diff options
Diffstat (limited to 'arch/x86')
172 files changed, 4965 insertions, 2672 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dcb0593b4a66..baa34e510222 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -55,6 +55,7 @@ config X86 | |||
55 | select HAVE_HW_BREAKPOINT | 55 | select HAVE_HW_BREAKPOINT |
56 | select HAVE_MIXED_BREAKPOINTS_REGS | 56 | select HAVE_MIXED_BREAKPOINTS_REGS |
57 | select PERF_EVENTS | 57 | select PERF_EVENTS |
58 | select HAVE_PERF_EVENTS_NMI | ||
58 | select ANON_INODES | 59 | select ANON_INODES |
59 | select HAVE_ARCH_KMEMCHECK | 60 | select HAVE_ARCH_KMEMCHECK |
60 | select HAVE_USER_RETURN_NOTIFIER | 61 | select HAVE_USER_RETURN_NOTIFIER |
@@ -72,9 +73,6 @@ config ARCH_DEFCONFIG | |||
72 | default "arch/x86/configs/i386_defconfig" if X86_32 | 73 | default "arch/x86/configs/i386_defconfig" if X86_32 |
73 | default "arch/x86/configs/x86_64_defconfig" if X86_64 | 74 | default "arch/x86/configs/x86_64_defconfig" if X86_64 |
74 | 75 | ||
75 | config GENERIC_TIME | ||
76 | def_bool y | ||
77 | |||
78 | config GENERIC_CMOS_UPDATE | 76 | config GENERIC_CMOS_UPDATE |
79 | def_bool y | 77 | def_bool y |
80 | 78 | ||
@@ -2046,7 +2044,7 @@ config SCx200 | |||
2046 | 2044 | ||
2047 | config SCx200HR_TIMER | 2045 | config SCx200HR_TIMER |
2048 | tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" | 2046 | tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" |
2049 | depends on SCx200 && GENERIC_TIME | 2047 | depends on SCx200 |
2050 | default y | 2048 | default y |
2051 | ---help--- | 2049 | ---help--- |
2052 | This driver provides a clocksource built upon the on-chip | 2050 | This driver provides a clocksource built upon the on-chip |
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index ec749c2bfdd7..f7cb086b4add 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile | |||
@@ -26,10 +26,10 @@ targets := vmlinux.bin setup.bin setup.elf bzImage | |||
26 | targets += fdimage fdimage144 fdimage288 image.iso mtools.conf | 26 | targets += fdimage fdimage144 fdimage288 image.iso mtools.conf |
27 | subdir- := compressed | 27 | subdir- := compressed |
28 | 28 | ||
29 | setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o edd.o | 29 | setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o |
30 | setup-y += header.o main.o mca.o memory.o pm.o pmjump.o | 30 | setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o |
31 | setup-y += printf.o regs.o string.o tty.o video.o video-mode.o | 31 | setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o |
32 | setup-y += version.o | 32 | setup-y += video-mode.o version.o |
33 | setup-$(CONFIG_X86_APM_BOOT) += apm.o | 33 | setup-$(CONFIG_X86_APM_BOOT) += apm.o |
34 | 34 | ||
35 | # The link order of the video-*.o modules can matter. In particular, | 35 | # The link order of the video-*.o modules can matter. In particular, |
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 98239d2658f2..c7093bd9f2d3 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #include "bitops.h" | 28 | #include "bitops.h" |
29 | #include <asm/cpufeature.h> | 29 | #include <asm/cpufeature.h> |
30 | #include <asm/processor-flags.h> | 30 | #include <asm/processor-flags.h> |
31 | #include "ctype.h" | ||
31 | 32 | ||
32 | /* Useful macros */ | 33 | /* Useful macros */ |
33 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) | 34 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) |
@@ -37,6 +38,8 @@ | |||
37 | extern struct setup_header hdr; | 38 | extern struct setup_header hdr; |
38 | extern struct boot_params boot_params; | 39 | extern struct boot_params boot_params; |
39 | 40 | ||
41 | #define cpu_relax() asm volatile("rep; nop") | ||
42 | |||
40 | /* Basic port I/O */ | 43 | /* Basic port I/O */ |
41 | static inline void outb(u8 v, u16 port) | 44 | static inline void outb(u8 v, u16 port) |
42 | { | 45 | { |
@@ -198,11 +201,6 @@ static inline int memcmp_gs(const void *s1, addr_t s2, size_t len) | |||
198 | return diff; | 201 | return diff; |
199 | } | 202 | } |
200 | 203 | ||
201 | static inline int isdigit(int ch) | ||
202 | { | ||
203 | return (ch >= '0') && (ch <= '9'); | ||
204 | } | ||
205 | |||
206 | /* Heap -- available for dynamic lists. */ | 204 | /* Heap -- available for dynamic lists. */ |
207 | extern char _end[]; | 205 | extern char _end[]; |
208 | extern char *HEAP; | 206 | extern char *HEAP; |
@@ -287,8 +285,18 @@ struct biosregs { | |||
287 | void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); | 285 | void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); |
288 | 286 | ||
289 | /* cmdline.c */ | 287 | /* cmdline.c */ |
290 | int cmdline_find_option(const char *option, char *buffer, int bufsize); | 288 | int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize); |
291 | int cmdline_find_option_bool(const char *option); | 289 | int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option); |
290 | static inline int cmdline_find_option(const char *option, char *buffer, int bufsize) | ||
291 | { | ||
292 | return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize); | ||
293 | } | ||
294 | |||
295 | static inline int cmdline_find_option_bool(const char *option) | ||
296 | { | ||
297 | return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option); | ||
298 | } | ||
299 | |||
292 | 300 | ||
293 | /* cpu.c, cpucheck.c */ | 301 | /* cpu.c, cpucheck.c */ |
294 | struct cpu_features { | 302 | struct cpu_features { |
@@ -300,6 +308,10 @@ extern struct cpu_features cpu; | |||
300 | int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); | 308 | int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); |
301 | int validate_cpu(void); | 309 | int validate_cpu(void); |
302 | 310 | ||
311 | /* early_serial_console.c */ | ||
312 | extern int early_serial_base; | ||
313 | void console_init(void); | ||
314 | |||
303 | /* edd.c */ | 315 | /* edd.c */ |
304 | void query_edd(void); | 316 | void query_edd(void); |
305 | 317 | ||
@@ -329,8 +341,10 @@ void initregs(struct biosregs *regs); | |||
329 | 341 | ||
330 | /* string.c */ | 342 | /* string.c */ |
331 | int strcmp(const char *str1, const char *str2); | 343 | int strcmp(const char *str1, const char *str2); |
344 | int strncmp(const char *cs, const char *ct, size_t count); | ||
332 | size_t strnlen(const char *s, size_t maxlen); | 345 | size_t strnlen(const char *s, size_t maxlen); |
333 | unsigned int atou(const char *s); | 346 | unsigned int atou(const char *s); |
347 | unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base); | ||
334 | 348 | ||
335 | /* tty.c */ | 349 | /* tty.c */ |
336 | void puts(const char *); | 350 | void puts(const char *); |
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index a1d35634bce0..6b3b6f708c04 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c | |||
@@ -27,9 +27,8 @@ static inline int myisspace(u8 c) | |||
27 | * Returns the length of the argument (regardless of if it was | 27 | * Returns the length of the argument (regardless of if it was |
28 | * truncated to fit in the buffer), or -1 on not found. | 28 | * truncated to fit in the buffer), or -1 on not found. |
29 | */ | 29 | */ |
30 | int cmdline_find_option(const char *option, char *buffer, int bufsize) | 30 | int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize) |
31 | { | 31 | { |
32 | u32 cmdline_ptr = boot_params.hdr.cmd_line_ptr; | ||
33 | addr_t cptr; | 32 | addr_t cptr; |
34 | char c; | 33 | char c; |
35 | int len = -1; | 34 | int len = -1; |
@@ -100,9 +99,8 @@ int cmdline_find_option(const char *option, char *buffer, int bufsize) | |||
100 | * Returns the position of that option (starts counting with 1) | 99 | * Returns the position of that option (starts counting with 1) |
101 | * or 0 on not found | 100 | * or 0 on not found |
102 | */ | 101 | */ |
103 | int cmdline_find_option_bool(const char *option) | 102 | int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option) |
104 | { | 103 | { |
105 | u32 cmdline_ptr = boot_params.hdr.cmd_line_ptr; | ||
106 | addr_t cptr; | 104 | addr_t cptr; |
107 | char c; | 105 | char c; |
108 | int pos = 0, wstart = 0; | 106 | int pos = 0, wstart = 0; |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index fbb47daf2459..0c229551eead 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -4,7 +4,7 @@ | |||
4 | # create a compressed vmlinux image from the original vmlinux | 4 | # create a compressed vmlinux image from the original vmlinux |
5 | # | 5 | # |
6 | 6 | ||
7 | targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.lzo head_$(BITS).o misc.o piggy.o | 7 | targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o |
8 | 8 | ||
9 | KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 | 9 | KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 |
10 | KBUILD_CFLAGS += -fno-strict-aliasing -fPIC | 10 | KBUILD_CFLAGS += -fno-strict-aliasing -fPIC |
@@ -23,7 +23,7 @@ LDFLAGS_vmlinux := -T | |||
23 | 23 | ||
24 | hostprogs-y := mkpiggy | 24 | hostprogs-y := mkpiggy |
25 | 25 | ||
26 | $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE | 26 | $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o $(obj)/piggy.o FORCE |
27 | $(call if_changed,ld) | 27 | $(call if_changed,ld) |
28 | @: | 28 | @: |
29 | 29 | ||
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c new file mode 100644 index 000000000000..cb62f786990d --- /dev/null +++ b/arch/x86/boot/compressed/cmdline.c | |||
@@ -0,0 +1,21 @@ | |||
1 | #include "misc.h" | ||
2 | |||
3 | static unsigned long fs; | ||
4 | static inline void set_fs(unsigned long seg) | ||
5 | { | ||
6 | fs = seg << 4; /* shift it back */ | ||
7 | } | ||
8 | typedef unsigned long addr_t; | ||
9 | static inline char rdfs8(addr_t addr) | ||
10 | { | ||
11 | return *((char *)(fs + addr)); | ||
12 | } | ||
13 | #include "../cmdline.c" | ||
14 | int cmdline_find_option(const char *option, char *buffer, int bufsize) | ||
15 | { | ||
16 | return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize); | ||
17 | } | ||
18 | int cmdline_find_option_bool(const char *option) | ||
19 | { | ||
20 | return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); | ||
21 | } | ||
diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c new file mode 100644 index 000000000000..261e81fb9582 --- /dev/null +++ b/arch/x86/boot/compressed/early_serial_console.c | |||
@@ -0,0 +1,5 @@ | |||
1 | #include "misc.h" | ||
2 | |||
3 | int early_serial_base; | ||
4 | |||
5 | #include "../early_serial_console.c" | ||
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index f543b70ffae2..67a655a39ce4 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -124,6 +124,19 @@ relocated: | |||
124 | rep stosl | 124 | rep stosl |
125 | 125 | ||
126 | /* | 126 | /* |
127 | * Adjust our own GOT | ||
128 | */ | ||
129 | leal _got(%ebx), %edx | ||
130 | leal _egot(%ebx), %ecx | ||
131 | 1: | ||
132 | cmpl %ecx, %edx | ||
133 | jae 2f | ||
134 | addl %ebx, (%edx) | ||
135 | addl $4, %edx | ||
136 | jmp 1b | ||
137 | 2: | ||
138 | |||
139 | /* | ||
127 | * Do the decompression, and jump to the new kernel.. | 140 | * Do the decompression, and jump to the new kernel.. |
128 | */ | 141 | */ |
129 | leal z_extract_offset_negative(%ebx), %ebp | 142 | leal z_extract_offset_negative(%ebx), %ebp |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index faff0dc9c06a..52f85a196fa0 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -280,6 +280,19 @@ relocated: | |||
280 | rep stosq | 280 | rep stosq |
281 | 281 | ||
282 | /* | 282 | /* |
283 | * Adjust our own GOT | ||
284 | */ | ||
285 | leaq _got(%rip), %rdx | ||
286 | leaq _egot(%rip), %rcx | ||
287 | 1: | ||
288 | cmpq %rcx, %rdx | ||
289 | jae 2f | ||
290 | addq %rbx, (%rdx) | ||
291 | addq $8, %rdx | ||
292 | jmp 1b | ||
293 | 2: | ||
294 | |||
295 | /* | ||
283 | * Do the decompression, and jump to the new kernel.. | 296 | * Do the decompression, and jump to the new kernel.. |
284 | */ | 297 | */ |
285 | pushq %rsi /* Save the real mode argument */ | 298 | pushq %rsi /* Save the real mode argument */ |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 51e240779a44..8f7bef8e9fff 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -9,23 +9,7 @@ | |||
9 | * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 | 9 | * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 |
10 | */ | 10 | */ |
11 | 11 | ||
12 | /* | 12 | #include "misc.h" |
13 | * we have to be careful, because no indirections are allowed here, and | ||
14 | * paravirt_ops is a kind of one. As it will only run in baremetal anyway, | ||
15 | * we just keep it from happening | ||
16 | */ | ||
17 | #undef CONFIG_PARAVIRT | ||
18 | #ifdef CONFIG_X86_32 | ||
19 | #define _ASM_X86_DESC_H 1 | ||
20 | #endif | ||
21 | |||
22 | #include <linux/linkage.h> | ||
23 | #include <linux/screen_info.h> | ||
24 | #include <linux/elf.h> | ||
25 | #include <linux/io.h> | ||
26 | #include <asm/page.h> | ||
27 | #include <asm/boot.h> | ||
28 | #include <asm/bootparam.h> | ||
29 | 13 | ||
30 | /* WARNING!! | 14 | /* WARNING!! |
31 | * This code is compiled with -fPIC and it is relocated dynamically | 15 | * This code is compiled with -fPIC and it is relocated dynamically |
@@ -123,15 +107,13 @@ static void error(char *m); | |||
123 | /* | 107 | /* |
124 | * This is set up by the setup-routine at boot-time | 108 | * This is set up by the setup-routine at boot-time |
125 | */ | 109 | */ |
126 | static struct boot_params *real_mode; /* Pointer to real-mode data */ | 110 | struct boot_params *real_mode; /* Pointer to real-mode data */ |
127 | static int quiet; | 111 | static int quiet; |
112 | static int debug; | ||
128 | 113 | ||
129 | void *memset(void *s, int c, size_t n); | 114 | void *memset(void *s, int c, size_t n); |
130 | void *memcpy(void *dest, const void *src, size_t n); | 115 | void *memcpy(void *dest, const void *src, size_t n); |
131 | 116 | ||
132 | static void __putstr(int, const char *); | ||
133 | #define putstr(__x) __putstr(0, __x) | ||
134 | |||
135 | #ifdef CONFIG_X86_64 | 117 | #ifdef CONFIG_X86_64 |
136 | #define memptr long | 118 | #define memptr long |
137 | #else | 119 | #else |
@@ -170,7 +152,21 @@ static void scroll(void) | |||
170 | vidmem[i] = ' '; | 152 | vidmem[i] = ' '; |
171 | } | 153 | } |
172 | 154 | ||
173 | static void __putstr(int error, const char *s) | 155 | #define XMTRDY 0x20 |
156 | |||
157 | #define TXR 0 /* Transmit register (WRITE) */ | ||
158 | #define LSR 5 /* Line Status */ | ||
159 | static void serial_putchar(int ch) | ||
160 | { | ||
161 | unsigned timeout = 0xffff; | ||
162 | |||
163 | while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) | ||
164 | cpu_relax(); | ||
165 | |||
166 | outb(ch, early_serial_base + TXR); | ||
167 | } | ||
168 | |||
169 | void __putstr(int error, const char *s) | ||
174 | { | 170 | { |
175 | int x, y, pos; | 171 | int x, y, pos; |
176 | char c; | 172 | char c; |
@@ -179,6 +175,14 @@ static void __putstr(int error, const char *s) | |||
179 | if (!error) | 175 | if (!error) |
180 | return; | 176 | return; |
181 | #endif | 177 | #endif |
178 | if (early_serial_base) { | ||
179 | const char *str = s; | ||
180 | while (*str) { | ||
181 | if (*str == '\n') | ||
182 | serial_putchar('\r'); | ||
183 | serial_putchar(*str++); | ||
184 | } | ||
185 | } | ||
182 | 186 | ||
183 | if (real_mode->screen_info.orig_video_mode == 0 && | 187 | if (real_mode->screen_info.orig_video_mode == 0 && |
184 | lines == 0 && cols == 0) | 188 | lines == 0 && cols == 0) |
@@ -305,8 +309,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
305 | { | 309 | { |
306 | real_mode = rmode; | 310 | real_mode = rmode; |
307 | 311 | ||
308 | if (real_mode->hdr.loadflags & QUIET_FLAG) | 312 | if (cmdline_find_option_bool("quiet")) |
309 | quiet = 1; | 313 | quiet = 1; |
314 | if (cmdline_find_option_bool("debug")) | ||
315 | debug = 1; | ||
310 | 316 | ||
311 | if (real_mode->screen_info.orig_video_mode == 7) { | 317 | if (real_mode->screen_info.orig_video_mode == 7) { |
312 | vidmem = (char *) 0xb0000; | 318 | vidmem = (char *) 0xb0000; |
@@ -319,6 +325,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
319 | lines = real_mode->screen_info.orig_video_lines; | 325 | lines = real_mode->screen_info.orig_video_lines; |
320 | cols = real_mode->screen_info.orig_video_cols; | 326 | cols = real_mode->screen_info.orig_video_cols; |
321 | 327 | ||
328 | console_init(); | ||
329 | if (debug) | ||
330 | putstr("early console in decompress_kernel\n"); | ||
331 | |||
322 | free_mem_ptr = heap; /* Heap */ | 332 | free_mem_ptr = heap; /* Heap */ |
323 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; | 333 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; |
324 | 334 | ||
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h new file mode 100644 index 000000000000..3f19c81a6203 --- /dev/null +++ b/arch/x86/boot/compressed/misc.h | |||
@@ -0,0 +1,39 @@ | |||
1 | #ifndef BOOT_COMPRESSED_MISC_H | ||
2 | #define BOOT_COMPRESSED_MISC_H | ||
3 | |||
4 | /* | ||
5 | * we have to be careful, because no indirections are allowed here, and | ||
6 | * paravirt_ops is a kind of one. As it will only run in baremetal anyway, | ||
7 | * we just keep it from happening | ||
8 | */ | ||
9 | #undef CONFIG_PARAVIRT | ||
10 | #ifdef CONFIG_X86_32 | ||
11 | #define _ASM_X86_DESC_H 1 | ||
12 | #endif | ||
13 | |||
14 | #include <linux/linkage.h> | ||
15 | #include <linux/screen_info.h> | ||
16 | #include <linux/elf.h> | ||
17 | #include <linux/io.h> | ||
18 | #include <asm/page.h> | ||
19 | #include <asm/boot.h> | ||
20 | #include <asm/bootparam.h> | ||
21 | |||
22 | #define BOOT_BOOT_H | ||
23 | #include "../ctype.h" | ||
24 | |||
25 | /* misc.c */ | ||
26 | extern struct boot_params *real_mode; /* Pointer to real-mode data */ | ||
27 | void __putstr(int error, const char *s); | ||
28 | #define putstr(__x) __putstr(0, __x) | ||
29 | #define puts(__x) __putstr(0, __x) | ||
30 | |||
31 | /* cmdline.c */ | ||
32 | int cmdline_find_option(const char *option, char *buffer, int bufsize); | ||
33 | int cmdline_find_option_bool(const char *option); | ||
34 | |||
35 | /* early_serial_console.c */ | ||
36 | extern int early_serial_base; | ||
37 | void console_init(void); | ||
38 | |||
39 | #endif | ||
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c new file mode 100644 index 000000000000..19b3e693cd72 --- /dev/null +++ b/arch/x86/boot/compressed/string.c | |||
@@ -0,0 +1,2 @@ | |||
1 | #include "misc.h" | ||
2 | #include "../string.c" | ||
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 5ddabceee124..34d047c98284 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S | |||
@@ -41,6 +41,12 @@ SECTIONS | |||
41 | *(.rodata.*) | 41 | *(.rodata.*) |
42 | _erodata = . ; | 42 | _erodata = . ; |
43 | } | 43 | } |
44 | .got : { | ||
45 | _got = .; | ||
46 | KEEP(*(.got.plt)) | ||
47 | KEEP(*(.got)) | ||
48 | _egot = .; | ||
49 | } | ||
44 | .data : { | 50 | .data : { |
45 | _data = . ; | 51 | _data = . ; |
46 | *(.data) | 52 | *(.data) |
diff --git a/arch/x86/boot/ctype.h b/arch/x86/boot/ctype.h new file mode 100644 index 000000000000..25e13403193c --- /dev/null +++ b/arch/x86/boot/ctype.h | |||
@@ -0,0 +1,21 @@ | |||
1 | #ifndef BOOT_ISDIGIT_H | ||
2 | |||
3 | #define BOOT_ISDIGIT_H | ||
4 | |||
5 | static inline int isdigit(int ch) | ||
6 | { | ||
7 | return (ch >= '0') && (ch <= '9'); | ||
8 | } | ||
9 | |||
10 | static inline int isxdigit(int ch) | ||
11 | { | ||
12 | if (isdigit(ch)) | ||
13 | return true; | ||
14 | |||
15 | if ((ch >= 'a') && (ch <= 'f')) | ||
16 | return true; | ||
17 | |||
18 | return (ch >= 'A') && (ch <= 'F'); | ||
19 | } | ||
20 | |||
21 | #endif | ||
diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c new file mode 100644 index 000000000000..030f4b93e255 --- /dev/null +++ b/arch/x86/boot/early_serial_console.c | |||
@@ -0,0 +1,139 @@ | |||
1 | #include "boot.h" | ||
2 | |||
3 | #define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ | ||
4 | |||
5 | #define XMTRDY 0x20 | ||
6 | |||
7 | #define DLAB 0x80 | ||
8 | |||
9 | #define TXR 0 /* Transmit register (WRITE) */ | ||
10 | #define RXR 0 /* Receive register (READ) */ | ||
11 | #define IER 1 /* Interrupt Enable */ | ||
12 | #define IIR 2 /* Interrupt ID */ | ||
13 | #define FCR 2 /* FIFO control */ | ||
14 | #define LCR 3 /* Line control */ | ||
15 | #define MCR 4 /* Modem control */ | ||
16 | #define LSR 5 /* Line Status */ | ||
17 | #define MSR 6 /* Modem Status */ | ||
18 | #define DLL 0 /* Divisor Latch Low */ | ||
19 | #define DLH 1 /* Divisor latch High */ | ||
20 | |||
21 | #define DEFAULT_BAUD 9600 | ||
22 | |||
23 | static void early_serial_init(int port, int baud) | ||
24 | { | ||
25 | unsigned char c; | ||
26 | unsigned divisor; | ||
27 | |||
28 | outb(0x3, port + LCR); /* 8n1 */ | ||
29 | outb(0, port + IER); /* no interrupt */ | ||
30 | outb(0, port + FCR); /* no fifo */ | ||
31 | outb(0x3, port + MCR); /* DTR + RTS */ | ||
32 | |||
33 | divisor = 115200 / baud; | ||
34 | c = inb(port + LCR); | ||
35 | outb(c | DLAB, port + LCR); | ||
36 | outb(divisor & 0xff, port + DLL); | ||
37 | outb((divisor >> 8) & 0xff, port + DLH); | ||
38 | outb(c & ~DLAB, port + LCR); | ||
39 | |||
40 | early_serial_base = port; | ||
41 | } | ||
42 | |||
43 | static void parse_earlyprintk(void) | ||
44 | { | ||
45 | int baud = DEFAULT_BAUD; | ||
46 | char arg[32]; | ||
47 | int pos = 0; | ||
48 | int port = 0; | ||
49 | |||
50 | if (cmdline_find_option("earlyprintk", arg, sizeof arg) > 0) { | ||
51 | char *e; | ||
52 | |||
53 | if (!strncmp(arg, "serial", 6)) { | ||
54 | port = DEFAULT_SERIAL_PORT; | ||
55 | pos += 6; | ||
56 | } | ||
57 | |||
58 | if (arg[pos] == ',') | ||
59 | pos++; | ||
60 | |||
61 | if (!strncmp(arg, "ttyS", 4)) { | ||
62 | static const int bases[] = { 0x3f8, 0x2f8 }; | ||
63 | int idx = 0; | ||
64 | |||
65 | if (!strncmp(arg + pos, "ttyS", 4)) | ||
66 | pos += 4; | ||
67 | |||
68 | if (arg[pos++] == '1') | ||
69 | idx = 1; | ||
70 | |||
71 | port = bases[idx]; | ||
72 | } | ||
73 | |||
74 | if (arg[pos] == ',') | ||
75 | pos++; | ||
76 | |||
77 | baud = simple_strtoull(arg + pos, &e, 0); | ||
78 | if (baud == 0 || arg + pos == e) | ||
79 | baud = DEFAULT_BAUD; | ||
80 | } | ||
81 | |||
82 | if (port) | ||
83 | early_serial_init(port, baud); | ||
84 | } | ||
85 | |||
86 | #define BASE_BAUD (1843200/16) | ||
87 | static unsigned int probe_baud(int port) | ||
88 | { | ||
89 | unsigned char lcr, dll, dlh; | ||
90 | unsigned int quot; | ||
91 | |||
92 | lcr = inb(port + LCR); | ||
93 | outb(lcr | DLAB, port + LCR); | ||
94 | dll = inb(port + DLL); | ||
95 | dlh = inb(port + DLH); | ||
96 | outb(lcr, port + LCR); | ||
97 | quot = (dlh << 8) | dll; | ||
98 | |||
99 | return BASE_BAUD / quot; | ||
100 | } | ||
101 | |||
102 | static void parse_console_uart8250(void) | ||
103 | { | ||
104 | char optstr[64], *options; | ||
105 | int baud = DEFAULT_BAUD; | ||
106 | int port = 0; | ||
107 | |||
108 | /* | ||
109 | * console=uart8250,io,0x3f8,115200n8 | ||
110 | * need to make sure it is last one console ! | ||
111 | */ | ||
112 | if (cmdline_find_option("console", optstr, sizeof optstr) <= 0) | ||
113 | return; | ||
114 | |||
115 | options = optstr; | ||
116 | |||
117 | if (!strncmp(options, "uart8250,io,", 12)) | ||
118 | port = simple_strtoull(options + 12, &options, 0); | ||
119 | else if (!strncmp(options, "uart,io,", 8)) | ||
120 | port = simple_strtoull(options + 8, &options, 0); | ||
121 | else | ||
122 | return; | ||
123 | |||
124 | if (options && (options[0] == ',')) | ||
125 | baud = simple_strtoull(options + 1, &options, 0); | ||
126 | else | ||
127 | baud = probe_baud(port); | ||
128 | |||
129 | if (port) | ||
130 | early_serial_init(port, baud); | ||
131 | } | ||
132 | |||
133 | void console_init(void) | ||
134 | { | ||
135 | parse_earlyprintk(); | ||
136 | |||
137 | if (!early_serial_base) | ||
138 | parse_console_uart8250(); | ||
139 | } | ||
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 140172b895bd..40358c8905be 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c | |||
@@ -130,6 +130,11 @@ void main(void) | |||
130 | /* First, copy the boot header into the "zeropage" */ | 130 | /* First, copy the boot header into the "zeropage" */ |
131 | copy_boot_params(); | 131 | copy_boot_params(); |
132 | 132 | ||
133 | /* Initialize the early-boot console */ | ||
134 | console_init(); | ||
135 | if (cmdline_find_option_bool("debug")) | ||
136 | puts("early console in setup code\n"); | ||
137 | |||
133 | /* End of heap check */ | 138 | /* End of heap check */ |
134 | init_heap(); | 139 | init_heap(); |
135 | 140 | ||
@@ -168,10 +173,6 @@ void main(void) | |||
168 | /* Set the video mode */ | 173 | /* Set the video mode */ |
169 | set_video(); | 174 | set_video(); |
170 | 175 | ||
171 | /* Parse command line for 'quiet' and pass it to decompressor. */ | ||
172 | if (cmdline_find_option_bool("quiet")) | ||
173 | boot_params.hdr.loadflags |= QUIET_FLAG; | ||
174 | |||
175 | /* Do the last things and invoke protected mode */ | 176 | /* Do the last things and invoke protected mode */ |
176 | go_to_protected_mode(); | 177 | go_to_protected_mode(); |
177 | } | 178 | } |
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c index 50e47cdbdddd..cdac91ca55d3 100644 --- a/arch/x86/boot/printf.c +++ b/arch/x86/boot/printf.c | |||
@@ -34,7 +34,7 @@ static int skip_atoi(const char **s) | |||
34 | #define SMALL 32 /* Must be 32 == 0x20 */ | 34 | #define SMALL 32 /* Must be 32 == 0x20 */ |
35 | #define SPECIAL 64 /* 0x */ | 35 | #define SPECIAL 64 /* 0x */ |
36 | 36 | ||
37 | #define do_div(n,base) ({ \ | 37 | #define __do_div(n, base) ({ \ |
38 | int __res; \ | 38 | int __res; \ |
39 | __res = ((unsigned long) n) % (unsigned) base; \ | 39 | __res = ((unsigned long) n) % (unsigned) base; \ |
40 | n = ((unsigned long) n) / (unsigned) base; \ | 40 | n = ((unsigned long) n) / (unsigned) base; \ |
@@ -83,7 +83,7 @@ static char *number(char *str, long num, int base, int size, int precision, | |||
83 | tmp[i++] = '0'; | 83 | tmp[i++] = '0'; |
84 | else | 84 | else |
85 | while (num != 0) | 85 | while (num != 0) |
86 | tmp[i++] = (digits[do_div(num, base)] | locase); | 86 | tmp[i++] = (digits[__do_div(num, base)] | locase); |
87 | if (i > precision) | 87 | if (i > precision) |
88 | precision = i; | 88 | precision = i; |
89 | size -= precision; | 89 | size -= precision; |
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index f94b7a0c2abf..3cbc4058dd26 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c | |||
@@ -30,6 +30,22 @@ int strcmp(const char *str1, const char *str2) | |||
30 | return 0; | 30 | return 0; |
31 | } | 31 | } |
32 | 32 | ||
33 | int strncmp(const char *cs, const char *ct, size_t count) | ||
34 | { | ||
35 | unsigned char c1, c2; | ||
36 | |||
37 | while (count) { | ||
38 | c1 = *cs++; | ||
39 | c2 = *ct++; | ||
40 | if (c1 != c2) | ||
41 | return c1 < c2 ? -1 : 1; | ||
42 | if (!c1) | ||
43 | break; | ||
44 | count--; | ||
45 | } | ||
46 | return 0; | ||
47 | } | ||
48 | |||
33 | size_t strnlen(const char *s, size_t maxlen) | 49 | size_t strnlen(const char *s, size_t maxlen) |
34 | { | 50 | { |
35 | const char *es = s; | 51 | const char *es = s; |
@@ -48,3 +64,50 @@ unsigned int atou(const char *s) | |||
48 | i = i * 10 + (*s++ - '0'); | 64 | i = i * 10 + (*s++ - '0'); |
49 | return i; | 65 | return i; |
50 | } | 66 | } |
67 | |||
68 | /* Works only for digits and letters, but small and fast */ | ||
69 | #define TOLOWER(x) ((x) | 0x20) | ||
70 | |||
71 | static unsigned int simple_guess_base(const char *cp) | ||
72 | { | ||
73 | if (cp[0] == '0') { | ||
74 | if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2])) | ||
75 | return 16; | ||
76 | else | ||
77 | return 8; | ||
78 | } else { | ||
79 | return 10; | ||
80 | } | ||
81 | } | ||
82 | |||
83 | /** | ||
84 | * simple_strtoull - convert a string to an unsigned long long | ||
85 | * @cp: The start of the string | ||
86 | * @endp: A pointer to the end of the parsed string will be placed here | ||
87 | * @base: The number base to use | ||
88 | */ | ||
89 | |||
90 | unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) | ||
91 | { | ||
92 | unsigned long long result = 0; | ||
93 | |||
94 | if (!base) | ||
95 | base = simple_guess_base(cp); | ||
96 | |||
97 | if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x') | ||
98 | cp += 2; | ||
99 | |||
100 | while (isxdigit(*cp)) { | ||
101 | unsigned int value; | ||
102 | |||
103 | value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10; | ||
104 | if (value >= base) | ||
105 | break; | ||
106 | result = result * base + value; | ||
107 | cp++; | ||
108 | } | ||
109 | if (endp) | ||
110 | *endp = (char *)cp; | ||
111 | |||
112 | return result; | ||
113 | } | ||
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index 01ec69c901c7..def2451f46ae 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c | |||
@@ -10,23 +10,36 @@ | |||
10 | * ----------------------------------------------------------------------- */ | 10 | * ----------------------------------------------------------------------- */ |
11 | 11 | ||
12 | /* | 12 | /* |
13 | * Very simple screen I/O | 13 | * Very simple screen and serial I/O |
14 | * XXX: Probably should add very simple serial I/O? | ||
15 | */ | 14 | */ |
16 | 15 | ||
17 | #include "boot.h" | 16 | #include "boot.h" |
18 | 17 | ||
18 | int early_serial_base; | ||
19 | |||
20 | #define XMTRDY 0x20 | ||
21 | |||
22 | #define TXR 0 /* Transmit register (WRITE) */ | ||
23 | #define LSR 5 /* Line Status */ | ||
24 | |||
19 | /* | 25 | /* |
20 | * These functions are in .inittext so they can be used to signal | 26 | * These functions are in .inittext so they can be used to signal |
21 | * error during initialization. | 27 | * error during initialization. |
22 | */ | 28 | */ |
23 | 29 | ||
24 | void __attribute__((section(".inittext"))) putchar(int ch) | 30 | static void __attribute__((section(".inittext"))) serial_putchar(int ch) |
25 | { | 31 | { |
26 | struct biosregs ireg; | 32 | unsigned timeout = 0xffff; |
27 | 33 | ||
28 | if (ch == '\n') | 34 | while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) |
29 | putchar('\r'); /* \n -> \r\n */ | 35 | cpu_relax(); |
36 | |||
37 | outb(ch, early_serial_base + TXR); | ||
38 | } | ||
39 | |||
40 | static void __attribute__((section(".inittext"))) bios_putchar(int ch) | ||
41 | { | ||
42 | struct biosregs ireg; | ||
30 | 43 | ||
31 | initregs(&ireg); | 44 | initregs(&ireg); |
32 | ireg.bx = 0x0007; | 45 | ireg.bx = 0x0007; |
@@ -36,6 +49,17 @@ void __attribute__((section(".inittext"))) putchar(int ch) | |||
36 | intcall(0x10, &ireg, NULL); | 49 | intcall(0x10, &ireg, NULL); |
37 | } | 50 | } |
38 | 51 | ||
52 | void __attribute__((section(".inittext"))) putchar(int ch) | ||
53 | { | ||
54 | if (ch == '\n') | ||
55 | putchar('\r'); /* \n -> \r\n */ | ||
56 | |||
57 | bios_putchar(ch); | ||
58 | |||
59 | if (early_serial_base != 0) | ||
60 | serial_putchar(ch); | ||
61 | } | ||
62 | |||
39 | void __attribute__((section(".inittext"))) puts(const char *str) | 63 | void __attribute__((section(".inittext"))) puts(const char *str) |
40 | { | 64 | { |
41 | while (*str) | 65 | while (*str) |
@@ -112,3 +136,4 @@ int getchar_timeout(void) | |||
112 | 136 | ||
113 | return 0; /* Timeout! */ | 137 | return 0; /* Timeout! */ |
114 | } | 138 | } |
139 | |||
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index d28fad19654a..e3a32431ca1e 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -1471,6 +1471,7 @@ CONFIG_HWMON=y | |||
1471 | # CONFIG_SENSORS_GL518SM is not set | 1471 | # CONFIG_SENSORS_GL518SM is not set |
1472 | # CONFIG_SENSORS_GL520SM is not set | 1472 | # CONFIG_SENSORS_GL520SM is not set |
1473 | # CONFIG_SENSORS_CORETEMP is not set | 1473 | # CONFIG_SENSORS_CORETEMP is not set |
1474 | # CONFIG_SENSORS_PKGTEMP is not set | ||
1474 | # CONFIG_SENSORS_IT87 is not set | 1475 | # CONFIG_SENSORS_IT87 is not set |
1475 | # CONFIG_SENSORS_LM63 is not set | 1476 | # CONFIG_SENSORS_LM63 is not set |
1476 | # CONFIG_SENSORS_LM75 is not set | 1477 | # CONFIG_SENSORS_LM75 is not set |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 6c86acd847a4..4251f8372050 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -1456,6 +1456,7 @@ CONFIG_HWMON=y | |||
1456 | # CONFIG_SENSORS_GL518SM is not set | 1456 | # CONFIG_SENSORS_GL518SM is not set |
1457 | # CONFIG_SENSORS_GL520SM is not set | 1457 | # CONFIG_SENSORS_GL520SM is not set |
1458 | # CONFIG_SENSORS_CORETEMP is not set | 1458 | # CONFIG_SENSORS_CORETEMP is not set |
1459 | # CONFIG_SENSORS_PKGTEMP is not set | ||
1459 | # CONFIG_SENSORS_IT87 is not set | 1460 | # CONFIG_SENSORS_IT87 is not set |
1460 | # CONFIG_SENSORS_LM63 is not set | 1461 | # CONFIG_SENSORS_LM63 is not set |
1461 | # CONFIG_SENSORS_LM75 is not set | 1462 | # CONFIG_SENSORS_LM75 is not set |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index aa2c39d968fc..92091de11113 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -134,7 +134,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) | |||
134 | boot_cpu_data.x86_model <= 0x05 && | 134 | boot_cpu_data.x86_model <= 0x05 && |
135 | boot_cpu_data.x86_mask < 0x0A) | 135 | boot_cpu_data.x86_mask < 0x0A) |
136 | return 1; | 136 | return 1; |
137 | else if (boot_cpu_has(X86_FEATURE_AMDC1E)) | 137 | else if (c1e_detected) |
138 | return 1; | 138 | return 1; |
139 | else | 139 | else |
140 | return max_cstate; | 140 | return max_cstate; |
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h index c74a2eebe570..a69b1ac9eaf8 100644 --- a/arch/x86/include/asm/apb_timer.h +++ b/arch/x86/include/asm/apb_timer.h | |||
@@ -55,7 +55,6 @@ extern unsigned long apbt_quick_calibrate(void); | |||
55 | extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); | 55 | extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); |
56 | extern void apbt_setup_secondary_clock(void); | 56 | extern void apbt_setup_secondary_clock(void); |
57 | extern unsigned int boot_cpu_id; | 57 | extern unsigned int boot_cpu_id; |
58 | extern int disable_apbt_percpu; | ||
59 | 58 | ||
60 | extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); | 59 | extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); |
61 | extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); | 60 | extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); |
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 8859e12dd3cf..284a6e8f7ce1 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
@@ -11,38 +11,42 @@ | |||
11 | extern void __xchg_wrong_size(void); | 11 | extern void __xchg_wrong_size(void); |
12 | 12 | ||
13 | /* | 13 | /* |
14 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway | 14 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. |
15 | * Note 2: xchg has side effect, so that attribute volatile is necessary, | 15 | * Since this is generally used to protect other memory information, we |
16 | * but generally the primitive is invalid, *ptr is output argument. --ANK | 16 | * use "asm volatile" and "memory" clobbers to prevent gcc from moving |
17 | * information around. | ||
17 | */ | 18 | */ |
18 | |||
19 | struct __xchg_dummy { | ||
20 | unsigned long a[100]; | ||
21 | }; | ||
22 | #define __xg(x) ((struct __xchg_dummy *)(x)) | ||
23 | |||
24 | #define __xchg(x, ptr, size) \ | 19 | #define __xchg(x, ptr, size) \ |
25 | ({ \ | 20 | ({ \ |
26 | __typeof(*(ptr)) __x = (x); \ | 21 | __typeof(*(ptr)) __x = (x); \ |
27 | switch (size) { \ | 22 | switch (size) { \ |
28 | case 1: \ | 23 | case 1: \ |
29 | asm volatile("xchgb %b0,%1" \ | 24 | { \ |
30 | : "=q" (__x) \ | 25 | volatile u8 *__ptr = (volatile u8 *)(ptr); \ |
31 | : "m" (*__xg(ptr)), "0" (__x) \ | 26 | asm volatile("xchgb %0,%1" \ |
27 | : "=q" (__x), "+m" (*__ptr) \ | ||
28 | : "0" (__x) \ | ||
32 | : "memory"); \ | 29 | : "memory"); \ |
33 | break; \ | 30 | break; \ |
31 | } \ | ||
34 | case 2: \ | 32 | case 2: \ |
35 | asm volatile("xchgw %w0,%1" \ | 33 | { \ |
36 | : "=r" (__x) \ | 34 | volatile u16 *__ptr = (volatile u16 *)(ptr); \ |
37 | : "m" (*__xg(ptr)), "0" (__x) \ | 35 | asm volatile("xchgw %0,%1" \ |
36 | : "=r" (__x), "+m" (*__ptr) \ | ||
37 | : "0" (__x) \ | ||
38 | : "memory"); \ | 38 | : "memory"); \ |
39 | break; \ | 39 | break; \ |
40 | } \ | ||
40 | case 4: \ | 41 | case 4: \ |
42 | { \ | ||
43 | volatile u32 *__ptr = (volatile u32 *)(ptr); \ | ||
41 | asm volatile("xchgl %0,%1" \ | 44 | asm volatile("xchgl %0,%1" \ |
42 | : "=r" (__x) \ | 45 | : "=r" (__x), "+m" (*__ptr) \ |
43 | : "m" (*__xg(ptr)), "0" (__x) \ | 46 | : "0" (__x) \ |
44 | : "memory"); \ | 47 | : "memory"); \ |
45 | break; \ | 48 | break; \ |
49 | } \ | ||
46 | default: \ | 50 | default: \ |
47 | __xchg_wrong_size(); \ | 51 | __xchg_wrong_size(); \ |
48 | } \ | 52 | } \ |
@@ -53,60 +57,33 @@ struct __xchg_dummy { | |||
53 | __xchg((v), (ptr), sizeof(*ptr)) | 57 | __xchg((v), (ptr), sizeof(*ptr)) |
54 | 58 | ||
55 | /* | 59 | /* |
56 | * The semantics of XCHGCMP8B are a bit strange, this is why | 60 | * CMPXCHG8B only writes to the target if we had the previous |
57 | * there is a loop and the loading of %%eax and %%edx has to | 61 | * value in registers, otherwise it acts as a read and gives us the |
58 | * be inside. This inlines well in most cases, the cached | 62 | * "new previous" value. That is why there is a loop. Preloading |
59 | * cost is around ~38 cycles. (in the future we might want | 63 | * EDX:EAX is a performance optimization: in the common case it means |
60 | * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that | 64 | * we need only one locked operation. |
61 | * might have an implicit FPU-save as a cost, so it's not | ||
62 | * clear which path to go.) | ||
63 | * | 65 | * |
64 | * cmpxchg8b must be used with the lock prefix here to allow | 66 | * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very |
65 | * the instruction to be executed atomically, see page 3-102 | 67 | * least an FPU save and/or %cr0.ts manipulation. |
66 | * of the instruction set reference 24319102.pdf. We need | 68 | * |
67 | * the reader side to see the coherent 64bit value. | 69 | * cmpxchg8b must be used with the lock prefix here to allow the |
70 | * instruction to be executed atomically. We need to have the reader | ||
71 | * side to see the coherent 64bit value. | ||
68 | */ | 72 | */ |
69 | static inline void __set_64bit(unsigned long long *ptr, | 73 | static inline void set_64bit(volatile u64 *ptr, u64 value) |
70 | unsigned int low, unsigned int high) | ||
71 | { | 74 | { |
75 | u32 low = value; | ||
76 | u32 high = value >> 32; | ||
77 | u64 prev = *ptr; | ||
78 | |||
72 | asm volatile("\n1:\t" | 79 | asm volatile("\n1:\t" |
73 | "movl (%0), %%eax\n\t" | 80 | LOCK_PREFIX "cmpxchg8b %0\n\t" |
74 | "movl 4(%0), %%edx\n\t" | ||
75 | LOCK_PREFIX "cmpxchg8b (%0)\n\t" | ||
76 | "jnz 1b" | 81 | "jnz 1b" |
77 | : /* no outputs */ | 82 | : "=m" (*ptr), "+A" (prev) |
78 | : "D"(ptr), | 83 | : "b" (low), "c" (high) |
79 | "b"(low), | 84 | : "memory"); |
80 | "c"(high) | ||
81 | : "ax", "dx", "memory"); | ||
82 | } | ||
83 | |||
84 | static inline void __set_64bit_constant(unsigned long long *ptr, | ||
85 | unsigned long long value) | ||
86 | { | ||
87 | __set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32)); | ||
88 | } | ||
89 | |||
90 | #define ll_low(x) *(((unsigned int *)&(x)) + 0) | ||
91 | #define ll_high(x) *(((unsigned int *)&(x)) + 1) | ||
92 | |||
93 | static inline void __set_64bit_var(unsigned long long *ptr, | ||
94 | unsigned long long value) | ||
95 | { | ||
96 | __set_64bit(ptr, ll_low(value), ll_high(value)); | ||
97 | } | 85 | } |
98 | 86 | ||
99 | #define set_64bit(ptr, value) \ | ||
100 | (__builtin_constant_p((value)) \ | ||
101 | ? __set_64bit_constant((ptr), (value)) \ | ||
102 | : __set_64bit_var((ptr), (value))) | ||
103 | |||
104 | #define _set_64bit(ptr, value) \ | ||
105 | (__builtin_constant_p(value) \ | ||
106 | ? __set_64bit(ptr, (unsigned int)(value), \ | ||
107 | (unsigned int)((value) >> 32)) \ | ||
108 | : __set_64bit(ptr, ll_low((value)), ll_high((value)))) | ||
109 | |||
110 | extern void __cmpxchg_wrong_size(void); | 87 | extern void __cmpxchg_wrong_size(void); |
111 | 88 | ||
112 | /* | 89 | /* |
@@ -121,23 +98,32 @@ extern void __cmpxchg_wrong_size(void); | |||
121 | __typeof__(*(ptr)) __new = (new); \ | 98 | __typeof__(*(ptr)) __new = (new); \ |
122 | switch (size) { \ | 99 | switch (size) { \ |
123 | case 1: \ | 100 | case 1: \ |
124 | asm volatile(lock "cmpxchgb %b1,%2" \ | 101 | { \ |
125 | : "=a"(__ret) \ | 102 | volatile u8 *__ptr = (volatile u8 *)(ptr); \ |
126 | : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ | 103 | asm volatile(lock "cmpxchgb %2,%1" \ |
104 | : "=a" (__ret), "+m" (*__ptr) \ | ||
105 | : "q" (__new), "0" (__old) \ | ||
127 | : "memory"); \ | 106 | : "memory"); \ |
128 | break; \ | 107 | break; \ |
108 | } \ | ||
129 | case 2: \ | 109 | case 2: \ |
130 | asm volatile(lock "cmpxchgw %w1,%2" \ | 110 | { \ |
131 | : "=a"(__ret) \ | 111 | volatile u16 *__ptr = (volatile u16 *)(ptr); \ |
132 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | 112 | asm volatile(lock "cmpxchgw %2,%1" \ |
113 | : "=a" (__ret), "+m" (*__ptr) \ | ||
114 | : "r" (__new), "0" (__old) \ | ||
133 | : "memory"); \ | 115 | : "memory"); \ |
134 | break; \ | 116 | break; \ |
117 | } \ | ||
135 | case 4: \ | 118 | case 4: \ |
136 | asm volatile(lock "cmpxchgl %1,%2" \ | 119 | { \ |
137 | : "=a"(__ret) \ | 120 | volatile u32 *__ptr = (volatile u32 *)(ptr); \ |
138 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | 121 | asm volatile(lock "cmpxchgl %2,%1" \ |
122 | : "=a" (__ret), "+m" (*__ptr) \ | ||
123 | : "r" (__new), "0" (__old) \ | ||
139 | : "memory"); \ | 124 | : "memory"); \ |
140 | break; \ | 125 | break; \ |
126 | } \ | ||
141 | default: \ | 127 | default: \ |
142 | __cmpxchg_wrong_size(); \ | 128 | __cmpxchg_wrong_size(); \ |
143 | } \ | 129 | } \ |
@@ -175,32 +161,28 @@ extern void __cmpxchg_wrong_size(void); | |||
175 | (unsigned long long)(n))) | 161 | (unsigned long long)(n))) |
176 | #endif | 162 | #endif |
177 | 163 | ||
178 | static inline unsigned long long __cmpxchg64(volatile void *ptr, | 164 | static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) |
179 | unsigned long long old, | ||
180 | unsigned long long new) | ||
181 | { | 165 | { |
182 | unsigned long long prev; | 166 | u64 prev; |
183 | asm volatile(LOCK_PREFIX "cmpxchg8b %3" | 167 | asm volatile(LOCK_PREFIX "cmpxchg8b %1" |
184 | : "=A"(prev) | 168 | : "=A" (prev), |
185 | : "b"((unsigned long)new), | 169 | "+m" (*ptr) |
186 | "c"((unsigned long)(new >> 32)), | 170 | : "b" ((u32)new), |
187 | "m"(*__xg(ptr)), | 171 | "c" ((u32)(new >> 32)), |
188 | "0"(old) | 172 | "0" (old) |
189 | : "memory"); | 173 | : "memory"); |
190 | return prev; | 174 | return prev; |
191 | } | 175 | } |
192 | 176 | ||
193 | static inline unsigned long long __cmpxchg64_local(volatile void *ptr, | 177 | static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) |
194 | unsigned long long old, | ||
195 | unsigned long long new) | ||
196 | { | 178 | { |
197 | unsigned long long prev; | 179 | u64 prev; |
198 | asm volatile("cmpxchg8b %3" | 180 | asm volatile("cmpxchg8b %1" |
199 | : "=A"(prev) | 181 | : "=A" (prev), |
200 | : "b"((unsigned long)new), | 182 | "+m" (*ptr) |
201 | "c"((unsigned long)(new >> 32)), | 183 | : "b" ((u32)new), |
202 | "m"(*__xg(ptr)), | 184 | "c" ((u32)(new >> 32)), |
203 | "0"(old) | 185 | "0" (old) |
204 | : "memory"); | 186 | : "memory"); |
205 | return prev; | 187 | return prev; |
206 | } | 188 | } |
@@ -264,8 +246,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, | |||
264 | * to simulate the cmpxchg8b on the 80386 and 80486 CPU. | 246 | * to simulate the cmpxchg8b on the 80386 and 80486 CPU. |
265 | */ | 247 | */ |
266 | 248 | ||
267 | extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); | ||
268 | |||
269 | #define cmpxchg64(ptr, o, n) \ | 249 | #define cmpxchg64(ptr, o, n) \ |
270 | ({ \ | 250 | ({ \ |
271 | __typeof__(*(ptr)) __ret; \ | 251 | __typeof__(*(ptr)) __ret; \ |
@@ -283,20 +263,20 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); | |||
283 | __ret; }) | 263 | __ret; }) |
284 | 264 | ||
285 | 265 | ||
286 | 266 | #define cmpxchg64_local(ptr, o, n) \ | |
287 | #define cmpxchg64_local(ptr, o, n) \ | 267 | ({ \ |
288 | ({ \ | 268 | __typeof__(*(ptr)) __ret; \ |
289 | __typeof__(*(ptr)) __ret; \ | 269 | __typeof__(*(ptr)) __old = (o); \ |
290 | if (likely(boot_cpu_data.x86 > 4)) \ | 270 | __typeof__(*(ptr)) __new = (n); \ |
291 | __ret = (__typeof__(*(ptr)))__cmpxchg64_local((ptr), \ | 271 | alternative_io("call cmpxchg8b_emu", \ |
292 | (unsigned long long)(o), \ | 272 | "cmpxchg8b (%%esi)" , \ |
293 | (unsigned long long)(n)); \ | 273 | X86_FEATURE_CX8, \ |
294 | else \ | 274 | "=A" (__ret), \ |
295 | __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \ | 275 | "S" ((ptr)), "0" (__old), \ |
296 | (unsigned long long)(o), \ | 276 | "b" ((unsigned int)__new), \ |
297 | (unsigned long long)(n)); \ | 277 | "c" ((unsigned int)(__new>>32)) \ |
298 | __ret; \ | 278 | : "memory"); \ |
299 | }) | 279 | __ret; }) |
300 | 280 | ||
301 | #endif | 281 | #endif |
302 | 282 | ||
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 485ae415faec..423ae58aa020 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h | |||
@@ -3,51 +3,60 @@ | |||
3 | 3 | ||
4 | #include <asm/alternative.h> /* Provides LOCK_PREFIX */ | 4 | #include <asm/alternative.h> /* Provides LOCK_PREFIX */ |
5 | 5 | ||
6 | #define __xg(x) ((volatile long *)(x)) | 6 | static inline void set_64bit(volatile u64 *ptr, u64 val) |
7 | |||
8 | static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) | ||
9 | { | 7 | { |
10 | *ptr = val; | 8 | *ptr = val; |
11 | } | 9 | } |
12 | 10 | ||
13 | #define _set_64bit set_64bit | ||
14 | |||
15 | extern void __xchg_wrong_size(void); | 11 | extern void __xchg_wrong_size(void); |
16 | extern void __cmpxchg_wrong_size(void); | 12 | extern void __cmpxchg_wrong_size(void); |
17 | 13 | ||
18 | /* | 14 | /* |
19 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway | 15 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. |
20 | * Note 2: xchg has side effect, so that attribute volatile is necessary, | 16 | * Since this is generally used to protect other memory information, we |
21 | * but generally the primitive is invalid, *ptr is output argument. --ANK | 17 | * use "asm volatile" and "memory" clobbers to prevent gcc from moving |
18 | * information around. | ||
22 | */ | 19 | */ |
23 | #define __xchg(x, ptr, size) \ | 20 | #define __xchg(x, ptr, size) \ |
24 | ({ \ | 21 | ({ \ |
25 | __typeof(*(ptr)) __x = (x); \ | 22 | __typeof(*(ptr)) __x = (x); \ |
26 | switch (size) { \ | 23 | switch (size) { \ |
27 | case 1: \ | 24 | case 1: \ |
28 | asm volatile("xchgb %b0,%1" \ | 25 | { \ |
29 | : "=q" (__x) \ | 26 | volatile u8 *__ptr = (volatile u8 *)(ptr); \ |
30 | : "m" (*__xg(ptr)), "0" (__x) \ | 27 | asm volatile("xchgb %0,%1" \ |
28 | : "=q" (__x), "+m" (*__ptr) \ | ||
29 | : "0" (__x) \ | ||
31 | : "memory"); \ | 30 | : "memory"); \ |
32 | break; \ | 31 | break; \ |
32 | } \ | ||
33 | case 2: \ | 33 | case 2: \ |
34 | asm volatile("xchgw %w0,%1" \ | 34 | { \ |
35 | : "=r" (__x) \ | 35 | volatile u16 *__ptr = (volatile u16 *)(ptr); \ |
36 | : "m" (*__xg(ptr)), "0" (__x) \ | 36 | asm volatile("xchgw %0,%1" \ |
37 | : "=r" (__x), "+m" (*__ptr) \ | ||
38 | : "0" (__x) \ | ||
37 | : "memory"); \ | 39 | : "memory"); \ |
38 | break; \ | 40 | break; \ |
41 | } \ | ||
39 | case 4: \ | 42 | case 4: \ |
40 | asm volatile("xchgl %k0,%1" \ | 43 | { \ |
41 | : "=r" (__x) \ | 44 | volatile u32 *__ptr = (volatile u32 *)(ptr); \ |
42 | : "m" (*__xg(ptr)), "0" (__x) \ | 45 | asm volatile("xchgl %0,%1" \ |
46 | : "=r" (__x), "+m" (*__ptr) \ | ||
47 | : "0" (__x) \ | ||
43 | : "memory"); \ | 48 | : "memory"); \ |
44 | break; \ | 49 | break; \ |
50 | } \ | ||
45 | case 8: \ | 51 | case 8: \ |
52 | { \ | ||
53 | volatile u64 *__ptr = (volatile u64 *)(ptr); \ | ||
46 | asm volatile("xchgq %0,%1" \ | 54 | asm volatile("xchgq %0,%1" \ |
47 | : "=r" (__x) \ | 55 | : "=r" (__x), "+m" (*__ptr) \ |
48 | : "m" (*__xg(ptr)), "0" (__x) \ | 56 | : "0" (__x) \ |
49 | : "memory"); \ | 57 | : "memory"); \ |
50 | break; \ | 58 | break; \ |
59 | } \ | ||
51 | default: \ | 60 | default: \ |
52 | __xchg_wrong_size(); \ | 61 | __xchg_wrong_size(); \ |
53 | } \ | 62 | } \ |
@@ -71,29 +80,41 @@ extern void __cmpxchg_wrong_size(void); | |||
71 | __typeof__(*(ptr)) __new = (new); \ | 80 | __typeof__(*(ptr)) __new = (new); \ |
72 | switch (size) { \ | 81 | switch (size) { \ |
73 | case 1: \ | 82 | case 1: \ |
74 | asm volatile(lock "cmpxchgb %b1,%2" \ | 83 | { \ |
75 | : "=a"(__ret) \ | 84 | volatile u8 *__ptr = (volatile u8 *)(ptr); \ |
76 | : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ | 85 | asm volatile(lock "cmpxchgb %2,%1" \ |
86 | : "=a" (__ret), "+m" (*__ptr) \ | ||
87 | : "q" (__new), "0" (__old) \ | ||
77 | : "memory"); \ | 88 | : "memory"); \ |
78 | break; \ | 89 | break; \ |
90 | } \ | ||
79 | case 2: \ | 91 | case 2: \ |
80 | asm volatile(lock "cmpxchgw %w1,%2" \ | 92 | { \ |
81 | : "=a"(__ret) \ | 93 | volatile u16 *__ptr = (volatile u16 *)(ptr); \ |
82 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | 94 | asm volatile(lock "cmpxchgw %2,%1" \ |
95 | : "=a" (__ret), "+m" (*__ptr) \ | ||
96 | : "r" (__new), "0" (__old) \ | ||
83 | : "memory"); \ | 97 | : "memory"); \ |
84 | break; \ | 98 | break; \ |
99 | } \ | ||
85 | case 4: \ | 100 | case 4: \ |
86 | asm volatile(lock "cmpxchgl %k1,%2" \ | 101 | { \ |
87 | : "=a"(__ret) \ | 102 | volatile u32 *__ptr = (volatile u32 *)(ptr); \ |
88 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | 103 | asm volatile(lock "cmpxchgl %2,%1" \ |
104 | : "=a" (__ret), "+m" (*__ptr) \ | ||
105 | : "r" (__new), "0" (__old) \ | ||
89 | : "memory"); \ | 106 | : "memory"); \ |
90 | break; \ | 107 | break; \ |
108 | } \ | ||
91 | case 8: \ | 109 | case 8: \ |
92 | asm volatile(lock "cmpxchgq %1,%2" \ | 110 | { \ |
93 | : "=a"(__ret) \ | 111 | volatile u64 *__ptr = (volatile u64 *)(ptr); \ |
94 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | 112 | asm volatile(lock "cmpxchgq %2,%1" \ |
113 | : "=a" (__ret), "+m" (*__ptr) \ | ||
114 | : "r" (__new), "0" (__old) \ | ||
95 | : "memory"); \ | 115 | : "memory"); \ |
96 | break; \ | 116 | break; \ |
117 | } \ | ||
97 | default: \ | 118 | default: \ |
98 | __cmpxchg_wrong_size(); \ | 119 | __cmpxchg_wrong_size(); \ |
99 | } \ | 120 | } \ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 468145914389..0b205b8a4308 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -6,7 +6,7 @@ | |||
6 | 6 | ||
7 | #include <asm/required-features.h> | 7 | #include <asm/required-features.h> |
8 | 8 | ||
9 | #define NCAPINTS 9 /* N 32-bit words worth of info */ | 9 | #define NCAPINTS 10 /* N 32-bit words worth of info */ |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * Note: If the comment begins with a quoted string, that string is used | 12 | * Note: If the comment begins with a quoted string, that string is used |
@@ -89,7 +89,7 @@ | |||
89 | #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */ | 89 | #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */ |
90 | #define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ | 90 | #define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ |
91 | #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ | 91 | #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ |
92 | #define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ | 92 | /* 21 available, was AMD_C1E */ |
93 | #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ | 93 | #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ |
94 | #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ | 94 | #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ |
95 | #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ | 95 | #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ |
@@ -124,6 +124,8 @@ | |||
124 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ | 124 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ |
125 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ | 125 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ |
126 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ | 126 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ |
127 | #define X86_FEATURE_F16C (4*32+29) /* 16-bit fp conversions */ | ||
128 | #define X86_FEATURE_RDRND (4*32+30) /* The RDRAND instruction */ | ||
127 | #define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */ | 129 | #define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */ |
128 | 130 | ||
129 | /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ | 131 | /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ |
@@ -157,22 +159,29 @@ | |||
157 | 159 | ||
158 | /* | 160 | /* |
159 | * Auxiliary flags: Linux defined - For features scattered in various | 161 | * Auxiliary flags: Linux defined - For features scattered in various |
160 | * CPUID levels like 0x6, 0xA etc | 162 | * CPUID levels like 0x6, 0xA etc, word 7 |
161 | */ | 163 | */ |
162 | #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ | 164 | #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ |
163 | #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ | 165 | #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ |
164 | #define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ | 166 | #define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ |
167 | #define X86_FEATURE_EPB (7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ | ||
168 | #define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ | ||
169 | #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ | ||
170 | #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ | ||
165 | 171 | ||
166 | /* Virtualization flags: Linux defined */ | 172 | /* Virtualization flags: Linux defined, word 8 */ |
167 | #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ | 173 | #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ |
168 | #define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */ | 174 | #define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */ |
169 | #define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ | 175 | #define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ |
170 | #define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ | 176 | #define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ |
171 | #define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ | 177 | #define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ |
172 | #define X86_FEATURE_NPT (8*32+5) /* AMD Nested Page Table support */ | 178 | #define X86_FEATURE_NPT (8*32+ 5) /* AMD Nested Page Table support */ |
173 | #define X86_FEATURE_LBRV (8*32+6) /* AMD LBR Virtualization support */ | 179 | #define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */ |
174 | #define X86_FEATURE_SVML (8*32+7) /* "svm_lock" AMD SVM locking MSR */ | 180 | #define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ |
175 | #define X86_FEATURE_NRIPS (8*32+8) /* "nrip_save" AMD SVM next_rip save */ | 181 | #define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ |
182 | |||
183 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ | ||
184 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ | ||
176 | 185 | ||
177 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 186 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
178 | 187 | ||
@@ -194,7 +203,9 @@ extern const char * const x86_power_flags[32]; | |||
194 | (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) || \ | 203 | (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) || \ |
195 | (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) || \ | 204 | (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) || \ |
196 | (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ | 205 | (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ |
197 | (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ) \ | 206 | (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \ |
207 | (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ | ||
208 | (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) \ | ||
198 | ? 1 : \ | 209 | ? 1 : \ |
199 | test_cpu_cap(c, bit)) | 210 | test_cpu_cap(c, bit)) |
200 | 211 | ||
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 942255310e6a..528a11e8d3e3 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
@@ -20,10 +20,10 @@ struct arch_hw_breakpoint { | |||
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | 21 | ||
22 | /* Available HW breakpoint length encodings */ | 22 | /* Available HW breakpoint length encodings */ |
23 | #define X86_BREAKPOINT_LEN_X 0x00 | ||
23 | #define X86_BREAKPOINT_LEN_1 0x40 | 24 | #define X86_BREAKPOINT_LEN_1 0x40 |
24 | #define X86_BREAKPOINT_LEN_2 0x44 | 25 | #define X86_BREAKPOINT_LEN_2 0x44 |
25 | #define X86_BREAKPOINT_LEN_4 0x4c | 26 | #define X86_BREAKPOINT_LEN_4 0x4c |
26 | #define X86_BREAKPOINT_LEN_EXECUTE 0x40 | ||
27 | 27 | ||
28 | #ifdef CONFIG_X86_64 | 28 | #ifdef CONFIG_X86_64 |
29 | #define X86_BREAKPOINT_LEN_8 0x48 | 29 | #define X86_BREAKPOINT_LEN_8 0x48 |
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 70abda7058c8..ff2546ce7178 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h | |||
@@ -45,5 +45,6 @@ extern const struct hypervisor_x86 *x86_hyper; | |||
45 | /* Recognized hypervisors */ | 45 | /* Recognized hypervisors */ |
46 | extern const struct hypervisor_x86 x86_hyper_vmware; | 46 | extern const struct hypervisor_x86 x86_hyper_vmware; |
47 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; | 47 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; |
48 | extern const struct hypervisor_x86 x86_hyper_xen_hvm; | ||
48 | 49 | ||
49 | #endif | 50 | #endif |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 0f1cf5d53dd8..f1accc625beb 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -491,6 +491,8 @@ static inline void fpu_copy(struct fpu *dst, struct fpu *src) | |||
491 | memcpy(dst->state, src->state, xstate_size); | 491 | memcpy(dst->state, src->state, xstate_size); |
492 | } | 492 | } |
493 | 493 | ||
494 | extern void fpu_finit(struct fpu *fpu); | ||
495 | |||
494 | #endif /* __ASSEMBLY__ */ | 496 | #endif /* __ASSEMBLY__ */ |
495 | 497 | ||
496 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 | 498 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 |
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h index 4470c9ad4a3e..29f66793cc55 100644 --- a/arch/x86/include/asm/intel_scu_ipc.h +++ b/arch/x86/include/asm/intel_scu_ipc.h | |||
@@ -1,6 +1,12 @@ | |||
1 | #ifndef _ASM_X86_INTEL_SCU_IPC_H_ | 1 | #ifndef _ASM_X86_INTEL_SCU_IPC_H_ |
2 | #define _ASM_X86_INTEL_SCU_IPC_H_ | 2 | #define _ASM_X86_INTEL_SCU_IPC_H_ |
3 | 3 | ||
4 | #define IPCMSG_VRTC 0xFA /* Set vRTC device */ | ||
5 | |||
6 | /* Command id associated with message IPCMSG_VRTC */ | ||
7 | #define IPC_CMD_VRTC_SETTIME 1 /* Set time */ | ||
8 | #define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */ | ||
9 | |||
4 | /* Read single register */ | 10 | /* Read single register */ |
5 | int intel_scu_ipc_ioread8(u16 addr, u8 *data); | 11 | int intel_scu_ipc_ioread8(u16 addr, u8 *data); |
6 | 12 | ||
@@ -28,20 +34,6 @@ int intel_scu_ipc_writev(u16 *addr, u8 *data, int len); | |||
28 | /* Update single register based on the mask */ | 34 | /* Update single register based on the mask */ |
29 | int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); | 35 | int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); |
30 | 36 | ||
31 | /* | ||
32 | * Indirect register read | ||
33 | * Can be used when SCCB(System Controller Configuration Block) register | ||
34 | * HRIM(Honor Restricted IPC Messages) is set (bit 23) | ||
35 | */ | ||
36 | int intel_scu_ipc_register_read(u32 addr, u32 *data); | ||
37 | |||
38 | /* | ||
39 | * Indirect register write | ||
40 | * Can be used when SCCB(System Controller Configuration Block) register | ||
41 | * HRIM(Honor Restricted IPC Messages) is set (bit 23) | ||
42 | */ | ||
43 | int intel_scu_ipc_register_write(u32 addr, u32 data); | ||
44 | |||
45 | /* Issue commands to the SCU with or without data */ | 37 | /* Issue commands to the SCU with or without data */ |
46 | int intel_scu_ipc_simple_command(int cmd, int sub); | 38 | int intel_scu_ipc_simple_command(int cmd, int sub); |
47 | int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, | 39 | int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 8767d99c4f64..e2ca30092557 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -125,6 +125,9 @@ | |||
125 | */ | 125 | */ |
126 | #define MCE_SELF_VECTOR 0xeb | 126 | #define MCE_SELF_VECTOR 0xeb |
127 | 127 | ||
128 | /* Xen vector callback to receive events in a HVM domain */ | ||
129 | #define XEN_HVM_EVTCHN_CALLBACK 0xe9 | ||
130 | |||
128 | #define NR_VECTORS 256 | 131 | #define NR_VECTORS 256 |
129 | 132 | ||
130 | #define FPU_IRQ 13 | 133 | #define FPU_IRQ 13 |
diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h index 006da3687cdc..396f5b5fc4d7 100644 --- a/arch/x86/include/asm/kgdb.h +++ b/arch/x86/include/asm/kgdb.h | |||
@@ -39,9 +39,11 @@ enum regnames { | |||
39 | GDB_FS, /* 14 */ | 39 | GDB_FS, /* 14 */ |
40 | GDB_GS, /* 15 */ | 40 | GDB_GS, /* 15 */ |
41 | }; | 41 | }; |
42 | #define GDB_ORIG_AX 41 | ||
43 | #define DBG_MAX_REG_NUM 16 | ||
42 | #define NUMREGBYTES ((GDB_GS+1)*4) | 44 | #define NUMREGBYTES ((GDB_GS+1)*4) |
43 | #else /* ! CONFIG_X86_32 */ | 45 | #else /* ! CONFIG_X86_32 */ |
44 | enum regnames64 { | 46 | enum regnames { |
45 | GDB_AX, /* 0 */ | 47 | GDB_AX, /* 0 */ |
46 | GDB_BX, /* 1 */ | 48 | GDB_BX, /* 1 */ |
47 | GDB_CX, /* 2 */ | 49 | GDB_CX, /* 2 */ |
@@ -59,15 +61,15 @@ enum regnames64 { | |||
59 | GDB_R14, /* 14 */ | 61 | GDB_R14, /* 14 */ |
60 | GDB_R15, /* 15 */ | 62 | GDB_R15, /* 15 */ |
61 | GDB_PC, /* 16 */ | 63 | GDB_PC, /* 16 */ |
64 | GDB_PS, /* 17 */ | ||
65 | GDB_CS, /* 18 */ | ||
66 | GDB_SS, /* 19 */ | ||
62 | }; | 67 | }; |
63 | 68 | #define GDB_ORIG_AX 57 | |
64 | enum regnames32 { | 69 | #define DBG_MAX_REG_NUM 20 |
65 | GDB_PS = 34, | 70 | /* 17 64 bit regs and 3 32 bit regs */ |
66 | GDB_CS, | 71 | #define NUMREGBYTES ((17 * 8) + (3 * 4)) |
67 | GDB_SS, | 72 | #endif /* ! CONFIG_X86_32 */ |
68 | }; | ||
69 | #define NUMREGBYTES ((GDB_SS+1)*4) | ||
70 | #endif /* CONFIG_X86_32 */ | ||
71 | 73 | ||
72 | static inline void arch_kgdb_breakpoint(void) | 74 | static inline void arch_kgdb_breakpoint(void) |
73 | { | 75 | { |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index ff90055c7f0b..4d8dcbdfc120 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -22,6 +22,8 @@ | |||
22 | #define __KVM_HAVE_XEN_HVM | 22 | #define __KVM_HAVE_XEN_HVM |
23 | #define __KVM_HAVE_VCPU_EVENTS | 23 | #define __KVM_HAVE_VCPU_EVENTS |
24 | #define __KVM_HAVE_DEBUGREGS | 24 | #define __KVM_HAVE_DEBUGREGS |
25 | #define __KVM_HAVE_XSAVE | ||
26 | #define __KVM_HAVE_XCRS | ||
25 | 27 | ||
26 | /* Architectural interrupt line count. */ | 28 | /* Architectural interrupt line count. */ |
27 | #define KVM_NR_INTERRUPTS 256 | 29 | #define KVM_NR_INTERRUPTS 256 |
@@ -299,4 +301,24 @@ struct kvm_debugregs { | |||
299 | __u64 reserved[9]; | 301 | __u64 reserved[9]; |
300 | }; | 302 | }; |
301 | 303 | ||
304 | /* for KVM_CAP_XSAVE */ | ||
305 | struct kvm_xsave { | ||
306 | __u32 region[1024]; | ||
307 | }; | ||
308 | |||
309 | #define KVM_MAX_XCRS 16 | ||
310 | |||
311 | struct kvm_xcr { | ||
312 | __u32 xcr; | ||
313 | __u32 reserved; | ||
314 | __u64 value; | ||
315 | }; | ||
316 | |||
317 | struct kvm_xcrs { | ||
318 | __u32 nr_xcrs; | ||
319 | __u32 flags; | ||
320 | struct kvm_xcr xcrs[KVM_MAX_XCRS]; | ||
321 | __u64 padding[16]; | ||
322 | }; | ||
323 | |||
302 | #endif /* _ASM_X86_KVM_H */ | 324 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0b2729bf2070..51cfd730ac5d 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -51,8 +51,10 @@ struct x86_emulate_ctxt; | |||
51 | #define X86EMUL_UNHANDLEABLE 1 | 51 | #define X86EMUL_UNHANDLEABLE 1 |
52 | /* Terminate emulation but return success to the caller. */ | 52 | /* Terminate emulation but return success to the caller. */ |
53 | #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ | 53 | #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ |
54 | #define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */ | 54 | #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ |
55 | #define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */ | 55 | #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ |
56 | #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ | ||
57 | |||
56 | struct x86_emulate_ops { | 58 | struct x86_emulate_ops { |
57 | /* | 59 | /* |
58 | * read_std: Read bytes of standard (non-emulated/special) memory. | 60 | * read_std: Read bytes of standard (non-emulated/special) memory. |
@@ -92,6 +94,7 @@ struct x86_emulate_ops { | |||
92 | int (*read_emulated)(unsigned long addr, | 94 | int (*read_emulated)(unsigned long addr, |
93 | void *val, | 95 | void *val, |
94 | unsigned int bytes, | 96 | unsigned int bytes, |
97 | unsigned int *error, | ||
95 | struct kvm_vcpu *vcpu); | 98 | struct kvm_vcpu *vcpu); |
96 | 99 | ||
97 | /* | 100 | /* |
@@ -104,6 +107,7 @@ struct x86_emulate_ops { | |||
104 | int (*write_emulated)(unsigned long addr, | 107 | int (*write_emulated)(unsigned long addr, |
105 | const void *val, | 108 | const void *val, |
106 | unsigned int bytes, | 109 | unsigned int bytes, |
110 | unsigned int *error, | ||
107 | struct kvm_vcpu *vcpu); | 111 | struct kvm_vcpu *vcpu); |
108 | 112 | ||
109 | /* | 113 | /* |
@@ -118,6 +122,7 @@ struct x86_emulate_ops { | |||
118 | const void *old, | 122 | const void *old, |
119 | const void *new, | 123 | const void *new, |
120 | unsigned int bytes, | 124 | unsigned int bytes, |
125 | unsigned int *error, | ||
121 | struct kvm_vcpu *vcpu); | 126 | struct kvm_vcpu *vcpu); |
122 | 127 | ||
123 | int (*pio_in_emulated)(int size, unsigned short port, void *val, | 128 | int (*pio_in_emulated)(int size, unsigned short port, void *val, |
@@ -132,18 +137,26 @@ struct x86_emulate_ops { | |||
132 | int seg, struct kvm_vcpu *vcpu); | 137 | int seg, struct kvm_vcpu *vcpu); |
133 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | 138 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); |
134 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | 139 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); |
140 | unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); | ||
135 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); | 141 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); |
136 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); | 142 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); |
137 | void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); | 143 | int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); |
138 | int (*cpl)(struct kvm_vcpu *vcpu); | 144 | int (*cpl)(struct kvm_vcpu *vcpu); |
139 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 145 | int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); |
146 | int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); | ||
147 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | ||
148 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); | ||
140 | }; | 149 | }; |
141 | 150 | ||
142 | /* Type, address-of, and value of an instruction's operand. */ | 151 | /* Type, address-of, and value of an instruction's operand. */ |
143 | struct operand { | 152 | struct operand { |
144 | enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; | 153 | enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; |
145 | unsigned int bytes; | 154 | unsigned int bytes; |
146 | unsigned long val, orig_val, *ptr; | 155 | unsigned long orig_val, *ptr; |
156 | union { | ||
157 | unsigned long val; | ||
158 | char valptr[sizeof(unsigned long) + 2]; | ||
159 | }; | ||
147 | }; | 160 | }; |
148 | 161 | ||
149 | struct fetch_cache { | 162 | struct fetch_cache { |
@@ -186,6 +199,7 @@ struct decode_cache { | |||
186 | unsigned long modrm_val; | 199 | unsigned long modrm_val; |
187 | struct fetch_cache fetch; | 200 | struct fetch_cache fetch; |
188 | struct read_cache io_read; | 201 | struct read_cache io_read; |
202 | struct read_cache mem_read; | ||
189 | }; | 203 | }; |
190 | 204 | ||
191 | struct x86_emulate_ctxt { | 205 | struct x86_emulate_ctxt { |
@@ -202,6 +216,12 @@ struct x86_emulate_ctxt { | |||
202 | int interruptibility; | 216 | int interruptibility; |
203 | 217 | ||
204 | bool restart; /* restart string instruction after writeback */ | 218 | bool restart; /* restart string instruction after writeback */ |
219 | |||
220 | int exception; /* exception that happens during emulation or -1 */ | ||
221 | u32 error_code; /* error code for exception */ | ||
222 | bool error_code_valid; | ||
223 | unsigned long cr2; /* faulted address in case of #PF */ | ||
224 | |||
205 | /* decode cache */ | 225 | /* decode cache */ |
206 | struct decode_cache decode; | 226 | struct decode_cache decode; |
207 | }; | 227 | }; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 76f5483cffec..502e53f999cf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
16 | #include <linux/mmu_notifier.h> | 16 | #include <linux/mmu_notifier.h> |
17 | #include <linux/tracepoint.h> | 17 | #include <linux/tracepoint.h> |
18 | #include <linux/cpumask.h> | ||
18 | 19 | ||
19 | #include <linux/kvm.h> | 20 | #include <linux/kvm.h> |
20 | #include <linux/kvm_para.h> | 21 | #include <linux/kvm_para.h> |
@@ -39,11 +40,14 @@ | |||
39 | 0xFFFFFF0000000000ULL) | 40 | 0xFFFFFF0000000000ULL) |
40 | 41 | ||
41 | #define INVALID_PAGE (~(hpa_t)0) | 42 | #define INVALID_PAGE (~(hpa_t)0) |
43 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | ||
44 | |||
42 | #define UNMAPPED_GVA (~(gpa_t)0) | 45 | #define UNMAPPED_GVA (~(gpa_t)0) |
43 | 46 | ||
44 | /* KVM Hugepage definitions for x86 */ | 47 | /* KVM Hugepage definitions for x86 */ |
45 | #define KVM_NR_PAGE_SIZES 3 | 48 | #define KVM_NR_PAGE_SIZES 3 |
46 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9)) | 49 | #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) |
50 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) | ||
47 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) | 51 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) |
48 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) | 52 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) |
49 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) | 53 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) |
@@ -69,8 +73,6 @@ | |||
69 | 73 | ||
70 | #define IOPL_SHIFT 12 | 74 | #define IOPL_SHIFT 12 |
71 | 75 | ||
72 | #define KVM_ALIAS_SLOTS 4 | ||
73 | |||
74 | #define KVM_PERMILLE_MMU_PAGES 20 | 76 | #define KVM_PERMILLE_MMU_PAGES 20 |
75 | #define KVM_MIN_ALLOC_MMU_PAGES 64 | 77 | #define KVM_MIN_ALLOC_MMU_PAGES 64 |
76 | #define KVM_MMU_HASH_SHIFT 10 | 78 | #define KVM_MMU_HASH_SHIFT 10 |
@@ -241,7 +243,7 @@ struct kvm_mmu { | |||
241 | void (*prefetch_page)(struct kvm_vcpu *vcpu, | 243 | void (*prefetch_page)(struct kvm_vcpu *vcpu, |
242 | struct kvm_mmu_page *page); | 244 | struct kvm_mmu_page *page); |
243 | int (*sync_page)(struct kvm_vcpu *vcpu, | 245 | int (*sync_page)(struct kvm_vcpu *vcpu, |
244 | struct kvm_mmu_page *sp); | 246 | struct kvm_mmu_page *sp, bool clear_unsync); |
245 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); | 247 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); |
246 | hpa_t root_hpa; | 248 | hpa_t root_hpa; |
247 | int root_level; | 249 | int root_level; |
@@ -301,8 +303,8 @@ struct kvm_vcpu_arch { | |||
301 | unsigned long mmu_seq; | 303 | unsigned long mmu_seq; |
302 | } update_pte; | 304 | } update_pte; |
303 | 305 | ||
304 | struct i387_fxsave_struct host_fx_image; | 306 | struct fpu guest_fpu; |
305 | struct i387_fxsave_struct guest_fx_image; | 307 | u64 xcr0; |
306 | 308 | ||
307 | gva_t mmio_fault_cr2; | 309 | gva_t mmio_fault_cr2; |
308 | struct kvm_pio_request pio; | 310 | struct kvm_pio_request pio; |
@@ -360,26 +362,11 @@ struct kvm_vcpu_arch { | |||
360 | 362 | ||
361 | /* fields used by HYPER-V emulation */ | 363 | /* fields used by HYPER-V emulation */ |
362 | u64 hv_vapic; | 364 | u64 hv_vapic; |
363 | }; | ||
364 | |||
365 | struct kvm_mem_alias { | ||
366 | gfn_t base_gfn; | ||
367 | unsigned long npages; | ||
368 | gfn_t target_gfn; | ||
369 | #define KVM_ALIAS_INVALID 1UL | ||
370 | unsigned long flags; | ||
371 | }; | ||
372 | 365 | ||
373 | #define KVM_ARCH_HAS_UNALIAS_INSTANTIATION | 366 | cpumask_var_t wbinvd_dirty_mask; |
374 | |||
375 | struct kvm_mem_aliases { | ||
376 | struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; | ||
377 | int naliases; | ||
378 | }; | 367 | }; |
379 | 368 | ||
380 | struct kvm_arch { | 369 | struct kvm_arch { |
381 | struct kvm_mem_aliases *aliases; | ||
382 | |||
383 | unsigned int n_free_mmu_pages; | 370 | unsigned int n_free_mmu_pages; |
384 | unsigned int n_requested_mmu_pages; | 371 | unsigned int n_requested_mmu_pages; |
385 | unsigned int n_alloc_mmu_pages; | 372 | unsigned int n_alloc_mmu_pages; |
@@ -533,6 +520,8 @@ struct kvm_x86_ops { | |||
533 | 520 | ||
534 | void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); | 521 | void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); |
535 | 522 | ||
523 | bool (*has_wbinvd_exit)(void); | ||
524 | |||
536 | const struct trace_print_flags *exit_reasons_str; | 525 | const struct trace_print_flags *exit_reasons_str; |
537 | }; | 526 | }; |
538 | 527 | ||
@@ -576,7 +565,6 @@ enum emulation_result { | |||
576 | #define EMULTYPE_SKIP (1 << 2) | 565 | #define EMULTYPE_SKIP (1 << 2) |
577 | int emulate_instruction(struct kvm_vcpu *vcpu, | 566 | int emulate_instruction(struct kvm_vcpu *vcpu, |
578 | unsigned long cr2, u16 error_code, int emulation_type); | 567 | unsigned long cr2, u16 error_code, int emulation_type); |
579 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | ||
580 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 568 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
581 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 569 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
582 | 570 | ||
@@ -591,10 +579,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | |||
591 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 579 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
592 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | 580 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); |
593 | int emulate_clts(struct kvm_vcpu *vcpu); | 581 | int emulate_clts(struct kvm_vcpu *vcpu); |
594 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, | 582 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); |
595 | unsigned long *dest); | ||
596 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, | ||
597 | unsigned long value); | ||
598 | 583 | ||
599 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 584 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
600 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 585 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
@@ -602,15 +587,16 @@ int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | |||
602 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | 587 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
603 | bool has_error_code, u32 error_code); | 588 | bool has_error_code, u32 error_code); |
604 | 589 | ||
605 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 590 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
606 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 591 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
607 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | 592 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
608 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); | 593 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
609 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); | 594 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); |
610 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); | 595 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); |
611 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); | 596 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); |
612 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | 597 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); |
613 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | 598 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); |
599 | int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); | ||
614 | 600 | ||
615 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); | 601 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); |
616 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 602 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); |
@@ -630,12 +616,7 @@ int kvm_pic_set_irq(void *opaque, int irq, int level); | |||
630 | 616 | ||
631 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); | 617 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); |
632 | 618 | ||
633 | void fx_init(struct kvm_vcpu *vcpu); | 619 | int fx_init(struct kvm_vcpu *vcpu); |
634 | |||
635 | int emulator_write_emulated(unsigned long addr, | ||
636 | const void *val, | ||
637 | unsigned int bytes, | ||
638 | struct kvm_vcpu *vcpu); | ||
639 | 620 | ||
640 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 621 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
641 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 622 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
@@ -664,8 +645,6 @@ void kvm_disable_tdp(void); | |||
664 | int complete_pio(struct kvm_vcpu *vcpu); | 645 | int complete_pio(struct kvm_vcpu *vcpu); |
665 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | 646 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); |
666 | 647 | ||
667 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); | ||
668 | |||
669 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) | 648 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) |
670 | { | 649 | { |
671 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); | 650 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); |
@@ -719,21 +698,6 @@ static inline unsigned long read_msr(unsigned long msr) | |||
719 | } | 698 | } |
720 | #endif | 699 | #endif |
721 | 700 | ||
722 | static inline void kvm_fx_save(struct i387_fxsave_struct *image) | ||
723 | { | ||
724 | asm("fxsave (%0)":: "r" (image)); | ||
725 | } | ||
726 | |||
727 | static inline void kvm_fx_restore(struct i387_fxsave_struct *image) | ||
728 | { | ||
729 | asm("fxrstor (%0)":: "r" (image)); | ||
730 | } | ||
731 | |||
732 | static inline void kvm_fx_finit(void) | ||
733 | { | ||
734 | asm("finit"); | ||
735 | } | ||
736 | |||
737 | static inline u32 get_rdx_init_val(void) | 701 | static inline u32 get_rdx_init_val(void) |
738 | { | 702 | { |
739 | return 0x600; /* P6 family */ | 703 | return 0x600; /* P6 family */ |
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/x86/include/asm/local64.h | |||
@@ -0,0 +1 @@ | |||
#include <asm-generic/local64.h> | |||
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 451d30e7f62d..16350740edf6 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h | |||
@@ -13,6 +13,32 @@ | |||
13 | extern int pci_mrst_init(void); | 13 | extern int pci_mrst_init(void); |
14 | int __init sfi_parse_mrtc(struct sfi_table_header *table); | 14 | int __init sfi_parse_mrtc(struct sfi_table_header *table); |
15 | 15 | ||
16 | /* | ||
17 | * Medfield is the follow-up of Moorestown, it combines two chip solution into | ||
18 | * one. Other than that it also added always-on and constant tsc and lapic | ||
19 | * timers. Medfield is the platform name, and the chip name is called Penwell | ||
20 | * we treat Medfield/Penwell as a variant of Moorestown. Penwell can be | ||
21 | * identified via MSRs. | ||
22 | */ | ||
23 | enum mrst_cpu_type { | ||
24 | MRST_CPU_CHIP_LINCROFT = 1, | ||
25 | MRST_CPU_CHIP_PENWELL, | ||
26 | }; | ||
27 | |||
28 | extern enum mrst_cpu_type __mrst_cpu_chip; | ||
29 | static enum mrst_cpu_type mrst_identify_cpu(void) | ||
30 | { | ||
31 | return __mrst_cpu_chip; | ||
32 | } | ||
33 | |||
34 | enum mrst_timer_options { | ||
35 | MRST_TIMER_DEFAULT, | ||
36 | MRST_TIMER_APBT_ONLY, | ||
37 | MRST_TIMER_LAPIC_APBT, | ||
38 | }; | ||
39 | |||
40 | extern enum mrst_timer_options mrst_timer_options; | ||
41 | |||
16 | #define SFI_MTMR_MAX_NUM 8 | 42 | #define SFI_MTMR_MAX_NUM 8 |
17 | #define SFI_MRTC_MAX 8 | 43 | #define SFI_MRTC_MAX 8 |
18 | 44 | ||
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 8c7ae4318629..65bbec2093aa 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #define _EFER_LMA 10 /* Long mode active (read-only) */ | 20 | #define _EFER_LMA 10 /* Long mode active (read-only) */ |
21 | #define _EFER_NX 11 /* No execute enable */ | 21 | #define _EFER_NX 11 /* No execute enable */ |
22 | #define _EFER_SVME 12 /* Enable virtualization */ | 22 | #define _EFER_SVME 12 /* Enable virtualization */ |
23 | #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ | ||
23 | #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ | 24 | #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ |
24 | 25 | ||
25 | #define EFER_SCE (1<<_EFER_SCE) | 26 | #define EFER_SCE (1<<_EFER_SCE) |
@@ -27,6 +28,7 @@ | |||
27 | #define EFER_LMA (1<<_EFER_LMA) | 28 | #define EFER_LMA (1<<_EFER_LMA) |
28 | #define EFER_NX (1<<_EFER_NX) | 29 | #define EFER_NX (1<<_EFER_NX) |
29 | #define EFER_SVME (1<<_EFER_SVME) | 30 | #define EFER_SVME (1<<_EFER_SVME) |
31 | #define EFER_LMSLE (1<<_EFER_LMSLE) | ||
30 | #define EFER_FFXSR (1<<_EFER_FFXSR) | 32 | #define EFER_FFXSR (1<<_EFER_FFXSR) |
31 | 33 | ||
32 | /* Intel MSRs. Some also available on other CPUs */ | 34 | /* Intel MSRs. Some also available on other CPUs */ |
@@ -159,8 +161,6 @@ | |||
159 | #define MSR_K7_FID_VID_STATUS 0xc0010042 | 161 | #define MSR_K7_FID_VID_STATUS 0xc0010042 |
160 | 162 | ||
161 | /* K6 MSRs */ | 163 | /* K6 MSRs */ |
162 | #define MSR_K6_EFER 0xc0000080 | ||
163 | #define MSR_K6_STAR 0xc0000081 | ||
164 | #define MSR_K6_WHCR 0xc0000082 | 164 | #define MSR_K6_WHCR 0xc0000082 |
165 | #define MSR_K6_UWCCR 0xc0000085 | 165 | #define MSR_K6_UWCCR 0xc0000085 |
166 | #define MSR_K6_EPMR 0xc0000086 | 166 | #define MSR_K6_EPMR 0xc0000086 |
@@ -224,12 +224,14 @@ | |||
224 | #define MSR_IA32_THERM_CONTROL 0x0000019a | 224 | #define MSR_IA32_THERM_CONTROL 0x0000019a |
225 | #define MSR_IA32_THERM_INTERRUPT 0x0000019b | 225 | #define MSR_IA32_THERM_INTERRUPT 0x0000019b |
226 | 226 | ||
227 | #define THERM_INT_LOW_ENABLE (1 << 0) | 227 | #define THERM_INT_HIGH_ENABLE (1 << 0) |
228 | #define THERM_INT_HIGH_ENABLE (1 << 1) | 228 | #define THERM_INT_LOW_ENABLE (1 << 1) |
229 | #define THERM_INT_PLN_ENABLE (1 << 24) | ||
229 | 230 | ||
230 | #define MSR_IA32_THERM_STATUS 0x0000019c | 231 | #define MSR_IA32_THERM_STATUS 0x0000019c |
231 | 232 | ||
232 | #define THERM_STATUS_PROCHOT (1 << 0) | 233 | #define THERM_STATUS_PROCHOT (1 << 0) |
234 | #define THERM_STATUS_POWER_LIMIT (1 << 10) | ||
233 | 235 | ||
234 | #define MSR_THERM2_CTL 0x0000019d | 236 | #define MSR_THERM2_CTL 0x0000019d |
235 | 237 | ||
@@ -239,6 +241,19 @@ | |||
239 | 241 | ||
240 | #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 | 242 | #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 |
241 | 243 | ||
244 | #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 | ||
245 | |||
246 | #define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 | ||
247 | |||
248 | #define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) | ||
249 | #define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) | ||
250 | |||
251 | #define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 | ||
252 | |||
253 | #define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) | ||
254 | #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) | ||
255 | #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) | ||
256 | |||
242 | /* MISC_ENABLE bits: architectural */ | 257 | /* MISC_ENABLE bits: architectural */ |
243 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) | 258 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) |
244 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) | 259 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index c5bc4c2d33f5..084ef95274cd 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -148,8 +148,8 @@ static inline unsigned long long native_read_pmc(int counter) | |||
148 | #define rdmsr(msr, val1, val2) \ | 148 | #define rdmsr(msr, val1, val2) \ |
149 | do { \ | 149 | do { \ |
150 | u64 __val = native_read_msr((msr)); \ | 150 | u64 __val = native_read_msr((msr)); \ |
151 | (val1) = (u32)__val; \ | 151 | (void)((val1) = (u32)__val); \ |
152 | (val2) = (u32)(__val >> 32); \ | 152 | (void)((val2) = (u32)(__val >> 32)); \ |
153 | } while (0) | 153 | } while (0) |
154 | 154 | ||
155 | static inline void wrmsr(unsigned msr, unsigned low, unsigned high) | 155 | static inline void wrmsr(unsigned msr, unsigned low, unsigned high) |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 93da9c3f3341..932f0f86b4b7 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -17,7 +17,9 @@ int do_nmi_callback(struct pt_regs *regs, int cpu); | |||
17 | 17 | ||
18 | extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); | 18 | extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); |
19 | extern int check_nmi_watchdog(void); | 19 | extern int check_nmi_watchdog(void); |
20 | #if !defined(CONFIG_LOCKUP_DETECTOR) | ||
20 | extern int nmi_watchdog_enabled; | 21 | extern int nmi_watchdog_enabled; |
22 | #endif | ||
21 | extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); | 23 | extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); |
22 | extern int reserve_perfctr_nmi(unsigned int); | 24 | extern int reserve_perfctr_nmi(unsigned int); |
23 | extern void release_perfctr_nmi(unsigned int); | 25 | extern void release_perfctr_nmi(unsigned int); |
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index cd2a31dc5fb8..49c7219826f9 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h | |||
@@ -30,6 +30,7 @@ | |||
30 | #define PCI_HAS_IO_ECS 0x40000 | 30 | #define PCI_HAS_IO_ECS 0x40000 |
31 | #define PCI_NOASSIGN_ROMS 0x80000 | 31 | #define PCI_NOASSIGN_ROMS 0x80000 |
32 | #define PCI_ROOT_NO_CRS 0x100000 | 32 | #define PCI_ROOT_NO_CRS 0x100000 |
33 | #define PCI_NOASSIGN_BARS 0x200000 | ||
33 | 34 | ||
34 | extern unsigned int pci_probe; | 35 | extern unsigned int pci_probe; |
35 | extern unsigned long pirq_table_addr; | 36 | extern unsigned long pirq_table_addr; |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 254883d0c7e0..6e742cc4251b 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -68,8 +68,9 @@ union cpuid10_eax { | |||
68 | 68 | ||
69 | union cpuid10_edx { | 69 | union cpuid10_edx { |
70 | struct { | 70 | struct { |
71 | unsigned int num_counters_fixed:4; | 71 | unsigned int num_counters_fixed:5; |
72 | unsigned int reserved:28; | 72 | unsigned int bit_width_fixed:8; |
73 | unsigned int reserved:19; | ||
73 | } split; | 74 | } split; |
74 | unsigned int full; | 75 | unsigned int full; |
75 | }; | 76 | }; |
@@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | |||
140 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | 141 | extern unsigned long perf_misc_flags(struct pt_regs *regs); |
141 | #define perf_misc_flags(regs) perf_misc_flags(regs) | 142 | #define perf_misc_flags(regs) perf_misc_flags(regs) |
142 | 143 | ||
144 | #include <asm/stacktrace.h> | ||
145 | |||
146 | /* | ||
147 | * We abuse bit 3 from flags to pass exact information, see perf_misc_flags | ||
148 | * and the comment with PERF_EFLAGS_EXACT. | ||
149 | */ | ||
150 | #define perf_arch_fetch_caller_regs(regs, __ip) { \ | ||
151 | (regs)->ip = (__ip); \ | ||
152 | (regs)->bp = caller_frame_pointer(); \ | ||
153 | (regs)->cs = __KERNEL_CS; \ | ||
154 | regs->flags = 0; \ | ||
155 | } | ||
156 | |||
143 | #else | 157 | #else |
144 | static inline void init_hw_perf_events(void) { } | 158 | static inline void init_hw_perf_events(void) { } |
145 | static inline void perf_events_lapic_init(void) { } | 159 | static inline void perf_events_lapic_init(void) { } |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 64a8ebff06fc..def500776b16 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -19,7 +19,6 @@ | |||
19 | #define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ | 19 | #define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ |
20 | #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) | 20 | #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) |
21 | #define ARCH_P4_MAX_CCCR (18) | 21 | #define ARCH_P4_MAX_CCCR (18) |
22 | #define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) | ||
23 | 22 | ||
24 | #define P4_ESCR_EVENT_MASK 0x7e000000U | 23 | #define P4_ESCR_EVENT_MASK 0x7e000000U |
25 | #define P4_ESCR_EVENT_SHIFT 25 | 24 | #define P4_ESCR_EVENT_SHIFT 25 |
@@ -71,10 +70,6 @@ | |||
71 | #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) | 70 | #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) |
72 | #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) | 71 | #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) |
73 | 72 | ||
74 | /* Custom bits in reerved CCCR area */ | ||
75 | #define P4_CCCR_CACHE_OPS_MASK 0x0000003fU | ||
76 | |||
77 | |||
78 | /* Non HT mask */ | 73 | /* Non HT mask */ |
79 | #define P4_CCCR_MASK \ | 74 | #define P4_CCCR_MASK \ |
80 | (P4_CCCR_OVF | \ | 75 | (P4_CCCR_OVF | \ |
@@ -106,8 +101,7 @@ | |||
106 | * ESCR and CCCR but rather an only packed value should | 101 | * ESCR and CCCR but rather an only packed value should |
107 | * be unpacked and written to a proper addresses | 102 | * be unpacked and written to a proper addresses |
108 | * | 103 | * |
109 | * the base idea is to pack as much info as | 104 | * the base idea is to pack as much info as possible |
110 | * possible | ||
111 | */ | 105 | */ |
112 | #define p4_config_pack_escr(v) (((u64)(v)) << 32) | 106 | #define p4_config_pack_escr(v) (((u64)(v)) << 32) |
113 | #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) | 107 | #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) |
@@ -130,8 +124,6 @@ | |||
130 | t; \ | 124 | t; \ |
131 | }) | 125 | }) |
132 | 126 | ||
133 | #define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK) | ||
134 | |||
135 | #define P4_CONFIG_HT_SHIFT 63 | 127 | #define P4_CONFIG_HT_SHIFT 63 |
136 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) | 128 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) |
137 | 129 | ||
@@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) | |||
214 | return escr; | 206 | return escr; |
215 | } | 207 | } |
216 | 208 | ||
209 | /* | ||
210 | * This are the events which should be used in "Event Select" | ||
211 | * field of ESCR register, they are like unique keys which allow | ||
212 | * the kernel to determinate which CCCR and COUNTER should be | ||
213 | * used to track an event | ||
214 | */ | ||
217 | enum P4_EVENTS { | 215 | enum P4_EVENTS { |
218 | P4_EVENT_TC_DELIVER_MODE, | 216 | P4_EVENT_TC_DELIVER_MODE, |
219 | P4_EVENT_BPU_FETCH_REQUEST, | 217 | P4_EVENT_BPU_FETCH_REQUEST, |
@@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES { | |||
561 | * a caller should use P4_ESCR_EMASK_NAME helper to | 559 | * a caller should use P4_ESCR_EMASK_NAME helper to |
562 | * pick the EventMask needed, for example | 560 | * pick the EventMask needed, for example |
563 | * | 561 | * |
564 | * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) | 562 | * P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
565 | */ | 563 | */ |
566 | enum P4_ESCR_EMASKS { | 564 | enum P4_ESCR_EMASKS { |
567 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), | 565 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), |
@@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS { | |||
753 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), | 751 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), |
754 | }; | 752 | }; |
755 | 753 | ||
756 | /* P4 PEBS: stale for a while */ | 754 | /* |
757 | #define P4_PEBS_METRIC_MASK 0x00001fffU | 755 | * P4 PEBS specifics (Replay Event only) |
758 | #define P4_PEBS_UOB_TAG 0x01000000U | 756 | * |
759 | #define P4_PEBS_ENABLE 0x02000000U | 757 | * Format (bits): |
760 | 758 | * 0-6: metric from P4_PEBS_METRIC enum | |
761 | /* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ | 759 | * 7 : reserved |
762 | #define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 | 760 | * 8 : reserved |
763 | #define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 | 761 | * 9-11 : reserved |
764 | #define P4_PEBS__dtlb_load_miss_retired 0x3000004 | 762 | * |
765 | #define P4_PEBS__dtlb_store_miss_retired 0x3000004 | 763 | * Note we have UOP and PEBS bits reserved for now |
766 | #define P4_PEBS__dtlb_all_miss_retired 0x3000004 | 764 | * just in case if we will need them once |
767 | #define P4_PEBS__tagged_mispred_branch 0x3018000 | 765 | */ |
768 | #define P4_PEBS__mob_load_replay_retired 0x3000200 | 766 | #define P4_PEBS_CONFIG_ENABLE (1 << 7) |
769 | #define P4_PEBS__split_load_retired 0x3000400 | 767 | #define P4_PEBS_CONFIG_UOP_TAG (1 << 8) |
770 | #define P4_PEBS__split_store_retired 0x3000400 | 768 | #define P4_PEBS_CONFIG_METRIC_MASK 0x3f |
771 | 769 | #define P4_PEBS_CONFIG_MASK 0xff | |
772 | #define P4_VERT__1stl_cache_load_miss_retired 0x0000001 | 770 | |
773 | #define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 | 771 | /* |
774 | #define P4_VERT__dtlb_load_miss_retired 0x0000001 | 772 | * mem: Only counters MSR_IQ_COUNTER4 (16) and |
775 | #define P4_VERT__dtlb_store_miss_retired 0x0000002 | 773 | * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling |
776 | #define P4_VERT__dtlb_all_miss_retired 0x0000003 | 774 | */ |
777 | #define P4_VERT__tagged_mispred_branch 0x0000010 | 775 | #define P4_PEBS_ENABLE 0x02000000U |
778 | #define P4_VERT__mob_load_replay_retired 0x0000001 | 776 | #define P4_PEBS_ENABLE_UOP_TAG 0x01000000U |
779 | #define P4_VERT__split_load_retired 0x0000001 | 777 | |
780 | #define P4_VERT__split_store_retired 0x0000002 | 778 | #define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK) |
781 | 779 | #define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK) | |
782 | enum P4_CACHE_EVENTS { | 780 | |
783 | P4_CACHE__NONE, | 781 | #define p4_config_pebs_has(v, mask) (p4_config_unpack_pebs(v) & (mask)) |
784 | 782 | ||
785 | P4_CACHE__1stl_cache_load_miss_retired, | 783 | enum P4_PEBS_METRIC { |
786 | P4_CACHE__2ndl_cache_load_miss_retired, | 784 | P4_PEBS_METRIC__none, |
787 | P4_CACHE__dtlb_load_miss_retired, | 785 | |
788 | P4_CACHE__dtlb_store_miss_retired, | 786 | P4_PEBS_METRIC__1stl_cache_load_miss_retired, |
789 | P4_CACHE__itlb_reference_hit, | 787 | P4_PEBS_METRIC__2ndl_cache_load_miss_retired, |
790 | P4_CACHE__itlb_reference_miss, | 788 | P4_PEBS_METRIC__dtlb_load_miss_retired, |
791 | 789 | P4_PEBS_METRIC__dtlb_store_miss_retired, | |
792 | P4_CACHE__MAX | 790 | P4_PEBS_METRIC__dtlb_all_miss_retired, |
791 | P4_PEBS_METRIC__tagged_mispred_branch, | ||
792 | P4_PEBS_METRIC__mob_load_replay_retired, | ||
793 | P4_PEBS_METRIC__split_load_retired, | ||
794 | P4_PEBS_METRIC__split_store_retired, | ||
795 | |||
796 | P4_PEBS_METRIC__max | ||
793 | }; | 797 | }; |
794 | 798 | ||
795 | #endif /* PERF_EVENT_P4_H */ | 799 | #endif /* PERF_EVENT_P4_H */ |
800 | |||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7e5c6a60b8ee..325b7bdbebaa 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -762,6 +762,7 @@ extern void init_c1e_mask(void); | |||
762 | extern unsigned long boot_option_idle_override; | 762 | extern unsigned long boot_option_idle_override; |
763 | extern unsigned long idle_halt; | 763 | extern unsigned long idle_halt; |
764 | extern unsigned long idle_nomwait; | 764 | extern unsigned long idle_nomwait; |
765 | extern bool c1e_detected; | ||
765 | 766 | ||
766 | /* | 767 | /* |
767 | * on systems with caches, caches must be flashed as the absolute | 768 | * on systems with caches, caches must be flashed as the absolute |
@@ -1025,4 +1026,24 @@ unsigned long calc_aperfmperf_ratio(struct aperfmperf *old, | |||
1025 | return ratio; | 1026 | return ratio; |
1026 | } | 1027 | } |
1027 | 1028 | ||
1029 | /* | ||
1030 | * AMD errata checking | ||
1031 | */ | ||
1032 | #ifdef CONFIG_CPU_SUP_AMD | ||
1033 | extern const int amd_erratum_383[]; | ||
1034 | extern const int amd_erratum_400[]; | ||
1035 | extern bool cpu_has_amd_erratum(const int *); | ||
1036 | |||
1037 | #define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 } | ||
1038 | #define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 } | ||
1039 | #define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ | ||
1040 | ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) | ||
1041 | #define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff) | ||
1042 | #define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff) | ||
1043 | #define AMD_MODEL_RANGE_END(range) ((range) & 0xfff) | ||
1044 | |||
1045 | #else | ||
1046 | #define cpu_has_amd_erratum(x) (false) | ||
1047 | #endif /* CONFIG_CPU_SUP_AMD */ | ||
1048 | |||
1028 | #endif /* _ASM_X86_PROCESSOR_H */ | 1049 | #endif /* _ASM_X86_PROCESSOR_H */ |
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 64cf2d24fad1..6c7fc25f2c34 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h | |||
@@ -84,5 +84,7 @@ | |||
84 | #define REQUIRED_MASK5 0 | 84 | #define REQUIRED_MASK5 0 |
85 | #define REQUIRED_MASK6 0 | 85 | #define REQUIRED_MASK6 0 |
86 | #define REQUIRED_MASK7 0 | 86 | #define REQUIRED_MASK7 0 |
87 | #define REQUIRED_MASK8 0 | ||
88 | #define REQUIRED_MASK9 0 | ||
87 | 89 | ||
88 | #endif /* _ASM_X86_REQUIRED_FEATURES_H */ | 90 | #endif /* _ASM_X86_REQUIRED_FEATURES_H */ |
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 606ede126972..d1e41b0f9b60 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h | |||
@@ -118,7 +118,7 @@ static inline void __down_read(struct rw_semaphore *sem) | |||
118 | { | 118 | { |
119 | asm volatile("# beginning down_read\n\t" | 119 | asm volatile("# beginning down_read\n\t" |
120 | LOCK_PREFIX _ASM_INC "(%1)\n\t" | 120 | LOCK_PREFIX _ASM_INC "(%1)\n\t" |
121 | /* adds 0x00000001, returns the old value */ | 121 | /* adds 0x00000001 */ |
122 | " jns 1f\n" | 122 | " jns 1f\n" |
123 | " call call_rwsem_down_read_failed\n" | 123 | " call call_rwsem_down_read_failed\n" |
124 | "1:\n\t" | 124 | "1:\n\t" |
@@ -156,11 +156,9 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) | |||
156 | static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) | 156 | static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) |
157 | { | 157 | { |
158 | rwsem_count_t tmp; | 158 | rwsem_count_t tmp; |
159 | |||
160 | tmp = RWSEM_ACTIVE_WRITE_BIAS; | ||
161 | asm volatile("# beginning down_write\n\t" | 159 | asm volatile("# beginning down_write\n\t" |
162 | LOCK_PREFIX " xadd %1,(%2)\n\t" | 160 | LOCK_PREFIX " xadd %1,(%2)\n\t" |
163 | /* subtract 0x0000ffff, returns the old value */ | 161 | /* adds 0xffff0001, returns the old value */ |
164 | " test %1,%1\n\t" | 162 | " test %1,%1\n\t" |
165 | /* was the count 0 before? */ | 163 | /* was the count 0 before? */ |
166 | " jz 1f\n" | 164 | " jz 1f\n" |
@@ -168,7 +166,7 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) | |||
168 | "1:\n" | 166 | "1:\n" |
169 | "# ending down_write" | 167 | "# ending down_write" |
170 | : "+m" (sem->count), "=d" (tmp) | 168 | : "+m" (sem->count), "=d" (tmp) |
171 | : "a" (sem), "1" (tmp) | 169 | : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) |
172 | : "memory", "cc"); | 170 | : "memory", "cc"); |
173 | } | 171 | } |
174 | 172 | ||
@@ -195,16 +193,16 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) | |||
195 | */ | 193 | */ |
196 | static inline void __up_read(struct rw_semaphore *sem) | 194 | static inline void __up_read(struct rw_semaphore *sem) |
197 | { | 195 | { |
198 | rwsem_count_t tmp = -RWSEM_ACTIVE_READ_BIAS; | 196 | rwsem_count_t tmp; |
199 | asm volatile("# beginning __up_read\n\t" | 197 | asm volatile("# beginning __up_read\n\t" |
200 | LOCK_PREFIX " xadd %1,(%2)\n\t" | 198 | LOCK_PREFIX " xadd %1,(%2)\n\t" |
201 | /* subtracts 1, returns the old value */ | 199 | /* subtracts 1, returns the old value */ |
202 | " jns 1f\n\t" | 200 | " jns 1f\n\t" |
203 | " call call_rwsem_wake\n" | 201 | " call call_rwsem_wake\n" /* expects old value in %edx */ |
204 | "1:\n" | 202 | "1:\n" |
205 | "# ending __up_read\n" | 203 | "# ending __up_read\n" |
206 | : "+m" (sem->count), "=d" (tmp) | 204 | : "+m" (sem->count), "=d" (tmp) |
207 | : "a" (sem), "1" (tmp) | 205 | : "a" (sem), "1" (-RWSEM_ACTIVE_READ_BIAS) |
208 | : "memory", "cc"); | 206 | : "memory", "cc"); |
209 | } | 207 | } |
210 | 208 | ||
@@ -216,10 +214,9 @@ static inline void __up_write(struct rw_semaphore *sem) | |||
216 | rwsem_count_t tmp; | 214 | rwsem_count_t tmp; |
217 | asm volatile("# beginning __up_write\n\t" | 215 | asm volatile("# beginning __up_write\n\t" |
218 | LOCK_PREFIX " xadd %1,(%2)\n\t" | 216 | LOCK_PREFIX " xadd %1,(%2)\n\t" |
219 | /* tries to transition | 217 | /* subtracts 0xffff0001, returns the old value */ |
220 | 0xffff0001 -> 0x00000000 */ | 218 | " jns 1f\n\t" |
221 | " jz 1f\n" | 219 | " call call_rwsem_wake\n" /* expects old value in %edx */ |
222 | " call call_rwsem_wake\n" | ||
223 | "1:\n\t" | 220 | "1:\n\t" |
224 | "# ending __up_write\n" | 221 | "# ending __up_write\n" |
225 | : "+m" (sem->count), "=d" (tmp) | 222 | : "+m" (sem->count), "=d" (tmp) |
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 86b1506f4179..ef292c792d74 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -82,7 +82,7 @@ void *extend_brk(size_t size, size_t align); | |||
82 | * executable.) | 82 | * executable.) |
83 | */ | 83 | */ |
84 | #define RESERVE_BRK(name,sz) \ | 84 | #define RESERVE_BRK(name,sz) \ |
85 | static void __section(.discard) __used \ | 85 | static void __section(.discard.text) __used \ |
86 | __brk_reservation_fn_##name##__(void) { \ | 86 | __brk_reservation_fn_##name##__(void) { \ |
87 | asm volatile ( \ | 87 | asm volatile ( \ |
88 | ".pushsection .brk_reservation,\"aw\",@nobits;" \ | 88 | ".pushsection .brk_reservation,\"aw\",@nobits;" \ |
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 4dab78edbad9..2b16a2ad23dc 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
@@ -1,6 +1,13 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | |||
1 | #ifndef _ASM_X86_STACKTRACE_H | 6 | #ifndef _ASM_X86_STACKTRACE_H |
2 | #define _ASM_X86_STACKTRACE_H | 7 | #define _ASM_X86_STACKTRACE_H |
3 | 8 | ||
9 | #include <linux/uaccess.h> | ||
10 | |||
4 | extern int kstack_depth_to_print; | 11 | extern int kstack_depth_to_print; |
5 | 12 | ||
6 | struct thread_info; | 13 | struct thread_info; |
@@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |||
42 | unsigned long *stack, unsigned long bp, | 49 | unsigned long *stack, unsigned long bp, |
43 | const struct stacktrace_ops *ops, void *data); | 50 | const struct stacktrace_ops *ops, void *data); |
44 | 51 | ||
52 | #ifdef CONFIG_X86_32 | ||
53 | #define STACKSLOTS_PER_LINE 8 | ||
54 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
55 | #else | ||
56 | #define STACKSLOTS_PER_LINE 4 | ||
57 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
58 | #endif | ||
59 | |||
60 | extern void | ||
61 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
62 | unsigned long *stack, unsigned long bp, char *log_lvl); | ||
63 | |||
64 | extern void | ||
65 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
66 | unsigned long *sp, unsigned long bp, char *log_lvl); | ||
67 | |||
68 | extern unsigned int code_bytes; | ||
69 | |||
70 | /* The form of the top of the frame on the stack */ | ||
71 | struct stack_frame { | ||
72 | struct stack_frame *next_frame; | ||
73 | unsigned long return_address; | ||
74 | }; | ||
75 | |||
76 | struct stack_frame_ia32 { | ||
77 | u32 next_frame; | ||
78 | u32 return_address; | ||
79 | }; | ||
80 | |||
81 | static inline unsigned long caller_frame_pointer(void) | ||
82 | { | ||
83 | struct stack_frame *frame; | ||
84 | |||
85 | get_bp(frame); | ||
86 | |||
87 | #ifdef CONFIG_FRAME_POINTER | ||
88 | frame = frame->next_frame; | ||
89 | #endif | ||
90 | |||
91 | return (unsigned long)frame; | ||
92 | } | ||
93 | |||
45 | #endif /* _ASM_X86_STACKTRACE_H */ | 94 | #endif /* _ASM_X86_STACKTRACE_H */ |
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index e7f4d33c55ed..33ecc3ea8782 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -457,4 +457,11 @@ static __always_inline void rdtsc_barrier(void) | |||
457 | alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); | 457 | alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); |
458 | } | 458 | } |
459 | 459 | ||
460 | /* | ||
461 | * We handle most unaligned accesses in hardware. On the other hand | ||
462 | * unaligned DMA can be quite expensive on some Nehalem processors. | ||
463 | * | ||
464 | * Based on this we disable the IP header alignment in network drivers. | ||
465 | */ | ||
466 | #define NET_IP_ALIGN 0 | ||
460 | #endif /* _ASM_X86_SYSTEM_H */ | 467 | #endif /* _ASM_X86_SYSTEM_H */ |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9e6779f7cf2d..9f0cbd987d50 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -257,6 +257,7 @@ enum vmcs_field { | |||
257 | #define EXIT_REASON_IO_INSTRUCTION 30 | 257 | #define EXIT_REASON_IO_INSTRUCTION 30 |
258 | #define EXIT_REASON_MSR_READ 31 | 258 | #define EXIT_REASON_MSR_READ 31 |
259 | #define EXIT_REASON_MSR_WRITE 32 | 259 | #define EXIT_REASON_MSR_WRITE 32 |
260 | #define EXIT_REASON_INVALID_STATE 33 | ||
260 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 261 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
261 | #define EXIT_REASON_MONITOR_INSTRUCTION 39 | 262 | #define EXIT_REASON_MONITOR_INSTRUCTION 39 |
262 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 | 263 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 |
@@ -266,6 +267,7 @@ enum vmcs_field { | |||
266 | #define EXIT_REASON_EPT_VIOLATION 48 | 267 | #define EXIT_REASON_EPT_VIOLATION 48 |
267 | #define EXIT_REASON_EPT_MISCONFIG 49 | 268 | #define EXIT_REASON_EPT_MISCONFIG 49 |
268 | #define EXIT_REASON_WBINVD 54 | 269 | #define EXIT_REASON_WBINVD 54 |
270 | #define EXIT_REASON_XSETBV 55 | ||
269 | 271 | ||
270 | /* | 272 | /* |
271 | * Interruption-information format | 273 | * Interruption-information format |
@@ -375,6 +377,9 @@ enum vmcs_field { | |||
375 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | 377 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) |
376 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | 378 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) |
377 | 379 | ||
380 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ | ||
381 | #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ | ||
382 | |||
378 | #define VMX_EPT_DEFAULT_GAW 3 | 383 | #define VMX_EPT_DEFAULT_GAW 3 |
379 | #define VMX_EPT_MAX_GAW 0x4 | 384 | #define VMX_EPT_MAX_GAW 0x4 |
380 | #define VMX_EPT_MT_EPTE_SHIFT 3 | 385 | #define VMX_EPT_MT_EPTE_SHIFT 3 |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 519b54327d75..baa579c8e038 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -142,6 +142,7 @@ struct x86_cpuinit_ops { | |||
142 | * @set_wallclock: set time back to HW clock | 142 | * @set_wallclock: set time back to HW clock |
143 | * @is_untracked_pat_range exclude from PAT logic | 143 | * @is_untracked_pat_range exclude from PAT logic |
144 | * @nmi_init enable NMI on cpus | 144 | * @nmi_init enable NMI on cpus |
145 | * @i8042_detect pre-detect if i8042 controller exists | ||
145 | */ | 146 | */ |
146 | struct x86_platform_ops { | 147 | struct x86_platform_ops { |
147 | unsigned long (*calibrate_tsc)(void); | 148 | unsigned long (*calibrate_tsc)(void); |
@@ -150,6 +151,7 @@ struct x86_platform_ops { | |||
150 | void (*iommu_shutdown)(void); | 151 | void (*iommu_shutdown)(void); |
151 | bool (*is_untracked_pat_range)(u64 start, u64 end); | 152 | bool (*is_untracked_pat_range)(u64 start, u64 end); |
152 | void (*nmi_init)(void); | 153 | void (*nmi_init)(void); |
154 | int (*i8042_detect)(void); | ||
153 | }; | 155 | }; |
154 | 156 | ||
155 | extern struct x86_init_ops x86_init; | 157 | extern struct x86_init_ops x86_init; |
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 9c371e4a9fa6..7fda040a76cd 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -417,6 +417,12 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) | |||
417 | return _hypercall2(int, nmi_op, op, arg); | 417 | return _hypercall2(int, nmi_op, op, arg); |
418 | } | 418 | } |
419 | 419 | ||
420 | static inline unsigned long __must_check | ||
421 | HYPERVISOR_hvm_op(int op, void *arg) | ||
422 | { | ||
423 | return _hypercall2(unsigned long, hvm_op, op, arg); | ||
424 | } | ||
425 | |||
420 | static inline void | 426 | static inline void |
421 | MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) | 427 | MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) |
422 | { | 428 | { |
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 30dfc81804d5..06acdbd7570a 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h | |||
@@ -13,6 +13,12 @@ | |||
13 | 13 | ||
14 | #define FXSAVE_SIZE 512 | 14 | #define FXSAVE_SIZE 512 |
15 | 15 | ||
16 | #define XSAVE_HDR_SIZE 64 | ||
17 | #define XSAVE_HDR_OFFSET FXSAVE_SIZE | ||
18 | |||
19 | #define XSAVE_YMM_SIZE 256 | ||
20 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) | ||
21 | |||
16 | /* | 22 | /* |
17 | * These are the features that the OS can handle currently. | 23 | * These are the features that the OS can handle currently. |
18 | */ | 24 | */ |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 2e837f5080fe..fb7a5f052e2b 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
@@ -145,6 +145,15 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, | |||
145 | percpu_entry->states[cx->index].eax = cx->address; | 145 | percpu_entry->states[cx->index].eax = cx->address; |
146 | percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; | 146 | percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; |
147 | } | 147 | } |
148 | |||
149 | /* | ||
150 | * For _CST FFH on Intel, if GAS.access_size bit 1 is cleared, | ||
151 | * then we should skip checking BM_STS for this C-state. | ||
152 | * ref: "Intel Processor Vendor-Specific ACPI Interface Specification" | ||
153 | */ | ||
154 | if ((c->x86_vendor == X86_VENDOR_INTEL) && !(reg->access_size & 0x2)) | ||
155 | cx->bm_sts_skip = 1; | ||
156 | |||
148 | return retval; | 157 | return retval; |
149 | } | 158 | } |
150 | EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); | 159 | EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index 580b4e296010..28595d6df47c 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S | |||
@@ -104,7 +104,7 @@ _start: | |||
104 | movl %eax, %ecx | 104 | movl %eax, %ecx |
105 | orl %edx, %ecx | 105 | orl %edx, %ecx |
106 | jz 1f | 106 | jz 1f |
107 | movl $0xc0000080, %ecx | 107 | movl $MSR_EFER, %ecx |
108 | wrmsr | 108 | wrmsr |
109 | 1: | 109 | 1: |
110 | 110 | ||
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 82e508677b91..33cec152070d 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * sleep.c - x86-specific ACPI sleep support. | 2 | * sleep.c - x86-specific ACPI sleep support. |
3 | * | 3 | * |
4 | * Copyright (C) 2001-2003 Patrick Mochel | 4 | * Copyright (C) 2001-2003 Patrick Mochel |
5 | * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz> | 5 | * Copyright (C) 2001-2003 Pavel Machek <pavel@ucw.cz> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/acpi.h> | 8 | #include <linux/acpi.h> |
@@ -157,9 +157,14 @@ static int __init acpi_sleep_setup(char *str) | |||
157 | #ifdef CONFIG_HIBERNATION | 157 | #ifdef CONFIG_HIBERNATION |
158 | if (strncmp(str, "s4_nohwsig", 10) == 0) | 158 | if (strncmp(str, "s4_nohwsig", 10) == 0) |
159 | acpi_no_s4_hw_signature(); | 159 | acpi_no_s4_hw_signature(); |
160 | if (strncmp(str, "s4_nonvs", 8) == 0) | 160 | if (strncmp(str, "s4_nonvs", 8) == 0) { |
161 | acpi_s4_no_nvs(); | 161 | pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, " |
162 | "please use acpi_sleep=nonvs instead"); | ||
163 | acpi_nvs_nosave(); | ||
164 | } | ||
162 | #endif | 165 | #endif |
166 | if (strncmp(str, "nonvs", 5) == 0) | ||
167 | acpi_nvs_nosave(); | ||
163 | if (strncmp(str, "old_ordering", 12) == 0) | 168 | if (strncmp(str, "old_ordering", 12) == 0) |
164 | acpi_old_suspend_ordering(); | 169 | acpi_old_suspend_ordering(); |
165 | str = strchr(str, ','); | 170 | str = strchr(str, ','); |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0d20286d78c6..fa044e1e30a2 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -2572,6 +2572,11 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | |||
2572 | static int amd_iommu_domain_has_cap(struct iommu_domain *domain, | 2572 | static int amd_iommu_domain_has_cap(struct iommu_domain *domain, |
2573 | unsigned long cap) | 2573 | unsigned long cap) |
2574 | { | 2574 | { |
2575 | switch (cap) { | ||
2576 | case IOMMU_CAP_CACHE_COHERENCY: | ||
2577 | return 1; | ||
2578 | } | ||
2579 | |||
2575 | return 0; | 2580 | return 0; |
2576 | } | 2581 | } |
2577 | 2582 | ||
@@ -2609,8 +2614,7 @@ int __init amd_iommu_init_passthrough(void) | |||
2609 | 2614 | ||
2610 | pt_domain->mode |= PAGE_MODE_NONE; | 2615 | pt_domain->mode |= PAGE_MODE_NONE; |
2611 | 2616 | ||
2612 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 2617 | for_each_pci_dev(dev) { |
2613 | |||
2614 | if (!check_device(&dev->dev)) | 2618 | if (!check_device(&dev->dev)) |
2615 | continue; | 2619 | continue; |
2616 | 2620 | ||
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index a35347501d36..8dd77800ff5d 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -43,10 +43,11 @@ | |||
43 | 43 | ||
44 | #include <asm/fixmap.h> | 44 | #include <asm/fixmap.h> |
45 | #include <asm/apb_timer.h> | 45 | #include <asm/apb_timer.h> |
46 | #include <asm/mrst.h> | ||
46 | 47 | ||
47 | #define APBT_MASK CLOCKSOURCE_MASK(32) | 48 | #define APBT_MASK CLOCKSOURCE_MASK(32) |
48 | #define APBT_SHIFT 22 | 49 | #define APBT_SHIFT 22 |
49 | #define APBT_CLOCKEVENT_RATING 150 | 50 | #define APBT_CLOCKEVENT_RATING 110 |
50 | #define APBT_CLOCKSOURCE_RATING 250 | 51 | #define APBT_CLOCKSOURCE_RATING 250 |
51 | #define APBT_MIN_DELTA_USEC 200 | 52 | #define APBT_MIN_DELTA_USEC 200 |
52 | 53 | ||
@@ -83,8 +84,6 @@ struct apbt_dev { | |||
83 | char name[10]; | 84 | char name[10]; |
84 | }; | 85 | }; |
85 | 86 | ||
86 | int disable_apbt_percpu __cpuinitdata; | ||
87 | |||
88 | static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); | 87 | static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); |
89 | 88 | ||
90 | #ifdef CONFIG_SMP | 89 | #ifdef CONFIG_SMP |
@@ -195,29 +194,6 @@ static struct clock_event_device apbt_clockevent = { | |||
195 | }; | 194 | }; |
196 | 195 | ||
197 | /* | 196 | /* |
198 | * if user does not want to use per CPU apb timer, just give it a lower rating | ||
199 | * than local apic timer and skip the late per cpu timer init. | ||
200 | */ | ||
201 | static inline int __init setup_x86_mrst_timer(char *arg) | ||
202 | { | ||
203 | if (!arg) | ||
204 | return -EINVAL; | ||
205 | |||
206 | if (strcmp("apbt_only", arg) == 0) | ||
207 | disable_apbt_percpu = 0; | ||
208 | else if (strcmp("lapic_and_apbt", arg) == 0) | ||
209 | disable_apbt_percpu = 1; | ||
210 | else { | ||
211 | pr_warning("X86 MRST timer option %s not recognised" | ||
212 | " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", | ||
213 | arg); | ||
214 | return -EINVAL; | ||
215 | } | ||
216 | return 0; | ||
217 | } | ||
218 | __setup("x86_mrst_timer=", setup_x86_mrst_timer); | ||
219 | |||
220 | /* | ||
221 | * start count down from 0xffff_ffff. this is done by toggling the enable bit | 197 | * start count down from 0xffff_ffff. this is done by toggling the enable bit |
222 | * then load initial load count to ~0. | 198 | * then load initial load count to ~0. |
223 | */ | 199 | */ |
@@ -335,7 +311,7 @@ static int __init apbt_clockevent_register(void) | |||
335 | adev->num = smp_processor_id(); | 311 | adev->num = smp_processor_id(); |
336 | memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); | 312 | memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); |
337 | 313 | ||
338 | if (disable_apbt_percpu) { | 314 | if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) { |
339 | apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; | 315 | apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; |
340 | global_clock_event = &adev->evt; | 316 | global_clock_event = &adev->evt; |
341 | printk(KERN_DEBUG "%s clockevent registered as global\n", | 317 | printk(KERN_DEBUG "%s clockevent registered as global\n", |
@@ -429,7 +405,8 @@ static int apbt_cpuhp_notify(struct notifier_block *n, | |||
429 | 405 | ||
430 | static __init int apbt_late_init(void) | 406 | static __init int apbt_late_init(void) |
431 | { | 407 | { |
432 | if (disable_apbt_percpu || !apb_timer_block_enabled) | 408 | if (mrst_timer_options == MRST_TIMER_LAPIC_APBT || |
409 | !apb_timer_block_enabled) | ||
433 | return 0; | 410 | return 0; |
434 | /* This notifier should be called after workqueue is ready */ | 411 | /* This notifier should be called after workqueue is ready */ |
435 | hotcpu_notifier(apbt_cpuhp_notify, -20); | 412 | hotcpu_notifier(apbt_cpuhp_notify, -20); |
@@ -450,6 +427,8 @@ static void apbt_set_mode(enum clock_event_mode mode, | |||
450 | int timer_num; | 427 | int timer_num; |
451 | struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); | 428 | struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); |
452 | 429 | ||
430 | BUG_ON(!apbt_virt_address); | ||
431 | |||
453 | timer_num = adev->num; | 432 | timer_num = adev->num; |
454 | pr_debug("%s CPU %d timer %d mode=%d\n", | 433 | pr_debug("%s CPU %d timer %d mode=%d\n", |
455 | __func__, first_cpu(*evt->cpumask), timer_num, mode); | 434 | __func__, first_cpu(*evt->cpumask), timer_num, mode); |
@@ -676,7 +655,7 @@ void __init apbt_time_init(void) | |||
676 | } | 655 | } |
677 | #ifdef CONFIG_SMP | 656 | #ifdef CONFIG_SMP |
678 | /* kernel cmdline disable apb timer, so we will use lapic timers */ | 657 | /* kernel cmdline disable apb timer, so we will use lapic timers */ |
679 | if (disable_apbt_percpu) { | 658 | if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) { |
680 | printk(KERN_INFO "apbt: disabled per cpu timer\n"); | 659 | printk(KERN_INFO "apbt: disabled per cpu timer\n"); |
681 | return; | 660 | return; |
682 | } | 661 | } |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index b5d8b0bcf235..a2e0caf26e17 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -280,7 +280,7 @@ void __init early_gart_iommu_check(void) | |||
280 | * or BIOS forget to put that in reserved. | 280 | * or BIOS forget to put that in reserved. |
281 | * try to update e820 to make that region as reserved. | 281 | * try to update e820 to make that region as reserved. |
282 | */ | 282 | */ |
283 | u32 agp_aper_base = 0, agp_aper_order = 0; | 283 | u32 agp_aper_order = 0; |
284 | int i, fix, slot, valid_agp = 0; | 284 | int i, fix, slot, valid_agp = 0; |
285 | u32 ctl; | 285 | u32 ctl; |
286 | u32 aper_size = 0, aper_order = 0, last_aper_order = 0; | 286 | u32 aper_size = 0, aper_order = 0, last_aper_order = 0; |
@@ -291,7 +291,7 @@ void __init early_gart_iommu_check(void) | |||
291 | return; | 291 | return; |
292 | 292 | ||
293 | /* This is mostly duplicate of iommu_hole_init */ | 293 | /* This is mostly duplicate of iommu_hole_init */ |
294 | agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); | 294 | search_agp_bridge(&agp_aper_order, &valid_agp); |
295 | 295 | ||
296 | fix = 0; | 296 | fix = 0; |
297 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { | 297 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 565c1bfc507d..910f20b457c4 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile | |||
@@ -2,7 +2,12 @@ | |||
2 | # Makefile for local APIC drivers and for the IO-APIC code | 2 | # Makefile for local APIC drivers and for the IO-APIC code |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o | 5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o |
6 | ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) | ||
7 | obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o | ||
8 | endif | ||
9 | obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o | ||
10 | |||
6 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 11 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
7 | obj-$(CONFIG_SMP) += ipi.o | 12 | obj-$(CONFIG_SMP) += ipi.o |
8 | 13 | ||
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c02cc692985c..980508c79082 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -460,7 +460,7 @@ static void lapic_timer_broadcast(const struct cpumask *mask) | |||
460 | } | 460 | } |
461 | 461 | ||
462 | /* | 462 | /* |
463 | * Setup the local APIC timer for this CPU. Copy the initilized values | 463 | * Setup the local APIC timer for this CPU. Copy the initialized values |
464 | * of the boot CPU and register the clock event in the framework. | 464 | * of the boot CPU and register the clock event in the framework. |
465 | */ | 465 | */ |
466 | static void __cpuinit setup_APIC_timer(void) | 466 | static void __cpuinit setup_APIC_timer(void) |
@@ -921,7 +921,7 @@ void disable_local_APIC(void) | |||
921 | unsigned int value; | 921 | unsigned int value; |
922 | 922 | ||
923 | /* APIC hasn't been mapped yet */ | 923 | /* APIC hasn't been mapped yet */ |
924 | if (!apic_phys) | 924 | if (!x2apic_mode && !apic_phys) |
925 | return; | 925 | return; |
926 | 926 | ||
927 | clear_local_APIC(); | 927 | clear_local_APIC(); |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 425e53a87feb..8593582d8022 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -129,7 +129,6 @@ int es7000_plat; | |||
129 | * GSI override for ES7000 platforms. | 129 | * GSI override for ES7000 platforms. |
130 | */ | 130 | */ |
131 | 131 | ||
132 | static unsigned int base; | ||
133 | 132 | ||
134 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | 133 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) |
135 | { | 134 | { |
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c new file mode 100644 index 000000000000..cefd6942f0e9 --- /dev/null +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -0,0 +1,107 @@ | |||
1 | /* | ||
2 | * HW NMI watchdog support | ||
3 | * | ||
4 | * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. | ||
5 | * | ||
6 | * Arch specific calls to support NMI watchdog | ||
7 | * | ||
8 | * Bits copied from original nmi.c file | ||
9 | * | ||
10 | */ | ||
11 | #include <asm/apic.h> | ||
12 | |||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/kdebug.h> | ||
15 | #include <linux/notifier.h> | ||
16 | #include <linux/kprobes.h> | ||
17 | #include <linux/nmi.h> | ||
18 | #include <linux/module.h> | ||
19 | |||
20 | /* For reliability, we're prepared to waste bits here. */ | ||
21 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | ||
22 | |||
23 | u64 hw_nmi_get_sample_period(void) | ||
24 | { | ||
25 | return (u64)(cpu_khz) * 1000 * 60; | ||
26 | } | ||
27 | |||
28 | #ifdef ARCH_HAS_NMI_WATCHDOG | ||
29 | void arch_trigger_all_cpu_backtrace(void) | ||
30 | { | ||
31 | int i; | ||
32 | |||
33 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); | ||
34 | |||
35 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | ||
36 | apic->send_IPI_all(NMI_VECTOR); | ||
37 | |||
38 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | ||
39 | for (i = 0; i < 10 * 1000; i++) { | ||
40 | if (cpumask_empty(to_cpumask(backtrace_mask))) | ||
41 | break; | ||
42 | mdelay(1); | ||
43 | } | ||
44 | } | ||
45 | |||
46 | static int __kprobes | ||
47 | arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, | ||
48 | unsigned long cmd, void *__args) | ||
49 | { | ||
50 | struct die_args *args = __args; | ||
51 | struct pt_regs *regs; | ||
52 | int cpu = smp_processor_id(); | ||
53 | |||
54 | switch (cmd) { | ||
55 | case DIE_NMI: | ||
56 | case DIE_NMI_IPI: | ||
57 | break; | ||
58 | |||
59 | default: | ||
60 | return NOTIFY_DONE; | ||
61 | } | ||
62 | |||
63 | regs = args->regs; | ||
64 | |||
65 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { | ||
66 | static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; | ||
67 | |||
68 | arch_spin_lock(&lock); | ||
69 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | ||
70 | show_regs(regs); | ||
71 | dump_stack(); | ||
72 | arch_spin_unlock(&lock); | ||
73 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | ||
74 | return NOTIFY_STOP; | ||
75 | } | ||
76 | |||
77 | return NOTIFY_DONE; | ||
78 | } | ||
79 | |||
80 | static __read_mostly struct notifier_block backtrace_notifier = { | ||
81 | .notifier_call = arch_trigger_all_cpu_backtrace_handler, | ||
82 | .next = NULL, | ||
83 | .priority = 1 | ||
84 | }; | ||
85 | |||
86 | static int __init register_trigger_all_cpu_backtrace(void) | ||
87 | { | ||
88 | register_die_notifier(&backtrace_notifier); | ||
89 | return 0; | ||
90 | } | ||
91 | early_initcall(register_trigger_all_cpu_backtrace); | ||
92 | #endif | ||
93 | |||
94 | /* STUB calls to mimic old nmi_watchdog behaviour */ | ||
95 | #if defined(CONFIG_X86_LOCAL_APIC) | ||
96 | unsigned int nmi_watchdog = NMI_NONE; | ||
97 | EXPORT_SYMBOL(nmi_watchdog); | ||
98 | void acpi_nmi_enable(void) { return; } | ||
99 | void acpi_nmi_disable(void) { return; } | ||
100 | #endif | ||
101 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ | ||
102 | EXPORT_SYMBOL(nmi_active); | ||
103 | int unknown_nmi_panic; | ||
104 | void cpu_nmi_set_wd_enabled(void) { return; } | ||
105 | void stop_apic_nmi_watchdog(void *unused) { return; } | ||
106 | void setup_apic_nmi_watchdog(void *unused) { return; } | ||
107 | int __init check_nmi_watchdog(void) { return 0; } | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e41ed24ab26d..4dc0084ec1b1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -3397,7 +3397,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3397 | 3397 | ||
3398 | cfg = desc->chip_data; | 3398 | cfg = desc->chip_data; |
3399 | 3399 | ||
3400 | read_msi_msg_desc(desc, &msg); | 3400 | get_cached_msi_msg_desc(desc, &msg); |
3401 | 3401 | ||
3402 | msg.data &= ~MSI_DATA_VECTOR_MASK; | 3402 | msg.data &= ~MSI_DATA_VECTOR_MASK; |
3403 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | 3403 | msg.data |= MSI_DATA_VECTOR(cfg->vector); |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 1edaf15c0b8e..a43f71cb30f8 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -401,13 +401,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
401 | int cpu = smp_processor_id(); | 401 | int cpu = smp_processor_id(); |
402 | int rc = 0; | 402 | int rc = 0; |
403 | 403 | ||
404 | /* check for other users first */ | ||
405 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | ||
406 | == NOTIFY_STOP) { | ||
407 | rc = 1; | ||
408 | touched = 1; | ||
409 | } | ||
410 | |||
411 | sum = get_timer_irqs(cpu); | 404 | sum = get_timer_irqs(cpu); |
412 | 405 | ||
413 | if (__get_cpu_var(nmi_touch)) { | 406 | if (__get_cpu_var(nmi_touch)) { |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index c4f9182ca3ac..4c9c67bf09b7 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -140,7 +140,7 @@ | |||
140 | * is now the way life works). | 140 | * is now the way life works). |
141 | * Fix thinko in suspend() (wrong return). | 141 | * Fix thinko in suspend() (wrong return). |
142 | * Notify drivers on critical suspend. | 142 | * Notify drivers on critical suspend. |
143 | * Make kapmd absorb more idle time (Pavel Machek <pavel@suse.cz> | 143 | * Make kapmd absorb more idle time (Pavel Machek <pavel@ucw.cz> |
144 | * modified by sfr). | 144 | * modified by sfr). |
145 | * Disable interrupts while we are suspended (Andy Henroid | 145 | * Disable interrupts while we are suspended (Andy Henroid |
146 | * <andy_henroid@yahoo.com> fixed by sfr). | 146 | * <andy_henroid@yahoo.com> fixed by sfr). |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3a785da34b6f..3f0ebe429a01 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -12,11 +12,11 @@ endif | |||
12 | nostackp := $(call cc-option, -fno-stack-protector) | 12 | nostackp := $(call cc-option, -fno-stack-protector) |
13 | CFLAGS_common.o := $(nostackp) | 13 | CFLAGS_common.o := $(nostackp) |
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 15 | obj-y := intel_cacheinfo.o scattered.o topology.o |
16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o | 17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o |
18 | 18 | ||
19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 19 | obj-$(CONFIG_X86_32) += bugs.o |
20 | obj-$(CONFIG_X86_64) += bugs_64.o | 20 | obj-$(CONFIG_X86_64) += bugs_64.o |
21 | 21 | ||
22 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o | 22 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e485825130d2..60a57b13082d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -466,7 +466,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
466 | } | 466 | } |
467 | 467 | ||
468 | } | 468 | } |
469 | if (c->x86 == 0x10 || c->x86 == 0x11) | 469 | if (c->x86 >= 0x10) |
470 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 470 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
471 | 471 | ||
472 | /* get apicid instead of initial apic id from cpuid */ | 472 | /* get apicid instead of initial apic id from cpuid */ |
@@ -529,7 +529,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
529 | num_cache_leaves = 3; | 529 | num_cache_leaves = 3; |
530 | } | 530 | } |
531 | 531 | ||
532 | if (c->x86 >= 0xf && c->x86 <= 0x11) | 532 | if (c->x86 >= 0xf) |
533 | set_cpu_cap(c, X86_FEATURE_K8); | 533 | set_cpu_cap(c, X86_FEATURE_K8); |
534 | 534 | ||
535 | if (cpu_has_xmm2) { | 535 | if (cpu_has_xmm2) { |
@@ -546,7 +546,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
546 | fam10h_check_enable_mmcfg(); | 546 | fam10h_check_enable_mmcfg(); |
547 | } | 547 | } |
548 | 548 | ||
549 | if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | 549 | if (c == &boot_cpu_data && c->x86 >= 0xf) { |
550 | unsigned long long tseg; | 550 | unsigned long long tseg; |
551 | 551 | ||
552 | /* | 552 | /* |
@@ -609,3 +609,74 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { | |||
609 | }; | 609 | }; |
610 | 610 | ||
611 | cpu_dev_register(amd_cpu_dev); | 611 | cpu_dev_register(amd_cpu_dev); |
612 | |||
613 | /* | ||
614 | * AMD errata checking | ||
615 | * | ||
616 | * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or | ||
617 | * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that | ||
618 | * have an OSVW id assigned, which it takes as first argument. Both take a | ||
619 | * variable number of family-specific model-stepping ranges created by | ||
620 | * AMD_MODEL_RANGE(). Each erratum also has to be declared as extern const | ||
621 | * int[] in arch/x86/include/asm/processor.h. | ||
622 | * | ||
623 | * Example: | ||
624 | * | ||
625 | * const int amd_erratum_319[] = | ||
626 | * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), | ||
627 | * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), | ||
628 | * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); | ||
629 | */ | ||
630 | |||
631 | const int amd_erratum_400[] = | ||
632 | AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), | ||
633 | AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); | ||
634 | EXPORT_SYMBOL_GPL(amd_erratum_400); | ||
635 | |||
636 | const int amd_erratum_383[] = | ||
637 | AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); | ||
638 | EXPORT_SYMBOL_GPL(amd_erratum_383); | ||
639 | |||
640 | bool cpu_has_amd_erratum(const int *erratum) | ||
641 | { | ||
642 | struct cpuinfo_x86 *cpu = ¤t_cpu_data; | ||
643 | int osvw_id = *erratum++; | ||
644 | u32 range; | ||
645 | u32 ms; | ||
646 | |||
647 | /* | ||
648 | * If called early enough that current_cpu_data hasn't been initialized | ||
649 | * yet, fall back to boot_cpu_data. | ||
650 | */ | ||
651 | if (cpu->x86 == 0) | ||
652 | cpu = &boot_cpu_data; | ||
653 | |||
654 | if (cpu->x86_vendor != X86_VENDOR_AMD) | ||
655 | return false; | ||
656 | |||
657 | if (osvw_id >= 0 && osvw_id < 65536 && | ||
658 | cpu_has(cpu, X86_FEATURE_OSVW)) { | ||
659 | u64 osvw_len; | ||
660 | |||
661 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); | ||
662 | if (osvw_id < osvw_len) { | ||
663 | u64 osvw_bits; | ||
664 | |||
665 | rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), | ||
666 | osvw_bits); | ||
667 | return osvw_bits & (1ULL << (osvw_id & 0x3f)); | ||
668 | } | ||
669 | } | ||
670 | |||
671 | /* OSVW unavailable or ID unknown, match family-model-stepping range */ | ||
672 | ms = (cpu->x86_model << 8) | cpu->x86_mask; | ||
673 | while ((range = *erratum++)) | ||
674 | if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && | ||
675 | (ms >= AMD_MODEL_RANGE_START(range)) && | ||
676 | (ms <= AMD_MODEL_RANGE_END(range))) | ||
677 | return true; | ||
678 | |||
679 | return false; | ||
680 | } | ||
681 | |||
682 | EXPORT_SYMBOL_GPL(cpu_has_amd_erratum); | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 68e4a6f2211e..f10273138382 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -551,6 +551,16 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |||
551 | c->x86_capability[4] = excap; | 551 | c->x86_capability[4] = excap; |
552 | } | 552 | } |
553 | 553 | ||
554 | /* Additional Intel-defined flags: level 0x00000007 */ | ||
555 | if (c->cpuid_level >= 0x00000007) { | ||
556 | u32 eax, ebx, ecx, edx; | ||
557 | |||
558 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); | ||
559 | |||
560 | if (eax > 0) | ||
561 | c->x86_capability[9] = ebx; | ||
562 | } | ||
563 | |||
554 | /* AMD-defined flags: level 0x80000001 */ | 564 | /* AMD-defined flags: level 0x80000001 */ |
555 | xlvl = cpuid_eax(0x80000000); | 565 | xlvl = cpuid_eax(0x80000000); |
556 | c->extended_cpuid_level = xlvl; | 566 | c->extended_cpuid_level = xlvl; |
@@ -576,6 +586,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |||
576 | if (c->extended_cpuid_level >= 0x80000007) | 586 | if (c->extended_cpuid_level >= 0x80000007) |
577 | c->x86_power = cpuid_edx(0x80000007); | 587 | c->x86_power = cpuid_edx(0x80000007); |
578 | 588 | ||
589 | init_scattered_cpuid_features(c); | ||
579 | } | 590 | } |
580 | 591 | ||
581 | static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) | 592 | static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
@@ -731,7 +742,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
731 | 742 | ||
732 | get_model_name(c); /* Default name */ | 743 | get_model_name(c); /* Default name */ |
733 | 744 | ||
734 | init_scattered_cpuid_features(c); | ||
735 | detect_nopl(c); | 745 | detect_nopl(c); |
736 | } | 746 | } |
737 | 747 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1d3cddaa40ee..246cd3afbb5f 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -34,7 +34,6 @@ | |||
34 | #include <linux/compiler.h> | 34 | #include <linux/compiler.h> |
35 | #include <linux/dmi.h> | 35 | #include <linux/dmi.h> |
36 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
37 | #include <trace/events/power.h> | ||
38 | 37 | ||
39 | #include <linux/acpi.h> | 38 | #include <linux/acpi.h> |
40 | #include <linux/io.h> | 39 | #include <linux/io.h> |
@@ -324,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
324 | } | 323 | } |
325 | } | 324 | } |
326 | 325 | ||
327 | trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency); | ||
328 | |||
329 | switch (data->cpu_feature) { | 326 | switch (data->cpu_feature) { |
330 | case SYSTEM_INTEL_MSR_CAPABLE: | 327 | case SYSTEM_INTEL_MSR_CAPABLE: |
331 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 328 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
@@ -351,7 +348,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
351 | 348 | ||
352 | freqs.old = perf->states[perf->state].core_frequency * 1000; | 349 | freqs.old = perf->states[perf->state].core_frequency * 1000; |
353 | freqs.new = data->freq_table[next_state].frequency; | 350 | freqs.new = data->freq_table[next_state].frequency; |
354 | for_each_cpu(i, cmd.mask) { | 351 | for_each_cpu(i, policy->cpus) { |
355 | freqs.cpu = i; | 352 | freqs.cpu = i; |
356 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 353 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
357 | } | 354 | } |
@@ -367,7 +364,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
367 | } | 364 | } |
368 | } | 365 | } |
369 | 366 | ||
370 | for_each_cpu(i, cmd.mask) { | 367 | for_each_cpu(i, policy->cpus) { |
371 | freqs.cpu = i; | 368 | freqs.cpu = i; |
372 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 369 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
373 | } | 370 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index 16e3483be9e3..32974cf84232 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c | |||
@@ -169,12 +169,9 @@ static int gx_freq_mult[16] = { | |||
169 | * Low Level chipset interface * | 169 | * Low Level chipset interface * |
170 | ****************************************************************/ | 170 | ****************************************************************/ |
171 | static struct pci_device_id gx_chipset_tbl[] __initdata = { | 171 | static struct pci_device_id gx_chipset_tbl[] __initdata = { |
172 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, | 172 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), }, |
173 | PCI_ANY_ID, PCI_ANY_ID }, | 173 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), }, |
174 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, | 174 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), }, |
175 | PCI_ANY_ID, PCI_ANY_ID }, | ||
176 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, | ||
177 | PCI_ANY_ID, PCI_ANY_ID }, | ||
178 | { 0, }, | 175 | { 0, }, |
179 | }; | 176 | }; |
180 | 177 | ||
@@ -199,7 +196,7 @@ static __init struct pci_dev *gx_detect_chipset(void) | |||
199 | } | 196 | } |
200 | 197 | ||
201 | /* detect which companion chip is used */ | 198 | /* detect which companion chip is used */ |
202 | while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { | 199 | for_each_pci_dev(gx_pci) { |
203 | if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) | 200 | if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) |
204 | return gx_pci; | 201 | return gx_pci; |
205 | } | 202 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 7e7eea4f8261..03162dac6271 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c | |||
@@ -426,7 +426,7 @@ static int guess_fsb(int mult) | |||
426 | } | 426 | } |
427 | 427 | ||
428 | 428 | ||
429 | static int __init longhaul_get_ranges(void) | 429 | static int __cpuinit longhaul_get_ranges(void) |
430 | { | 430 | { |
431 | unsigned int i, j, k = 0; | 431 | unsigned int i, j, k = 0; |
432 | unsigned int ratio; | 432 | unsigned int ratio; |
@@ -530,7 +530,7 @@ static int __init longhaul_get_ranges(void) | |||
530 | } | 530 | } |
531 | 531 | ||
532 | 532 | ||
533 | static void __init longhaul_setup_voltagescaling(void) | 533 | static void __cpuinit longhaul_setup_voltagescaling(void) |
534 | { | 534 | { |
535 | union msr_longhaul longhaul; | 535 | union msr_longhaul longhaul; |
536 | struct mV_pos minvid, maxvid, vid; | 536 | struct mV_pos minvid, maxvid, vid; |
@@ -784,7 +784,7 @@ static int longhaul_setup_southbridge(void) | |||
784 | return 0; | 784 | return 0; |
785 | } | 785 | } |
786 | 786 | ||
787 | static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | 787 | static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy) |
788 | { | 788 | { |
789 | struct cpuinfo_x86 *c = &cpu_data(0); | 789 | struct cpuinfo_x86 *c = &cpu_data(0); |
790 | char *cpuname = NULL; | 790 | char *cpuname = NULL; |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h index e2360a469f79..cbf48fbca881 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.h +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h | |||
@@ -56,7 +56,7 @@ union msr_longhaul { | |||
56 | /* | 56 | /* |
57 | * VIA C3 Samuel 1 & Samuel 2 (stepping 0) | 57 | * VIA C3 Samuel 1 & Samuel 2 (stepping 0) |
58 | */ | 58 | */ |
59 | static const int __initdata samuel1_mults[16] = { | 59 | static const int __cpuinitdata samuel1_mults[16] = { |
60 | -1, /* 0000 -> RESERVED */ | 60 | -1, /* 0000 -> RESERVED */ |
61 | 30, /* 0001 -> 3.0x */ | 61 | 30, /* 0001 -> 3.0x */ |
62 | 40, /* 0010 -> 4.0x */ | 62 | 40, /* 0010 -> 4.0x */ |
@@ -75,7 +75,7 @@ static const int __initdata samuel1_mults[16] = { | |||
75 | -1, /* 1111 -> RESERVED */ | 75 | -1, /* 1111 -> RESERVED */ |
76 | }; | 76 | }; |
77 | 77 | ||
78 | static const int __initdata samuel1_eblcr[16] = { | 78 | static const int __cpuinitdata samuel1_eblcr[16] = { |
79 | 50, /* 0000 -> RESERVED */ | 79 | 50, /* 0000 -> RESERVED */ |
80 | 30, /* 0001 -> 3.0x */ | 80 | 30, /* 0001 -> 3.0x */ |
81 | 40, /* 0010 -> 4.0x */ | 81 | 40, /* 0010 -> 4.0x */ |
@@ -97,7 +97,7 @@ static const int __initdata samuel1_eblcr[16] = { | |||
97 | /* | 97 | /* |
98 | * VIA C3 Samuel2 Stepping 1->15 | 98 | * VIA C3 Samuel2 Stepping 1->15 |
99 | */ | 99 | */ |
100 | static const int __initdata samuel2_eblcr[16] = { | 100 | static const int __cpuinitdata samuel2_eblcr[16] = { |
101 | 50, /* 0000 -> 5.0x */ | 101 | 50, /* 0000 -> 5.0x */ |
102 | 30, /* 0001 -> 3.0x */ | 102 | 30, /* 0001 -> 3.0x */ |
103 | 40, /* 0010 -> 4.0x */ | 103 | 40, /* 0010 -> 4.0x */ |
@@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = { | |||
119 | /* | 119 | /* |
120 | * VIA C3 Ezra | 120 | * VIA C3 Ezra |
121 | */ | 121 | */ |
122 | static const int __initdata ezra_mults[16] = { | 122 | static const int __cpuinitdata ezra_mults[16] = { |
123 | 100, /* 0000 -> 10.0x */ | 123 | 100, /* 0000 -> 10.0x */ |
124 | 30, /* 0001 -> 3.0x */ | 124 | 30, /* 0001 -> 3.0x */ |
125 | 40, /* 0010 -> 4.0x */ | 125 | 40, /* 0010 -> 4.0x */ |
@@ -138,7 +138,7 @@ static const int __initdata ezra_mults[16] = { | |||
138 | 120, /* 1111 -> 12.0x */ | 138 | 120, /* 1111 -> 12.0x */ |
139 | }; | 139 | }; |
140 | 140 | ||
141 | static const int __initdata ezra_eblcr[16] = { | 141 | static const int __cpuinitdata ezra_eblcr[16] = { |
142 | 50, /* 0000 -> 5.0x */ | 142 | 50, /* 0000 -> 5.0x */ |
143 | 30, /* 0001 -> 3.0x */ | 143 | 30, /* 0001 -> 3.0x */ |
144 | 40, /* 0010 -> 4.0x */ | 144 | 40, /* 0010 -> 4.0x */ |
@@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = { | |||
160 | /* | 160 | /* |
161 | * VIA C3 (Ezra-T) [C5M]. | 161 | * VIA C3 (Ezra-T) [C5M]. |
162 | */ | 162 | */ |
163 | static const int __initdata ezrat_mults[32] = { | 163 | static const int __cpuinitdata ezrat_mults[32] = { |
164 | 100, /* 0000 -> 10.0x */ | 164 | 100, /* 0000 -> 10.0x */ |
165 | 30, /* 0001 -> 3.0x */ | 165 | 30, /* 0001 -> 3.0x */ |
166 | 40, /* 0010 -> 4.0x */ | 166 | 40, /* 0010 -> 4.0x */ |
@@ -196,7 +196,7 @@ static const int __initdata ezrat_mults[32] = { | |||
196 | -1, /* 1111 -> RESERVED (12.0x) */ | 196 | -1, /* 1111 -> RESERVED (12.0x) */ |
197 | }; | 197 | }; |
198 | 198 | ||
199 | static const int __initdata ezrat_eblcr[32] = { | 199 | static const int __cpuinitdata ezrat_eblcr[32] = { |
200 | 50, /* 0000 -> 5.0x */ | 200 | 50, /* 0000 -> 5.0x */ |
201 | 30, /* 0001 -> 3.0x */ | 201 | 30, /* 0001 -> 3.0x */ |
202 | 40, /* 0010 -> 4.0x */ | 202 | 40, /* 0010 -> 4.0x */ |
@@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = { | |||
235 | /* | 235 | /* |
236 | * VIA C3 Nehemiah */ | 236 | * VIA C3 Nehemiah */ |
237 | 237 | ||
238 | static const int __initdata nehemiah_mults[32] = { | 238 | static const int __cpuinitdata nehemiah_mults[32] = { |
239 | 100, /* 0000 -> 10.0x */ | 239 | 100, /* 0000 -> 10.0x */ |
240 | -1, /* 0001 -> 16.0x */ | 240 | -1, /* 0001 -> 16.0x */ |
241 | 40, /* 0010 -> 4.0x */ | 241 | 40, /* 0010 -> 4.0x */ |
@@ -270,7 +270,7 @@ static const int __initdata nehemiah_mults[32] = { | |||
270 | -1, /* 1111 -> 12.0x */ | 270 | -1, /* 1111 -> 12.0x */ |
271 | }; | 271 | }; |
272 | 272 | ||
273 | static const int __initdata nehemiah_eblcr[32] = { | 273 | static const int __cpuinitdata nehemiah_eblcr[32] = { |
274 | 50, /* 0000 -> 5.0x */ | 274 | 50, /* 0000 -> 5.0x */ |
275 | 160, /* 0001 -> 16.0x */ | 275 | 160, /* 0001 -> 16.0x */ |
276 | 40, /* 0010 -> 4.0x */ | 276 | 40, /* 0010 -> 4.0x */ |
@@ -315,7 +315,7 @@ struct mV_pos { | |||
315 | unsigned short pos; | 315 | unsigned short pos; |
316 | }; | 316 | }; |
317 | 317 | ||
318 | static const struct mV_pos __initdata vrm85_mV[32] = { | 318 | static const struct mV_pos __cpuinitdata vrm85_mV[32] = { |
319 | {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, | 319 | {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, |
320 | {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, | 320 | {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, |
321 | {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, | 321 | {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, |
@@ -326,14 +326,14 @@ static const struct mV_pos __initdata vrm85_mV[32] = { | |||
326 | {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} | 326 | {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} |
327 | }; | 327 | }; |
328 | 328 | ||
329 | static const unsigned char __initdata mV_vrm85[32] = { | 329 | static const unsigned char __cpuinitdata mV_vrm85[32] = { |
330 | 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, | 330 | 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, |
331 | 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, | 331 | 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, |
332 | 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, | 332 | 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, |
333 | 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 | 333 | 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 |
334 | }; | 334 | }; |
335 | 335 | ||
336 | static const struct mV_pos __initdata mobilevrm_mV[32] = { | 336 | static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = { |
337 | {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, | 337 | {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, |
338 | {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, | 338 | {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, |
339 | {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, | 339 | {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, |
@@ -344,7 +344,7 @@ static const struct mV_pos __initdata mobilevrm_mV[32] = { | |||
344 | {675, 3}, {650, 2}, {625, 1}, {600, 0} | 344 | {675, 3}, {650, 2}, {625, 1}, {600, 0} |
345 | }; | 345 | }; |
346 | 346 | ||
347 | static const unsigned char __initdata mV_mobilevrm[32] = { | 347 | static const unsigned char __cpuinitdata mV_mobilevrm[32] = { |
348 | 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, | 348 | 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, |
349 | 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, | 349 | 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, |
350 | 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, | 350 | 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, |
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index e7b559d74c52..fc09f142d94d 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c | |||
@@ -165,8 +165,8 @@ static unsigned int longrun_get(unsigned int cpu) | |||
165 | * TMTA rules: | 165 | * TMTA rules: |
166 | * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) | 166 | * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) |
167 | */ | 167 | */ |
168 | static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, | 168 | static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq, |
169 | unsigned int *high_freq) | 169 | unsigned int *high_freq) |
170 | { | 170 | { |
171 | u32 msr_lo, msr_hi; | 171 | u32 msr_lo, msr_hi; |
172 | u32 save_lo, save_hi; | 172 | u32 save_lo, save_hi; |
@@ -258,7 +258,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, | |||
258 | } | 258 | } |
259 | 259 | ||
260 | 260 | ||
261 | static int __init longrun_cpu_init(struct cpufreq_policy *policy) | 261 | static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy) |
262 | { | 262 | { |
263 | int result = 0; | 263 | int result = 0; |
264 | 264 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 7b8a8ba67b07..bd1cac747f67 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
@@ -178,13 +178,8 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | |||
178 | } | 178 | } |
179 | } | 179 | } |
180 | 180 | ||
181 | if (c->x86 != 0xF) { | 181 | if (c->x86 != 0xF) |
182 | if (!cpu_has(c, X86_FEATURE_EST)) | ||
183 | printk(KERN_WARNING PFX "Unknown CPU. " | ||
184 | "Please send an e-mail to " | ||
185 | "<cpufreq@vger.kernel.org>\n"); | ||
186 | return 0; | 182 | return 0; |
187 | } | ||
188 | 183 | ||
189 | /* on P-4s, the TSC runs with constant frequency independent whether | 184 | /* on P-4s, the TSC runs with constant frequency independent whether |
190 | * throttling is active or not. */ | 185 | * throttling is active or not. */ |
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index ce7cde713e71..a36de5bbb622 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c | |||
@@ -368,22 +368,16 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle) | |||
368 | return -ENODEV; | 368 | return -ENODEV; |
369 | 369 | ||
370 | out_obj = output.pointer; | 370 | out_obj = output.pointer; |
371 | if (out_obj->type != ACPI_TYPE_BUFFER) { | 371 | if (out_obj->type != ACPI_TYPE_BUFFER) |
372 | ret = -ENODEV; | 372 | return -ENODEV; |
373 | goto out_free; | ||
374 | } | ||
375 | 373 | ||
376 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | 374 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); |
377 | if (errors) { | 375 | if (errors) |
378 | ret = -ENODEV; | 376 | return -ENODEV; |
379 | goto out_free; | ||
380 | } | ||
381 | 377 | ||
382 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | 378 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); |
383 | if (!(supported & 0x1)) { | 379 | if (!(supported & 0x1)) |
384 | ret = -ENODEV; | 380 | return -ENODEV; |
385 | goto out_free; | ||
386 | } | ||
387 | 381 | ||
388 | out_free: | 382 | out_free: |
389 | kfree(output.pointer); | 383 | kfree(output.pointer); |
@@ -397,13 +391,17 @@ static int __init pcc_cpufreq_probe(void) | |||
397 | struct pcc_memory_resource *mem_resource; | 391 | struct pcc_memory_resource *mem_resource; |
398 | struct pcc_register_resource *reg_resource; | 392 | struct pcc_register_resource *reg_resource; |
399 | union acpi_object *out_obj, *member; | 393 | union acpi_object *out_obj, *member; |
400 | acpi_handle handle, osc_handle; | 394 | acpi_handle handle, osc_handle, pcch_handle; |
401 | int ret = 0; | 395 | int ret = 0; |
402 | 396 | ||
403 | status = acpi_get_handle(NULL, "\\_SB", &handle); | 397 | status = acpi_get_handle(NULL, "\\_SB", &handle); |
404 | if (ACPI_FAILURE(status)) | 398 | if (ACPI_FAILURE(status)) |
405 | return -ENODEV; | 399 | return -ENODEV; |
406 | 400 | ||
401 | status = acpi_get_handle(handle, "PCCH", &pcch_handle); | ||
402 | if (ACPI_FAILURE(status)) | ||
403 | return -ENODEV; | ||
404 | |||
407 | status = acpi_get_handle(handle, "_OSC", &osc_handle); | 405 | status = acpi_get_handle(handle, "_OSC", &osc_handle); |
408 | if (ACPI_SUCCESS(status)) { | 406 | if (ACPI_SUCCESS(status)) { |
409 | ret = pcc_cpufreq_do_osc(&osc_handle); | 407 | ret = pcc_cpufreq_do_osc(&osc_handle); |
@@ -543,13 +541,13 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
543 | 541 | ||
544 | if (!pcch_virt_addr) { | 542 | if (!pcch_virt_addr) { |
545 | result = -1; | 543 | result = -1; |
546 | goto pcch_null; | 544 | goto out; |
547 | } | 545 | } |
548 | 546 | ||
549 | result = pcc_get_offset(cpu); | 547 | result = pcc_get_offset(cpu); |
550 | if (result) { | 548 | if (result) { |
551 | dprintk("init: PCCP evaluation failed\n"); | 549 | dprintk("init: PCCP evaluation failed\n"); |
552 | goto free; | 550 | goto out; |
553 | } | 551 | } |
554 | 552 | ||
555 | policy->max = policy->cpuinfo.max_freq = | 553 | policy->max = policy->cpuinfo.max_freq = |
@@ -558,14 +556,15 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
558 | ioread32(&pcch_hdr->minimum_frequency) * 1000; | 556 | ioread32(&pcch_hdr->minimum_frequency) * 1000; |
559 | policy->cur = pcc_get_freq(cpu); | 557 | policy->cur = pcc_get_freq(cpu); |
560 | 558 | ||
559 | if (!policy->cur) { | ||
560 | dprintk("init: Unable to get current CPU frequency\n"); | ||
561 | result = -EINVAL; | ||
562 | goto out; | ||
563 | } | ||
564 | |||
561 | dprintk("init: policy->max is %d, policy->min is %d\n", | 565 | dprintk("init: policy->max is %d, policy->min is %d\n", |
562 | policy->max, policy->min); | 566 | policy->max, policy->min); |
563 | 567 | out: | |
564 | return 0; | ||
565 | free: | ||
566 | pcc_clear_mapping(); | ||
567 | free_percpu(pcc_cpu_info); | ||
568 | pcch_null: | ||
569 | return result; | 568 | return result; |
570 | } | 569 | } |
571 | 570 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 9a97116f89e5..4a45fd6e41ba 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c | |||
@@ -569,7 +569,7 @@ static int powernow_verify(struct cpufreq_policy *policy) | |||
569 | * We will then get the same kind of behaviour already tested under | 569 | * We will then get the same kind of behaviour already tested under |
570 | * the "well-known" other OS. | 570 | * the "well-known" other OS. |
571 | */ | 571 | */ |
572 | static int __init fixup_sgtc(void) | 572 | static int __cpuinit fixup_sgtc(void) |
573 | { | 573 | { |
574 | unsigned int sgtc; | 574 | unsigned int sgtc; |
575 | unsigned int m; | 575 | unsigned int m; |
@@ -603,7 +603,7 @@ static unsigned int powernow_get(unsigned int cpu) | |||
603 | } | 603 | } |
604 | 604 | ||
605 | 605 | ||
606 | static int __init acer_cpufreq_pst(const struct dmi_system_id *d) | 606 | static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d) |
607 | { | 607 | { |
608 | printk(KERN_WARNING PFX | 608 | printk(KERN_WARNING PFX |
609 | "%s laptop with broken PST tables in BIOS detected.\n", | 609 | "%s laptop with broken PST tables in BIOS detected.\n", |
@@ -621,7 +621,7 @@ static int __init acer_cpufreq_pst(const struct dmi_system_id *d) | |||
621 | * A BIOS update is all that can save them. | 621 | * A BIOS update is all that can save them. |
622 | * Mention this, and disable cpufreq. | 622 | * Mention this, and disable cpufreq. |
623 | */ | 623 | */ |
624 | static struct dmi_system_id __initdata powernow_dmi_table[] = { | 624 | static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = { |
625 | { | 625 | { |
626 | .callback = acer_cpufreq_pst, | 626 | .callback = acer_cpufreq_pst, |
627 | .ident = "Acer Aspire", | 627 | .ident = "Acer Aspire", |
@@ -633,7 +633,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = { | |||
633 | { } | 633 | { } |
634 | }; | 634 | }; |
635 | 635 | ||
636 | static int __init powernow_cpu_init(struct cpufreq_policy *policy) | 636 | static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy) |
637 | { | 637 | { |
638 | union msr_fidvidstatus fidvidstatus; | 638 | union msr_fidvidstatus fidvidstatus; |
639 | int result; | 639 | int result; |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 7ec2123838e6..491977baf6c0 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -9,7 +9,7 @@ | |||
9 | * Based on the powernow-k7.c module written by Dave Jones. | 9 | * Based on the powernow-k7.c module written by Dave Jones. |
10 | * (C) 2003 Dave Jones on behalf of SuSE Labs | 10 | * (C) 2003 Dave Jones on behalf of SuSE Labs |
11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> | 11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> |
12 | * (C) 2004 Pavel Machek <pavel@suse.cz> | 12 | * (C) 2004 Pavel Machek <pavel@ucw.cz> |
13 | * Licensed under the terms of the GNU GPL License version 2. | 13 | * Licensed under the terms of the GNU GPL License version 2. |
14 | * Based upon datasheets & sample CPUs kindly provided by AMD. | 14 | * Based upon datasheets & sample CPUs kindly provided by AMD. |
15 | * | 15 | * |
@@ -806,6 +806,8 @@ static int find_psb_table(struct powernow_k8_data *data) | |||
806 | * www.amd.com | 806 | * www.amd.com |
807 | */ | 807 | */ |
808 | printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); | 808 | printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); |
809 | printk(KERN_ERR PFX "Make sure that your BIOS is up to date" | ||
810 | " and Cool'N'Quiet support is enabled in BIOS setup\n"); | ||
809 | return -ENODEV; | 811 | return -ENODEV; |
810 | } | 812 | } |
811 | 813 | ||
@@ -910,8 +912,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, | |||
910 | { | 912 | { |
911 | int i; | 913 | int i; |
912 | u32 hi = 0, lo = 0; | 914 | u32 hi = 0, lo = 0; |
913 | rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); | 915 | rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi); |
914 | data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; | 916 | data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; |
915 | 917 | ||
916 | for (i = 0; i < data->acpi_data.state_count; i++) { | 918 | for (i = 0; i < data->acpi_data.state_count; i++) { |
917 | u32 index; | 919 | u32 index; |
@@ -1023,13 +1025,12 @@ static int get_transition_latency(struct powernow_k8_data *data) | |||
1023 | } | 1025 | } |
1024 | if (max_latency == 0) { | 1026 | if (max_latency == 0) { |
1025 | /* | 1027 | /* |
1026 | * Fam 11h always returns 0 as transition latency. | 1028 | * Fam 11h and later may return 0 as transition latency. This |
1027 | * This is intended and means "very fast". While cpufreq core | 1029 | * is intended and means "very fast". While cpufreq core and |
1028 | * and governors currently can handle that gracefully, better | 1030 | * governors currently can handle that gracefully, better set it |
1029 | * set it to 1 to avoid problems in the future. | 1031 | * to 1 to avoid problems in the future. |
1030 | * For all others it's a BIOS bug. | ||
1031 | */ | 1032 | */ |
1032 | if (boot_cpu_data.x86 != 0x11) | 1033 | if (boot_cpu_data.x86 < 0x11) |
1033 | printk(KERN_ERR FW_WARN PFX "Invalid zero transition " | 1034 | printk(KERN_ERR FW_WARN PFX "Invalid zero transition " |
1034 | "latency\n"); | 1035 | "latency\n"); |
1035 | max_latency = 1; | 1036 | max_latency = 1; |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index dd531cc56a8f..8095f8611f8a 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -34,6 +34,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = | |||
34 | { | 34 | { |
35 | &x86_hyper_vmware, | 35 | &x86_hyper_vmware, |
36 | &x86_hyper_ms_hyperv, | 36 | &x86_hyper_ms_hyperv, |
37 | #ifdef CONFIG_XEN_PVHVM | ||
38 | &x86_hyper_xen_hvm, | ||
39 | #endif | ||
37 | }; | 40 | }; |
38 | 41 | ||
39 | const struct hypervisor_x86 *x86_hyper; | 42 | const struct hypervisor_x86 *x86_hyper; |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 33eae2062cf5..898c2f4eab88 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -347,8 +347,8 @@ static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) | |||
347 | return l3; | 347 | return l3; |
348 | } | 348 | } |
349 | 349 | ||
350 | static void __cpuinit | 350 | static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, |
351 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 351 | int index) |
352 | { | 352 | { |
353 | int node; | 353 | int node; |
354 | 354 | ||
@@ -396,20 +396,39 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | |||
396 | this_leaf->l3 = l3_caches[node]; | 396 | this_leaf->l3 = l3_caches[node]; |
397 | } | 397 | } |
398 | 398 | ||
399 | /* | ||
400 | * check whether a slot used for disabling an L3 index is occupied. | ||
401 | * @l3: L3 cache descriptor | ||
402 | * @slot: slot number (0..1) | ||
403 | * | ||
404 | * @returns: the disabled index if used or negative value if slot free. | ||
405 | */ | ||
406 | int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot) | ||
407 | { | ||
408 | unsigned int reg = 0; | ||
409 | |||
410 | pci_read_config_dword(l3->dev, 0x1BC + slot * 4, ®); | ||
411 | |||
412 | /* check whether this slot is activated already */ | ||
413 | if (reg & (3UL << 30)) | ||
414 | return reg & 0xfff; | ||
415 | |||
416 | return -1; | ||
417 | } | ||
418 | |||
399 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | 419 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, |
400 | unsigned int slot) | 420 | unsigned int slot) |
401 | { | 421 | { |
402 | struct pci_dev *dev = this_leaf->l3->dev; | 422 | int index; |
403 | unsigned int reg = 0; | ||
404 | 423 | ||
405 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) | 424 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
406 | return -EINVAL; | 425 | return -EINVAL; |
407 | 426 | ||
408 | if (!dev) | 427 | index = amd_get_l3_disable_slot(this_leaf->l3, slot); |
409 | return -EINVAL; | 428 | if (index >= 0) |
429 | return sprintf(buf, "%d\n", index); | ||
410 | 430 | ||
411 | pci_read_config_dword(dev, 0x1BC + slot * 4, ®); | 431 | return sprintf(buf, "FREE\n"); |
412 | return sprintf(buf, "0x%08x\n", reg); | ||
413 | } | 432 | } |
414 | 433 | ||
415 | #define SHOW_CACHE_DISABLE(slot) \ | 434 | #define SHOW_CACHE_DISABLE(slot) \ |
@@ -451,37 +470,74 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | |||
451 | } | 470 | } |
452 | } | 471 | } |
453 | 472 | ||
454 | 473 | /* | |
455 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | 474 | * disable a L3 cache index by using a disable-slot |
456 | const char *buf, size_t count, | 475 | * |
457 | unsigned int slot) | 476 | * @l3: L3 cache descriptor |
477 | * @cpu: A CPU on the node containing the L3 cache | ||
478 | * @slot: slot number (0..1) | ||
479 | * @index: index to disable | ||
480 | * | ||
481 | * @return: 0 on success, error status on failure | ||
482 | */ | ||
483 | int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot, | ||
484 | unsigned long index) | ||
458 | { | 485 | { |
459 | struct pci_dev *dev = this_leaf->l3->dev; | 486 | int ret = 0; |
460 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
461 | unsigned long val = 0; | ||
462 | 487 | ||
463 | #define SUBCACHE_MASK (3UL << 20) | 488 | #define SUBCACHE_MASK (3UL << 20) |
464 | #define SUBCACHE_INDEX 0xfff | 489 | #define SUBCACHE_INDEX 0xfff |
465 | 490 | ||
466 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) | 491 | /* |
492 | * check whether this slot is already used or | ||
493 | * the index is already disabled | ||
494 | */ | ||
495 | ret = amd_get_l3_disable_slot(l3, slot); | ||
496 | if (ret >= 0) | ||
467 | return -EINVAL; | 497 | return -EINVAL; |
468 | 498 | ||
499 | /* | ||
500 | * check whether the other slot has disabled the | ||
501 | * same index already | ||
502 | */ | ||
503 | if (index == amd_get_l3_disable_slot(l3, !slot)) | ||
504 | return -EINVAL; | ||
505 | |||
506 | /* do not allow writes outside of allowed bits */ | ||
507 | if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | ||
508 | ((index & SUBCACHE_INDEX) > l3->indices)) | ||
509 | return -EINVAL; | ||
510 | |||
511 | amd_l3_disable_index(l3, cpu, slot, index); | ||
512 | |||
513 | return 0; | ||
514 | } | ||
515 | |||
516 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | ||
517 | const char *buf, size_t count, | ||
518 | unsigned int slot) | ||
519 | { | ||
520 | unsigned long val = 0; | ||
521 | int cpu, err = 0; | ||
522 | |||
469 | if (!capable(CAP_SYS_ADMIN)) | 523 | if (!capable(CAP_SYS_ADMIN)) |
470 | return -EPERM; | 524 | return -EPERM; |
471 | 525 | ||
472 | if (!dev) | 526 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
473 | return -EINVAL; | 527 | return -EINVAL; |
474 | 528 | ||
475 | if (strict_strtoul(buf, 10, &val) < 0) | 529 | cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); |
476 | return -EINVAL; | ||
477 | 530 | ||
478 | /* do not allow writes outside of allowed bits */ | 531 | if (strict_strtoul(buf, 10, &val) < 0) |
479 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | ||
480 | ((val & SUBCACHE_INDEX) > this_leaf->l3->indices)) | ||
481 | return -EINVAL; | 532 | return -EINVAL; |
482 | 533 | ||
483 | amd_l3_disable_index(this_leaf->l3, cpu, slot, val); | 534 | err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val); |
484 | 535 | if (err) { | |
536 | if (err == -EEXIST) | ||
537 | printk(KERN_WARNING "L3 disable slot %d in use!\n", | ||
538 | slot); | ||
539 | return err; | ||
540 | } | ||
485 | return count; | 541 | return count; |
486 | } | 542 | } |
487 | 543 | ||
@@ -502,7 +558,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | |||
502 | 558 | ||
503 | #else /* CONFIG_CPU_SUP_AMD */ | 559 | #else /* CONFIG_CPU_SUP_AMD */ |
504 | static void __cpuinit | 560 | static void __cpuinit |
505 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 561 | amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) |
506 | { | 562 | { |
507 | }; | 563 | }; |
508 | #endif /* CONFIG_CPU_SUP_AMD */ | 564 | #endif /* CONFIG_CPU_SUP_AMD */ |
@@ -518,7 +574,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index, | |||
518 | 574 | ||
519 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | 575 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
520 | amd_cpuid4(index, &eax, &ebx, &ecx); | 576 | amd_cpuid4(index, &eax, &ebx, &ecx); |
521 | amd_check_l3_disable(index, this_leaf); | 577 | amd_check_l3_disable(this_leaf, index); |
522 | } else { | 578 | } else { |
523 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 579 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
524 | } | 580 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 18cc42562250..e1269d62c569 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -51,7 +51,7 @@ | |||
51 | static DEFINE_MUTEX(mce_read_mutex); | 51 | static DEFINE_MUTEX(mce_read_mutex); |
52 | 52 | ||
53 | #define rcu_dereference_check_mce(p) \ | 53 | #define rcu_dereference_check_mce(p) \ |
54 | rcu_dereference_check((p), \ | 54 | rcu_dereference_index_check((p), \ |
55 | rcu_read_lock_sched_held() || \ | 55 | rcu_read_lock_sched_held() || \ |
56 | lockdep_is_held(&mce_read_mutex)) | 56 | lockdep_is_held(&mce_read_mutex)) |
57 | 57 | ||
@@ -600,6 +600,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
600 | */ | 600 | */ |
601 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { | 601 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { |
602 | mce_log(&m); | 602 | mce_log(&m); |
603 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); | ||
603 | add_taint(TAINT_MACHINE_CHECK); | 604 | add_taint(TAINT_MACHINE_CHECK); |
604 | } | 605 | } |
605 | 606 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index e1a0a3bf9716..c2a8b26d4fea 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -34,15 +34,25 @@ | |||
34 | /* How long to wait between reporting thermal events */ | 34 | /* How long to wait between reporting thermal events */ |
35 | #define CHECK_INTERVAL (300 * HZ) | 35 | #define CHECK_INTERVAL (300 * HZ) |
36 | 36 | ||
37 | #define THERMAL_THROTTLING_EVENT 0 | ||
38 | #define POWER_LIMIT_EVENT 1 | ||
39 | |||
37 | /* | 40 | /* |
38 | * Current thermal throttling state: | 41 | * Current thermal event state: |
39 | */ | 42 | */ |
40 | struct thermal_state { | 43 | struct _thermal_state { |
41 | bool is_throttled; | 44 | bool new_event; |
42 | 45 | int event; | |
43 | u64 next_check; | 46 | u64 next_check; |
44 | unsigned long throttle_count; | 47 | unsigned long count; |
45 | unsigned long last_throttle_count; | 48 | unsigned long last_count; |
49 | }; | ||
50 | |||
51 | struct thermal_state { | ||
52 | struct _thermal_state core_throttle; | ||
53 | struct _thermal_state core_power_limit; | ||
54 | struct _thermal_state package_throttle; | ||
55 | struct _thermal_state package_power_limit; | ||
46 | }; | 56 | }; |
47 | 57 | ||
48 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); | 58 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); |
@@ -53,11 +63,13 @@ static u32 lvtthmr_init __read_mostly; | |||
53 | 63 | ||
54 | #ifdef CONFIG_SYSFS | 64 | #ifdef CONFIG_SYSFS |
55 | #define define_therm_throt_sysdev_one_ro(_name) \ | 65 | #define define_therm_throt_sysdev_one_ro(_name) \ |
56 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 66 | static SYSDEV_ATTR(_name, 0444, \ |
67 | therm_throt_sysdev_show_##_name, \ | ||
68 | NULL) \ | ||
57 | 69 | ||
58 | #define define_therm_throt_sysdev_show_func(name) \ | 70 | #define define_therm_throt_sysdev_show_func(event, name) \ |
59 | \ | 71 | \ |
60 | static ssize_t therm_throt_sysdev_show_##name( \ | 72 | static ssize_t therm_throt_sysdev_show_##event##_##name( \ |
61 | struct sys_device *dev, \ | 73 | struct sys_device *dev, \ |
62 | struct sysdev_attribute *attr, \ | 74 | struct sysdev_attribute *attr, \ |
63 | char *buf) \ | 75 | char *buf) \ |
@@ -66,30 +78,42 @@ static ssize_t therm_throt_sysdev_show_##name( \ | |||
66 | ssize_t ret; \ | 78 | ssize_t ret; \ |
67 | \ | 79 | \ |
68 | preempt_disable(); /* CPU hotplug */ \ | 80 | preempt_disable(); /* CPU hotplug */ \ |
69 | if (cpu_online(cpu)) \ | 81 | if (cpu_online(cpu)) { \ |
70 | ret = sprintf(buf, "%lu\n", \ | 82 | ret = sprintf(buf, "%lu\n", \ |
71 | per_cpu(thermal_state, cpu).name); \ | 83 | per_cpu(thermal_state, cpu).event.name); \ |
72 | else \ | 84 | } else \ |
73 | ret = 0; \ | 85 | ret = 0; \ |
74 | preempt_enable(); \ | 86 | preempt_enable(); \ |
75 | \ | 87 | \ |
76 | return ret; \ | 88 | return ret; \ |
77 | } | 89 | } |
78 | 90 | ||
79 | define_therm_throt_sysdev_show_func(throttle_count); | 91 | define_therm_throt_sysdev_show_func(core_throttle, count); |
80 | define_therm_throt_sysdev_one_ro(throttle_count); | 92 | define_therm_throt_sysdev_one_ro(core_throttle_count); |
93 | |||
94 | define_therm_throt_sysdev_show_func(core_power_limit, count); | ||
95 | define_therm_throt_sysdev_one_ro(core_power_limit_count); | ||
96 | |||
97 | define_therm_throt_sysdev_show_func(package_throttle, count); | ||
98 | define_therm_throt_sysdev_one_ro(package_throttle_count); | ||
99 | |||
100 | define_therm_throt_sysdev_show_func(package_power_limit, count); | ||
101 | define_therm_throt_sysdev_one_ro(package_power_limit_count); | ||
81 | 102 | ||
82 | static struct attribute *thermal_throttle_attrs[] = { | 103 | static struct attribute *thermal_throttle_attrs[] = { |
83 | &attr_throttle_count.attr, | 104 | &attr_core_throttle_count.attr, |
84 | NULL | 105 | NULL |
85 | }; | 106 | }; |
86 | 107 | ||
87 | static struct attribute_group thermal_throttle_attr_group = { | 108 | static struct attribute_group thermal_attr_group = { |
88 | .attrs = thermal_throttle_attrs, | 109 | .attrs = thermal_throttle_attrs, |
89 | .name = "thermal_throttle" | 110 | .name = "thermal_throttle" |
90 | }; | 111 | }; |
91 | #endif /* CONFIG_SYSFS */ | 112 | #endif /* CONFIG_SYSFS */ |
92 | 113 | ||
114 | #define CORE_LEVEL 0 | ||
115 | #define PACKAGE_LEVEL 1 | ||
116 | |||
93 | /*** | 117 | /*** |
94 | * therm_throt_process - Process thermal throttling event from interrupt | 118 | * therm_throt_process - Process thermal throttling event from interrupt |
95 | * @curr: Whether the condition is current or not (boolean), since the | 119 | * @curr: Whether the condition is current or not (boolean), since the |
@@ -106,39 +130,70 @@ static struct attribute_group thermal_throttle_attr_group = { | |||
106 | * 1 : Event should be logged further, and a message has been | 130 | * 1 : Event should be logged further, and a message has been |
107 | * printed to the syslog. | 131 | * printed to the syslog. |
108 | */ | 132 | */ |
109 | static int therm_throt_process(bool is_throttled) | 133 | static int therm_throt_process(bool new_event, int event, int level) |
110 | { | 134 | { |
111 | struct thermal_state *state; | 135 | struct _thermal_state *state; |
112 | unsigned int this_cpu; | 136 | unsigned int this_cpu = smp_processor_id(); |
113 | bool was_throttled; | 137 | bool old_event; |
114 | u64 now; | 138 | u64 now; |
139 | struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); | ||
115 | 140 | ||
116 | this_cpu = smp_processor_id(); | ||
117 | now = get_jiffies_64(); | 141 | now = get_jiffies_64(); |
118 | state = &per_cpu(thermal_state, this_cpu); | 142 | if (level == CORE_LEVEL) { |
143 | if (event == THERMAL_THROTTLING_EVENT) | ||
144 | state = &pstate->core_throttle; | ||
145 | else if (event == POWER_LIMIT_EVENT) | ||
146 | state = &pstate->core_power_limit; | ||
147 | else | ||
148 | return 0; | ||
149 | } else if (level == PACKAGE_LEVEL) { | ||
150 | if (event == THERMAL_THROTTLING_EVENT) | ||
151 | state = &pstate->package_throttle; | ||
152 | else if (event == POWER_LIMIT_EVENT) | ||
153 | state = &pstate->package_power_limit; | ||
154 | else | ||
155 | return 0; | ||
156 | } else | ||
157 | return 0; | ||
119 | 158 | ||
120 | was_throttled = state->is_throttled; | 159 | old_event = state->new_event; |
121 | state->is_throttled = is_throttled; | 160 | state->new_event = new_event; |
122 | 161 | ||
123 | if (is_throttled) | 162 | if (new_event) |
124 | state->throttle_count++; | 163 | state->count++; |
125 | 164 | ||
126 | if (time_before64(now, state->next_check) && | 165 | if (time_before64(now, state->next_check) && |
127 | state->throttle_count != state->last_throttle_count) | 166 | state->count != state->last_count) |
128 | return 0; | 167 | return 0; |
129 | 168 | ||
130 | state->next_check = now + CHECK_INTERVAL; | 169 | state->next_check = now + CHECK_INTERVAL; |
131 | state->last_throttle_count = state->throttle_count; | 170 | state->last_count = state->count; |
132 | 171 | ||
133 | /* if we just entered the thermal event */ | 172 | /* if we just entered the thermal event */ |
134 | if (is_throttled) { | 173 | if (new_event) { |
135 | printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count); | 174 | if (event == THERMAL_THROTTLING_EVENT) |
175 | printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", | ||
176 | this_cpu, | ||
177 | level == CORE_LEVEL ? "Core" : "Package", | ||
178 | state->count); | ||
179 | else | ||
180 | printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n", | ||
181 | this_cpu, | ||
182 | level == CORE_LEVEL ? "Core" : "Package", | ||
183 | state->count); | ||
136 | 184 | ||
137 | add_taint(TAINT_MACHINE_CHECK); | 185 | add_taint(TAINT_MACHINE_CHECK); |
138 | return 1; | 186 | return 1; |
139 | } | 187 | } |
140 | if (was_throttled) { | 188 | if (old_event) { |
141 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu); | 189 | if (event == THERMAL_THROTTLING_EVENT) |
190 | printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", | ||
191 | this_cpu, | ||
192 | level == CORE_LEVEL ? "Core" : "Package"); | ||
193 | else | ||
194 | printk(KERN_INFO "CPU%d: %s power limit normal\n", | ||
195 | this_cpu, | ||
196 | level == CORE_LEVEL ? "Core" : "Package"); | ||
142 | return 1; | 197 | return 1; |
143 | } | 198 | } |
144 | 199 | ||
@@ -149,13 +204,32 @@ static int therm_throt_process(bool is_throttled) | |||
149 | /* Add/Remove thermal_throttle interface for CPU device: */ | 204 | /* Add/Remove thermal_throttle interface for CPU device: */ |
150 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) | 205 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) |
151 | { | 206 | { |
152 | return sysfs_create_group(&sys_dev->kobj, | 207 | int err; |
153 | &thermal_throttle_attr_group); | 208 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); |
209 | |||
210 | err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); | ||
211 | if (err) | ||
212 | return err; | ||
213 | |||
214 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
215 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
216 | &attr_core_power_limit_count.attr, | ||
217 | thermal_attr_group.name); | ||
218 | if (cpu_has(c, X86_FEATURE_PTS)) | ||
219 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
220 | &attr_package_throttle_count.attr, | ||
221 | thermal_attr_group.name); | ||
222 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
223 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
224 | &attr_package_power_limit_count.attr, | ||
225 | thermal_attr_group.name); | ||
226 | |||
227 | return err; | ||
154 | } | 228 | } |
155 | 229 | ||
156 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | 230 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) |
157 | { | 231 | { |
158 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); | 232 | sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group); |
159 | } | 233 | } |
160 | 234 | ||
161 | /* Mutex protecting device creation against CPU hotplug: */ | 235 | /* Mutex protecting device creation against CPU hotplug: */ |
@@ -226,14 +300,50 @@ device_initcall(thermal_throttle_init_device); | |||
226 | 300 | ||
227 | #endif /* CONFIG_SYSFS */ | 301 | #endif /* CONFIG_SYSFS */ |
228 | 302 | ||
303 | /* | ||
304 | * Set up the most two significant bit to notify mce log that this thermal | ||
305 | * event type. | ||
306 | * This is a temp solution. May be changed in the future with mce log | ||
307 | * infrasture. | ||
308 | */ | ||
309 | #define CORE_THROTTLED (0) | ||
310 | #define CORE_POWER_LIMIT ((__u64)1 << 62) | ||
311 | #define PACKAGE_THROTTLED ((__u64)2 << 62) | ||
312 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) | ||
313 | |||
229 | /* Thermal transition interrupt handler */ | 314 | /* Thermal transition interrupt handler */ |
230 | static void intel_thermal_interrupt(void) | 315 | static void intel_thermal_interrupt(void) |
231 | { | 316 | { |
232 | __u64 msr_val; | 317 | __u64 msr_val; |
318 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | ||
233 | 319 | ||
234 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 320 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
235 | if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0)) | 321 | |
236 | mce_log_therm_throt_event(msr_val); | 322 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, |
323 | THERMAL_THROTTLING_EVENT, | ||
324 | CORE_LEVEL) != 0) | ||
325 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); | ||
326 | |||
327 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
328 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, | ||
329 | POWER_LIMIT_EVENT, | ||
330 | CORE_LEVEL) != 0) | ||
331 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); | ||
332 | |||
333 | if (cpu_has(c, X86_FEATURE_PTS)) { | ||
334 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | ||
335 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | ||
336 | THERMAL_THROTTLING_EVENT, | ||
337 | PACKAGE_LEVEL) != 0) | ||
338 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); | ||
339 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
340 | if (therm_throt_process(msr_val & | ||
341 | PACKAGE_THERM_STATUS_POWER_LIMIT, | ||
342 | POWER_LIMIT_EVENT, | ||
343 | PACKAGE_LEVEL) != 0) | ||
344 | mce_log_therm_throt_event(PACKAGE_POWER_LIMIT | ||
345 | | msr_val); | ||
346 | } | ||
237 | } | 347 | } |
238 | 348 | ||
239 | static void unexpected_thermal_interrupt(void) | 349 | static void unexpected_thermal_interrupt(void) |
@@ -335,8 +445,26 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
335 | apic_write(APIC_LVTTHMR, h); | 445 | apic_write(APIC_LVTTHMR, h); |
336 | 446 | ||
337 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 447 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); |
338 | wrmsr(MSR_IA32_THERM_INTERRUPT, | 448 | if (cpu_has(c, X86_FEATURE_PLN)) |
339 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | 449 | wrmsr(MSR_IA32_THERM_INTERRUPT, |
450 | l | (THERM_INT_LOW_ENABLE | ||
451 | | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); | ||
452 | else | ||
453 | wrmsr(MSR_IA32_THERM_INTERRUPT, | ||
454 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | ||
455 | |||
456 | if (cpu_has(c, X86_FEATURE_PTS)) { | ||
457 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | ||
458 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
459 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
460 | l | (PACKAGE_THERM_INT_LOW_ENABLE | ||
461 | | PACKAGE_THERM_INT_HIGH_ENABLE | ||
462 | | PACKAGE_THERM_INT_PLN_ENABLE), h); | ||
463 | else | ||
464 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
465 | l | (PACKAGE_THERM_INT_LOW_ENABLE | ||
466 | | PACKAGE_THERM_INT_HIGH_ENABLE), h); | ||
467 | } | ||
340 | 468 | ||
341 | smp_thermal_vector = intel_thermal_interrupt; | 469 | smp_thermal_vector = intel_thermal_interrupt; |
342 | 470 | ||
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 16f41bbe46b6..d944bf6c50e9 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/mshyperv.h> | 18 | #include <asm/mshyperv.h> |
19 | 19 | ||
20 | struct ms_hyperv_info ms_hyperv; | 20 | struct ms_hyperv_info ms_hyperv; |
21 | EXPORT_SYMBOL_GPL(ms_hyperv); | ||
21 | 22 | ||
22 | static bool __init ms_hyperv_platform(void) | 23 | static bool __init ms_hyperv_platform(void) |
23 | { | 24 | { |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 06130b52f012..c5f59d071425 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -632,9 +632,9 @@ static void __init mtrr_print_out_one_result(int i) | |||
632 | unsigned long gran_base, chunk_base, lose_base; | 632 | unsigned long gran_base, chunk_base, lose_base; |
633 | char gran_factor, chunk_factor, lose_factor; | 633 | char gran_factor, chunk_factor, lose_factor; |
634 | 634 | ||
635 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | 635 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor); |
636 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | 636 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor); |
637 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | 637 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor); |
638 | 638 | ||
639 | pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", | 639 | pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", |
640 | result[i].bad ? "*BAD*" : " ", | 640 | result[i].bad ? "*BAD*" : " ", |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index fd31a441c61c..7d28d7d03885 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -433,13 +433,12 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
433 | { | 433 | { |
434 | unsigned int mask_lo, mask_hi, base_lo, base_hi; | 434 | unsigned int mask_lo, mask_hi, base_lo, base_hi; |
435 | unsigned int tmp, hi; | 435 | unsigned int tmp, hi; |
436 | int cpu; | ||
437 | 436 | ||
438 | /* | 437 | /* |
439 | * get_mtrr doesn't need to update mtrr_state, also it could be called | 438 | * get_mtrr doesn't need to update mtrr_state, also it could be called |
440 | * from any cpu, so try to print it out directly. | 439 | * from any cpu, so try to print it out directly. |
441 | */ | 440 | */ |
442 | cpu = get_cpu(); | 441 | get_cpu(); |
443 | 442 | ||
444 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); | 443 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); |
445 | 444 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 79556bd9b602..01c0f3ee6cc3 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -35,6 +35,7 @@ | |||
35 | 35 | ||
36 | #include <linux/types.h> /* FIXME: kvm_para.h needs this */ | 36 | #include <linux/types.h> /* FIXME: kvm_para.h needs this */ |
37 | 37 | ||
38 | #include <linux/stop_machine.h> | ||
38 | #include <linux/kvm_para.h> | 39 | #include <linux/kvm_para.h> |
39 | #include <linux/uaccess.h> | 40 | #include <linux/uaccess.h> |
40 | #include <linux/module.h> | 41 | #include <linux/module.h> |
@@ -143,22 +144,28 @@ struct set_mtrr_data { | |||
143 | mtrr_type smp_type; | 144 | mtrr_type smp_type; |
144 | }; | 145 | }; |
145 | 146 | ||
147 | static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work); | ||
148 | |||
146 | /** | 149 | /** |
147 | * ipi_handler - Synchronisation handler. Executed by "other" CPUs. | 150 | * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs. |
148 | * @info: pointer to mtrr configuration data | 151 | * @info: pointer to mtrr configuration data |
149 | * | 152 | * |
150 | * Returns nothing. | 153 | * Returns nothing. |
151 | */ | 154 | */ |
152 | static void ipi_handler(void *info) | 155 | static int mtrr_work_handler(void *info) |
153 | { | 156 | { |
154 | #ifdef CONFIG_SMP | 157 | #ifdef CONFIG_SMP |
155 | struct set_mtrr_data *data = info; | 158 | struct set_mtrr_data *data = info; |
156 | unsigned long flags; | 159 | unsigned long flags; |
157 | 160 | ||
161 | atomic_dec(&data->count); | ||
162 | while (!atomic_read(&data->gate)) | ||
163 | cpu_relax(); | ||
164 | |||
158 | local_irq_save(flags); | 165 | local_irq_save(flags); |
159 | 166 | ||
160 | atomic_dec(&data->count); | 167 | atomic_dec(&data->count); |
161 | while (!atomic_read(&data->gate)) | 168 | while (atomic_read(&data->gate)) |
162 | cpu_relax(); | 169 | cpu_relax(); |
163 | 170 | ||
164 | /* The master has cleared me to execute */ | 171 | /* The master has cleared me to execute */ |
@@ -173,12 +180,13 @@ static void ipi_handler(void *info) | |||
173 | } | 180 | } |
174 | 181 | ||
175 | atomic_dec(&data->count); | 182 | atomic_dec(&data->count); |
176 | while (atomic_read(&data->gate)) | 183 | while (!atomic_read(&data->gate)) |
177 | cpu_relax(); | 184 | cpu_relax(); |
178 | 185 | ||
179 | atomic_dec(&data->count); | 186 | atomic_dec(&data->count); |
180 | local_irq_restore(flags); | 187 | local_irq_restore(flags); |
181 | #endif | 188 | #endif |
189 | return 0; | ||
182 | } | 190 | } |
183 | 191 | ||
184 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) | 192 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) |
@@ -198,7 +206,7 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) | |||
198 | * | 206 | * |
199 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: | 207 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: |
200 | * | 208 | * |
201 | * 1. Send IPI to do the following: | 209 | * 1. Queue work to do the following on all processors: |
202 | * 2. Disable Interrupts | 210 | * 2. Disable Interrupts |
203 | * 3. Wait for all procs to do so | 211 | * 3. Wait for all procs to do so |
204 | * 4. Enter no-fill cache mode | 212 | * 4. Enter no-fill cache mode |
@@ -215,14 +223,17 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) | |||
215 | * 15. Enable interrupts. | 223 | * 15. Enable interrupts. |
216 | * | 224 | * |
217 | * What does that mean for us? Well, first we set data.count to the number | 225 | * What does that mean for us? Well, first we set data.count to the number |
218 | * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait | 226 | * of CPUs. As each CPU announces that it started the rendezvous handler by |
219 | * until it hits 0 and proceed. We set the data.gate flag and reset data.count. | 227 | * decrementing the count, We reset data.count and set the data.gate flag |
220 | * Meanwhile, they are waiting for that flag to be set. Once it's set, each | 228 | * allowing all the cpu's to proceed with the work. As each cpu disables |
229 | * interrupts, it'll decrement data.count once. We wait until it hits 0 and | ||
230 | * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they | ||
231 | * are waiting for that flag to be cleared. Once it's cleared, each | ||
221 | * CPU goes through the transition of updating MTRRs. | 232 | * CPU goes through the transition of updating MTRRs. |
222 | * The CPU vendors may each do it differently, | 233 | * The CPU vendors may each do it differently, |
223 | * so we call mtrr_if->set() callback and let them take care of it. | 234 | * so we call mtrr_if->set() callback and let them take care of it. |
224 | * When they're done, they again decrement data->count and wait for data.gate | 235 | * When they're done, they again decrement data->count and wait for data.gate |
225 | * to be reset. | 236 | * to be set. |
226 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag | 237 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag |
227 | * Everyone then enables interrupts and we all continue on. | 238 | * Everyone then enables interrupts and we all continue on. |
228 | * | 239 | * |
@@ -234,6 +245,9 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
234 | { | 245 | { |
235 | struct set_mtrr_data data; | 246 | struct set_mtrr_data data; |
236 | unsigned long flags; | 247 | unsigned long flags; |
248 | int cpu; | ||
249 | |||
250 | preempt_disable(); | ||
237 | 251 | ||
238 | data.smp_reg = reg; | 252 | data.smp_reg = reg; |
239 | data.smp_base = base; | 253 | data.smp_base = base; |
@@ -246,10 +260,15 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
246 | atomic_set(&data.gate, 0); | 260 | atomic_set(&data.gate, 0); |
247 | 261 | ||
248 | /* Start the ball rolling on other CPUs */ | 262 | /* Start the ball rolling on other CPUs */ |
249 | if (smp_call_function(ipi_handler, &data, 0) != 0) | 263 | for_each_online_cpu(cpu) { |
250 | panic("mtrr: timed out waiting for other CPUs\n"); | 264 | struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu); |
265 | |||
266 | if (cpu == smp_processor_id()) | ||
267 | continue; | ||
268 | |||
269 | stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work); | ||
270 | } | ||
251 | 271 | ||
252 | local_irq_save(flags); | ||
253 | 272 | ||
254 | while (atomic_read(&data.count)) | 273 | while (atomic_read(&data.count)) |
255 | cpu_relax(); | 274 | cpu_relax(); |
@@ -259,6 +278,16 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
259 | smp_wmb(); | 278 | smp_wmb(); |
260 | atomic_set(&data.gate, 1); | 279 | atomic_set(&data.gate, 1); |
261 | 280 | ||
281 | local_irq_save(flags); | ||
282 | |||
283 | while (atomic_read(&data.count)) | ||
284 | cpu_relax(); | ||
285 | |||
286 | /* Ok, reset count and toggle gate */ | ||
287 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
288 | smp_wmb(); | ||
289 | atomic_set(&data.gate, 0); | ||
290 | |||
262 | /* Do our MTRR business */ | 291 | /* Do our MTRR business */ |
263 | 292 | ||
264 | /* | 293 | /* |
@@ -279,7 +308,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
279 | 308 | ||
280 | atomic_set(&data.count, num_booting_cpus() - 1); | 309 | atomic_set(&data.count, num_booting_cpus() - 1); |
281 | smp_wmb(); | 310 | smp_wmb(); |
282 | atomic_set(&data.gate, 0); | 311 | atomic_set(&data.gate, 1); |
283 | 312 | ||
284 | /* | 313 | /* |
285 | * Wait here for everyone to have seen the gate change | 314 | * Wait here for everyone to have seen the gate change |
@@ -289,6 +318,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
289 | cpu_relax(); | 318 | cpu_relax(); |
290 | 319 | ||
291 | local_irq_restore(flags); | 320 | local_irq_restore(flags); |
321 | preempt_enable(); | ||
292 | } | 322 | } |
293 | 323 | ||
294 | /** | 324 | /** |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5db5b7d65a18..f2da20fda02d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -220,6 +220,7 @@ struct x86_pmu { | |||
220 | struct perf_event *event); | 220 | struct perf_event *event); |
221 | struct event_constraint *event_constraints; | 221 | struct event_constraint *event_constraints; |
222 | void (*quirks)(void); | 222 | void (*quirks)(void); |
223 | int perfctr_second_write; | ||
223 | 224 | ||
224 | int (*cpu_prepare)(int cpu); | 225 | int (*cpu_prepare)(int cpu); |
225 | void (*cpu_starting)(int cpu); | 226 | void (*cpu_starting)(int cpu); |
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event) | |||
295 | * count to the generic event atomically: | 296 | * count to the generic event atomically: |
296 | */ | 297 | */ |
297 | again: | 298 | again: |
298 | prev_raw_count = atomic64_read(&hwc->prev_count); | 299 | prev_raw_count = local64_read(&hwc->prev_count); |
299 | rdmsrl(hwc->event_base + idx, new_raw_count); | 300 | rdmsrl(hwc->event_base + idx, new_raw_count); |
300 | 301 | ||
301 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | 302 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
302 | new_raw_count) != prev_raw_count) | 303 | new_raw_count) != prev_raw_count) |
303 | goto again; | 304 | goto again; |
304 | 305 | ||
@@ -313,8 +314,8 @@ again: | |||
313 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 314 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
314 | delta >>= shift; | 315 | delta >>= shift; |
315 | 316 | ||
316 | atomic64_add(delta, &event->count); | 317 | local64_add(delta, &event->count); |
317 | atomic64_sub(delta, &hwc->period_left); | 318 | local64_sub(delta, &hwc->period_left); |
318 | 319 | ||
319 | return new_raw_count; | 320 | return new_raw_count; |
320 | } | 321 | } |
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
438 | if (!hwc->sample_period) { | 439 | if (!hwc->sample_period) { |
439 | hwc->sample_period = x86_pmu.max_period; | 440 | hwc->sample_period = x86_pmu.max_period; |
440 | hwc->last_period = hwc->sample_period; | 441 | hwc->last_period = hwc->sample_period; |
441 | atomic64_set(&hwc->period_left, hwc->sample_period); | 442 | local64_set(&hwc->period_left, hwc->sample_period); |
442 | } else { | 443 | } else { |
443 | /* | 444 | /* |
444 | * If we have a PMU initialized but no APIC | 445 | * If we have a PMU initialized but no APIC |
@@ -885,7 +886,7 @@ static int | |||
885 | x86_perf_event_set_period(struct perf_event *event) | 886 | x86_perf_event_set_period(struct perf_event *event) |
886 | { | 887 | { |
887 | struct hw_perf_event *hwc = &event->hw; | 888 | struct hw_perf_event *hwc = &event->hw; |
888 | s64 left = atomic64_read(&hwc->period_left); | 889 | s64 left = local64_read(&hwc->period_left); |
889 | s64 period = hwc->sample_period; | 890 | s64 period = hwc->sample_period; |
890 | int ret = 0, idx = hwc->idx; | 891 | int ret = 0, idx = hwc->idx; |
891 | 892 | ||
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event) | |||
897 | */ | 898 | */ |
898 | if (unlikely(left <= -period)) { | 899 | if (unlikely(left <= -period)) { |
899 | left = period; | 900 | left = period; |
900 | atomic64_set(&hwc->period_left, left); | 901 | local64_set(&hwc->period_left, left); |
901 | hwc->last_period = period; | 902 | hwc->last_period = period; |
902 | ret = 1; | 903 | ret = 1; |
903 | } | 904 | } |
904 | 905 | ||
905 | if (unlikely(left <= 0)) { | 906 | if (unlikely(left <= 0)) { |
906 | left += period; | 907 | left += period; |
907 | atomic64_set(&hwc->period_left, left); | 908 | local64_set(&hwc->period_left, left); |
908 | hwc->last_period = period; | 909 | hwc->last_period = period; |
909 | ret = 1; | 910 | ret = 1; |
910 | } | 911 | } |
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event) | |||
923 | * The hw event starts counting from this event offset, | 924 | * The hw event starts counting from this event offset, |
924 | * mark it to be able to extra future deltas: | 925 | * mark it to be able to extra future deltas: |
925 | */ | 926 | */ |
926 | atomic64_set(&hwc->prev_count, (u64)-left); | 927 | local64_set(&hwc->prev_count, (u64)-left); |
927 | 928 | ||
928 | wrmsrl(hwc->event_base + idx, | 929 | wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); |
930 | |||
931 | /* | ||
932 | * Due to erratum on certan cpu we need | ||
933 | * a second write to be sure the register | ||
934 | * is updated properly | ||
935 | */ | ||
936 | if (x86_pmu.perfctr_second_write) { | ||
937 | wrmsrl(hwc->event_base + idx, | ||
929 | (u64)(-left) & x86_pmu.cntval_mask); | 938 | (u64)(-left) & x86_pmu.cntval_mask); |
939 | } | ||
930 | 940 | ||
931 | perf_event_update_userpage(event); | 941 | perf_event_update_userpage(event); |
932 | 942 | ||
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event) | |||
969 | * skip the schedulability test here, it will be peformed | 979 | * skip the schedulability test here, it will be peformed |
970 | * at commit time(->commit_txn) as a whole | 980 | * at commit time(->commit_txn) as a whole |
971 | */ | 981 | */ |
972 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | 982 | if (cpuc->group_flag & PERF_EVENT_TXN) |
973 | goto out; | 983 | goto out; |
974 | 984 | ||
975 | ret = x86_pmu.schedule_events(cpuc, n, assign); | 985 | ret = x86_pmu.schedule_events(cpuc, n, assign); |
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event) | |||
1096 | * The events never got scheduled and ->cancel_txn will truncate | 1106 | * The events never got scheduled and ->cancel_txn will truncate |
1097 | * the event_list. | 1107 | * the event_list. |
1098 | */ | 1108 | */ |
1099 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | 1109 | if (cpuc->group_flag & PERF_EVENT_TXN) |
1100 | return; | 1110 | return; |
1101 | 1111 | ||
1102 | x86_pmu_stop(event); | 1112 | x86_pmu_stop(event); |
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) | |||
1388 | { | 1398 | { |
1389 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1399 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1390 | 1400 | ||
1391 | cpuc->group_flag |= PERF_EVENT_TXN_STARTED; | 1401 | cpuc->group_flag |= PERF_EVENT_TXN; |
1392 | cpuc->n_txn = 0; | 1402 | cpuc->n_txn = 0; |
1393 | } | 1403 | } |
1394 | 1404 | ||
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) | |||
1401 | { | 1411 | { |
1402 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1412 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1403 | 1413 | ||
1404 | cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; | 1414 | cpuc->group_flag &= ~PERF_EVENT_TXN; |
1405 | /* | 1415 | /* |
1406 | * Truncate the collected events. | 1416 | * Truncate the collected events. |
1407 | */ | 1417 | */ |
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) | |||
1435 | */ | 1445 | */ |
1436 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 1446 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
1437 | 1447 | ||
1438 | /* | 1448 | cpuc->group_flag &= ~PERF_EVENT_TXN; |
1439 | * Clear out the txn count so that ->cancel_txn() which gets | ||
1440 | * run after ->commit_txn() doesn't undo things. | ||
1441 | */ | ||
1442 | cpuc->n_txn = 0; | ||
1443 | 1449 | ||
1444 | return 0; | 1450 | return 0; |
1445 | } | 1451 | } |
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = { | |||
1607 | .walk_stack = print_context_stack_bp, | 1613 | .walk_stack = print_context_stack_bp, |
1608 | }; | 1614 | }; |
1609 | 1615 | ||
1610 | #include "../dumpstack.h" | ||
1611 | |||
1612 | static void | 1616 | static void |
1613 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1617 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) |
1614 | { | 1618 | { |
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1730 | return entry; | 1734 | return entry; |
1731 | } | 1735 | } |
1732 | 1736 | ||
1733 | void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) | ||
1734 | { | ||
1735 | regs->ip = ip; | ||
1736 | /* | ||
1737 | * perf_arch_fetch_caller_regs adds another call, we need to increment | ||
1738 | * the skip level | ||
1739 | */ | ||
1740 | regs->bp = rewind_frame_pointer(skip + 1); | ||
1741 | regs->cs = __KERNEL_CS; | ||
1742 | /* | ||
1743 | * We abuse bit 3 to pass exact information, see perf_misc_flags | ||
1744 | * and the comment with PERF_EFLAGS_EXACT. | ||
1745 | */ | ||
1746 | regs->flags = 0; | ||
1747 | } | ||
1748 | |||
1749 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1737 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |
1750 | { | 1738 | { |
1751 | unsigned long ip; | 1739 | unsigned long ip; |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ae85d69644d1..107711bf0ee8 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -21,22 +21,36 @@ struct p4_event_bind { | |||
21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | 21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ |
22 | }; | 22 | }; |
23 | 23 | ||
24 | struct p4_cache_event_bind { | 24 | struct p4_pebs_bind { |
25 | unsigned int metric_pebs; | 25 | unsigned int metric_pebs; |
26 | unsigned int metric_vert; | 26 | unsigned int metric_vert; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | #define P4_GEN_CACHE_EVENT_BIND(name) \ | 29 | /* it sets P4_PEBS_ENABLE_UOP_TAG as well */ |
30 | [P4_CACHE__##name] = { \ | 30 | #define P4_GEN_PEBS_BIND(name, pebs, vert) \ |
31 | .metric_pebs = P4_PEBS__##name, \ | 31 | [P4_PEBS_METRIC__##name] = { \ |
32 | .metric_vert = P4_VERT__##name, \ | 32 | .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ |
33 | .metric_vert = vert, \ | ||
33 | } | 34 | } |
34 | 35 | ||
35 | static struct p4_cache_event_bind p4_cache_event_bind_map[] = { | 36 | /* |
36 | P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), | 37 | * note we have P4_PEBS_ENABLE_UOP_TAG always set here |
37 | P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), | 38 | * |
38 | P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), | 39 | * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of |
39 | P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), | 40 | * event configuration to find out which values are to be |
41 | * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT | ||
42 | * resgisters | ||
43 | */ | ||
44 | static struct p4_pebs_bind p4_pebs_bind_map[] = { | ||
45 | P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), | ||
46 | P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), | ||
47 | P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), | ||
48 | P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), | ||
49 | P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), | ||
50 | P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), | ||
51 | P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), | ||
52 | P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), | ||
53 | P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), | ||
40 | }; | 54 | }; |
41 | 55 | ||
42 | /* | 56 | /* |
@@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = { | |||
281 | }, | 295 | }, |
282 | }; | 296 | }; |
283 | 297 | ||
284 | #define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ | 298 | #define P4_GEN_CACHE_EVENT(event, bit, metric) \ |
285 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ | 299 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ |
286 | P4_ESCR_EMASK_BIT(event, bit)) | \ | 300 | P4_ESCR_EMASK_BIT(event, bit)) | \ |
287 | p4_config_pack_cccr(cache_event | \ | 301 | p4_config_pack_cccr(metric | \ |
288 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) | 302 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) |
289 | 303 | ||
290 | static __initconst const u64 p4_hw_cache_event_ids | 304 | static __initconst const u64 p4_hw_cache_event_ids |
@@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids | |||
296 | [ C(OP_READ) ] = { | 310 | [ C(OP_READ) ] = { |
297 | [ C(RESULT_ACCESS) ] = 0x0, | 311 | [ C(RESULT_ACCESS) ] = 0x0, |
298 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
299 | P4_CACHE__1stl_cache_load_miss_retired), | 313 | P4_PEBS_METRIC__1stl_cache_load_miss_retired), |
300 | }, | 314 | }, |
301 | }, | 315 | }, |
302 | [ C(LL ) ] = { | 316 | [ C(LL ) ] = { |
303 | [ C(OP_READ) ] = { | 317 | [ C(OP_READ) ] = { |
304 | [ C(RESULT_ACCESS) ] = 0x0, | 318 | [ C(RESULT_ACCESS) ] = 0x0, |
305 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 319 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
306 | P4_CACHE__2ndl_cache_load_miss_retired), | 320 | P4_PEBS_METRIC__2ndl_cache_load_miss_retired), |
307 | }, | 321 | }, |
308 | }, | 322 | }, |
309 | [ C(DTLB) ] = { | 323 | [ C(DTLB) ] = { |
310 | [ C(OP_READ) ] = { | 324 | [ C(OP_READ) ] = { |
311 | [ C(RESULT_ACCESS) ] = 0x0, | 325 | [ C(RESULT_ACCESS) ] = 0x0, |
312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 326 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
313 | P4_CACHE__dtlb_load_miss_retired), | 327 | P4_PEBS_METRIC__dtlb_load_miss_retired), |
314 | }, | 328 | }, |
315 | [ C(OP_WRITE) ] = { | 329 | [ C(OP_WRITE) ] = { |
316 | [ C(RESULT_ACCESS) ] = 0x0, | 330 | [ C(RESULT_ACCESS) ] = 0x0, |
317 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 331 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
318 | P4_CACHE__dtlb_store_miss_retired), | 332 | P4_PEBS_METRIC__dtlb_store_miss_retired), |
319 | }, | 333 | }, |
320 | }, | 334 | }, |
321 | [ C(ITLB) ] = { | 335 | [ C(ITLB) ] = { |
322 | [ C(OP_READ) ] = { | 336 | [ C(OP_READ) ] = { |
323 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, | 337 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, |
324 | P4_CACHE__itlb_reference_hit), | 338 | P4_PEBS_METRIC__none), |
325 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, | 339 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, |
326 | P4_CACHE__itlb_reference_miss), | 340 | P4_PEBS_METRIC__none), |
327 | }, | 341 | }, |
328 | [ C(OP_WRITE) ] = { | 342 | [ C(OP_WRITE) ] = { |
329 | [ C(RESULT_ACCESS) ] = -1, | 343 | [ C(RESULT_ACCESS) ] = -1, |
@@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event) | |||
414 | return config; | 428 | return config; |
415 | } | 429 | } |
416 | 430 | ||
431 | static int p4_validate_raw_event(struct perf_event *event) | ||
432 | { | ||
433 | unsigned int v; | ||
434 | |||
435 | /* user data may have out-of-bound event index */ | ||
436 | v = p4_config_unpack_event(event->attr.config); | ||
437 | if (v >= ARRAY_SIZE(p4_event_bind_map)) { | ||
438 | pr_warning("P4 PMU: Unknown event code: %d\n", v); | ||
439 | return -EINVAL; | ||
440 | } | ||
441 | |||
442 | /* | ||
443 | * it may have some screwed PEBS bits | ||
444 | */ | ||
445 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { | ||
446 | pr_warning("P4 PMU: PEBS are not supported yet\n"); | ||
447 | return -EINVAL; | ||
448 | } | ||
449 | v = p4_config_unpack_metric(event->attr.config); | ||
450 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { | ||
451 | pr_warning("P4 PMU: Unknown metric code: %d\n", v); | ||
452 | return -EINVAL; | ||
453 | } | ||
454 | |||
455 | return 0; | ||
456 | } | ||
457 | |||
417 | static int p4_hw_config(struct perf_event *event) | 458 | static int p4_hw_config(struct perf_event *event) |
418 | { | 459 | { |
419 | int cpu = get_cpu(); | 460 | int cpu = get_cpu(); |
420 | int rc = 0; | 461 | int rc = 0; |
421 | unsigned int evnt; | ||
422 | u32 escr, cccr; | 462 | u32 escr, cccr; |
423 | 463 | ||
424 | /* | 464 | /* |
@@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event) | |||
438 | 478 | ||
439 | if (event->attr.type == PERF_TYPE_RAW) { | 479 | if (event->attr.type == PERF_TYPE_RAW) { |
440 | 480 | ||
441 | /* user data may have out-of-bound event index */ | 481 | rc = p4_validate_raw_event(event); |
442 | evnt = p4_config_unpack_event(event->attr.config); | 482 | if (rc) |
443 | if (evnt >= ARRAY_SIZE(p4_event_bind_map)) { | ||
444 | rc = -EINVAL; | ||
445 | goto out; | 483 | goto out; |
446 | } | ||
447 | 484 | ||
448 | /* | 485 | /* |
449 | * We don't control raw events so it's up to the caller | 486 | * We don't control raw events so it's up to the caller |
@@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event) | |||
451 | * on HT machine but allow HT-compatible specifics to be | 488 | * on HT machine but allow HT-compatible specifics to be |
452 | * passed on) | 489 | * passed on) |
453 | * | 490 | * |
491 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED | ||
492 | * bits since we keep additional info here (for cache events and etc) | ||
493 | * | ||
454 | * XXX: HT wide things should check perf_paranoid_cpu() && | 494 | * XXX: HT wide things should check perf_paranoid_cpu() && |
455 | * CAP_SYS_ADMIN | 495 | * CAP_SYS_ADMIN |
456 | */ | 496 | */ |
457 | event->hw.config |= event->attr.config & | 497 | event->hw.config |= event->attr.config & |
458 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | 498 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | |
459 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | 499 | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); |
460 | } | 500 | } |
461 | 501 | ||
462 | rc = x86_setup_perfctr(event); | 502 | rc = x86_setup_perfctr(event); |
@@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | |||
482 | return overflow; | 522 | return overflow; |
483 | } | 523 | } |
484 | 524 | ||
525 | static void p4_pmu_disable_pebs(void) | ||
526 | { | ||
527 | /* | ||
528 | * FIXME | ||
529 | * | ||
530 | * It's still allowed that two threads setup same cache | ||
531 | * events so we can't simply clear metrics until we knew | ||
532 | * noone is depending on us, so we need kind of counter | ||
533 | * for "ReplayEvent" users. | ||
534 | * | ||
535 | * What is more complex -- RAW events, if user (for some | ||
536 | * reason) will pass some cache event metric with improper | ||
537 | * event opcode -- it's fine from hardware point of view | ||
538 | * but completely nonsence from "meaning" of such action. | ||
539 | * | ||
540 | * So at moment let leave metrics turned on forever -- it's | ||
541 | * ok for now but need to be revisited! | ||
542 | * | ||
543 | * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); | ||
544 | * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); | ||
545 | */ | ||
546 | } | ||
547 | |||
485 | static inline void p4_pmu_disable_event(struct perf_event *event) | 548 | static inline void p4_pmu_disable_event(struct perf_event *event) |
486 | { | 549 | { |
487 | struct hw_perf_event *hwc = &event->hw; | 550 | struct hw_perf_event *hwc = &event->hw; |
@@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void) | |||
507 | continue; | 570 | continue; |
508 | p4_pmu_disable_event(event); | 571 | p4_pmu_disable_event(event); |
509 | } | 572 | } |
573 | |||
574 | p4_pmu_disable_pebs(); | ||
575 | } | ||
576 | |||
577 | /* configuration must be valid */ | ||
578 | static void p4_pmu_enable_pebs(u64 config) | ||
579 | { | ||
580 | struct p4_pebs_bind *bind; | ||
581 | unsigned int idx; | ||
582 | |||
583 | BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); | ||
584 | |||
585 | idx = p4_config_unpack_metric(config); | ||
586 | if (idx == P4_PEBS_METRIC__none) | ||
587 | return; | ||
588 | |||
589 | bind = &p4_pebs_bind_map[idx]; | ||
590 | |||
591 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); | ||
592 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); | ||
510 | } | 593 | } |
511 | 594 | ||
512 | static void p4_pmu_enable_event(struct perf_event *event) | 595 | static void p4_pmu_enable_event(struct perf_event *event) |
@@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
515 | int thread = p4_ht_config_thread(hwc->config); | 598 | int thread = p4_ht_config_thread(hwc->config); |
516 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | 599 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); |
517 | unsigned int idx = p4_config_unpack_event(hwc->config); | 600 | unsigned int idx = p4_config_unpack_event(hwc->config); |
518 | unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config); | ||
519 | struct p4_event_bind *bind; | 601 | struct p4_event_bind *bind; |
520 | struct p4_cache_event_bind *bind_cache; | ||
521 | u64 escr_addr, cccr; | 602 | u64 escr_addr, cccr; |
522 | 603 | ||
523 | bind = &p4_event_bind_map[idx]; | 604 | bind = &p4_event_bind_map[idx]; |
@@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
537 | cccr = p4_config_unpack_cccr(hwc->config); | 618 | cccr = p4_config_unpack_cccr(hwc->config); |
538 | 619 | ||
539 | /* | 620 | /* |
540 | * it could be Cache event so that we need to | 621 | * it could be Cache event so we need to write metrics |
541 | * set metrics into additional MSRs | 622 | * into additional MSRs |
542 | */ | 623 | */ |
543 | BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); | 624 | p4_pmu_enable_pebs(hwc->config); |
544 | if (idx_cache > P4_CACHE__NONE && | ||
545 | idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) { | ||
546 | bind_cache = &p4_cache_event_bind_map[idx_cache]; | ||
547 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs); | ||
548 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert); | ||
549 | } | ||
550 | 625 | ||
551 | (void)checking_wrmsrl(escr_addr, escr_conf); | 626 | (void)checking_wrmsrl(escr_addr, escr_conf); |
552 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 627 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, |
@@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = { | |||
829 | .max_period = (1ULL << 39) - 1, | 904 | .max_period = (1ULL << 39) - 1, |
830 | .hw_config = p4_hw_config, | 905 | .hw_config = p4_hw_config, |
831 | .schedule_events = p4_pmu_schedule_events, | 906 | .schedule_events = p4_pmu_schedule_events, |
907 | /* | ||
908 | * This handles erratum N15 in intel doc 249199-029, | ||
909 | * the counter may not be updated correctly on write | ||
910 | * so we need a second write operation to do the trick | ||
911 | * (the official workaround didn't work) | ||
912 | * | ||
913 | * the former idea is taken from OProfile code | ||
914 | */ | ||
915 | .perfctr_second_write = 1, | ||
832 | }; | 916 | }; |
833 | 917 | ||
834 | static __init int p4_pmu_init(void) | 918 | static __init int p4_pmu_init(void) |
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c new file mode 100644 index 000000000000..34b4dad6f0b8 --- /dev/null +++ b/arch/x86/kernel/cpu/scattered.c | |||
@@ -0,0 +1,63 @@ | |||
1 | /* | ||
2 | * Routines to indentify additional cpu features that are scattered in | ||
3 | * cpuid space. | ||
4 | */ | ||
5 | #include <linux/cpu.h> | ||
6 | |||
7 | #include <asm/pat.h> | ||
8 | #include <asm/processor.h> | ||
9 | |||
10 | #include <asm/apic.h> | ||
11 | |||
12 | struct cpuid_bit { | ||
13 | u16 feature; | ||
14 | u8 reg; | ||
15 | u8 bit; | ||
16 | u32 level; | ||
17 | u32 sub_leaf; | ||
18 | }; | ||
19 | |||
20 | enum cpuid_regs { | ||
21 | CR_EAX = 0, | ||
22 | CR_ECX, | ||
23 | CR_EDX, | ||
24 | CR_EBX | ||
25 | }; | ||
26 | |||
27 | void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | ||
28 | { | ||
29 | u32 max_level; | ||
30 | u32 regs[4]; | ||
31 | const struct cpuid_bit *cb; | ||
32 | |||
33 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | ||
34 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, | ||
35 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, | ||
36 | { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, | ||
37 | { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, | ||
38 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, | ||
39 | { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, | ||
40 | { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, | ||
41 | { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, | ||
42 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, | ||
43 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, | ||
44 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, | ||
45 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, | ||
46 | { 0, 0, 0, 0, 0 } | ||
47 | }; | ||
48 | |||
49 | for (cb = cpuid_bits; cb->feature; cb++) { | ||
50 | |||
51 | /* Verify that the level is valid */ | ||
52 | max_level = cpuid_eax(cb->level & 0xffff0000); | ||
53 | if (max_level < cb->level || | ||
54 | max_level > (cb->level | 0xffff)) | ||
55 | continue; | ||
56 | |||
57 | cpuid_count(cb->level, cb->sub_leaf, ®s[CR_EAX], | ||
58 | ®s[CR_EBX], ®s[CR_ECX], ®s[CR_EDX]); | ||
59 | |||
60 | if (regs[cb->reg] & (1 << cb->bit)) | ||
61 | set_cpu_cap(c, cb->feature); | ||
62 | } | ||
63 | } | ||
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/topology.c index 10fa5684a662..4397e987a1cf 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/topology.c | |||
@@ -1,62 +1,14 @@ | |||
1 | /* | 1 | /* |
2 | * Routines to indentify additional cpu features that are scattered in | 2 | * Check for extended topology enumeration cpuid leaf 0xb and if it |
3 | * cpuid space. | 3 | * exists, use it for populating initial_apicid and cpu topology |
4 | * detection. | ||
4 | */ | 5 | */ |
5 | #include <linux/cpu.h> | ||
6 | 6 | ||
7 | #include <linux/cpu.h> | ||
8 | #include <asm/apic.h> | ||
7 | #include <asm/pat.h> | 9 | #include <asm/pat.h> |
8 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
9 | 11 | ||
10 | #include <asm/apic.h> | ||
11 | |||
12 | struct cpuid_bit { | ||
13 | u16 feature; | ||
14 | u8 reg; | ||
15 | u8 bit; | ||
16 | u32 level; | ||
17 | }; | ||
18 | |||
19 | enum cpuid_regs { | ||
20 | CR_EAX = 0, | ||
21 | CR_ECX, | ||
22 | CR_EDX, | ||
23 | CR_EBX | ||
24 | }; | ||
25 | |||
26 | void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | ||
27 | { | ||
28 | u32 max_level; | ||
29 | u32 regs[4]; | ||
30 | const struct cpuid_bit *cb; | ||
31 | |||
32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | ||
33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, | ||
34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, | ||
35 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006 }, | ||
36 | { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007 }, | ||
37 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, | ||
38 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, | ||
39 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | ||
40 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | ||
41 | { 0, 0, 0, 0 } | ||
42 | }; | ||
43 | |||
44 | for (cb = cpuid_bits; cb->feature; cb++) { | ||
45 | |||
46 | /* Verify that the level is valid */ | ||
47 | max_level = cpuid_eax(cb->level & 0xffff0000); | ||
48 | if (max_level < cb->level || | ||
49 | max_level > (cb->level | 0xffff)) | ||
50 | continue; | ||
51 | |||
52 | cpuid(cb->level, ®s[CR_EAX], ®s[CR_EBX], | ||
53 | ®s[CR_ECX], ®s[CR_EDX]); | ||
54 | |||
55 | if (regs[cb->reg] & (1 << cb->bit)) | ||
56 | set_cpu_cap(c, cb->feature); | ||
57 | } | ||
58 | } | ||
59 | |||
60 | /* leaf 0xb SMT level */ | 12 | /* leaf 0xb SMT level */ |
61 | #define SMT_LEVEL 0 | 13 | #define SMT_LEVEL 0 |
62 | 14 | ||
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index b9d1ff588445..227b0448960d 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -51,7 +51,7 @@ static inline int __vmware_platform(void) | |||
51 | 51 | ||
52 | static unsigned long vmware_get_tsc_khz(void) | 52 | static unsigned long vmware_get_tsc_khz(void) |
53 | { | 53 | { |
54 | uint64_t tsc_hz; | 54 | uint64_t tsc_hz, lpj; |
55 | uint32_t eax, ebx, ecx, edx; | 55 | uint32_t eax, ebx, ecx, edx; |
56 | 56 | ||
57 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | 57 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); |
@@ -62,6 +62,13 @@ static unsigned long vmware_get_tsc_khz(void) | |||
62 | printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", | 62 | printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", |
63 | (unsigned long) tsc_hz / 1000, | 63 | (unsigned long) tsc_hz / 1000, |
64 | (unsigned long) tsc_hz % 1000); | 64 | (unsigned long) tsc_hz % 1000); |
65 | |||
66 | if (!preset_lpj) { | ||
67 | lpj = ((u64)tsc_hz * 1000); | ||
68 | do_div(lpj, HZ); | ||
69 | preset_lpj = lpj; | ||
70 | } | ||
71 | |||
65 | return tsc_hz; | 72 | return tsc_hz; |
66 | } | 73 | } |
67 | 74 | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c89a386930b7..6e8752c1bd52 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -18,7 +18,6 @@ | |||
18 | 18 | ||
19 | #include <asm/stacktrace.h> | 19 | #include <asm/stacktrace.h> |
20 | 20 | ||
21 | #include "dumpstack.h" | ||
22 | 21 | ||
23 | int panic_on_unrecovered_nmi; | 22 | int panic_on_unrecovered_nmi; |
24 | int panic_on_io_nmi; | 23 | int panic_on_io_nmi; |
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h deleted file mode 100644 index e1a93be4fd44..000000000000 --- a/arch/x86/kernel/dumpstack.h +++ /dev/null | |||
@@ -1,56 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | |||
6 | #ifndef DUMPSTACK_H | ||
7 | #define DUMPSTACK_H | ||
8 | |||
9 | #ifdef CONFIG_X86_32 | ||
10 | #define STACKSLOTS_PER_LINE 8 | ||
11 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
12 | #else | ||
13 | #define STACKSLOTS_PER_LINE 4 | ||
14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
15 | #endif | ||
16 | |||
17 | #include <linux/uaccess.h> | ||
18 | |||
19 | extern void | ||
20 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
21 | unsigned long *stack, unsigned long bp, char *log_lvl); | ||
22 | |||
23 | extern void | ||
24 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
25 | unsigned long *sp, unsigned long bp, char *log_lvl); | ||
26 | |||
27 | extern unsigned int code_bytes; | ||
28 | |||
29 | /* The form of the top of the frame on the stack */ | ||
30 | struct stack_frame { | ||
31 | struct stack_frame *next_frame; | ||
32 | unsigned long return_address; | ||
33 | }; | ||
34 | |||
35 | struct stack_frame_ia32 { | ||
36 | u32 next_frame; | ||
37 | u32 return_address; | ||
38 | }; | ||
39 | |||
40 | static inline unsigned long rewind_frame_pointer(int n) | ||
41 | { | ||
42 | struct stack_frame *frame; | ||
43 | |||
44 | get_bp(frame); | ||
45 | |||
46 | #ifdef CONFIG_FRAME_POINTER | ||
47 | while (n--) { | ||
48 | if (probe_kernel_address(&frame->next_frame, frame)) | ||
49 | break; | ||
50 | } | ||
51 | #endif | ||
52 | |||
53 | return (unsigned long)frame; | ||
54 | } | ||
55 | |||
56 | #endif /* DUMPSTACK_H */ | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 11540a189d93..0f6376ffa2d9 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -16,8 +16,6 @@ | |||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | #include "dumpstack.h" | ||
20 | |||
21 | 19 | ||
22 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 20 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
23 | unsigned long *stack, unsigned long bp, | 21 | unsigned long *stack, unsigned long bp, |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 272c9f1f05f3..57a21f11c791 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -16,7 +16,6 @@ | |||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | #include "dumpstack.h" | ||
20 | 19 | ||
21 | #define N_EXCEPTION_STACKS_END \ | 20 | #define N_EXCEPTION_STACKS_END \ |
22 | (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) | 21 | (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index ebdb85cf2686..e5cc7e82e60d 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
19 | #include <asm/iommu.h> | 19 | #include <asm/iommu.h> |
20 | #include <asm/gart.h> | 20 | #include <asm/gart.h> |
21 | #include <asm/hpet.h> | ||
21 | 22 | ||
22 | static void __init fix_hypertransport_config(int num, int slot, int func) | 23 | static void __init fix_hypertransport_config(int num, int slot, int func) |
23 | { | 24 | { |
@@ -191,6 +192,21 @@ static void __init ati_bugs_contd(int num, int slot, int func) | |||
191 | } | 192 | } |
192 | #endif | 193 | #endif |
193 | 194 | ||
195 | /* | ||
196 | * Force the read back of the CMP register in hpet_next_event() | ||
197 | * to work around the problem that the CMP register write seems to be | ||
198 | * delayed. See hpet_next_event() for details. | ||
199 | * | ||
200 | * We do this on all SMBUS incarnations for now until we have more | ||
201 | * information about the affected chipsets. | ||
202 | */ | ||
203 | static void __init ati_hpet_bugs(int num, int slot, int func) | ||
204 | { | ||
205 | #ifdef CONFIG_HPET_TIMER | ||
206 | hpet_readback_cmp = 1; | ||
207 | #endif | ||
208 | } | ||
209 | |||
194 | #define QFLAG_APPLY_ONCE 0x1 | 210 | #define QFLAG_APPLY_ONCE 0x1 |
195 | #define QFLAG_APPLIED 0x2 | 211 | #define QFLAG_APPLIED 0x2 |
196 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) | 212 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) |
@@ -220,6 +236,8 @@ static struct chipset early_qrk[] __initdata = { | |||
220 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, | 236 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, |
221 | { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | 237 | { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, |
222 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, | 238 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, |
239 | { PCI_VENDOR_ID_ATI, PCI_ANY_ID, | ||
240 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs }, | ||
223 | {} | 241 | {} |
224 | }; | 242 | }; |
225 | 243 | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index cd49141cf153..258e93fa2630 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -611,14 +611,14 @@ ldt_ss: | |||
611 | * compensating for the offset by changing to the ESPFIX segment with | 611 | * compensating for the offset by changing to the ESPFIX segment with |
612 | * a base address that matches for the difference. | 612 | * a base address that matches for the difference. |
613 | */ | 613 | */ |
614 | #define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) | ||
614 | mov %esp, %edx /* load kernel esp */ | 615 | mov %esp, %edx /* load kernel esp */ |
615 | mov PT_OLDESP(%esp), %eax /* load userspace esp */ | 616 | mov PT_OLDESP(%esp), %eax /* load userspace esp */ |
616 | mov %dx, %ax /* eax: new kernel esp */ | 617 | mov %dx, %ax /* eax: new kernel esp */ |
617 | sub %eax, %edx /* offset (low word is 0) */ | 618 | sub %eax, %edx /* offset (low word is 0) */ |
618 | PER_CPU(gdt_page, %ebx) | ||
619 | shr $16, %edx | 619 | shr $16, %edx |
620 | mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */ | 620 | mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ |
621 | mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */ | 621 | mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ |
622 | pushl $__ESPFIX_SS | 622 | pushl $__ESPFIX_SS |
623 | CFI_ADJUST_CFA_OFFSET 4 | 623 | CFI_ADJUST_CFA_OFFSET 4 |
624 | push %eax /* new kernel esp */ | 624 | push %eax /* new kernel esp */ |
@@ -791,9 +791,8 @@ ptregs_clone: | |||
791 | * normal stack and adjusts ESP with the matching offset. | 791 | * normal stack and adjusts ESP with the matching offset. |
792 | */ | 792 | */ |
793 | /* fixup the stack */ | 793 | /* fixup the stack */ |
794 | PER_CPU(gdt_page, %ebx) | 794 | mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ |
795 | mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */ | 795 | mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ |
796 | mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */ | ||
797 | shl $16, %eax | 796 | shl $16, %eax |
798 | addl %esp, %eax /* the adjusted stack pointer */ | 797 | addl %esp, %eax /* the adjusted stack pointer */ |
799 | pushl $__KERNEL_DS | 798 | pushl $__KERNEL_DS |
@@ -1166,6 +1165,9 @@ ENTRY(xen_failsafe_callback) | |||
1166 | .previous | 1165 | .previous |
1167 | ENDPROC(xen_failsafe_callback) | 1166 | ENDPROC(xen_failsafe_callback) |
1168 | 1167 | ||
1168 | BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, | ||
1169 | xen_evtchn_do_upcall) | ||
1170 | |||
1169 | #endif /* CONFIG_XEN */ | 1171 | #endif /* CONFIG_XEN */ |
1170 | 1172 | ||
1171 | #ifdef CONFIG_FUNCTION_TRACER | 1173 | #ifdef CONFIG_FUNCTION_TRACER |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 0697ff139837..c5ea5cdbe7b3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -571,8 +571,8 @@ auditsys: | |||
571 | * masked off. | 571 | * masked off. |
572 | */ | 572 | */ |
573 | sysret_audit: | 573 | sysret_audit: |
574 | movq %rax,%rsi /* second arg, syscall return value */ | 574 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ |
575 | cmpq $0,%rax /* is it < 0? */ | 575 | cmpq $0,%rsi /* is it < 0? */ |
576 | setl %al /* 1 if so, 0 if not */ | 576 | setl %al /* 1 if so, 0 if not */ |
577 | movzbl %al,%edi /* zero-extend that into %edi */ | 577 | movzbl %al,%edi /* zero-extend that into %edi */ |
578 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | 578 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ |
@@ -1065,6 +1065,7 @@ ENTRY(\sym) | |||
1065 | END(\sym) | 1065 | END(\sym) |
1066 | .endm | 1066 | .endm |
1067 | 1067 | ||
1068 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) | ||
1068 | .macro paranoidzeroentry_ist sym do_sym ist | 1069 | .macro paranoidzeroentry_ist sym do_sym ist |
1069 | ENTRY(\sym) | 1070 | ENTRY(\sym) |
1070 | INTR_FRAME | 1071 | INTR_FRAME |
@@ -1076,10 +1077,9 @@ ENTRY(\sym) | |||
1076 | TRACE_IRQS_OFF | 1077 | TRACE_IRQS_OFF |
1077 | movq %rsp,%rdi /* pt_regs pointer */ | 1078 | movq %rsp,%rdi /* pt_regs pointer */ |
1078 | xorl %esi,%esi /* no error code */ | 1079 | xorl %esi,%esi /* no error code */ |
1079 | PER_CPU(init_tss, %r12) | 1080 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
1080 | subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) | ||
1081 | call \do_sym | 1081 | call \do_sym |
1082 | addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) | 1082 | addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
1083 | jmp paranoid_exit /* %ebx: no swapgs flag */ | 1083 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1084 | CFI_ENDPROC | 1084 | CFI_ENDPROC |
1085 | END(\sym) | 1085 | END(\sym) |
@@ -1329,6 +1329,9 @@ ENTRY(xen_failsafe_callback) | |||
1329 | CFI_ENDPROC | 1329 | CFI_ENDPROC |
1330 | END(xen_failsafe_callback) | 1330 | END(xen_failsafe_callback) |
1331 | 1331 | ||
1332 | apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ | ||
1333 | xen_hvm_callback_vector xen_evtchn_do_upcall | ||
1334 | |||
1332 | #endif /* CONFIG_XEN */ | 1335 | #endif /* CONFIG_XEN */ |
1333 | 1336 | ||
1334 | /* | 1337 | /* |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index b2e246037392..784360c0625c 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -20,7 +20,7 @@ | |||
20 | 20 | ||
21 | static void __init i386_default_early_setup(void) | 21 | static void __init i386_default_early_setup(void) |
22 | { | 22 | { |
23 | /* Initilize 32bit specific setup functions */ | 23 | /* Initialize 32bit specific setup functions */ |
24 | x86_init.resources.probe_roms = probe_roms; | 24 | x86_init.resources.probe_roms = probe_roms; |
25 | x86_init.resources.reserve_resources = i386_reserve_resources; | 25 | x86_init.resources.reserve_resources = i386_reserve_resources; |
26 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; | 26 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 3d1e6f16b7a6..239046bd447f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -234,9 +234,8 @@ ENTRY(secondary_startup_64) | |||
234 | * init data section till per cpu areas are set up. | 234 | * init data section till per cpu areas are set up. |
235 | */ | 235 | */ |
236 | movl $MSR_GS_BASE,%ecx | 236 | movl $MSR_GS_BASE,%ecx |
237 | movq initial_gs(%rip),%rax | 237 | movl initial_gs(%rip),%eax |
238 | movq %rax,%rdx | 238 | movl initial_gs+4(%rip),%edx |
239 | shrq $32,%rdx | ||
240 | wrmsr | 239 | wrmsr |
241 | 240 | ||
242 | /* esi is pointer to real mode structure with interesting info. | 241 | /* esi is pointer to real mode structure with interesting info. |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index a198b7c87a12..33dbcc4ec5ff 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <asm/hpet.h> | 16 | #include <asm/hpet.h> |
17 | 17 | ||
18 | #define HPET_MASK CLOCKSOURCE_MASK(32) | 18 | #define HPET_MASK CLOCKSOURCE_MASK(32) |
19 | #define HPET_SHIFT 22 | ||
20 | 19 | ||
21 | /* FSEC = 10^-15 | 20 | /* FSEC = 10^-15 |
22 | NSEC = 10^-9 */ | 21 | NSEC = 10^-9 */ |
@@ -787,7 +786,6 @@ static struct clocksource clocksource_hpet = { | |||
787 | .rating = 250, | 786 | .rating = 250, |
788 | .read = read_hpet, | 787 | .read = read_hpet, |
789 | .mask = HPET_MASK, | 788 | .mask = HPET_MASK, |
790 | .shift = HPET_SHIFT, | ||
791 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 789 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
792 | .resume = hpet_resume_counter, | 790 | .resume = hpet_resume_counter, |
793 | #ifdef CONFIG_X86_64 | 791 | #ifdef CONFIG_X86_64 |
@@ -798,6 +796,7 @@ static struct clocksource clocksource_hpet = { | |||
798 | static int hpet_clocksource_register(void) | 796 | static int hpet_clocksource_register(void) |
799 | { | 797 | { |
800 | u64 start, now; | 798 | u64 start, now; |
799 | u64 hpet_freq; | ||
801 | cycle_t t1; | 800 | cycle_t t1; |
802 | 801 | ||
803 | /* Start the counter */ | 802 | /* Start the counter */ |
@@ -832,9 +831,15 @@ static int hpet_clocksource_register(void) | |||
832 | * mult = (hpet_period * 2^shift)/10^6 | 831 | * mult = (hpet_period * 2^shift)/10^6 |
833 | * mult = (hpet_period << shift)/FSEC_PER_NSEC | 832 | * mult = (hpet_period << shift)/FSEC_PER_NSEC |
834 | */ | 833 | */ |
835 | clocksource_hpet.mult = div_sc(hpet_period, FSEC_PER_NSEC, HPET_SHIFT); | ||
836 | 834 | ||
837 | clocksource_register(&clocksource_hpet); | 835 | /* Need to convert hpet_period (fsec/cyc) to cyc/sec: |
836 | * | ||
837 | * cyc/sec = FSEC_PER_SEC/hpet_period(fsec/cyc) | ||
838 | * cyc/sec = (FSEC_PER_NSEC * NSEC_PER_SEC)/hpet_period | ||
839 | */ | ||
840 | hpet_freq = FSEC_PER_NSEC * NSEC_PER_SEC; | ||
841 | do_div(hpet_freq, hpet_period); | ||
842 | clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); | ||
838 | 843 | ||
839 | return 0; | 844 | return 0; |
840 | } | 845 | } |
@@ -964,7 +969,7 @@ fs_initcall(hpet_late_init); | |||
964 | 969 | ||
965 | void hpet_disable(void) | 970 | void hpet_disable(void) |
966 | { | 971 | { |
967 | if (is_hpet_capable()) { | 972 | if (is_hpet_capable() && hpet_virt_address) { |
968 | unsigned int cfg = hpet_readl(HPET_CFG); | 973 | unsigned int cfg = hpet_readl(HPET_CFG); |
969 | 974 | ||
970 | if (hpet_legacy_int_enabled) { | 975 | if (hpet_legacy_int_enabled) { |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a8f1b803d2fd..a474ec37c32f 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type, | |||
208 | { | 208 | { |
209 | /* Len */ | 209 | /* Len */ |
210 | switch (x86_len) { | 210 | switch (x86_len) { |
211 | case X86_BREAKPOINT_LEN_X: | ||
212 | *gen_len = sizeof(long); | ||
213 | break; | ||
211 | case X86_BREAKPOINT_LEN_1: | 214 | case X86_BREAKPOINT_LEN_1: |
212 | *gen_len = HW_BREAKPOINT_LEN_1; | 215 | *gen_len = HW_BREAKPOINT_LEN_1; |
213 | break; | 216 | break; |
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
251 | 254 | ||
252 | info->address = bp->attr.bp_addr; | 255 | info->address = bp->attr.bp_addr; |
253 | 256 | ||
257 | /* Type */ | ||
258 | switch (bp->attr.bp_type) { | ||
259 | case HW_BREAKPOINT_W: | ||
260 | info->type = X86_BREAKPOINT_WRITE; | ||
261 | break; | ||
262 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
263 | info->type = X86_BREAKPOINT_RW; | ||
264 | break; | ||
265 | case HW_BREAKPOINT_X: | ||
266 | info->type = X86_BREAKPOINT_EXECUTE; | ||
267 | /* | ||
268 | * x86 inst breakpoints need to have a specific undefined len. | ||
269 | * But we still need to check userspace is not trying to setup | ||
270 | * an unsupported length, to get a range breakpoint for example. | ||
271 | */ | ||
272 | if (bp->attr.bp_len == sizeof(long)) { | ||
273 | info->len = X86_BREAKPOINT_LEN_X; | ||
274 | return 0; | ||
275 | } | ||
276 | default: | ||
277 | return -EINVAL; | ||
278 | } | ||
279 | |||
254 | /* Len */ | 280 | /* Len */ |
255 | switch (bp->attr.bp_len) { | 281 | switch (bp->attr.bp_len) { |
256 | case HW_BREAKPOINT_LEN_1: | 282 | case HW_BREAKPOINT_LEN_1: |
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
271 | return -EINVAL; | 297 | return -EINVAL; |
272 | } | 298 | } |
273 | 299 | ||
274 | /* Type */ | ||
275 | switch (bp->attr.bp_type) { | ||
276 | case HW_BREAKPOINT_W: | ||
277 | info->type = X86_BREAKPOINT_WRITE; | ||
278 | break; | ||
279 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
280 | info->type = X86_BREAKPOINT_RW; | ||
281 | break; | ||
282 | case HW_BREAKPOINT_X: | ||
283 | info->type = X86_BREAKPOINT_EXECUTE; | ||
284 | break; | ||
285 | default: | ||
286 | return -EINVAL; | ||
287 | } | ||
288 | |||
289 | return 0; | 300 | return 0; |
290 | } | 301 | } |
291 | /* | 302 | /* |
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) | |||
305 | ret = -EINVAL; | 316 | ret = -EINVAL; |
306 | 317 | ||
307 | switch (info->len) { | 318 | switch (info->len) { |
319 | case X86_BREAKPOINT_LEN_X: | ||
320 | align = sizeof(long) -1; | ||
321 | break; | ||
308 | case X86_BREAKPOINT_LEN_1: | 322 | case X86_BREAKPOINT_LEN_1: |
309 | align = 0; | 323 | align = 0; |
310 | break; | 324 | break; |
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) | |||
466 | 480 | ||
467 | perf_bp_event(bp, args->regs); | 481 | perf_bp_event(bp, args->regs); |
468 | 482 | ||
483 | /* | ||
484 | * Set up resume flag to avoid breakpoint recursion when | ||
485 | * returning back to origin. | ||
486 | */ | ||
487 | if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) | ||
488 | args->regs->flags |= X86_EFLAGS_RF; | ||
489 | |||
469 | rcu_read_unlock(); | 490 | rcu_read_unlock(); |
470 | } | 491 | } |
471 | /* | 492 | /* |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 86cef6b32253..c4444bce8469 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -107,7 +107,7 @@ void __cpuinit fpu_init(void) | |||
107 | } | 107 | } |
108 | #endif /* CONFIG_X86_64 */ | 108 | #endif /* CONFIG_X86_64 */ |
109 | 109 | ||
110 | static void fpu_finit(struct fpu *fpu) | 110 | void fpu_finit(struct fpu *fpu) |
111 | { | 111 | { |
112 | #ifdef CONFIG_X86_32 | 112 | #ifdef CONFIG_X86_32 |
113 | if (!HAVE_HWFP) { | 113 | if (!HAVE_HWFP) { |
@@ -132,6 +132,7 @@ static void fpu_finit(struct fpu *fpu) | |||
132 | fp->fos = 0xffff0000u; | 132 | fp->fos = 0xffff0000u; |
133 | } | 133 | } |
134 | } | 134 | } |
135 | EXPORT_SYMBOL_GPL(fpu_finit); | ||
135 | 136 | ||
136 | /* | 137 | /* |
137 | * The _current_ task is using the FPU for the first time | 138 | * The _current_ task is using the FPU for the first time |
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 7c9f02c130f3..cafa7c80ac95 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
@@ -276,16 +276,6 @@ static struct sys_device device_i8259A = { | |||
276 | .cls = &i8259_sysdev_class, | 276 | .cls = &i8259_sysdev_class, |
277 | }; | 277 | }; |
278 | 278 | ||
279 | static int __init i8259A_init_sysfs(void) | ||
280 | { | ||
281 | int error = sysdev_class_register(&i8259_sysdev_class); | ||
282 | if (!error) | ||
283 | error = sysdev_register(&device_i8259A); | ||
284 | return error; | ||
285 | } | ||
286 | |||
287 | device_initcall(i8259A_init_sysfs); | ||
288 | |||
289 | static void mask_8259A(void) | 279 | static void mask_8259A(void) |
290 | { | 280 | { |
291 | unsigned long flags; | 281 | unsigned long flags; |
@@ -407,3 +397,18 @@ struct legacy_pic default_legacy_pic = { | |||
407 | }; | 397 | }; |
408 | 398 | ||
409 | struct legacy_pic *legacy_pic = &default_legacy_pic; | 399 | struct legacy_pic *legacy_pic = &default_legacy_pic; |
400 | |||
401 | static int __init i8259A_init_sysfs(void) | ||
402 | { | ||
403 | int error; | ||
404 | |||
405 | if (legacy_pic != &default_legacy_pic) | ||
406 | return 0; | ||
407 | |||
408 | error = sysdev_class_register(&i8259_sysdev_class); | ||
409 | if (!error) | ||
410 | error = sysdev_register(&device_i8259A); | ||
411 | return error; | ||
412 | } | ||
413 | |||
414 | device_initcall(i8259A_init_sysfs); | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 4f4af75b9482..ef10940e1af0 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -49,55 +49,94 @@ | |||
49 | #include <asm/system.h> | 49 | #include <asm/system.h> |
50 | #include <asm/apic.h> | 50 | #include <asm/apic.h> |
51 | 51 | ||
52 | /** | 52 | struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = |
53 | * pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs | ||
54 | * @gdb_regs: A pointer to hold the registers in the order GDB wants. | ||
55 | * @regs: The &struct pt_regs of the current process. | ||
56 | * | ||
57 | * Convert the pt_regs in @regs into the format for registers that | ||
58 | * GDB expects, stored in @gdb_regs. | ||
59 | */ | ||
60 | void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | ||
61 | { | 53 | { |
62 | #ifndef CONFIG_X86_32 | 54 | #ifdef CONFIG_X86_32 |
63 | u32 *gdb_regs32 = (u32 *)gdb_regs; | 55 | { "ax", 4, offsetof(struct pt_regs, ax) }, |
56 | { "cx", 4, offsetof(struct pt_regs, cx) }, | ||
57 | { "dx", 4, offsetof(struct pt_regs, dx) }, | ||
58 | { "bx", 4, offsetof(struct pt_regs, bx) }, | ||
59 | { "sp", 4, offsetof(struct pt_regs, sp) }, | ||
60 | { "bp", 4, offsetof(struct pt_regs, bp) }, | ||
61 | { "si", 4, offsetof(struct pt_regs, si) }, | ||
62 | { "di", 4, offsetof(struct pt_regs, di) }, | ||
63 | { "ip", 4, offsetof(struct pt_regs, ip) }, | ||
64 | { "flags", 4, offsetof(struct pt_regs, flags) }, | ||
65 | { "cs", 4, offsetof(struct pt_regs, cs) }, | ||
66 | { "ss", 4, offsetof(struct pt_regs, ss) }, | ||
67 | { "ds", 4, offsetof(struct pt_regs, ds) }, | ||
68 | { "es", 4, offsetof(struct pt_regs, es) }, | ||
69 | { "fs", 4, -1 }, | ||
70 | { "gs", 4, -1 }, | ||
71 | #else | ||
72 | { "ax", 8, offsetof(struct pt_regs, ax) }, | ||
73 | { "bx", 8, offsetof(struct pt_regs, bx) }, | ||
74 | { "cx", 8, offsetof(struct pt_regs, cx) }, | ||
75 | { "dx", 8, offsetof(struct pt_regs, dx) }, | ||
76 | { "si", 8, offsetof(struct pt_regs, dx) }, | ||
77 | { "di", 8, offsetof(struct pt_regs, di) }, | ||
78 | { "bp", 8, offsetof(struct pt_regs, bp) }, | ||
79 | { "sp", 8, offsetof(struct pt_regs, sp) }, | ||
80 | { "r8", 8, offsetof(struct pt_regs, r8) }, | ||
81 | { "r9", 8, offsetof(struct pt_regs, r9) }, | ||
82 | { "r10", 8, offsetof(struct pt_regs, r10) }, | ||
83 | { "r11", 8, offsetof(struct pt_regs, r11) }, | ||
84 | { "r12", 8, offsetof(struct pt_regs, r12) }, | ||
85 | { "r13", 8, offsetof(struct pt_regs, r13) }, | ||
86 | { "r14", 8, offsetof(struct pt_regs, r14) }, | ||
87 | { "r15", 8, offsetof(struct pt_regs, r15) }, | ||
88 | { "ip", 8, offsetof(struct pt_regs, ip) }, | ||
89 | { "flags", 4, offsetof(struct pt_regs, flags) }, | ||
90 | { "cs", 4, offsetof(struct pt_regs, cs) }, | ||
91 | { "ss", 4, offsetof(struct pt_regs, ss) }, | ||
64 | #endif | 92 | #endif |
65 | gdb_regs[GDB_AX] = regs->ax; | 93 | }; |
66 | gdb_regs[GDB_BX] = regs->bx; | 94 | |
67 | gdb_regs[GDB_CX] = regs->cx; | 95 | int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) |
68 | gdb_regs[GDB_DX] = regs->dx; | 96 | { |
69 | gdb_regs[GDB_SI] = regs->si; | 97 | if ( |
70 | gdb_regs[GDB_DI] = regs->di; | ||
71 | gdb_regs[GDB_BP] = regs->bp; | ||
72 | gdb_regs[GDB_PC] = regs->ip; | ||
73 | #ifdef CONFIG_X86_32 | 98 | #ifdef CONFIG_X86_32 |
74 | gdb_regs[GDB_PS] = regs->flags; | 99 | regno == GDB_SS || regno == GDB_FS || regno == GDB_GS || |
75 | gdb_regs[GDB_DS] = regs->ds; | 100 | #endif |
76 | gdb_regs[GDB_ES] = regs->es; | 101 | regno == GDB_SP || regno == GDB_ORIG_AX) |
77 | gdb_regs[GDB_CS] = regs->cs; | 102 | return 0; |
78 | gdb_regs[GDB_FS] = 0xFFFF; | 103 | |
79 | gdb_regs[GDB_GS] = 0xFFFF; | 104 | if (dbg_reg_def[regno].offset != -1) |
80 | if (user_mode_vm(regs)) { | 105 | memcpy((void *)regs + dbg_reg_def[regno].offset, mem, |
81 | gdb_regs[GDB_SS] = regs->ss; | 106 | dbg_reg_def[regno].size); |
82 | gdb_regs[GDB_SP] = regs->sp; | 107 | return 0; |
83 | } else { | 108 | } |
84 | gdb_regs[GDB_SS] = __KERNEL_DS; | 109 | |
85 | gdb_regs[GDB_SP] = kernel_stack_pointer(regs); | 110 | char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) |
111 | { | ||
112 | if (regno == GDB_ORIG_AX) { | ||
113 | memcpy(mem, ®s->orig_ax, sizeof(regs->orig_ax)); | ||
114 | return "orig_ax"; | ||
86 | } | 115 | } |
87 | #else | 116 | if (regno >= DBG_MAX_REG_NUM || regno < 0) |
88 | gdb_regs[GDB_R8] = regs->r8; | 117 | return NULL; |
89 | gdb_regs[GDB_R9] = regs->r9; | 118 | |
90 | gdb_regs[GDB_R10] = regs->r10; | 119 | if (dbg_reg_def[regno].offset != -1) |
91 | gdb_regs[GDB_R11] = regs->r11; | 120 | memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, |
92 | gdb_regs[GDB_R12] = regs->r12; | 121 | dbg_reg_def[regno].size); |
93 | gdb_regs[GDB_R13] = regs->r13; | 122 | |
94 | gdb_regs[GDB_R14] = regs->r14; | 123 | switch (regno) { |
95 | gdb_regs[GDB_R15] = regs->r15; | 124 | #ifdef CONFIG_X86_32 |
96 | gdb_regs32[GDB_PS] = regs->flags; | 125 | case GDB_SS: |
97 | gdb_regs32[GDB_CS] = regs->cs; | 126 | if (!user_mode_vm(regs)) |
98 | gdb_regs32[GDB_SS] = regs->ss; | 127 | *(unsigned long *)mem = __KERNEL_DS; |
99 | gdb_regs[GDB_SP] = kernel_stack_pointer(regs); | 128 | break; |
129 | case GDB_SP: | ||
130 | if (!user_mode_vm(regs)) | ||
131 | *(unsigned long *)mem = kernel_stack_pointer(regs); | ||
132 | break; | ||
133 | case GDB_GS: | ||
134 | case GDB_FS: | ||
135 | *(unsigned long *)mem = 0xFFFF; | ||
136 | break; | ||
100 | #endif | 137 | #endif |
138 | } | ||
139 | return dbg_reg_def[regno].name; | ||
101 | } | 140 | } |
102 | 141 | ||
103 | /** | 142 | /** |
@@ -150,54 +189,13 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
150 | gdb_regs[GDB_SP] = p->thread.sp; | 189 | gdb_regs[GDB_SP] = p->thread.sp; |
151 | } | 190 | } |
152 | 191 | ||
153 | /** | ||
154 | * gdb_regs_to_pt_regs - Convert GDB regs to ptrace regs. | ||
155 | * @gdb_regs: A pointer to hold the registers we've received from GDB. | ||
156 | * @regs: A pointer to a &struct pt_regs to hold these values in. | ||
157 | * | ||
158 | * Convert the GDB regs in @gdb_regs into the pt_regs, and store them | ||
159 | * in @regs. | ||
160 | */ | ||
161 | void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | ||
162 | { | ||
163 | #ifndef CONFIG_X86_32 | ||
164 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
165 | #endif | ||
166 | regs->ax = gdb_regs[GDB_AX]; | ||
167 | regs->bx = gdb_regs[GDB_BX]; | ||
168 | regs->cx = gdb_regs[GDB_CX]; | ||
169 | regs->dx = gdb_regs[GDB_DX]; | ||
170 | regs->si = gdb_regs[GDB_SI]; | ||
171 | regs->di = gdb_regs[GDB_DI]; | ||
172 | regs->bp = gdb_regs[GDB_BP]; | ||
173 | regs->ip = gdb_regs[GDB_PC]; | ||
174 | #ifdef CONFIG_X86_32 | ||
175 | regs->flags = gdb_regs[GDB_PS]; | ||
176 | regs->ds = gdb_regs[GDB_DS]; | ||
177 | regs->es = gdb_regs[GDB_ES]; | ||
178 | regs->cs = gdb_regs[GDB_CS]; | ||
179 | #else | ||
180 | regs->r8 = gdb_regs[GDB_R8]; | ||
181 | regs->r9 = gdb_regs[GDB_R9]; | ||
182 | regs->r10 = gdb_regs[GDB_R10]; | ||
183 | regs->r11 = gdb_regs[GDB_R11]; | ||
184 | regs->r12 = gdb_regs[GDB_R12]; | ||
185 | regs->r13 = gdb_regs[GDB_R13]; | ||
186 | regs->r14 = gdb_regs[GDB_R14]; | ||
187 | regs->r15 = gdb_regs[GDB_R15]; | ||
188 | regs->flags = gdb_regs32[GDB_PS]; | ||
189 | regs->cs = gdb_regs32[GDB_CS]; | ||
190 | regs->ss = gdb_regs32[GDB_SS]; | ||
191 | #endif | ||
192 | } | ||
193 | |||
194 | static struct hw_breakpoint { | 192 | static struct hw_breakpoint { |
195 | unsigned enabled; | 193 | unsigned enabled; |
196 | unsigned long addr; | 194 | unsigned long addr; |
197 | int len; | 195 | int len; |
198 | int type; | 196 | int type; |
199 | struct perf_event **pev; | 197 | struct perf_event **pev; |
200 | } breakinfo[4]; | 198 | } breakinfo[HBP_NUM]; |
201 | 199 | ||
202 | static unsigned long early_dr7; | 200 | static unsigned long early_dr7; |
203 | 201 | ||
@@ -205,7 +203,7 @@ static void kgdb_correct_hw_break(void) | |||
205 | { | 203 | { |
206 | int breakno; | 204 | int breakno; |
207 | 205 | ||
208 | for (breakno = 0; breakno < 4; breakno++) { | 206 | for (breakno = 0; breakno < HBP_NUM; breakno++) { |
209 | struct perf_event *bp; | 207 | struct perf_event *bp; |
210 | struct arch_hw_breakpoint *info; | 208 | struct arch_hw_breakpoint *info; |
211 | int val; | 209 | int val; |
@@ -292,10 +290,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | |||
292 | { | 290 | { |
293 | int i; | 291 | int i; |
294 | 292 | ||
295 | for (i = 0; i < 4; i++) | 293 | for (i = 0; i < HBP_NUM; i++) |
296 | if (breakinfo[i].addr == addr && breakinfo[i].enabled) | 294 | if (breakinfo[i].addr == addr && breakinfo[i].enabled) |
297 | break; | 295 | break; |
298 | if (i == 4) | 296 | if (i == HBP_NUM) |
299 | return -1; | 297 | return -1; |
300 | 298 | ||
301 | if (hw_break_release_slot(i)) { | 299 | if (hw_break_release_slot(i)) { |
@@ -313,7 +311,7 @@ static void kgdb_remove_all_hw_break(void) | |||
313 | int cpu = raw_smp_processor_id(); | 311 | int cpu = raw_smp_processor_id(); |
314 | struct perf_event *bp; | 312 | struct perf_event *bp; |
315 | 313 | ||
316 | for (i = 0; i < 4; i++) { | 314 | for (i = 0; i < HBP_NUM; i++) { |
317 | if (!breakinfo[i].enabled) | 315 | if (!breakinfo[i].enabled) |
318 | continue; | 316 | continue; |
319 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); | 317 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); |
@@ -333,10 +331,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | |||
333 | { | 331 | { |
334 | int i; | 332 | int i; |
335 | 333 | ||
336 | for (i = 0; i < 4; i++) | 334 | for (i = 0; i < HBP_NUM; i++) |
337 | if (!breakinfo[i].enabled) | 335 | if (!breakinfo[i].enabled) |
338 | break; | 336 | break; |
339 | if (i == 4) | 337 | if (i == HBP_NUM) |
340 | return -1; | 338 | return -1; |
341 | 339 | ||
342 | switch (bptype) { | 340 | switch (bptype) { |
@@ -397,7 +395,7 @@ void kgdb_disable_hw_debug(struct pt_regs *regs) | |||
397 | 395 | ||
398 | /* Disable hardware debugging while we are in kgdb: */ | 396 | /* Disable hardware debugging while we are in kgdb: */ |
399 | set_debugreg(0UL, 7); | 397 | set_debugreg(0UL, 7); |
400 | for (i = 0; i < 4; i++) { | 398 | for (i = 0; i < HBP_NUM; i++) { |
401 | if (!breakinfo[i].enabled) | 399 | if (!breakinfo[i].enabled) |
402 | continue; | 400 | continue; |
403 | if (dbg_is_early) { | 401 | if (dbg_is_early) { |
@@ -458,7 +456,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
458 | { | 456 | { |
459 | unsigned long addr; | 457 | unsigned long addr; |
460 | char *ptr; | 458 | char *ptr; |
461 | int newPC; | ||
462 | 459 | ||
463 | switch (remcomInBuffer[0]) { | 460 | switch (remcomInBuffer[0]) { |
464 | case 'c': | 461 | case 'c': |
@@ -469,8 +466,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
469 | linux_regs->ip = addr; | 466 | linux_regs->ip = addr; |
470 | case 'D': | 467 | case 'D': |
471 | case 'k': | 468 | case 'k': |
472 | newPC = linux_regs->ip; | ||
473 | |||
474 | /* clear the trace bit */ | 469 | /* clear the trace bit */ |
475 | linux_regs->flags &= ~X86_EFLAGS_TF; | 470 | linux_regs->flags &= ~X86_EFLAGS_TF; |
476 | atomic_set(&kgdb_cpu_doing_single_step, -1); | 471 | atomic_set(&kgdb_cpu_doing_single_step, -1); |
@@ -572,7 +567,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
572 | return NOTIFY_STOP; | 567 | return NOTIFY_STOP; |
573 | } | 568 | } |
574 | 569 | ||
575 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | ||
576 | int kgdb_ll_trap(int cmd, const char *str, | 570 | int kgdb_ll_trap(int cmd, const char *str, |
577 | struct pt_regs *regs, long err, int trap, int sig) | 571 | struct pt_regs *regs, long err, int trap, int sig) |
578 | { | 572 | { |
@@ -590,7 +584,6 @@ int kgdb_ll_trap(int cmd, const char *str, | |||
590 | 584 | ||
591 | return __kgdb_notify(&args, cmd); | 585 | return __kgdb_notify(&args, cmd); |
592 | } | 586 | } |
593 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ | ||
594 | 587 | ||
595 | static int | 588 | static int |
596 | kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) | 589 | kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) |
@@ -625,6 +618,12 @@ int kgdb_arch_init(void) | |||
625 | return register_die_notifier(&kgdb_notifier); | 618 | return register_die_notifier(&kgdb_notifier); |
626 | } | 619 | } |
627 | 620 | ||
621 | static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi, | ||
622 | struct perf_sample_data *data, struct pt_regs *regs) | ||
623 | { | ||
624 | kgdb_ll_trap(DIE_DEBUG, "debug", regs, 0, 0, SIGTRAP); | ||
625 | } | ||
626 | |||
628 | void kgdb_arch_late(void) | 627 | void kgdb_arch_late(void) |
629 | { | 628 | { |
630 | int i, cpu; | 629 | int i, cpu; |
@@ -641,7 +640,7 @@ void kgdb_arch_late(void) | |||
641 | attr.bp_len = HW_BREAKPOINT_LEN_1; | 640 | attr.bp_len = HW_BREAKPOINT_LEN_1; |
642 | attr.bp_type = HW_BREAKPOINT_W; | 641 | attr.bp_type = HW_BREAKPOINT_W; |
643 | attr.disabled = 1; | 642 | attr.disabled = 1; |
644 | for (i = 0; i < 4; i++) { | 643 | for (i = 0; i < HBP_NUM; i++) { |
645 | if (breakinfo[i].pev) | 644 | if (breakinfo[i].pev) |
646 | continue; | 645 | continue; |
647 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); | 646 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); |
@@ -655,6 +654,7 @@ void kgdb_arch_late(void) | |||
655 | for_each_online_cpu(cpu) { | 654 | for_each_online_cpu(cpu) { |
656 | pevent = per_cpu_ptr(breakinfo[i].pev, cpu); | 655 | pevent = per_cpu_ptr(breakinfo[i].pev, cpu); |
657 | pevent[0]->hw.sample_period = 1; | 656 | pevent[0]->hw.sample_period = 1; |
657 | pevent[0]->overflow_handler = kgdb_hw_overflow_handler; | ||
658 | if (pevent[0]->destroy != NULL) { | 658 | if (pevent[0]->destroy != NULL) { |
659 | pevent[0]->destroy = NULL; | 659 | pevent[0]->destroy = NULL; |
660 | release_bp_slot(*pevent); | 660 | release_bp_slot(*pevent); |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 345a4b1fe144..1bfb6cf4dd55 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to) | |||
126 | } | 126 | } |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * Check for the REX prefix which can only exist on X86_64 | 129 | * Skip the prefixes of the instruction. |
130 | * X86_32 always returns 0 | ||
131 | */ | 130 | */ |
132 | static int __kprobes is_REX_prefix(kprobe_opcode_t *insn) | 131 | static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) |
133 | { | 132 | { |
133 | insn_attr_t attr; | ||
134 | |||
135 | attr = inat_get_opcode_attribute((insn_byte_t)*insn); | ||
136 | while (inat_is_legacy_prefix(attr)) { | ||
137 | insn++; | ||
138 | attr = inat_get_opcode_attribute((insn_byte_t)*insn); | ||
139 | } | ||
134 | #ifdef CONFIG_X86_64 | 140 | #ifdef CONFIG_X86_64 |
135 | if ((*insn & 0xf0) == 0x40) | 141 | if (inat_is_rex_prefix(attr)) |
136 | return 1; | 142 | insn++; |
137 | #endif | 143 | #endif |
138 | return 0; | 144 | return insn; |
139 | } | 145 | } |
140 | 146 | ||
141 | /* | 147 | /* |
@@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr) | |||
272 | */ | 278 | */ |
273 | static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | 279 | static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) |
274 | { | 280 | { |
281 | /* Skip prefixes */ | ||
282 | insn = skip_prefixes(insn); | ||
283 | |||
275 | switch (*insn) { | 284 | switch (*insn) { |
276 | case 0xfa: /* cli */ | 285 | case 0xfa: /* cli */ |
277 | case 0xfb: /* sti */ | 286 | case 0xfb: /* sti */ |
@@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
280 | return 1; | 289 | return 1; |
281 | } | 290 | } |
282 | 291 | ||
283 | /* | ||
284 | * on X86_64, 0x40-0x4f are REX prefixes so we need to look | ||
285 | * at the next byte instead.. but of course not recurse infinitely | ||
286 | */ | ||
287 | if (is_REX_prefix(insn)) | ||
288 | return is_IF_modifier(++insn); | ||
289 | |||
290 | return 0; | 292 | return 0; |
291 | } | 293 | } |
292 | 294 | ||
@@ -640,8 +642,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
640 | /* Skip cs, ip, orig_ax and gs. */ \ | 642 | /* Skip cs, ip, orig_ax and gs. */ \ |
641 | " subl $16, %esp\n" \ | 643 | " subl $16, %esp\n" \ |
642 | " pushl %fs\n" \ | 644 | " pushl %fs\n" \ |
643 | " pushl %ds\n" \ | ||
644 | " pushl %es\n" \ | 645 | " pushl %es\n" \ |
646 | " pushl %ds\n" \ | ||
645 | " pushl %eax\n" \ | 647 | " pushl %eax\n" \ |
646 | " pushl %ebp\n" \ | 648 | " pushl %ebp\n" \ |
647 | " pushl %edi\n" \ | 649 | " pushl %edi\n" \ |
@@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p, | |||
803 | unsigned long orig_ip = (unsigned long)p->addr; | 805 | unsigned long orig_ip = (unsigned long)p->addr; |
804 | kprobe_opcode_t *insn = p->ainsn.insn; | 806 | kprobe_opcode_t *insn = p->ainsn.insn; |
805 | 807 | ||
806 | /*skip the REX prefix*/ | 808 | /* Skip prefixes */ |
807 | if (is_REX_prefix(insn)) | 809 | insn = skip_prefixes(insn); |
808 | insn++; | ||
809 | 810 | ||
810 | regs->flags &= ~X86_EFLAGS_TF; | 811 | regs->flags &= ~X86_EFLAGS_TF; |
811 | switch (*insn) { | 812 | switch (*insn) { |
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index e796448f0eb5..79ae68154e87 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c | |||
@@ -25,8 +25,34 @@ | |||
25 | #include <asm/i8259.h> | 25 | #include <asm/i8259.h> |
26 | #include <asm/apb_timer.h> | 26 | #include <asm/apb_timer.h> |
27 | 27 | ||
28 | /* | ||
29 | * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, | ||
30 | * cmdline option x86_mrst_timer can be used to override the configuration | ||
31 | * to prefer one or the other. | ||
32 | * at runtime, there are basically three timer configurations: | ||
33 | * 1. per cpu apbt clock only | ||
34 | * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only | ||
35 | * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast. | ||
36 | * | ||
37 | * by default (without cmdline option), platform code first detects cpu type | ||
38 | * to see if we are on lincroft or penwell, then set up both lapic or apbt | ||
39 | * clocks accordingly. | ||
40 | * i.e. by default, medfield uses configuration #2, moorestown uses #1. | ||
41 | * config #3 is supported but not recommended on medfield. | ||
42 | * | ||
43 | * rating and feature summary: | ||
44 | * lapic (with C3STOP) --------- 100 | ||
45 | * apbt (always-on) ------------ 110 | ||
46 | * lapic (always-on,ARAT) ------ 150 | ||
47 | */ | ||
48 | |||
49 | __cpuinitdata enum mrst_timer_options mrst_timer_options; | ||
50 | |||
28 | static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; | 51 | static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; |
29 | static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; | 52 | static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; |
53 | enum mrst_cpu_type __mrst_cpu_chip; | ||
54 | EXPORT_SYMBOL_GPL(__mrst_cpu_chip); | ||
55 | |||
30 | int sfi_mtimer_num; | 56 | int sfi_mtimer_num; |
31 | 57 | ||
32 | struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; | 58 | struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; |
@@ -167,18 +193,6 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) | |||
167 | return 0; | 193 | return 0; |
168 | } | 194 | } |
169 | 195 | ||
170 | /* | ||
171 | * the secondary clock in Moorestown can be APBT or LAPIC clock, default to | ||
172 | * APBT but cmdline option can also override it. | ||
173 | */ | ||
174 | static void __cpuinit mrst_setup_secondary_clock(void) | ||
175 | { | ||
176 | /* restore default lapic clock if disabled by cmdline */ | ||
177 | if (disable_apbt_percpu) | ||
178 | return setup_secondary_APIC_clock(); | ||
179 | apbt_setup_secondary_clock(); | ||
180 | } | ||
181 | |||
182 | static unsigned long __init mrst_calibrate_tsc(void) | 196 | static unsigned long __init mrst_calibrate_tsc(void) |
183 | { | 197 | { |
184 | unsigned long flags, fast_calibrate; | 198 | unsigned long flags, fast_calibrate; |
@@ -195,6 +209,21 @@ static unsigned long __init mrst_calibrate_tsc(void) | |||
195 | 209 | ||
196 | void __init mrst_time_init(void) | 210 | void __init mrst_time_init(void) |
197 | { | 211 | { |
212 | switch (mrst_timer_options) { | ||
213 | case MRST_TIMER_APBT_ONLY: | ||
214 | break; | ||
215 | case MRST_TIMER_LAPIC_APBT: | ||
216 | x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; | ||
217 | x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; | ||
218 | break; | ||
219 | default: | ||
220 | if (!boot_cpu_has(X86_FEATURE_ARAT)) | ||
221 | break; | ||
222 | x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; | ||
223 | x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; | ||
224 | return; | ||
225 | } | ||
226 | /* we need at least one APB timer */ | ||
198 | sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); | 227 | sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); |
199 | pre_init_apic_IRQ0(); | 228 | pre_init_apic_IRQ0(); |
200 | apbt_time_init(); | 229 | apbt_time_init(); |
@@ -205,16 +234,27 @@ void __init mrst_rtc_init(void) | |||
205 | sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); | 234 | sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); |
206 | } | 235 | } |
207 | 236 | ||
208 | /* | 237 | void __cpuinit mrst_arch_setup(void) |
209 | * if we use per cpu apb timer, the bootclock already setup. if we use lapic | ||
210 | * timer and one apbt timer for broadcast, we need to set up lapic boot clock. | ||
211 | */ | ||
212 | static void __init mrst_setup_boot_clock(void) | ||
213 | { | 238 | { |
214 | pr_info("%s: per cpu apbt flag %d \n", __func__, disable_apbt_percpu); | 239 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) |
215 | if (disable_apbt_percpu) | 240 | __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; |
216 | setup_boot_APIC_clock(); | 241 | else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26) |
217 | }; | 242 | __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT; |
243 | else { | ||
244 | pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n", | ||
245 | boot_cpu_data.x86, boot_cpu_data.x86_model); | ||
246 | __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT; | ||
247 | } | ||
248 | pr_debug("Moorestown CPU %s identified\n", | ||
249 | (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ? | ||
250 | "Lincroft" : "Penwell"); | ||
251 | } | ||
252 | |||
253 | /* MID systems don't have i8042 controller */ | ||
254 | static int mrst_i8042_detect(void) | ||
255 | { | ||
256 | return 0; | ||
257 | } | ||
218 | 258 | ||
219 | /* | 259 | /* |
220 | * Moorestown specific x86_init function overrides and early setup | 260 | * Moorestown specific x86_init function overrides and early setup |
@@ -226,13 +266,16 @@ void __init x86_mrst_early_setup(void) | |||
226 | x86_init.resources.reserve_resources = x86_init_noop; | 266 | x86_init.resources.reserve_resources = x86_init_noop; |
227 | 267 | ||
228 | x86_init.timers.timer_init = mrst_time_init; | 268 | x86_init.timers.timer_init = mrst_time_init; |
229 | x86_init.timers.setup_percpu_clockev = mrst_setup_boot_clock; | 269 | x86_init.timers.setup_percpu_clockev = x86_init_noop; |
230 | 270 | ||
231 | x86_init.irqs.pre_vector_init = x86_init_noop; | 271 | x86_init.irqs.pre_vector_init = x86_init_noop; |
232 | 272 | ||
233 | x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock; | 273 | x86_init.oem.arch_setup = mrst_arch_setup; |
274 | |||
275 | x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock; | ||
234 | 276 | ||
235 | x86_platform.calibrate_tsc = mrst_calibrate_tsc; | 277 | x86_platform.calibrate_tsc = mrst_calibrate_tsc; |
278 | x86_platform.i8042_detect = mrst_i8042_detect; | ||
236 | x86_init.pci.init = pci_mrst_init; | 279 | x86_init.pci.init = pci_mrst_init; |
237 | x86_init.pci.fixup_irqs = x86_init_noop; | 280 | x86_init.pci.fixup_irqs = x86_init_noop; |
238 | 281 | ||
@@ -243,3 +286,26 @@ void __init x86_mrst_early_setup(void) | |||
243 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; | 286 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; |
244 | 287 | ||
245 | } | 288 | } |
289 | |||
290 | /* | ||
291 | * if user does not want to use per CPU apb timer, just give it a lower rating | ||
292 | * than local apic timer and skip the late per cpu timer init. | ||
293 | */ | ||
294 | static inline int __init setup_x86_mrst_timer(char *arg) | ||
295 | { | ||
296 | if (!arg) | ||
297 | return -EINVAL; | ||
298 | |||
299 | if (strcmp("apbt_only", arg) == 0) | ||
300 | mrst_timer_options = MRST_TIMER_APBT_ONLY; | ||
301 | else if (strcmp("lapic_and_apbt", arg) == 0) | ||
302 | mrst_timer_options = MRST_TIMER_LAPIC_APBT; | ||
303 | else { | ||
304 | pr_warning("X86 MRST timer option %s not recognised" | ||
305 | " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", | ||
306 | arg); | ||
307 | return -EINVAL; | ||
308 | } | ||
309 | return 0; | ||
310 | } | ||
311 | __setup("x86_mrst_timer=", setup_x86_mrst_timer); | ||
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 0b96b5589f08..078d4ec1a9d9 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -110,7 +110,7 @@ int use_calgary __read_mostly = 0; | |||
110 | * x3950 (PCIE): 8 chassis, 32 PHBs per chassis = 256 | 110 | * x3950 (PCIE): 8 chassis, 32 PHBs per chassis = 256 |
111 | * x3950 (PCIX): 8 chassis, 16 PHBs per chassis = 128 | 111 | * x3950 (PCIX): 8 chassis, 16 PHBs per chassis = 128 |
112 | */ | 112 | */ |
113 | #define MAX_PHB_BUS_NUM 384 | 113 | #define MAX_PHB_BUS_NUM 256 |
114 | 114 | ||
115 | #define PHBS_PER_CALGARY 4 | 115 | #define PHBS_PER_CALGARY 4 |
116 | 116 | ||
@@ -1056,8 +1056,6 @@ static int __init calgary_init_one(struct pci_dev *dev) | |||
1056 | struct iommu_table *tbl; | 1056 | struct iommu_table *tbl; |
1057 | int ret; | 1057 | int ret; |
1058 | 1058 | ||
1059 | BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); | ||
1060 | |||
1061 | bbar = busno_to_bbar(dev->bus->number); | 1059 | bbar = busno_to_bbar(dev->bus->number); |
1062 | ret = calgary_setup_tar(dev, bbar); | 1060 | ret = calgary_setup_tar(dev, bbar); |
1063 | if (ret) | 1061 | if (ret) |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e7e35219b32f..d401f1d2d06e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -28,6 +28,7 @@ unsigned long idle_nomwait; | |||
28 | EXPORT_SYMBOL(idle_nomwait); | 28 | EXPORT_SYMBOL(idle_nomwait); |
29 | 29 | ||
30 | struct kmem_cache *task_xstate_cachep; | 30 | struct kmem_cache *task_xstate_cachep; |
31 | EXPORT_SYMBOL_GPL(task_xstate_cachep); | ||
31 | 32 | ||
32 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 33 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
33 | { | 34 | { |
@@ -371,7 +372,7 @@ static inline int hlt_use_halt(void) | |||
371 | void default_idle(void) | 372 | void default_idle(void) |
372 | { | 373 | { |
373 | if (hlt_use_halt()) { | 374 | if (hlt_use_halt()) { |
374 | trace_power_start(POWER_CSTATE, 1); | 375 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); |
375 | current_thread_info()->status &= ~TS_POLLING; | 376 | current_thread_info()->status &= ~TS_POLLING; |
376 | /* | 377 | /* |
377 | * TS_POLLING-cleared state must be visible before we | 378 | * TS_POLLING-cleared state must be visible before we |
@@ -441,7 +442,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
441 | */ | 442 | */ |
442 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | 443 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
443 | { | 444 | { |
444 | trace_power_start(POWER_CSTATE, (ax>>4)+1); | 445 | trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); |
445 | if (!need_resched()) { | 446 | if (!need_resched()) { |
446 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) | 447 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) |
447 | clflush((void *)¤t_thread_info()->flags); | 448 | clflush((void *)¤t_thread_info()->flags); |
@@ -457,7 +458,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | |||
457 | static void mwait_idle(void) | 458 | static void mwait_idle(void) |
458 | { | 459 | { |
459 | if (!need_resched()) { | 460 | if (!need_resched()) { |
460 | trace_power_start(POWER_CSTATE, 1); | 461 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); |
461 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) | 462 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) |
462 | clflush((void *)¤t_thread_info()->flags); | 463 | clflush((void *)¤t_thread_info()->flags); |
463 | 464 | ||
@@ -478,7 +479,7 @@ static void mwait_idle(void) | |||
478 | */ | 479 | */ |
479 | static void poll_idle(void) | 480 | static void poll_idle(void) |
480 | { | 481 | { |
481 | trace_power_start(POWER_CSTATE, 0); | 482 | trace_power_start(POWER_CSTATE, 0, smp_processor_id()); |
482 | local_irq_enable(); | 483 | local_irq_enable(); |
483 | while (!need_resched()) | 484 | while (!need_resched()) |
484 | cpu_relax(); | 485 | cpu_relax(); |
@@ -525,44 +526,10 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) | |||
525 | return (edx & MWAIT_EDX_C1); | 526 | return (edx & MWAIT_EDX_C1); |
526 | } | 527 | } |
527 | 528 | ||
528 | /* | 529 | bool c1e_detected; |
529 | * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e. | 530 | EXPORT_SYMBOL(c1e_detected); |
530 | * For more information see | ||
531 | * - Erratum #400 for NPT family 0xf and family 0x10 CPUs | ||
532 | * - Erratum #365 for family 0x11 (not affected because C1e not in use) | ||
533 | */ | ||
534 | static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | ||
535 | { | ||
536 | u64 val; | ||
537 | if (c->x86_vendor != X86_VENDOR_AMD) | ||
538 | goto no_c1e_idle; | ||
539 | |||
540 | /* Family 0x0f models < rev F do not have C1E */ | ||
541 | if (c->x86 == 0x0F && c->x86_model >= 0x40) | ||
542 | return 1; | ||
543 | |||
544 | if (c->x86 == 0x10) { | ||
545 | /* | ||
546 | * check OSVW bit for CPUs that are not affected | ||
547 | * by erratum #400 | ||
548 | */ | ||
549 | if (cpu_has(c, X86_FEATURE_OSVW)) { | ||
550 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); | ||
551 | if (val >= 2) { | ||
552 | rdmsrl(MSR_AMD64_OSVW_STATUS, val); | ||
553 | if (!(val & BIT(1))) | ||
554 | goto no_c1e_idle; | ||
555 | } | ||
556 | } | ||
557 | return 1; | ||
558 | } | ||
559 | |||
560 | no_c1e_idle: | ||
561 | return 0; | ||
562 | } | ||
563 | 531 | ||
564 | static cpumask_var_t c1e_mask; | 532 | static cpumask_var_t c1e_mask; |
565 | static int c1e_detected; | ||
566 | 533 | ||
567 | void c1e_remove_cpu(int cpu) | 534 | void c1e_remove_cpu(int cpu) |
568 | { | 535 | { |
@@ -584,12 +551,12 @@ static void c1e_idle(void) | |||
584 | u32 lo, hi; | 551 | u32 lo, hi; |
585 | 552 | ||
586 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | 553 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); |
554 | |||
587 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { | 555 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { |
588 | c1e_detected = 1; | 556 | c1e_detected = true; |
589 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | 557 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) |
590 | mark_tsc_unstable("TSC halt in AMD C1E"); | 558 | mark_tsc_unstable("TSC halt in AMD C1E"); |
591 | printk(KERN_INFO "System has AMD C1E enabled\n"); | 559 | printk(KERN_INFO "System has AMD C1E enabled\n"); |
592 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); | ||
593 | } | 560 | } |
594 | } | 561 | } |
595 | 562 | ||
@@ -638,7 +605,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
638 | */ | 605 | */ |
639 | printk(KERN_INFO "using mwait in idle threads.\n"); | 606 | printk(KERN_INFO "using mwait in idle threads.\n"); |
640 | pm_idle = mwait_idle; | 607 | pm_idle = mwait_idle; |
641 | } else if (check_c1e_idle(c)) { | 608 | } else if (cpu_has_amd_erratum(amd_erratum_400)) { |
609 | /* E400: APIC timer interrupt does not wake up CPU from C1e */ | ||
642 | printk(KERN_INFO "using C1E aware idle routine\n"); | 610 | printk(KERN_INFO "using C1E aware idle routine\n"); |
643 | pm_idle = c1e_idle; | 611 | pm_idle = c1e_idle; |
644 | } else | 612 | } else |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8d128783af47..96586c3cbbbf 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -57,6 +57,8 @@ | |||
57 | #include <asm/syscalls.h> | 57 | #include <asm/syscalls.h> |
58 | #include <asm/debugreg.h> | 58 | #include <asm/debugreg.h> |
59 | 59 | ||
60 | #include <trace/events/power.h> | ||
61 | |||
60 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
61 | 63 | ||
62 | /* | 64 | /* |
@@ -111,6 +113,8 @@ void cpu_idle(void) | |||
111 | stop_critical_timings(); | 113 | stop_critical_timings(); |
112 | pm_idle(); | 114 | pm_idle(); |
113 | start_critical_timings(); | 115 | start_critical_timings(); |
116 | |||
117 | trace_power_end(smp_processor_id()); | ||
114 | } | 118 | } |
115 | tick_nohz_restart_sched_tick(); | 119 | tick_nohz_restart_sched_tick(); |
116 | preempt_enable_no_resched(); | 120 | preempt_enable_no_resched(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3c2422a99f1f..3d9ea531ddd1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -51,6 +51,8 @@ | |||
51 | #include <asm/syscalls.h> | 51 | #include <asm/syscalls.h> |
52 | #include <asm/debugreg.h> | 52 | #include <asm/debugreg.h> |
53 | 53 | ||
54 | #include <trace/events/power.h> | ||
55 | |||
54 | asmlinkage extern void ret_from_fork(void); | 56 | asmlinkage extern void ret_from_fork(void); |
55 | 57 | ||
56 | DEFINE_PER_CPU(unsigned long, old_rsp); | 58 | DEFINE_PER_CPU(unsigned long, old_rsp); |
@@ -138,6 +140,9 @@ void cpu_idle(void) | |||
138 | stop_critical_timings(); | 140 | stop_critical_timings(); |
139 | pm_idle(); | 141 | pm_idle(); |
140 | start_critical_timings(); | 142 | start_critical_timings(); |
143 | |||
144 | trace_power_end(smp_processor_id()); | ||
145 | |||
141 | /* In many cases the interrupt that ended idle | 146 | /* In many cases the interrupt that ended idle |
142 | has already called exit_idle. But some idle | 147 | has already called exit_idle. But some idle |
143 | loops can be woken up without interrupt. */ | 148 | loops can be woken up without interrupt. */ |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index e72d3fc6547d..939b9e98245f 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -498,15 +498,10 @@ void force_hpet_resume(void) | |||
498 | * See erratum #27 (Misinterpreted MSI Requests May Result in | 498 | * See erratum #27 (Misinterpreted MSI Requests May Result in |
499 | * Corrupted LPC DMA Data) in AMD Publication #46837, | 499 | * Corrupted LPC DMA Data) in AMD Publication #46837, |
500 | * "SB700 Family Product Errata", Rev. 1.0, March 2010. | 500 | * "SB700 Family Product Errata", Rev. 1.0, March 2010. |
501 | * | ||
502 | * Also force the read back of the CMP register in hpet_next_event() | ||
503 | * to work around the problem that the CMP register write seems to be | ||
504 | * delayed. See hpet_next_event() for details. | ||
505 | */ | 501 | */ |
506 | static void force_disable_hpet_msi(struct pci_dev *unused) | 502 | static void force_disable_hpet_msi(struct pci_dev *unused) |
507 | { | 503 | { |
508 | hpet_msi_disable = 1; | 504 | hpet_msi_disable = 1; |
509 | hpet_readback_cmp = 1; | ||
510 | } | 505 | } |
511 | 506 | ||
512 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | 507 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index de3b63ae3da2..a60df9ae6454 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -238,6 +238,15 @@ void __init setup_per_cpu_areas(void) | |||
238 | #ifdef CONFIG_NUMA | 238 | #ifdef CONFIG_NUMA |
239 | per_cpu(x86_cpu_to_node_map, cpu) = | 239 | per_cpu(x86_cpu_to_node_map, cpu) = |
240 | early_per_cpu_map(x86_cpu_to_node_map, cpu); | 240 | early_per_cpu_map(x86_cpu_to_node_map, cpu); |
241 | /* | ||
242 | * Ensure that the boot cpu numa_node is correct when the boot | ||
243 | * cpu is on a node that doesn't have memory installed. | ||
244 | * Also cpu_up() will call cpu_to_node() for APs when | ||
245 | * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set | ||
246 | * up later with c_init aka intel_init/amd_init. | ||
247 | * So set them all (boot cpu and all APs). | ||
248 | */ | ||
249 | set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); | ||
241 | #endif | 250 | #endif |
242 | #endif | 251 | #endif |
243 | /* | 252 | /* |
@@ -257,14 +266,6 @@ void __init setup_per_cpu_areas(void) | |||
257 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; | 266 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; |
258 | #endif | 267 | #endif |
259 | 268 | ||
260 | #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) | ||
261 | /* | ||
262 | * make sure boot cpu numa_node is right, when boot cpu is on the | ||
263 | * node that doesn't have mem installed | ||
264 | */ | ||
265 | set_cpu_numa_node(boot_cpu_id, early_cpu_to_node(boot_cpu_id)); | ||
266 | #endif | ||
267 | |||
268 | /* Setup node to cpumask map */ | 269 | /* Setup node to cpumask map */ |
269 | setup_node_to_cpumask_map(); | 270 | setup_node_to_cpumask_map(); |
270 | 271 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c4f33b2e77d6..11015fd1abbc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -816,6 +816,13 @@ do_rest: | |||
816 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) | 816 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) |
817 | break; /* It has booted */ | 817 | break; /* It has booted */ |
818 | udelay(100); | 818 | udelay(100); |
819 | /* | ||
820 | * Allow other tasks to run while we wait for the | ||
821 | * AP to come online. This also gives a chance | ||
822 | * for the MTRR work(triggered by the AP coming online) | ||
823 | * to be completed in the stop machine context. | ||
824 | */ | ||
825 | schedule(); | ||
819 | } | 826 | } |
820 | 827 | ||
821 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) | 828 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 922eefbb3f6c..b53c525368a7 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name) | |||
23 | return 0; | 23 | return 0; |
24 | } | 24 | } |
25 | 25 | ||
26 | static void save_stack_address(void *data, unsigned long addr, int reliable) | 26 | static void |
27 | __save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched) | ||
27 | { | 28 | { |
28 | struct stack_trace *trace = data; | 29 | struct stack_trace *trace = data; |
30 | #ifdef CONFIG_FRAME_POINTER | ||
29 | if (!reliable) | 31 | if (!reliable) |
30 | return; | 32 | return; |
33 | #endif | ||
34 | if (nosched && in_sched_functions(addr)) | ||
35 | return; | ||
31 | if (trace->skip > 0) { | 36 | if (trace->skip > 0) { |
32 | trace->skip--; | 37 | trace->skip--; |
33 | return; | 38 | return; |
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable) | |||
36 | trace->entries[trace->nr_entries++] = addr; | 41 | trace->entries[trace->nr_entries++] = addr; |
37 | } | 42 | } |
38 | 43 | ||
44 | static void save_stack_address(void *data, unsigned long addr, int reliable) | ||
45 | { | ||
46 | return __save_stack_address(data, addr, reliable, false); | ||
47 | } | ||
48 | |||
39 | static void | 49 | static void |
40 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) | 50 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) |
41 | { | 51 | { |
42 | struct stack_trace *trace = (struct stack_trace *)data; | 52 | return __save_stack_address(data, addr, reliable, true); |
43 | if (!reliable) | ||
44 | return; | ||
45 | if (in_sched_functions(addr)) | ||
46 | return; | ||
47 | if (trace->skip > 0) { | ||
48 | trace->skip--; | ||
49 | return; | ||
50 | } | ||
51 | if (trace->nr_entries < trace->max_entries) | ||
52 | trace->entries[trace->nr_entries++] = addr; | ||
53 | } | 53 | } |
54 | 54 | ||
55 | static const struct stacktrace_ops save_stack_ops = { | 55 | static const struct stacktrace_ops save_stack_ops = { |
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk); | |||
96 | 96 | ||
97 | /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ | 97 | /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ |
98 | 98 | ||
99 | struct stack_frame { | 99 | struct stack_frame_user { |
100 | const void __user *next_fp; | 100 | const void __user *next_fp; |
101 | unsigned long ret_addr; | 101 | unsigned long ret_addr; |
102 | }; | 102 | }; |
103 | 103 | ||
104 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 104 | static int |
105 | copy_stack_frame(const void __user *fp, struct stack_frame_user *frame) | ||
105 | { | 106 | { |
106 | int ret; | 107 | int ret; |
107 | 108 | ||
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace) | |||
126 | trace->entries[trace->nr_entries++] = regs->ip; | 127 | trace->entries[trace->nr_entries++] = regs->ip; |
127 | 128 | ||
128 | while (trace->nr_entries < trace->max_entries) { | 129 | while (trace->nr_entries < trace->max_entries) { |
129 | struct stack_frame frame; | 130 | struct stack_frame_user frame; |
130 | 131 | ||
131 | frame.next_fp = NULL; | 132 | frame.next_fp = NULL; |
132 | frame.ret_addr = 0; | 133 | frame.ret_addr = 0; |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 725ef4d17cd5..60788dee0f8a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -392,7 +392,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
392 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) | 392 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) |
393 | == NOTIFY_STOP) | 393 | == NOTIFY_STOP) |
394 | return; | 394 | return; |
395 | |||
395 | #ifdef CONFIG_X86_LOCAL_APIC | 396 | #ifdef CONFIG_X86_LOCAL_APIC |
397 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | ||
398 | == NOTIFY_STOP) | ||
399 | return; | ||
400 | |||
401 | #ifndef CONFIG_LOCKUP_DETECTOR | ||
396 | /* | 402 | /* |
397 | * Ok, so this is none of the documented NMI sources, | 403 | * Ok, so this is none of the documented NMI sources, |
398 | * so it must be the NMI watchdog. | 404 | * so it must be the NMI watchdog. |
@@ -400,6 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
400 | if (nmi_watchdog_tick(regs, reason)) | 406 | if (nmi_watchdog_tick(regs, reason)) |
401 | return; | 407 | return; |
402 | if (!do_nmi_callback(regs, cpu)) | 408 | if (!do_nmi_callback(regs, cpu)) |
409 | #endif /* !CONFIG_LOCKUP_DETECTOR */ | ||
403 | unknown_nmi_error(reason, regs); | 410 | unknown_nmi_error(reason, regs); |
404 | #else | 411 | #else |
405 | unknown_nmi_error(reason, regs); | 412 | unknown_nmi_error(reason, regs); |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 9faf91ae1841..ce8e50239332 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -751,7 +751,6 @@ static struct clocksource clocksource_tsc = { | |||
751 | .read = read_tsc, | 751 | .read = read_tsc, |
752 | .resume = resume_tsc, | 752 | .resume = resume_tsc, |
753 | .mask = CLOCKSOURCE_MASK(64), | 753 | .mask = CLOCKSOURCE_MASK(64), |
754 | .shift = 22, | ||
755 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | | 754 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
756 | CLOCK_SOURCE_MUST_VERIFY, | 755 | CLOCK_SOURCE_MUST_VERIFY, |
757 | #ifdef CONFIG_X86_64 | 756 | #ifdef CONFIG_X86_64 |
@@ -845,8 +844,6 @@ __cpuinit int unsynchronized_tsc(void) | |||
845 | 844 | ||
846 | static void __init init_tsc_clocksource(void) | 845 | static void __init init_tsc_clocksource(void) |
847 | { | 846 | { |
848 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, | ||
849 | clocksource_tsc.shift); | ||
850 | if (tsc_clocksource_reliable) | 847 | if (tsc_clocksource_reliable) |
851 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | 848 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; |
852 | /* lower the rating if we already know its unstable: */ | 849 | /* lower the rating if we already know its unstable: */ |
@@ -854,7 +851,7 @@ static void __init init_tsc_clocksource(void) | |||
854 | clocksource_tsc.rating = 0; | 851 | clocksource_tsc.rating = 0; |
855 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; | 852 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; |
856 | } | 853 | } |
857 | clocksource_register(&clocksource_tsc); | 854 | clocksource_register_khz(&clocksource_tsc, tsc_khz); |
858 | } | 855 | } |
859 | 856 | ||
860 | #ifdef CONFIG_X86_64 | 857 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu_64.S index 45b6f8a975a1..56a8c2a867d9 100644 --- a/arch/x86/kernel/verify_cpu_64.S +++ b/arch/x86/kernel/verify_cpu_64.S | |||
@@ -31,6 +31,7 @@ | |||
31 | */ | 31 | */ |
32 | 32 | ||
33 | #include <asm/cpufeature.h> | 33 | #include <asm/cpufeature.h> |
34 | #include <asm/msr-index.h> | ||
34 | 35 | ||
35 | verify_cpu: | 36 | verify_cpu: |
36 | pushfl # Save caller passed flags | 37 | pushfl # Save caller passed flags |
@@ -88,7 +89,7 @@ verify_cpu_sse_test: | |||
88 | je verify_cpu_sse_ok | 89 | je verify_cpu_sse_ok |
89 | test %di,%di | 90 | test %di,%di |
90 | jz verify_cpu_no_longmode # only try to force SSE on AMD | 91 | jz verify_cpu_no_longmode # only try to force SSE on AMD |
91 | movl $0xc0010015,%ecx # HWCR | 92 | movl $MSR_K7_HWCR,%ecx |
92 | rdmsr | 93 | rdmsr |
93 | btr $15,%eax # enable SSE | 94 | btr $15,%eax # enable SSE |
94 | wrmsr | 95 | wrmsr |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 1c0c6ab9c60f..dcbb28c4b694 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -73,8 +73,8 @@ void update_vsyscall_tz(void) | |||
73 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 73 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
74 | } | 74 | } |
75 | 75 | ||
76 | void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, | 76 | void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, |
77 | u32 mult) | 77 | struct clocksource *clock, u32 mult) |
78 | { | 78 | { |
79 | unsigned long flags; | 79 | unsigned long flags; |
80 | 80 | ||
@@ -87,7 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, | |||
87 | vsyscall_gtod_data.clock.shift = clock->shift; | 87 | vsyscall_gtod_data.clock.shift = clock->shift; |
88 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 88 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
89 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 89 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
90 | vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; | 90 | vsyscall_gtod_data.wall_to_monotonic = *wtm; |
91 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); | 91 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); |
92 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 92 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
93 | } | 93 | } |
@@ -169,13 +169,18 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) | |||
169 | * unlikely */ | 169 | * unlikely */ |
170 | time_t __vsyscall(1) vtime(time_t *t) | 170 | time_t __vsyscall(1) vtime(time_t *t) |
171 | { | 171 | { |
172 | struct timeval tv; | 172 | unsigned seq; |
173 | time_t result; | 173 | time_t result; |
174 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) | 174 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) |
175 | return time_syscall(t); | 175 | return time_syscall(t); |
176 | 176 | ||
177 | vgettimeofday(&tv, NULL); | 177 | do { |
178 | result = tv.tv_sec; | 178 | seq = read_seqbegin(&__vsyscall_gtod_data.lock); |
179 | |||
180 | result = __vsyscall_gtod_data.wall_time_sec; | ||
181 | |||
182 | } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); | ||
183 | |||
179 | if (t) | 184 | if (t) |
180 | *t = result; | 185 | *t = result; |
181 | return result; | 186 | return result; |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 61a1e8c7e19f..cd6da6bf3eca 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -5,6 +5,7 @@ | |||
5 | */ | 5 | */ |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/ioport.h> | 7 | #include <linux/ioport.h> |
8 | #include <linux/module.h> | ||
8 | 9 | ||
9 | #include <asm/bios_ebda.h> | 10 | #include <asm/bios_ebda.h> |
10 | #include <asm/paravirt.h> | 11 | #include <asm/paravirt.h> |
@@ -85,6 +86,7 @@ struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { | |||
85 | }; | 86 | }; |
86 | 87 | ||
87 | static void default_nmi_init(void) { }; | 88 | static void default_nmi_init(void) { }; |
89 | static int default_i8042_detect(void) { return 1; }; | ||
88 | 90 | ||
89 | struct x86_platform_ops x86_platform = { | 91 | struct x86_platform_ops x86_platform = { |
90 | .calibrate_tsc = native_calibrate_tsc, | 92 | .calibrate_tsc = native_calibrate_tsc, |
@@ -92,5 +94,8 @@ struct x86_platform_ops x86_platform = { | |||
92 | .set_wallclock = mach_set_rtc_mmss, | 94 | .set_wallclock = mach_set_rtc_mmss, |
93 | .iommu_shutdown = iommu_shutdown_noop, | 95 | .iommu_shutdown = iommu_shutdown_noop, |
94 | .is_untracked_pat_range = is_ISA_range, | 96 | .is_untracked_pat_range = is_ISA_range, |
95 | .nmi_init = default_nmi_init | 97 | .nmi_init = default_nmi_init, |
98 | .i8042_detect = default_i8042_detect | ||
96 | }; | 99 | }; |
100 | |||
101 | EXPORT_SYMBOL_GPL(x86_platform); | ||
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 6e73db1b7b4e..a4ae302f03aa 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -36,15 +36,14 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf, | |||
36 | 36 | ||
37 | err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], | 37 | err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], |
38 | sizeof(struct _fpx_sw_bytes)); | 38 | sizeof(struct _fpx_sw_bytes)); |
39 | |||
40 | if (err) | 39 | if (err) |
41 | return err; | 40 | return -EFAULT; |
42 | 41 | ||
43 | /* | 42 | /* |
44 | * First Magic check failed. | 43 | * First Magic check failed. |
45 | */ | 44 | */ |
46 | if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) | 45 | if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) |
47 | return -1; | 46 | return -EINVAL; |
48 | 47 | ||
49 | /* | 48 | /* |
50 | * Check for error scenarios. | 49 | * Check for error scenarios. |
@@ -52,19 +51,21 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf, | |||
52 | if (fx_sw_user->xstate_size < min_xstate_size || | 51 | if (fx_sw_user->xstate_size < min_xstate_size || |
53 | fx_sw_user->xstate_size > xstate_size || | 52 | fx_sw_user->xstate_size > xstate_size || |
54 | fx_sw_user->xstate_size > fx_sw_user->extended_size) | 53 | fx_sw_user->xstate_size > fx_sw_user->extended_size) |
55 | return -1; | 54 | return -EINVAL; |
56 | 55 | ||
57 | err = __get_user(magic2, (__u32 *) (((void *)fpstate) + | 56 | err = __get_user(magic2, (__u32 *) (((void *)fpstate) + |
58 | fx_sw_user->extended_size - | 57 | fx_sw_user->extended_size - |
59 | FP_XSTATE_MAGIC2_SIZE)); | 58 | FP_XSTATE_MAGIC2_SIZE)); |
59 | if (err) | ||
60 | return err; | ||
60 | /* | 61 | /* |
61 | * Check for the presence of second magic word at the end of memory | 62 | * Check for the presence of second magic word at the end of memory |
62 | * layout. This detects the case where the user just copied the legacy | 63 | * layout. This detects the case where the user just copied the legacy |
63 | * fpstate layout with out copying the extended state information | 64 | * fpstate layout with out copying the extended state information |
64 | * in the memory layout. | 65 | * in the memory layout. |
65 | */ | 66 | */ |
66 | if (err || magic2 != FP_XSTATE_MAGIC2) | 67 | if (magic2 != FP_XSTATE_MAGIC2) |
67 | return -1; | 68 | return -EFAULT; |
68 | 69 | ||
69 | return 0; | 70 | return 0; |
70 | } | 71 | } |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5ac0bb465ed6..b38bd8b92aa6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -9,6 +9,7 @@ | |||
9 | * privileged instructions: | 9 | * privileged instructions: |
10 | * | 10 | * |
11 | * Copyright (C) 2006 Qumranet | 11 | * Copyright (C) 2006 Qumranet |
12 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
12 | * | 13 | * |
13 | * Avi Kivity <avi@qumranet.com> | 14 | * Avi Kivity <avi@qumranet.com> |
14 | * Yaniv Kamay <yaniv@qumranet.com> | 15 | * Yaniv Kamay <yaniv@qumranet.com> |
@@ -67,6 +68,9 @@ | |||
67 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ | 68 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ |
68 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ | 69 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ |
69 | #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ | 70 | #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ |
71 | #define SrcImmFAddr (0xb<<4) /* Source is immediate far address */ | ||
72 | #define SrcMemFAddr (0xc<<4) /* Source is far address in memory */ | ||
73 | #define SrcAcc (0xd<<4) /* Source Accumulator */ | ||
70 | #define SrcMask (0xf<<4) | 74 | #define SrcMask (0xf<<4) |
71 | /* Generic ModRM decode. */ | 75 | /* Generic ModRM decode. */ |
72 | #define ModRM (1<<8) | 76 | #define ModRM (1<<8) |
@@ -88,10 +92,6 @@ | |||
88 | #define Src2CL (1<<29) | 92 | #define Src2CL (1<<29) |
89 | #define Src2ImmByte (2<<29) | 93 | #define Src2ImmByte (2<<29) |
90 | #define Src2One (3<<29) | 94 | #define Src2One (3<<29) |
91 | #define Src2Imm16 (4<<29) | ||
92 | #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be | ||
93 | in memory and second argument is located | ||
94 | immediately after the first one in memory. */ | ||
95 | #define Src2Mask (7<<29) | 95 | #define Src2Mask (7<<29) |
96 | 96 | ||
97 | enum { | 97 | enum { |
@@ -124,15 +124,15 @@ static u32 opcode_table[256] = { | |||
124 | /* 0x20 - 0x27 */ | 124 | /* 0x20 - 0x27 */ |
125 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, | 125 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
126 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 126 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
127 | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, | 127 | ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
128 | /* 0x28 - 0x2F */ | 128 | /* 0x28 - 0x2F */ |
129 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, | 129 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
130 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 130 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
131 | 0, 0, 0, 0, | 131 | ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
132 | /* 0x30 - 0x37 */ | 132 | /* 0x30 - 0x37 */ |
133 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, | 133 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
134 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 134 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
135 | 0, 0, 0, 0, | 135 | ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
136 | /* 0x38 - 0x3F */ | 136 | /* 0x38 - 0x3F */ |
137 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 137 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
138 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 138 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
@@ -170,20 +170,20 @@ static u32 opcode_table[256] = { | |||
170 | /* 0x88 - 0x8F */ | 170 | /* 0x88 - 0x8F */ |
171 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, | 171 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, |
172 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, | 172 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, |
173 | DstMem | SrcReg | ModRM | Mov, ModRM | DstReg, | 173 | DstMem | SrcNone | ModRM | Mov, ModRM | DstReg, |
174 | DstReg | SrcMem | ModRM | Mov, Group | Group1A, | 174 | ImplicitOps | SrcMem16 | ModRM, Group | Group1A, |
175 | /* 0x90 - 0x97 */ | 175 | /* 0x90 - 0x97 */ |
176 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, | 176 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, |
177 | /* 0x98 - 0x9F */ | 177 | /* 0x98 - 0x9F */ |
178 | 0, 0, SrcImm | Src2Imm16 | No64, 0, | 178 | 0, 0, SrcImmFAddr | No64, 0, |
179 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | 179 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, |
180 | /* 0xA0 - 0xA7 */ | 180 | /* 0xA0 - 0xA7 */ |
181 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 181 | ByteOp | DstAcc | SrcMem | Mov | MemAbs, DstAcc | SrcMem | Mov | MemAbs, |
182 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, | 182 | ByteOp | DstMem | SrcAcc | Mov | MemAbs, DstMem | SrcAcc | Mov | MemAbs, |
183 | ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, | 183 | ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, |
184 | ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, | 184 | ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, |
185 | /* 0xA8 - 0xAF */ | 185 | /* 0xA8 - 0xAF */ |
186 | 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, | 186 | DstAcc | SrcImmByte | ByteOp, DstAcc | SrcImm, ByteOp | DstDI | Mov | String, DstDI | Mov | String, |
187 | ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, | 187 | ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, |
188 | ByteOp | DstDI | String, DstDI | String, | 188 | ByteOp | DstDI | String, DstDI | String, |
189 | /* 0xB0 - 0xB7 */ | 189 | /* 0xB0 - 0xB7 */ |
@@ -215,7 +215,7 @@ static u32 opcode_table[256] = { | |||
215 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, | 215 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
216 | /* 0xE8 - 0xEF */ | 216 | /* 0xE8 - 0xEF */ |
217 | SrcImm | Stack, SrcImm | ImplicitOps, | 217 | SrcImm | Stack, SrcImm | ImplicitOps, |
218 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, | 218 | SrcImmFAddr | No64, SrcImmByte | ImplicitOps, |
219 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, | 219 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
220 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, | 220 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
221 | /* 0xF0 - 0xF7 */ | 221 | /* 0xF0 - 0xF7 */ |
@@ -337,20 +337,20 @@ static u32 group_table[] = { | |||
337 | [Group1A*8] = | 337 | [Group1A*8] = |
338 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, | 338 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, |
339 | [Group3_Byte*8] = | 339 | [Group3_Byte*8] = |
340 | ByteOp | SrcImm | DstMem | ModRM, 0, | 340 | ByteOp | SrcImm | DstMem | ModRM, ByteOp | SrcImm | DstMem | ModRM, |
341 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | 341 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, |
342 | 0, 0, 0, 0, | 342 | 0, 0, 0, 0, |
343 | [Group3*8] = | 343 | [Group3*8] = |
344 | DstMem | SrcImm | ModRM, 0, | 344 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, |
345 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | 345 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
346 | 0, 0, 0, 0, | 346 | 0, 0, 0, 0, |
347 | [Group4*8] = | 347 | [Group4*8] = |
348 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | 348 | ByteOp | DstMem | SrcNone | ModRM | Lock, ByteOp | DstMem | SrcNone | ModRM | Lock, |
349 | 0, 0, 0, 0, 0, 0, | 349 | 0, 0, 0, 0, 0, 0, |
350 | [Group5*8] = | 350 | [Group5*8] = |
351 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | 351 | DstMem | SrcNone | ModRM | Lock, DstMem | SrcNone | ModRM | Lock, |
352 | SrcMem | ModRM | Stack, 0, | 352 | SrcMem | ModRM | Stack, 0, |
353 | SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, | 353 | SrcMem | ModRM | Stack, SrcMemFAddr | ModRM | ImplicitOps, |
354 | SrcMem | ModRM | Stack, 0, | 354 | SrcMem | ModRM | Stack, 0, |
355 | [Group7*8] = | 355 | [Group7*8] = |
356 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, | 356 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, |
@@ -576,6 +576,13 @@ static u32 group2_table[] = { | |||
576 | (_type)_x; \ | 576 | (_type)_x; \ |
577 | }) | 577 | }) |
578 | 578 | ||
579 | #define insn_fetch_arr(_arr, _size, _eip) \ | ||
580 | ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ | ||
581 | if (rc != X86EMUL_CONTINUE) \ | ||
582 | goto done; \ | ||
583 | (_eip) += (_size); \ | ||
584 | }) | ||
585 | |||
579 | static inline unsigned long ad_mask(struct decode_cache *c) | 586 | static inline unsigned long ad_mask(struct decode_cache *c) |
580 | { | 587 | { |
581 | return (1UL << (c->ad_bytes << 3)) - 1; | 588 | return (1UL << (c->ad_bytes << 3)) - 1; |
@@ -617,31 +624,66 @@ static void set_seg_override(struct decode_cache *c, int seg) | |||
617 | c->seg_override = seg; | 624 | c->seg_override = seg; |
618 | } | 625 | } |
619 | 626 | ||
620 | static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) | 627 | static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, |
628 | struct x86_emulate_ops *ops, int seg) | ||
621 | { | 629 | { |
622 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) | 630 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) |
623 | return 0; | 631 | return 0; |
624 | 632 | ||
625 | return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg); | 633 | return ops->get_cached_segment_base(seg, ctxt->vcpu); |
626 | } | 634 | } |
627 | 635 | ||
628 | static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, | 636 | static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, |
637 | struct x86_emulate_ops *ops, | ||
629 | struct decode_cache *c) | 638 | struct decode_cache *c) |
630 | { | 639 | { |
631 | if (!c->has_seg_override) | 640 | if (!c->has_seg_override) |
632 | return 0; | 641 | return 0; |
633 | 642 | ||
634 | return seg_base(ctxt, c->seg_override); | 643 | return seg_base(ctxt, ops, c->seg_override); |
644 | } | ||
645 | |||
646 | static unsigned long es_base(struct x86_emulate_ctxt *ctxt, | ||
647 | struct x86_emulate_ops *ops) | ||
648 | { | ||
649 | return seg_base(ctxt, ops, VCPU_SREG_ES); | ||
650 | } | ||
651 | |||
652 | static unsigned long ss_base(struct x86_emulate_ctxt *ctxt, | ||
653 | struct x86_emulate_ops *ops) | ||
654 | { | ||
655 | return seg_base(ctxt, ops, VCPU_SREG_SS); | ||
656 | } | ||
657 | |||
658 | static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, | ||
659 | u32 error, bool valid) | ||
660 | { | ||
661 | ctxt->exception = vec; | ||
662 | ctxt->error_code = error; | ||
663 | ctxt->error_code_valid = valid; | ||
664 | ctxt->restart = false; | ||
665 | } | ||
666 | |||
667 | static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err) | ||
668 | { | ||
669 | emulate_exception(ctxt, GP_VECTOR, err, true); | ||
635 | } | 670 | } |
636 | 671 | ||
637 | static unsigned long es_base(struct x86_emulate_ctxt *ctxt) | 672 | static void emulate_pf(struct x86_emulate_ctxt *ctxt, unsigned long addr, |
673 | int err) | ||
638 | { | 674 | { |
639 | return seg_base(ctxt, VCPU_SREG_ES); | 675 | ctxt->cr2 = addr; |
676 | emulate_exception(ctxt, PF_VECTOR, err, true); | ||
640 | } | 677 | } |
641 | 678 | ||
642 | static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) | 679 | static void emulate_ud(struct x86_emulate_ctxt *ctxt) |
643 | { | 680 | { |
644 | return seg_base(ctxt, VCPU_SREG_SS); | 681 | emulate_exception(ctxt, UD_VECTOR, 0, false); |
682 | } | ||
683 | |||
684 | static void emulate_ts(struct x86_emulate_ctxt *ctxt, int err) | ||
685 | { | ||
686 | emulate_exception(ctxt, TS_VECTOR, err, true); | ||
645 | } | 687 | } |
646 | 688 | ||
647 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 689 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
@@ -932,12 +974,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
932 | /* we cannot decode insn before we complete previous rep insn */ | 974 | /* we cannot decode insn before we complete previous rep insn */ |
933 | WARN_ON(ctxt->restart); | 975 | WARN_ON(ctxt->restart); |
934 | 976 | ||
935 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
936 | memset(c, 0, sizeof(struct decode_cache)); | ||
937 | c->eip = ctxt->eip; | 977 | c->eip = ctxt->eip; |
938 | c->fetch.start = c->fetch.end = c->eip; | 978 | c->fetch.start = c->fetch.end = c->eip; |
939 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 979 | ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); |
940 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
941 | 980 | ||
942 | switch (mode) { | 981 | switch (mode) { |
943 | case X86EMUL_MODE_REAL: | 982 | case X86EMUL_MODE_REAL: |
@@ -1060,7 +1099,7 @@ done_prefixes: | |||
1060 | set_seg_override(c, VCPU_SREG_DS); | 1099 | set_seg_override(c, VCPU_SREG_DS); |
1061 | 1100 | ||
1062 | if (!(!c->twobyte && c->b == 0x8d)) | 1101 | if (!(!c->twobyte && c->b == 0x8d)) |
1063 | c->modrm_ea += seg_override_base(ctxt, c); | 1102 | c->modrm_ea += seg_override_base(ctxt, ops, c); |
1064 | 1103 | ||
1065 | if (c->ad_bytes != 8) | 1104 | if (c->ad_bytes != 8) |
1066 | c->modrm_ea = (u32)c->modrm_ea; | 1105 | c->modrm_ea = (u32)c->modrm_ea; |
@@ -1148,6 +1187,25 @@ done_prefixes: | |||
1148 | else | 1187 | else |
1149 | c->src.val = insn_fetch(u8, 1, c->eip); | 1188 | c->src.val = insn_fetch(u8, 1, c->eip); |
1150 | break; | 1189 | break; |
1190 | case SrcAcc: | ||
1191 | c->src.type = OP_REG; | ||
1192 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1193 | c->src.ptr = &c->regs[VCPU_REGS_RAX]; | ||
1194 | switch (c->src.bytes) { | ||
1195 | case 1: | ||
1196 | c->src.val = *(u8 *)c->src.ptr; | ||
1197 | break; | ||
1198 | case 2: | ||
1199 | c->src.val = *(u16 *)c->src.ptr; | ||
1200 | break; | ||
1201 | case 4: | ||
1202 | c->src.val = *(u32 *)c->src.ptr; | ||
1203 | break; | ||
1204 | case 8: | ||
1205 | c->src.val = *(u64 *)c->src.ptr; | ||
1206 | break; | ||
1207 | } | ||
1208 | break; | ||
1151 | case SrcOne: | 1209 | case SrcOne: |
1152 | c->src.bytes = 1; | 1210 | c->src.bytes = 1; |
1153 | c->src.val = 1; | 1211 | c->src.val = 1; |
@@ -1156,10 +1214,21 @@ done_prefixes: | |||
1156 | c->src.type = OP_MEM; | 1214 | c->src.type = OP_MEM; |
1157 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1215 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1158 | c->src.ptr = (unsigned long *) | 1216 | c->src.ptr = (unsigned long *) |
1159 | register_address(c, seg_override_base(ctxt, c), | 1217 | register_address(c, seg_override_base(ctxt, ops, c), |
1160 | c->regs[VCPU_REGS_RSI]); | 1218 | c->regs[VCPU_REGS_RSI]); |
1161 | c->src.val = 0; | 1219 | c->src.val = 0; |
1162 | break; | 1220 | break; |
1221 | case SrcImmFAddr: | ||
1222 | c->src.type = OP_IMM; | ||
1223 | c->src.ptr = (unsigned long *)c->eip; | ||
1224 | c->src.bytes = c->op_bytes + 2; | ||
1225 | insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip); | ||
1226 | break; | ||
1227 | case SrcMemFAddr: | ||
1228 | c->src.type = OP_MEM; | ||
1229 | c->src.ptr = (unsigned long *)c->modrm_ea; | ||
1230 | c->src.bytes = c->op_bytes + 2; | ||
1231 | break; | ||
1163 | } | 1232 | } |
1164 | 1233 | ||
1165 | /* | 1234 | /* |
@@ -1179,22 +1248,10 @@ done_prefixes: | |||
1179 | c->src2.bytes = 1; | 1248 | c->src2.bytes = 1; |
1180 | c->src2.val = insn_fetch(u8, 1, c->eip); | 1249 | c->src2.val = insn_fetch(u8, 1, c->eip); |
1181 | break; | 1250 | break; |
1182 | case Src2Imm16: | ||
1183 | c->src2.type = OP_IMM; | ||
1184 | c->src2.ptr = (unsigned long *)c->eip; | ||
1185 | c->src2.bytes = 2; | ||
1186 | c->src2.val = insn_fetch(u16, 2, c->eip); | ||
1187 | break; | ||
1188 | case Src2One: | 1251 | case Src2One: |
1189 | c->src2.bytes = 1; | 1252 | c->src2.bytes = 1; |
1190 | c->src2.val = 1; | 1253 | c->src2.val = 1; |
1191 | break; | 1254 | break; |
1192 | case Src2Mem16: | ||
1193 | c->src2.type = OP_MEM; | ||
1194 | c->src2.bytes = 2; | ||
1195 | c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); | ||
1196 | c->src2.val = 0; | ||
1197 | break; | ||
1198 | } | 1255 | } |
1199 | 1256 | ||
1200 | /* Decode and fetch the destination operand: register or memory. */ | 1257 | /* Decode and fetch the destination operand: register or memory. */ |
@@ -1253,7 +1310,7 @@ done_prefixes: | |||
1253 | c->dst.type = OP_MEM; | 1310 | c->dst.type = OP_MEM; |
1254 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1311 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1255 | c->dst.ptr = (unsigned long *) | 1312 | c->dst.ptr = (unsigned long *) |
1256 | register_address(c, es_base(ctxt), | 1313 | register_address(c, es_base(ctxt, ops), |
1257 | c->regs[VCPU_REGS_RDI]); | 1314 | c->regs[VCPU_REGS_RDI]); |
1258 | c->dst.val = 0; | 1315 | c->dst.val = 0; |
1259 | break; | 1316 | break; |
@@ -1263,6 +1320,37 @@ done: | |||
1263 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 1320 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
1264 | } | 1321 | } |
1265 | 1322 | ||
1323 | static int read_emulated(struct x86_emulate_ctxt *ctxt, | ||
1324 | struct x86_emulate_ops *ops, | ||
1325 | unsigned long addr, void *dest, unsigned size) | ||
1326 | { | ||
1327 | int rc; | ||
1328 | struct read_cache *mc = &ctxt->decode.mem_read; | ||
1329 | u32 err; | ||
1330 | |||
1331 | while (size) { | ||
1332 | int n = min(size, 8u); | ||
1333 | size -= n; | ||
1334 | if (mc->pos < mc->end) | ||
1335 | goto read_cached; | ||
1336 | |||
1337 | rc = ops->read_emulated(addr, mc->data + mc->end, n, &err, | ||
1338 | ctxt->vcpu); | ||
1339 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
1340 | emulate_pf(ctxt, addr, err); | ||
1341 | if (rc != X86EMUL_CONTINUE) | ||
1342 | return rc; | ||
1343 | mc->end += n; | ||
1344 | |||
1345 | read_cached: | ||
1346 | memcpy(dest, mc->data + mc->pos, n); | ||
1347 | mc->pos += n; | ||
1348 | dest += n; | ||
1349 | addr += n; | ||
1350 | } | ||
1351 | return X86EMUL_CONTINUE; | ||
1352 | } | ||
1353 | |||
1266 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | 1354 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, |
1267 | struct x86_emulate_ops *ops, | 1355 | struct x86_emulate_ops *ops, |
1268 | unsigned int size, unsigned short port, | 1356 | unsigned int size, unsigned short port, |
@@ -1330,13 +1418,13 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1330 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | 1418 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); |
1331 | 1419 | ||
1332 | if (dt.size < index * 8 + 7) { | 1420 | if (dt.size < index * 8 + 7) { |
1333 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | 1421 | emulate_gp(ctxt, selector & 0xfffc); |
1334 | return X86EMUL_PROPAGATE_FAULT; | 1422 | return X86EMUL_PROPAGATE_FAULT; |
1335 | } | 1423 | } |
1336 | addr = dt.address + index * 8; | 1424 | addr = dt.address + index * 8; |
1337 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | 1425 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); |
1338 | if (ret == X86EMUL_PROPAGATE_FAULT) | 1426 | if (ret == X86EMUL_PROPAGATE_FAULT) |
1339 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | 1427 | emulate_pf(ctxt, addr, err); |
1340 | 1428 | ||
1341 | return ret; | 1429 | return ret; |
1342 | } | 1430 | } |
@@ -1355,14 +1443,14 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1355 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | 1443 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); |
1356 | 1444 | ||
1357 | if (dt.size < index * 8 + 7) { | 1445 | if (dt.size < index * 8 + 7) { |
1358 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | 1446 | emulate_gp(ctxt, selector & 0xfffc); |
1359 | return X86EMUL_PROPAGATE_FAULT; | 1447 | return X86EMUL_PROPAGATE_FAULT; |
1360 | } | 1448 | } |
1361 | 1449 | ||
1362 | addr = dt.address + index * 8; | 1450 | addr = dt.address + index * 8; |
1363 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | 1451 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); |
1364 | if (ret == X86EMUL_PROPAGATE_FAULT) | 1452 | if (ret == X86EMUL_PROPAGATE_FAULT) |
1365 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | 1453 | emulate_pf(ctxt, addr, err); |
1366 | 1454 | ||
1367 | return ret; | 1455 | return ret; |
1368 | } | 1456 | } |
@@ -1481,11 +1569,70 @@ load: | |||
1481 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); | 1569 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); |
1482 | return X86EMUL_CONTINUE; | 1570 | return X86EMUL_CONTINUE; |
1483 | exception: | 1571 | exception: |
1484 | kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); | 1572 | emulate_exception(ctxt, err_vec, err_code, true); |
1485 | return X86EMUL_PROPAGATE_FAULT; | 1573 | return X86EMUL_PROPAGATE_FAULT; |
1486 | } | 1574 | } |
1487 | 1575 | ||
1488 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | 1576 | static inline int writeback(struct x86_emulate_ctxt *ctxt, |
1577 | struct x86_emulate_ops *ops) | ||
1578 | { | ||
1579 | int rc; | ||
1580 | struct decode_cache *c = &ctxt->decode; | ||
1581 | u32 err; | ||
1582 | |||
1583 | switch (c->dst.type) { | ||
1584 | case OP_REG: | ||
1585 | /* The 4-byte case *is* correct: | ||
1586 | * in 64-bit mode we zero-extend. | ||
1587 | */ | ||
1588 | switch (c->dst.bytes) { | ||
1589 | case 1: | ||
1590 | *(u8 *)c->dst.ptr = (u8)c->dst.val; | ||
1591 | break; | ||
1592 | case 2: | ||
1593 | *(u16 *)c->dst.ptr = (u16)c->dst.val; | ||
1594 | break; | ||
1595 | case 4: | ||
1596 | *c->dst.ptr = (u32)c->dst.val; | ||
1597 | break; /* 64b: zero-ext */ | ||
1598 | case 8: | ||
1599 | *c->dst.ptr = c->dst.val; | ||
1600 | break; | ||
1601 | } | ||
1602 | break; | ||
1603 | case OP_MEM: | ||
1604 | if (c->lock_prefix) | ||
1605 | rc = ops->cmpxchg_emulated( | ||
1606 | (unsigned long)c->dst.ptr, | ||
1607 | &c->dst.orig_val, | ||
1608 | &c->dst.val, | ||
1609 | c->dst.bytes, | ||
1610 | &err, | ||
1611 | ctxt->vcpu); | ||
1612 | else | ||
1613 | rc = ops->write_emulated( | ||
1614 | (unsigned long)c->dst.ptr, | ||
1615 | &c->dst.val, | ||
1616 | c->dst.bytes, | ||
1617 | &err, | ||
1618 | ctxt->vcpu); | ||
1619 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
1620 | emulate_pf(ctxt, | ||
1621 | (unsigned long)c->dst.ptr, err); | ||
1622 | if (rc != X86EMUL_CONTINUE) | ||
1623 | return rc; | ||
1624 | break; | ||
1625 | case OP_NONE: | ||
1626 | /* no writeback */ | ||
1627 | break; | ||
1628 | default: | ||
1629 | break; | ||
1630 | } | ||
1631 | return X86EMUL_CONTINUE; | ||
1632 | } | ||
1633 | |||
1634 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt, | ||
1635 | struct x86_emulate_ops *ops) | ||
1489 | { | 1636 | { |
1490 | struct decode_cache *c = &ctxt->decode; | 1637 | struct decode_cache *c = &ctxt->decode; |
1491 | 1638 | ||
@@ -1493,7 +1640,7 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | |||
1493 | c->dst.bytes = c->op_bytes; | 1640 | c->dst.bytes = c->op_bytes; |
1494 | c->dst.val = c->src.val; | 1641 | c->dst.val = c->src.val; |
1495 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); | 1642 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); |
1496 | c->dst.ptr = (void *) register_address(c, ss_base(ctxt), | 1643 | c->dst.ptr = (void *) register_address(c, ss_base(ctxt, ops), |
1497 | c->regs[VCPU_REGS_RSP]); | 1644 | c->regs[VCPU_REGS_RSP]); |
1498 | } | 1645 | } |
1499 | 1646 | ||
@@ -1504,9 +1651,9 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1504 | struct decode_cache *c = &ctxt->decode; | 1651 | struct decode_cache *c = &ctxt->decode; |
1505 | int rc; | 1652 | int rc; |
1506 | 1653 | ||
1507 | rc = ops->read_emulated(register_address(c, ss_base(ctxt), | 1654 | rc = read_emulated(ctxt, ops, register_address(c, ss_base(ctxt, ops), |
1508 | c->regs[VCPU_REGS_RSP]), | 1655 | c->regs[VCPU_REGS_RSP]), |
1509 | dest, len, ctxt->vcpu); | 1656 | dest, len); |
1510 | if (rc != X86EMUL_CONTINUE) | 1657 | if (rc != X86EMUL_CONTINUE) |
1511 | return rc; | 1658 | return rc; |
1512 | 1659 | ||
@@ -1541,7 +1688,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1541 | break; | 1688 | break; |
1542 | case X86EMUL_MODE_VM86: | 1689 | case X86EMUL_MODE_VM86: |
1543 | if (iopl < 3) { | 1690 | if (iopl < 3) { |
1544 | kvm_inject_gp(ctxt->vcpu, 0); | 1691 | emulate_gp(ctxt, 0); |
1545 | return X86EMUL_PROPAGATE_FAULT; | 1692 | return X86EMUL_PROPAGATE_FAULT; |
1546 | } | 1693 | } |
1547 | change_mask |= EFLG_IF; | 1694 | change_mask |= EFLG_IF; |
@@ -1557,15 +1704,14 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1557 | return rc; | 1704 | return rc; |
1558 | } | 1705 | } |
1559 | 1706 | ||
1560 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) | 1707 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, |
1708 | struct x86_emulate_ops *ops, int seg) | ||
1561 | { | 1709 | { |
1562 | struct decode_cache *c = &ctxt->decode; | 1710 | struct decode_cache *c = &ctxt->decode; |
1563 | struct kvm_segment segment; | ||
1564 | 1711 | ||
1565 | kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg); | 1712 | c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); |
1566 | 1713 | ||
1567 | c->src.val = segment.selector; | 1714 | emulate_push(ctxt, ops); |
1568 | emulate_push(ctxt); | ||
1569 | } | 1715 | } |
1570 | 1716 | ||
1571 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | 1717 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, |
@@ -1583,19 +1729,31 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
1583 | return rc; | 1729 | return rc; |
1584 | } | 1730 | } |
1585 | 1731 | ||
1586 | static void emulate_pusha(struct x86_emulate_ctxt *ctxt) | 1732 | static int emulate_pusha(struct x86_emulate_ctxt *ctxt, |
1733 | struct x86_emulate_ops *ops) | ||
1587 | { | 1734 | { |
1588 | struct decode_cache *c = &ctxt->decode; | 1735 | struct decode_cache *c = &ctxt->decode; |
1589 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; | 1736 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; |
1737 | int rc = X86EMUL_CONTINUE; | ||
1590 | int reg = VCPU_REGS_RAX; | 1738 | int reg = VCPU_REGS_RAX; |
1591 | 1739 | ||
1592 | while (reg <= VCPU_REGS_RDI) { | 1740 | while (reg <= VCPU_REGS_RDI) { |
1593 | (reg == VCPU_REGS_RSP) ? | 1741 | (reg == VCPU_REGS_RSP) ? |
1594 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); | 1742 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); |
1595 | 1743 | ||
1596 | emulate_push(ctxt); | 1744 | emulate_push(ctxt, ops); |
1745 | |||
1746 | rc = writeback(ctxt, ops); | ||
1747 | if (rc != X86EMUL_CONTINUE) | ||
1748 | return rc; | ||
1749 | |||
1597 | ++reg; | 1750 | ++reg; |
1598 | } | 1751 | } |
1752 | |||
1753 | /* Disable writeback. */ | ||
1754 | c->dst.type = OP_NONE; | ||
1755 | |||
1756 | return rc; | ||
1599 | } | 1757 | } |
1600 | 1758 | ||
1601 | static int emulate_popa(struct x86_emulate_ctxt *ctxt, | 1759 | static int emulate_popa(struct x86_emulate_ctxt *ctxt, |
@@ -1695,14 +1853,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
1695 | old_eip = c->eip; | 1853 | old_eip = c->eip; |
1696 | c->eip = c->src.val; | 1854 | c->eip = c->src.val; |
1697 | c->src.val = old_eip; | 1855 | c->src.val = old_eip; |
1698 | emulate_push(ctxt); | 1856 | emulate_push(ctxt, ops); |
1699 | break; | 1857 | break; |
1700 | } | 1858 | } |
1701 | case 4: /* jmp abs */ | 1859 | case 4: /* jmp abs */ |
1702 | c->eip = c->src.val; | 1860 | c->eip = c->src.val; |
1703 | break; | 1861 | break; |
1704 | case 6: /* push */ | 1862 | case 6: /* push */ |
1705 | emulate_push(ctxt); | 1863 | emulate_push(ctxt, ops); |
1706 | break; | 1864 | break; |
1707 | } | 1865 | } |
1708 | return X86EMUL_CONTINUE; | 1866 | return X86EMUL_CONTINUE; |
@@ -1748,145 +1906,82 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
1748 | return rc; | 1906 | return rc; |
1749 | } | 1907 | } |
1750 | 1908 | ||
1751 | static inline int writeback(struct x86_emulate_ctxt *ctxt, | ||
1752 | struct x86_emulate_ops *ops) | ||
1753 | { | ||
1754 | int rc; | ||
1755 | struct decode_cache *c = &ctxt->decode; | ||
1756 | |||
1757 | switch (c->dst.type) { | ||
1758 | case OP_REG: | ||
1759 | /* The 4-byte case *is* correct: | ||
1760 | * in 64-bit mode we zero-extend. | ||
1761 | */ | ||
1762 | switch (c->dst.bytes) { | ||
1763 | case 1: | ||
1764 | *(u8 *)c->dst.ptr = (u8)c->dst.val; | ||
1765 | break; | ||
1766 | case 2: | ||
1767 | *(u16 *)c->dst.ptr = (u16)c->dst.val; | ||
1768 | break; | ||
1769 | case 4: | ||
1770 | *c->dst.ptr = (u32)c->dst.val; | ||
1771 | break; /* 64b: zero-ext */ | ||
1772 | case 8: | ||
1773 | *c->dst.ptr = c->dst.val; | ||
1774 | break; | ||
1775 | } | ||
1776 | break; | ||
1777 | case OP_MEM: | ||
1778 | if (c->lock_prefix) | ||
1779 | rc = ops->cmpxchg_emulated( | ||
1780 | (unsigned long)c->dst.ptr, | ||
1781 | &c->dst.orig_val, | ||
1782 | &c->dst.val, | ||
1783 | c->dst.bytes, | ||
1784 | ctxt->vcpu); | ||
1785 | else | ||
1786 | rc = ops->write_emulated( | ||
1787 | (unsigned long)c->dst.ptr, | ||
1788 | &c->dst.val, | ||
1789 | c->dst.bytes, | ||
1790 | ctxt->vcpu); | ||
1791 | if (rc != X86EMUL_CONTINUE) | ||
1792 | return rc; | ||
1793 | break; | ||
1794 | case OP_NONE: | ||
1795 | /* no writeback */ | ||
1796 | break; | ||
1797 | default: | ||
1798 | break; | ||
1799 | } | ||
1800 | return X86EMUL_CONTINUE; | ||
1801 | } | ||
1802 | |||
1803 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | ||
1804 | { | ||
1805 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); | ||
1806 | /* | ||
1807 | * an sti; sti; sequence only disable interrupts for the first | ||
1808 | * instruction. So, if the last instruction, be it emulated or | ||
1809 | * not, left the system with the INT_STI flag enabled, it | ||
1810 | * means that the last instruction is an sti. We should not | ||
1811 | * leave the flag on in this case. The same goes for mov ss | ||
1812 | */ | ||
1813 | if (!(int_shadow & mask)) | ||
1814 | ctxt->interruptibility = mask; | ||
1815 | } | ||
1816 | |||
1817 | static inline void | 1909 | static inline void |
1818 | setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | 1910 | setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, |
1819 | struct kvm_segment *cs, struct kvm_segment *ss) | 1911 | struct x86_emulate_ops *ops, struct desc_struct *cs, |
1912 | struct desc_struct *ss) | ||
1820 | { | 1913 | { |
1821 | memset(cs, 0, sizeof(struct kvm_segment)); | 1914 | memset(cs, 0, sizeof(struct desc_struct)); |
1822 | kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS); | 1915 | ops->get_cached_descriptor(cs, VCPU_SREG_CS, ctxt->vcpu); |
1823 | memset(ss, 0, sizeof(struct kvm_segment)); | 1916 | memset(ss, 0, sizeof(struct desc_struct)); |
1824 | 1917 | ||
1825 | cs->l = 0; /* will be adjusted later */ | 1918 | cs->l = 0; /* will be adjusted later */ |
1826 | cs->base = 0; /* flat segment */ | 1919 | set_desc_base(cs, 0); /* flat segment */ |
1827 | cs->g = 1; /* 4kb granularity */ | 1920 | cs->g = 1; /* 4kb granularity */ |
1828 | cs->limit = 0xffffffff; /* 4GB limit */ | 1921 | set_desc_limit(cs, 0xfffff); /* 4GB limit */ |
1829 | cs->type = 0x0b; /* Read, Execute, Accessed */ | 1922 | cs->type = 0x0b; /* Read, Execute, Accessed */ |
1830 | cs->s = 1; | 1923 | cs->s = 1; |
1831 | cs->dpl = 0; /* will be adjusted later */ | 1924 | cs->dpl = 0; /* will be adjusted later */ |
1832 | cs->present = 1; | 1925 | cs->p = 1; |
1833 | cs->db = 1; | 1926 | cs->d = 1; |
1834 | 1927 | ||
1835 | ss->unusable = 0; | 1928 | set_desc_base(ss, 0); /* flat segment */ |
1836 | ss->base = 0; /* flat segment */ | 1929 | set_desc_limit(ss, 0xfffff); /* 4GB limit */ |
1837 | ss->limit = 0xffffffff; /* 4GB limit */ | ||
1838 | ss->g = 1; /* 4kb granularity */ | 1930 | ss->g = 1; /* 4kb granularity */ |
1839 | ss->s = 1; | 1931 | ss->s = 1; |
1840 | ss->type = 0x03; /* Read/Write, Accessed */ | 1932 | ss->type = 0x03; /* Read/Write, Accessed */ |
1841 | ss->db = 1; /* 32bit stack segment */ | 1933 | ss->d = 1; /* 32bit stack segment */ |
1842 | ss->dpl = 0; | 1934 | ss->dpl = 0; |
1843 | ss->present = 1; | 1935 | ss->p = 1; |
1844 | } | 1936 | } |
1845 | 1937 | ||
1846 | static int | 1938 | static int |
1847 | emulate_syscall(struct x86_emulate_ctxt *ctxt) | 1939 | emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
1848 | { | 1940 | { |
1849 | struct decode_cache *c = &ctxt->decode; | 1941 | struct decode_cache *c = &ctxt->decode; |
1850 | struct kvm_segment cs, ss; | 1942 | struct desc_struct cs, ss; |
1851 | u64 msr_data; | 1943 | u64 msr_data; |
1944 | u16 cs_sel, ss_sel; | ||
1852 | 1945 | ||
1853 | /* syscall is not available in real mode */ | 1946 | /* syscall is not available in real mode */ |
1854 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1947 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1855 | ctxt->mode == X86EMUL_MODE_VM86) { | 1948 | ctxt->mode == X86EMUL_MODE_VM86) { |
1856 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 1949 | emulate_ud(ctxt); |
1857 | return X86EMUL_PROPAGATE_FAULT; | 1950 | return X86EMUL_PROPAGATE_FAULT; |
1858 | } | 1951 | } |
1859 | 1952 | ||
1860 | setup_syscalls_segments(ctxt, &cs, &ss); | 1953 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1861 | 1954 | ||
1862 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); | 1955 | ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); |
1863 | msr_data >>= 32; | 1956 | msr_data >>= 32; |
1864 | cs.selector = (u16)(msr_data & 0xfffc); | 1957 | cs_sel = (u16)(msr_data & 0xfffc); |
1865 | ss.selector = (u16)(msr_data + 8); | 1958 | ss_sel = (u16)(msr_data + 8); |
1866 | 1959 | ||
1867 | if (is_long_mode(ctxt->vcpu)) { | 1960 | if (is_long_mode(ctxt->vcpu)) { |
1868 | cs.db = 0; | 1961 | cs.d = 0; |
1869 | cs.l = 1; | 1962 | cs.l = 1; |
1870 | } | 1963 | } |
1871 | kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); | 1964 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); |
1872 | kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); | 1965 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
1966 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | ||
1967 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
1873 | 1968 | ||
1874 | c->regs[VCPU_REGS_RCX] = c->eip; | 1969 | c->regs[VCPU_REGS_RCX] = c->eip; |
1875 | if (is_long_mode(ctxt->vcpu)) { | 1970 | if (is_long_mode(ctxt->vcpu)) { |
1876 | #ifdef CONFIG_X86_64 | 1971 | #ifdef CONFIG_X86_64 |
1877 | c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; | 1972 | c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; |
1878 | 1973 | ||
1879 | kvm_x86_ops->get_msr(ctxt->vcpu, | 1974 | ops->get_msr(ctxt->vcpu, |
1880 | ctxt->mode == X86EMUL_MODE_PROT64 ? | 1975 | ctxt->mode == X86EMUL_MODE_PROT64 ? |
1881 | MSR_LSTAR : MSR_CSTAR, &msr_data); | 1976 | MSR_LSTAR : MSR_CSTAR, &msr_data); |
1882 | c->eip = msr_data; | 1977 | c->eip = msr_data; |
1883 | 1978 | ||
1884 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); | 1979 | ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); |
1885 | ctxt->eflags &= ~(msr_data | EFLG_RF); | 1980 | ctxt->eflags &= ~(msr_data | EFLG_RF); |
1886 | #endif | 1981 | #endif |
1887 | } else { | 1982 | } else { |
1888 | /* legacy mode */ | 1983 | /* legacy mode */ |
1889 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); | 1984 | ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); |
1890 | c->eip = (u32)msr_data; | 1985 | c->eip = (u32)msr_data; |
1891 | 1986 | ||
1892 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 1987 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
@@ -1896,15 +1991,16 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
1896 | } | 1991 | } |
1897 | 1992 | ||
1898 | static int | 1993 | static int |
1899 | emulate_sysenter(struct x86_emulate_ctxt *ctxt) | 1994 | emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
1900 | { | 1995 | { |
1901 | struct decode_cache *c = &ctxt->decode; | 1996 | struct decode_cache *c = &ctxt->decode; |
1902 | struct kvm_segment cs, ss; | 1997 | struct desc_struct cs, ss; |
1903 | u64 msr_data; | 1998 | u64 msr_data; |
1999 | u16 cs_sel, ss_sel; | ||
1904 | 2000 | ||
1905 | /* inject #GP if in real mode */ | 2001 | /* inject #GP if in real mode */ |
1906 | if (ctxt->mode == X86EMUL_MODE_REAL) { | 2002 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
1907 | kvm_inject_gp(ctxt->vcpu, 0); | 2003 | emulate_gp(ctxt, 0); |
1908 | return X86EMUL_PROPAGATE_FAULT; | 2004 | return X86EMUL_PROPAGATE_FAULT; |
1909 | } | 2005 | } |
1910 | 2006 | ||
@@ -1912,67 +2008,70 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1912 | * Therefore, we inject an #UD. | 2008 | * Therefore, we inject an #UD. |
1913 | */ | 2009 | */ |
1914 | if (ctxt->mode == X86EMUL_MODE_PROT64) { | 2010 | if (ctxt->mode == X86EMUL_MODE_PROT64) { |
1915 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2011 | emulate_ud(ctxt); |
1916 | return X86EMUL_PROPAGATE_FAULT; | 2012 | return X86EMUL_PROPAGATE_FAULT; |
1917 | } | 2013 | } |
1918 | 2014 | ||
1919 | setup_syscalls_segments(ctxt, &cs, &ss); | 2015 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1920 | 2016 | ||
1921 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); | 2017 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); |
1922 | switch (ctxt->mode) { | 2018 | switch (ctxt->mode) { |
1923 | case X86EMUL_MODE_PROT32: | 2019 | case X86EMUL_MODE_PROT32: |
1924 | if ((msr_data & 0xfffc) == 0x0) { | 2020 | if ((msr_data & 0xfffc) == 0x0) { |
1925 | kvm_inject_gp(ctxt->vcpu, 0); | 2021 | emulate_gp(ctxt, 0); |
1926 | return X86EMUL_PROPAGATE_FAULT; | 2022 | return X86EMUL_PROPAGATE_FAULT; |
1927 | } | 2023 | } |
1928 | break; | 2024 | break; |
1929 | case X86EMUL_MODE_PROT64: | 2025 | case X86EMUL_MODE_PROT64: |
1930 | if (msr_data == 0x0) { | 2026 | if (msr_data == 0x0) { |
1931 | kvm_inject_gp(ctxt->vcpu, 0); | 2027 | emulate_gp(ctxt, 0); |
1932 | return X86EMUL_PROPAGATE_FAULT; | 2028 | return X86EMUL_PROPAGATE_FAULT; |
1933 | } | 2029 | } |
1934 | break; | 2030 | break; |
1935 | } | 2031 | } |
1936 | 2032 | ||
1937 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 2033 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
1938 | cs.selector = (u16)msr_data; | 2034 | cs_sel = (u16)msr_data; |
1939 | cs.selector &= ~SELECTOR_RPL_MASK; | 2035 | cs_sel &= ~SELECTOR_RPL_MASK; |
1940 | ss.selector = cs.selector + 8; | 2036 | ss_sel = cs_sel + 8; |
1941 | ss.selector &= ~SELECTOR_RPL_MASK; | 2037 | ss_sel &= ~SELECTOR_RPL_MASK; |
1942 | if (ctxt->mode == X86EMUL_MODE_PROT64 | 2038 | if (ctxt->mode == X86EMUL_MODE_PROT64 |
1943 | || is_long_mode(ctxt->vcpu)) { | 2039 | || is_long_mode(ctxt->vcpu)) { |
1944 | cs.db = 0; | 2040 | cs.d = 0; |
1945 | cs.l = 1; | 2041 | cs.l = 1; |
1946 | } | 2042 | } |
1947 | 2043 | ||
1948 | kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); | 2044 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); |
1949 | kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); | 2045 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
2046 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | ||
2047 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
1950 | 2048 | ||
1951 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); | 2049 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); |
1952 | c->eip = msr_data; | 2050 | c->eip = msr_data; |
1953 | 2051 | ||
1954 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); | 2052 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); |
1955 | c->regs[VCPU_REGS_RSP] = msr_data; | 2053 | c->regs[VCPU_REGS_RSP] = msr_data; |
1956 | 2054 | ||
1957 | return X86EMUL_CONTINUE; | 2055 | return X86EMUL_CONTINUE; |
1958 | } | 2056 | } |
1959 | 2057 | ||
1960 | static int | 2058 | static int |
1961 | emulate_sysexit(struct x86_emulate_ctxt *ctxt) | 2059 | emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
1962 | { | 2060 | { |
1963 | struct decode_cache *c = &ctxt->decode; | 2061 | struct decode_cache *c = &ctxt->decode; |
1964 | struct kvm_segment cs, ss; | 2062 | struct desc_struct cs, ss; |
1965 | u64 msr_data; | 2063 | u64 msr_data; |
1966 | int usermode; | 2064 | int usermode; |
2065 | u16 cs_sel, ss_sel; | ||
1967 | 2066 | ||
1968 | /* inject #GP if in real mode or Virtual 8086 mode */ | 2067 | /* inject #GP if in real mode or Virtual 8086 mode */ |
1969 | if (ctxt->mode == X86EMUL_MODE_REAL || | 2068 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1970 | ctxt->mode == X86EMUL_MODE_VM86) { | 2069 | ctxt->mode == X86EMUL_MODE_VM86) { |
1971 | kvm_inject_gp(ctxt->vcpu, 0); | 2070 | emulate_gp(ctxt, 0); |
1972 | return X86EMUL_PROPAGATE_FAULT; | 2071 | return X86EMUL_PROPAGATE_FAULT; |
1973 | } | 2072 | } |
1974 | 2073 | ||
1975 | setup_syscalls_segments(ctxt, &cs, &ss); | 2074 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1976 | 2075 | ||
1977 | if ((c->rex_prefix & 0x8) != 0x0) | 2076 | if ((c->rex_prefix & 0x8) != 0x0) |
1978 | usermode = X86EMUL_MODE_PROT64; | 2077 | usermode = X86EMUL_MODE_PROT64; |
@@ -1981,35 +2080,37 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1981 | 2080 | ||
1982 | cs.dpl = 3; | 2081 | cs.dpl = 3; |
1983 | ss.dpl = 3; | 2082 | ss.dpl = 3; |
1984 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); | 2083 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); |
1985 | switch (usermode) { | 2084 | switch (usermode) { |
1986 | case X86EMUL_MODE_PROT32: | 2085 | case X86EMUL_MODE_PROT32: |
1987 | cs.selector = (u16)(msr_data + 16); | 2086 | cs_sel = (u16)(msr_data + 16); |
1988 | if ((msr_data & 0xfffc) == 0x0) { | 2087 | if ((msr_data & 0xfffc) == 0x0) { |
1989 | kvm_inject_gp(ctxt->vcpu, 0); | 2088 | emulate_gp(ctxt, 0); |
1990 | return X86EMUL_PROPAGATE_FAULT; | 2089 | return X86EMUL_PROPAGATE_FAULT; |
1991 | } | 2090 | } |
1992 | ss.selector = (u16)(msr_data + 24); | 2091 | ss_sel = (u16)(msr_data + 24); |
1993 | break; | 2092 | break; |
1994 | case X86EMUL_MODE_PROT64: | 2093 | case X86EMUL_MODE_PROT64: |
1995 | cs.selector = (u16)(msr_data + 32); | 2094 | cs_sel = (u16)(msr_data + 32); |
1996 | if (msr_data == 0x0) { | 2095 | if (msr_data == 0x0) { |
1997 | kvm_inject_gp(ctxt->vcpu, 0); | 2096 | emulate_gp(ctxt, 0); |
1998 | return X86EMUL_PROPAGATE_FAULT; | 2097 | return X86EMUL_PROPAGATE_FAULT; |
1999 | } | 2098 | } |
2000 | ss.selector = cs.selector + 8; | 2099 | ss_sel = cs_sel + 8; |
2001 | cs.db = 0; | 2100 | cs.d = 0; |
2002 | cs.l = 1; | 2101 | cs.l = 1; |
2003 | break; | 2102 | break; |
2004 | } | 2103 | } |
2005 | cs.selector |= SELECTOR_RPL_MASK; | 2104 | cs_sel |= SELECTOR_RPL_MASK; |
2006 | ss.selector |= SELECTOR_RPL_MASK; | 2105 | ss_sel |= SELECTOR_RPL_MASK; |
2007 | 2106 | ||
2008 | kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); | 2107 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); |
2009 | kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); | 2108 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
2109 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | ||
2110 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
2010 | 2111 | ||
2011 | c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; | 2112 | c->eip = c->regs[VCPU_REGS_RDX]; |
2012 | c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; | 2113 | c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; |
2013 | 2114 | ||
2014 | return X86EMUL_CONTINUE; | 2115 | return X86EMUL_CONTINUE; |
2015 | } | 2116 | } |
@@ -2030,25 +2131,25 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | |||
2030 | struct x86_emulate_ops *ops, | 2131 | struct x86_emulate_ops *ops, |
2031 | u16 port, u16 len) | 2132 | u16 port, u16 len) |
2032 | { | 2133 | { |
2033 | struct kvm_segment tr_seg; | 2134 | struct desc_struct tr_seg; |
2034 | int r; | 2135 | int r; |
2035 | u16 io_bitmap_ptr; | 2136 | u16 io_bitmap_ptr; |
2036 | u8 perm, bit_idx = port & 0x7; | 2137 | u8 perm, bit_idx = port & 0x7; |
2037 | unsigned mask = (1 << len) - 1; | 2138 | unsigned mask = (1 << len) - 1; |
2038 | 2139 | ||
2039 | kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR); | 2140 | ops->get_cached_descriptor(&tr_seg, VCPU_SREG_TR, ctxt->vcpu); |
2040 | if (tr_seg.unusable) | 2141 | if (!tr_seg.p) |
2041 | return false; | 2142 | return false; |
2042 | if (tr_seg.limit < 103) | 2143 | if (desc_limit_scaled(&tr_seg) < 103) |
2043 | return false; | 2144 | return false; |
2044 | r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, | 2145 | r = ops->read_std(get_desc_base(&tr_seg) + 102, &io_bitmap_ptr, 2, |
2045 | NULL); | 2146 | ctxt->vcpu, NULL); |
2046 | if (r != X86EMUL_CONTINUE) | 2147 | if (r != X86EMUL_CONTINUE) |
2047 | return false; | 2148 | return false; |
2048 | if (io_bitmap_ptr + port/8 > tr_seg.limit) | 2149 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) |
2049 | return false; | 2150 | return false; |
2050 | r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1, | 2151 | r = ops->read_std(get_desc_base(&tr_seg) + io_bitmap_ptr + port/8, |
2051 | ctxt->vcpu, NULL); | 2152 | &perm, 1, ctxt->vcpu, NULL); |
2052 | if (r != X86EMUL_CONTINUE) | 2153 | if (r != X86EMUL_CONTINUE) |
2053 | return false; | 2154 | return false; |
2054 | if ((perm >> bit_idx) & mask) | 2155 | if ((perm >> bit_idx) & mask) |
@@ -2066,17 +2167,6 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | |||
2066 | return true; | 2167 | return true; |
2067 | } | 2168 | } |
2068 | 2169 | ||
2069 | static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, | ||
2070 | struct x86_emulate_ops *ops, | ||
2071 | int seg) | ||
2072 | { | ||
2073 | struct desc_struct desc; | ||
2074 | if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) | ||
2075 | return get_desc_base(&desc); | ||
2076 | else | ||
2077 | return ~0; | ||
2078 | } | ||
2079 | |||
2080 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | 2170 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, |
2081 | struct x86_emulate_ops *ops, | 2171 | struct x86_emulate_ops *ops, |
2082 | struct tss_segment_16 *tss) | 2172 | struct tss_segment_16 *tss) |
@@ -2165,7 +2255,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
2165 | &err); | 2255 | &err); |
2166 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2256 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2167 | /* FIXME: need to provide precise fault address */ | 2257 | /* FIXME: need to provide precise fault address */ |
2168 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | 2258 | emulate_pf(ctxt, old_tss_base, err); |
2169 | return ret; | 2259 | return ret; |
2170 | } | 2260 | } |
2171 | 2261 | ||
@@ -2175,7 +2265,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
2175 | &err); | 2265 | &err); |
2176 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2266 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2177 | /* FIXME: need to provide precise fault address */ | 2267 | /* FIXME: need to provide precise fault address */ |
2178 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | 2268 | emulate_pf(ctxt, old_tss_base, err); |
2179 | return ret; | 2269 | return ret; |
2180 | } | 2270 | } |
2181 | 2271 | ||
@@ -2183,7 +2273,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
2183 | &err); | 2273 | &err); |
2184 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2274 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2185 | /* FIXME: need to provide precise fault address */ | 2275 | /* FIXME: need to provide precise fault address */ |
2186 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | 2276 | emulate_pf(ctxt, new_tss_base, err); |
2187 | return ret; | 2277 | return ret; |
2188 | } | 2278 | } |
2189 | 2279 | ||
@@ -2196,7 +2286,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
2196 | ctxt->vcpu, &err); | 2286 | ctxt->vcpu, &err); |
2197 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2287 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2198 | /* FIXME: need to provide precise fault address */ | 2288 | /* FIXME: need to provide precise fault address */ |
2199 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | 2289 | emulate_pf(ctxt, new_tss_base, err); |
2200 | return ret; | 2290 | return ret; |
2201 | } | 2291 | } |
2202 | } | 2292 | } |
@@ -2238,7 +2328,10 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2238 | struct decode_cache *c = &ctxt->decode; | 2328 | struct decode_cache *c = &ctxt->decode; |
2239 | int ret; | 2329 | int ret; |
2240 | 2330 | ||
2241 | ops->set_cr(3, tss->cr3, ctxt->vcpu); | 2331 | if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) { |
2332 | emulate_gp(ctxt, 0); | ||
2333 | return X86EMUL_PROPAGATE_FAULT; | ||
2334 | } | ||
2242 | c->eip = tss->eip; | 2335 | c->eip = tss->eip; |
2243 | ctxt->eflags = tss->eflags | 2; | 2336 | ctxt->eflags = tss->eflags | 2; |
2244 | c->regs[VCPU_REGS_RAX] = tss->eax; | 2337 | c->regs[VCPU_REGS_RAX] = tss->eax; |
@@ -2304,7 +2397,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2304 | &err); | 2397 | &err); |
2305 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2398 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2306 | /* FIXME: need to provide precise fault address */ | 2399 | /* FIXME: need to provide precise fault address */ |
2307 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | 2400 | emulate_pf(ctxt, old_tss_base, err); |
2308 | return ret; | 2401 | return ret; |
2309 | } | 2402 | } |
2310 | 2403 | ||
@@ -2314,7 +2407,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2314 | &err); | 2407 | &err); |
2315 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2408 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2316 | /* FIXME: need to provide precise fault address */ | 2409 | /* FIXME: need to provide precise fault address */ |
2317 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | 2410 | emulate_pf(ctxt, old_tss_base, err); |
2318 | return ret; | 2411 | return ret; |
2319 | } | 2412 | } |
2320 | 2413 | ||
@@ -2322,7 +2415,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2322 | &err); | 2415 | &err); |
2323 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2416 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2324 | /* FIXME: need to provide precise fault address */ | 2417 | /* FIXME: need to provide precise fault address */ |
2325 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | 2418 | emulate_pf(ctxt, new_tss_base, err); |
2326 | return ret; | 2419 | return ret; |
2327 | } | 2420 | } |
2328 | 2421 | ||
@@ -2335,7 +2428,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2335 | ctxt->vcpu, &err); | 2428 | ctxt->vcpu, &err); |
2336 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2429 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2337 | /* FIXME: need to provide precise fault address */ | 2430 | /* FIXME: need to provide precise fault address */ |
2338 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | 2431 | emulate_pf(ctxt, new_tss_base, err); |
2339 | return ret; | 2432 | return ret; |
2340 | } | 2433 | } |
2341 | } | 2434 | } |
@@ -2352,7 +2445,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2352 | int ret; | 2445 | int ret; |
2353 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); | 2446 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); |
2354 | ulong old_tss_base = | 2447 | ulong old_tss_base = |
2355 | get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); | 2448 | ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); |
2356 | u32 desc_limit; | 2449 | u32 desc_limit; |
2357 | 2450 | ||
2358 | /* FIXME: old_tss_base == ~0 ? */ | 2451 | /* FIXME: old_tss_base == ~0 ? */ |
@@ -2369,7 +2462,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2369 | if (reason != TASK_SWITCH_IRET) { | 2462 | if (reason != TASK_SWITCH_IRET) { |
2370 | if ((tss_selector & 3) > next_tss_desc.dpl || | 2463 | if ((tss_selector & 3) > next_tss_desc.dpl || |
2371 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { | 2464 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { |
2372 | kvm_inject_gp(ctxt->vcpu, 0); | 2465 | emulate_gp(ctxt, 0); |
2373 | return X86EMUL_PROPAGATE_FAULT; | 2466 | return X86EMUL_PROPAGATE_FAULT; |
2374 | } | 2467 | } |
2375 | } | 2468 | } |
@@ -2378,8 +2471,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2378 | if (!next_tss_desc.p || | 2471 | if (!next_tss_desc.p || |
2379 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || | 2472 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || |
2380 | desc_limit < 0x2b)) { | 2473 | desc_limit < 0x2b)) { |
2381 | kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, | 2474 | emulate_ts(ctxt, tss_selector & 0xfffc); |
2382 | tss_selector & 0xfffc); | ||
2383 | return X86EMUL_PROPAGATE_FAULT; | 2475 | return X86EMUL_PROPAGATE_FAULT; |
2384 | } | 2476 | } |
2385 | 2477 | ||
@@ -2425,7 +2517,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2425 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; | 2517 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; |
2426 | c->lock_prefix = 0; | 2518 | c->lock_prefix = 0; |
2427 | c->src.val = (unsigned long) error_code; | 2519 | c->src.val = (unsigned long) error_code; |
2428 | emulate_push(ctxt); | 2520 | emulate_push(ctxt, ops); |
2429 | } | 2521 | } |
2430 | 2522 | ||
2431 | return ret; | 2523 | return ret; |
@@ -2439,18 +2531,16 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2439 | struct decode_cache *c = &ctxt->decode; | 2531 | struct decode_cache *c = &ctxt->decode; |
2440 | int rc; | 2532 | int rc; |
2441 | 2533 | ||
2442 | memset(c, 0, sizeof(struct decode_cache)); | ||
2443 | c->eip = ctxt->eip; | 2534 | c->eip = ctxt->eip; |
2444 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
2445 | c->dst.type = OP_NONE; | 2535 | c->dst.type = OP_NONE; |
2446 | 2536 | ||
2447 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, | 2537 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, |
2448 | has_error_code, error_code); | 2538 | has_error_code, error_code); |
2449 | 2539 | ||
2450 | if (rc == X86EMUL_CONTINUE) { | 2540 | if (rc == X86EMUL_CONTINUE) { |
2451 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | ||
2452 | kvm_rip_write(ctxt->vcpu, c->eip); | ||
2453 | rc = writeback(ctxt, ops); | 2541 | rc = writeback(ctxt, ops); |
2542 | if (rc == X86EMUL_CONTINUE) | ||
2543 | ctxt->eip = c->eip; | ||
2454 | } | 2544 | } |
2455 | 2545 | ||
2456 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 2546 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
@@ -2474,29 +2564,22 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
2474 | int rc = X86EMUL_CONTINUE; | 2564 | int rc = X86EMUL_CONTINUE; |
2475 | int saved_dst_type = c->dst.type; | 2565 | int saved_dst_type = c->dst.type; |
2476 | 2566 | ||
2477 | ctxt->interruptibility = 0; | 2567 | ctxt->decode.mem_read.pos = 0; |
2478 | |||
2479 | /* Shadow copy of register state. Committed on successful emulation. | ||
2480 | * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't | ||
2481 | * modify them. | ||
2482 | */ | ||
2483 | |||
2484 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
2485 | 2568 | ||
2486 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | 2569 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { |
2487 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2570 | emulate_ud(ctxt); |
2488 | goto done; | 2571 | goto done; |
2489 | } | 2572 | } |
2490 | 2573 | ||
2491 | /* LOCK prefix is allowed only with some instructions */ | 2574 | /* LOCK prefix is allowed only with some instructions */ |
2492 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { | 2575 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { |
2493 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2576 | emulate_ud(ctxt); |
2494 | goto done; | 2577 | goto done; |
2495 | } | 2578 | } |
2496 | 2579 | ||
2497 | /* Privileged instruction can be executed only in CPL=0 */ | 2580 | /* Privileged instruction can be executed only in CPL=0 */ |
2498 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { | 2581 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { |
2499 | kvm_inject_gp(ctxt->vcpu, 0); | 2582 | emulate_gp(ctxt, 0); |
2500 | goto done; | 2583 | goto done; |
2501 | } | 2584 | } |
2502 | 2585 | ||
@@ -2506,7 +2589,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
2506 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { | 2589 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { |
2507 | string_done: | 2590 | string_done: |
2508 | ctxt->restart = false; | 2591 | ctxt->restart = false; |
2509 | kvm_rip_write(ctxt->vcpu, c->eip); | 2592 | ctxt->eip = c->eip; |
2510 | goto done; | 2593 | goto done; |
2511 | } | 2594 | } |
2512 | /* The second termination condition only applies for REPE | 2595 | /* The second termination condition only applies for REPE |
@@ -2529,20 +2612,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
2529 | } | 2612 | } |
2530 | 2613 | ||
2531 | if (c->src.type == OP_MEM) { | 2614 | if (c->src.type == OP_MEM) { |
2532 | rc = ops->read_emulated((unsigned long)c->src.ptr, | 2615 | rc = read_emulated(ctxt, ops, (unsigned long)c->src.ptr, |
2533 | &c->src.val, | 2616 | c->src.valptr, c->src.bytes); |
2534 | c->src.bytes, | ||
2535 | ctxt->vcpu); | ||
2536 | if (rc != X86EMUL_CONTINUE) | 2617 | if (rc != X86EMUL_CONTINUE) |
2537 | goto done; | 2618 | goto done; |
2538 | c->src.orig_val = c->src.val; | 2619 | c->src.orig_val = c->src.val; |
2539 | } | 2620 | } |
2540 | 2621 | ||
2541 | if (c->src2.type == OP_MEM) { | 2622 | if (c->src2.type == OP_MEM) { |
2542 | rc = ops->read_emulated((unsigned long)c->src2.ptr, | 2623 | rc = read_emulated(ctxt, ops, (unsigned long)c->src2.ptr, |
2543 | &c->src2.val, | 2624 | &c->src2.val, c->src2.bytes); |
2544 | c->src2.bytes, | ||
2545 | ctxt->vcpu); | ||
2546 | if (rc != X86EMUL_CONTINUE) | 2625 | if (rc != X86EMUL_CONTINUE) |
2547 | goto done; | 2626 | goto done; |
2548 | } | 2627 | } |
@@ -2553,8 +2632,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
2553 | 2632 | ||
2554 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { | 2633 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
2555 | /* optimisation - avoid slow emulated read if Mov */ | 2634 | /* optimisation - avoid slow emulated read if Mov */ |
2556 | rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, | 2635 | rc = read_emulated(ctxt, ops, (unsigned long)c->dst.ptr, |
2557 | c->dst.bytes, ctxt->vcpu); | 2636 | &c->dst.val, c->dst.bytes); |
2558 | if (rc != X86EMUL_CONTINUE) | 2637 | if (rc != X86EMUL_CONTINUE) |
2559 | goto done; | 2638 | goto done; |
2560 | } | 2639 | } |
@@ -2571,7 +2650,7 @@ special_insn: | |||
2571 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | 2650 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); |
2572 | break; | 2651 | break; |
2573 | case 0x06: /* push es */ | 2652 | case 0x06: /* push es */ |
2574 | emulate_push_sreg(ctxt, VCPU_SREG_ES); | 2653 | emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); |
2575 | break; | 2654 | break; |
2576 | case 0x07: /* pop es */ | 2655 | case 0x07: /* pop es */ |
2577 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | 2656 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); |
@@ -2583,14 +2662,14 @@ special_insn: | |||
2583 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | 2662 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); |
2584 | break; | 2663 | break; |
2585 | case 0x0e: /* push cs */ | 2664 | case 0x0e: /* push cs */ |
2586 | emulate_push_sreg(ctxt, VCPU_SREG_CS); | 2665 | emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); |
2587 | break; | 2666 | break; |
2588 | case 0x10 ... 0x15: | 2667 | case 0x10 ... 0x15: |
2589 | adc: /* adc */ | 2668 | adc: /* adc */ |
2590 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); | 2669 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); |
2591 | break; | 2670 | break; |
2592 | case 0x16: /* push ss */ | 2671 | case 0x16: /* push ss */ |
2593 | emulate_push_sreg(ctxt, VCPU_SREG_SS); | 2672 | emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); |
2594 | break; | 2673 | break; |
2595 | case 0x17: /* pop ss */ | 2674 | case 0x17: /* pop ss */ |
2596 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | 2675 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); |
@@ -2602,7 +2681,7 @@ special_insn: | |||
2602 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | 2681 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); |
2603 | break; | 2682 | break; |
2604 | case 0x1e: /* push ds */ | 2683 | case 0x1e: /* push ds */ |
2605 | emulate_push_sreg(ctxt, VCPU_SREG_DS); | 2684 | emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); |
2606 | break; | 2685 | break; |
2607 | case 0x1f: /* pop ds */ | 2686 | case 0x1f: /* pop ds */ |
2608 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | 2687 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); |
@@ -2632,7 +2711,7 @@ special_insn: | |||
2632 | emulate_1op("dec", c->dst, ctxt->eflags); | 2711 | emulate_1op("dec", c->dst, ctxt->eflags); |
2633 | break; | 2712 | break; |
2634 | case 0x50 ... 0x57: /* push reg */ | 2713 | case 0x50 ... 0x57: /* push reg */ |
2635 | emulate_push(ctxt); | 2714 | emulate_push(ctxt, ops); |
2636 | break; | 2715 | break; |
2637 | case 0x58 ... 0x5f: /* pop reg */ | 2716 | case 0x58 ... 0x5f: /* pop reg */ |
2638 | pop_instruction: | 2717 | pop_instruction: |
@@ -2641,7 +2720,9 @@ special_insn: | |||
2641 | goto done; | 2720 | goto done; |
2642 | break; | 2721 | break; |
2643 | case 0x60: /* pusha */ | 2722 | case 0x60: /* pusha */ |
2644 | emulate_pusha(ctxt); | 2723 | rc = emulate_pusha(ctxt, ops); |
2724 | if (rc != X86EMUL_CONTINUE) | ||
2725 | goto done; | ||
2645 | break; | 2726 | break; |
2646 | case 0x61: /* popa */ | 2727 | case 0x61: /* popa */ |
2647 | rc = emulate_popa(ctxt, ops); | 2728 | rc = emulate_popa(ctxt, ops); |
@@ -2655,14 +2736,14 @@ special_insn: | |||
2655 | break; | 2736 | break; |
2656 | case 0x68: /* push imm */ | 2737 | case 0x68: /* push imm */ |
2657 | case 0x6a: /* push imm8 */ | 2738 | case 0x6a: /* push imm8 */ |
2658 | emulate_push(ctxt); | 2739 | emulate_push(ctxt, ops); |
2659 | break; | 2740 | break; |
2660 | case 0x6c: /* insb */ | 2741 | case 0x6c: /* insb */ |
2661 | case 0x6d: /* insw/insd */ | 2742 | case 0x6d: /* insw/insd */ |
2662 | c->dst.bytes = min(c->dst.bytes, 4u); | 2743 | c->dst.bytes = min(c->dst.bytes, 4u); |
2663 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2744 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2664 | c->dst.bytes)) { | 2745 | c->dst.bytes)) { |
2665 | kvm_inject_gp(ctxt->vcpu, 0); | 2746 | emulate_gp(ctxt, 0); |
2666 | goto done; | 2747 | goto done; |
2667 | } | 2748 | } |
2668 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, | 2749 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, |
@@ -2674,7 +2755,7 @@ special_insn: | |||
2674 | c->src.bytes = min(c->src.bytes, 4u); | 2755 | c->src.bytes = min(c->src.bytes, 4u); |
2675 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2756 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2676 | c->src.bytes)) { | 2757 | c->src.bytes)) { |
2677 | kvm_inject_gp(ctxt->vcpu, 0); | 2758 | emulate_gp(ctxt, 0); |
2678 | goto done; | 2759 | goto done; |
2679 | } | 2760 | } |
2680 | ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], | 2761 | ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], |
@@ -2707,6 +2788,7 @@ special_insn: | |||
2707 | } | 2788 | } |
2708 | break; | 2789 | break; |
2709 | case 0x84 ... 0x85: | 2790 | case 0x84 ... 0x85: |
2791 | test: | ||
2710 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); | 2792 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); |
2711 | break; | 2793 | break; |
2712 | case 0x86 ... 0x87: /* xchg */ | 2794 | case 0x86 ... 0x87: /* xchg */ |
@@ -2735,18 +2817,13 @@ special_insn: | |||
2735 | break; | 2817 | break; |
2736 | case 0x88 ... 0x8b: /* mov */ | 2818 | case 0x88 ... 0x8b: /* mov */ |
2737 | goto mov; | 2819 | goto mov; |
2738 | case 0x8c: { /* mov r/m, sreg */ | 2820 | case 0x8c: /* mov r/m, sreg */ |
2739 | struct kvm_segment segreg; | 2821 | if (c->modrm_reg > VCPU_SREG_GS) { |
2740 | 2822 | emulate_ud(ctxt); | |
2741 | if (c->modrm_reg <= VCPU_SREG_GS) | ||
2742 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); | ||
2743 | else { | ||
2744 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
2745 | goto done; | 2823 | goto done; |
2746 | } | 2824 | } |
2747 | c->dst.val = segreg.selector; | 2825 | c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); |
2748 | break; | 2826 | break; |
2749 | } | ||
2750 | case 0x8d: /* lea r16/r32, m */ | 2827 | case 0x8d: /* lea r16/r32, m */ |
2751 | c->dst.val = c->modrm_ea; | 2828 | c->dst.val = c->modrm_ea; |
2752 | break; | 2829 | break; |
@@ -2757,12 +2834,12 @@ special_insn: | |||
2757 | 2834 | ||
2758 | if (c->modrm_reg == VCPU_SREG_CS || | 2835 | if (c->modrm_reg == VCPU_SREG_CS || |
2759 | c->modrm_reg > VCPU_SREG_GS) { | 2836 | c->modrm_reg > VCPU_SREG_GS) { |
2760 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2837 | emulate_ud(ctxt); |
2761 | goto done; | 2838 | goto done; |
2762 | } | 2839 | } |
2763 | 2840 | ||
2764 | if (c->modrm_reg == VCPU_SREG_SS) | 2841 | if (c->modrm_reg == VCPU_SREG_SS) |
2765 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); | 2842 | ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; |
2766 | 2843 | ||
2767 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); | 2844 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); |
2768 | 2845 | ||
@@ -2775,19 +2852,19 @@ special_insn: | |||
2775 | goto done; | 2852 | goto done; |
2776 | break; | 2853 | break; |
2777 | case 0x90: /* nop / xchg r8,rax */ | 2854 | case 0x90: /* nop / xchg r8,rax */ |
2778 | if (!(c->rex_prefix & 1)) { /* nop */ | 2855 | if (c->dst.ptr == (unsigned long *)&c->regs[VCPU_REGS_RAX]) { |
2779 | c->dst.type = OP_NONE; | 2856 | c->dst.type = OP_NONE; /* nop */ |
2780 | break; | 2857 | break; |
2781 | } | 2858 | } |
2782 | case 0x91 ... 0x97: /* xchg reg,rax */ | 2859 | case 0x91 ... 0x97: /* xchg reg,rax */ |
2783 | c->src.type = c->dst.type = OP_REG; | 2860 | c->src.type = OP_REG; |
2784 | c->src.bytes = c->dst.bytes = c->op_bytes; | 2861 | c->src.bytes = c->op_bytes; |
2785 | c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX]; | 2862 | c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX]; |
2786 | c->src.val = *(c->src.ptr); | 2863 | c->src.val = *(c->src.ptr); |
2787 | goto xchg; | 2864 | goto xchg; |
2788 | case 0x9c: /* pushf */ | 2865 | case 0x9c: /* pushf */ |
2789 | c->src.val = (unsigned long) ctxt->eflags; | 2866 | c->src.val = (unsigned long) ctxt->eflags; |
2790 | emulate_push(ctxt); | 2867 | emulate_push(ctxt, ops); |
2791 | break; | 2868 | break; |
2792 | case 0x9d: /* popf */ | 2869 | case 0x9d: /* popf */ |
2793 | c->dst.type = OP_REG; | 2870 | c->dst.type = OP_REG; |
@@ -2797,19 +2874,15 @@ special_insn: | |||
2797 | if (rc != X86EMUL_CONTINUE) | 2874 | if (rc != X86EMUL_CONTINUE) |
2798 | goto done; | 2875 | goto done; |
2799 | break; | 2876 | break; |
2800 | case 0xa0 ... 0xa1: /* mov */ | 2877 | case 0xa0 ... 0xa3: /* mov */ |
2801 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | ||
2802 | c->dst.val = c->src.val; | ||
2803 | break; | ||
2804 | case 0xa2 ... 0xa3: /* mov */ | ||
2805 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; | ||
2806 | break; | ||
2807 | case 0xa4 ... 0xa5: /* movs */ | 2878 | case 0xa4 ... 0xa5: /* movs */ |
2808 | goto mov; | 2879 | goto mov; |
2809 | case 0xa6 ... 0xa7: /* cmps */ | 2880 | case 0xa6 ... 0xa7: /* cmps */ |
2810 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2881 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2811 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); | 2882 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); |
2812 | goto cmp; | 2883 | goto cmp; |
2884 | case 0xa8 ... 0xa9: /* test ax, imm */ | ||
2885 | goto test; | ||
2813 | case 0xaa ... 0xab: /* stos */ | 2886 | case 0xaa ... 0xab: /* stos */ |
2814 | c->dst.val = c->regs[VCPU_REGS_RAX]; | 2887 | c->dst.val = c->regs[VCPU_REGS_RAX]; |
2815 | break; | 2888 | break; |
@@ -2855,19 +2928,23 @@ special_insn: | |||
2855 | long int rel = c->src.val; | 2928 | long int rel = c->src.val; |
2856 | c->src.val = (unsigned long) c->eip; | 2929 | c->src.val = (unsigned long) c->eip; |
2857 | jmp_rel(c, rel); | 2930 | jmp_rel(c, rel); |
2858 | emulate_push(ctxt); | 2931 | emulate_push(ctxt, ops); |
2859 | break; | 2932 | break; |
2860 | } | 2933 | } |
2861 | case 0xe9: /* jmp rel */ | 2934 | case 0xe9: /* jmp rel */ |
2862 | goto jmp; | 2935 | goto jmp; |
2863 | case 0xea: /* jmp far */ | 2936 | case 0xea: { /* jmp far */ |
2937 | unsigned short sel; | ||
2864 | jump_far: | 2938 | jump_far: |
2865 | if (load_segment_descriptor(ctxt, ops, c->src2.val, | 2939 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); |
2866 | VCPU_SREG_CS)) | 2940 | |
2941 | if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS)) | ||
2867 | goto done; | 2942 | goto done; |
2868 | 2943 | ||
2869 | c->eip = c->src.val; | 2944 | c->eip = 0; |
2945 | memcpy(&c->eip, c->src.valptr, c->op_bytes); | ||
2870 | break; | 2946 | break; |
2947 | } | ||
2871 | case 0xeb: | 2948 | case 0xeb: |
2872 | jmp: /* jmp rel short */ | 2949 | jmp: /* jmp rel short */ |
2873 | jmp_rel(c, c->src.val); | 2950 | jmp_rel(c, c->src.val); |
@@ -2879,20 +2956,20 @@ special_insn: | |||
2879 | do_io_in: | 2956 | do_io_in: |
2880 | c->dst.bytes = min(c->dst.bytes, 4u); | 2957 | c->dst.bytes = min(c->dst.bytes, 4u); |
2881 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | 2958 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
2882 | kvm_inject_gp(ctxt->vcpu, 0); | 2959 | emulate_gp(ctxt, 0); |
2883 | goto done; | 2960 | goto done; |
2884 | } | 2961 | } |
2885 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | 2962 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, |
2886 | &c->dst.val)) | 2963 | &c->dst.val)) |
2887 | goto done; /* IO is needed */ | 2964 | goto done; /* IO is needed */ |
2888 | break; | 2965 | break; |
2889 | case 0xee: /* out al,dx */ | 2966 | case 0xee: /* out dx,al */ |
2890 | case 0xef: /* out (e/r)ax,dx */ | 2967 | case 0xef: /* out dx,(e/r)ax */ |
2891 | c->src.val = c->regs[VCPU_REGS_RDX]; | 2968 | c->src.val = c->regs[VCPU_REGS_RDX]; |
2892 | do_io_out: | 2969 | do_io_out: |
2893 | c->dst.bytes = min(c->dst.bytes, 4u); | 2970 | c->dst.bytes = min(c->dst.bytes, 4u); |
2894 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | 2971 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
2895 | kvm_inject_gp(ctxt->vcpu, 0); | 2972 | emulate_gp(ctxt, 0); |
2896 | goto done; | 2973 | goto done; |
2897 | } | 2974 | } |
2898 | ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, | 2975 | ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, |
@@ -2916,18 +2993,20 @@ special_insn: | |||
2916 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2993 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2917 | break; | 2994 | break; |
2918 | case 0xfa: /* cli */ | 2995 | case 0xfa: /* cli */ |
2919 | if (emulator_bad_iopl(ctxt, ops)) | 2996 | if (emulator_bad_iopl(ctxt, ops)) { |
2920 | kvm_inject_gp(ctxt->vcpu, 0); | 2997 | emulate_gp(ctxt, 0); |
2921 | else { | 2998 | goto done; |
2999 | } else { | ||
2922 | ctxt->eflags &= ~X86_EFLAGS_IF; | 3000 | ctxt->eflags &= ~X86_EFLAGS_IF; |
2923 | c->dst.type = OP_NONE; /* Disable writeback. */ | 3001 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2924 | } | 3002 | } |
2925 | break; | 3003 | break; |
2926 | case 0xfb: /* sti */ | 3004 | case 0xfb: /* sti */ |
2927 | if (emulator_bad_iopl(ctxt, ops)) | 3005 | if (emulator_bad_iopl(ctxt, ops)) { |
2928 | kvm_inject_gp(ctxt->vcpu, 0); | 3006 | emulate_gp(ctxt, 0); |
2929 | else { | 3007 | goto done; |
2930 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); | 3008 | } else { |
3009 | ctxt->interruptibility = KVM_X86_SHADOW_INT_STI; | ||
2931 | ctxt->eflags |= X86_EFLAGS_IF; | 3010 | ctxt->eflags |= X86_EFLAGS_IF; |
2932 | c->dst.type = OP_NONE; /* Disable writeback. */ | 3011 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2933 | } | 3012 | } |
@@ -2964,11 +3043,12 @@ writeback: | |||
2964 | c->dst.type = saved_dst_type; | 3043 | c->dst.type = saved_dst_type; |
2965 | 3044 | ||
2966 | if ((c->d & SrcMask) == SrcSI) | 3045 | if ((c->d & SrcMask) == SrcSI) |
2967 | string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, | 3046 | string_addr_inc(ctxt, seg_override_base(ctxt, ops, c), |
2968 | &c->src); | 3047 | VCPU_REGS_RSI, &c->src); |
2969 | 3048 | ||
2970 | if ((c->d & DstMask) == DstDI) | 3049 | if ((c->d & DstMask) == DstDI) |
2971 | string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); | 3050 | string_addr_inc(ctxt, es_base(ctxt, ops), VCPU_REGS_RDI, |
3051 | &c->dst); | ||
2972 | 3052 | ||
2973 | if (c->rep_prefix && (c->d & String)) { | 3053 | if (c->rep_prefix && (c->d & String)) { |
2974 | struct read_cache *rc = &ctxt->decode.io_read; | 3054 | struct read_cache *rc = &ctxt->decode.io_read; |
@@ -2981,11 +3061,12 @@ writeback: | |||
2981 | (rc->end != 0 && rc->end == rc->pos)) | 3061 | (rc->end != 0 && rc->end == rc->pos)) |
2982 | ctxt->restart = false; | 3062 | ctxt->restart = false; |
2983 | } | 3063 | } |
2984 | 3064 | /* | |
2985 | /* Commit shadow register state. */ | 3065 | * reset read cache here in case string instruction is restared |
2986 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | 3066 | * without decoding |
2987 | kvm_rip_write(ctxt->vcpu, c->eip); | 3067 | */ |
2988 | ops->set_rflags(ctxt->vcpu, ctxt->eflags); | 3068 | ctxt->decode.mem_read.end = 0; |
3069 | ctxt->eip = c->eip; | ||
2989 | 3070 | ||
2990 | done: | 3071 | done: |
2991 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 3072 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
@@ -3051,7 +3132,7 @@ twobyte_insn: | |||
3051 | c->dst.type = OP_NONE; | 3132 | c->dst.type = OP_NONE; |
3052 | break; | 3133 | break; |
3053 | case 5: /* not defined */ | 3134 | case 5: /* not defined */ |
3054 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 3135 | emulate_ud(ctxt); |
3055 | goto done; | 3136 | goto done; |
3056 | case 7: /* invlpg*/ | 3137 | case 7: /* invlpg*/ |
3057 | emulate_invlpg(ctxt->vcpu, c->modrm_ea); | 3138 | emulate_invlpg(ctxt->vcpu, c->modrm_ea); |
@@ -3063,7 +3144,7 @@ twobyte_insn: | |||
3063 | } | 3144 | } |
3064 | break; | 3145 | break; |
3065 | case 0x05: /* syscall */ | 3146 | case 0x05: /* syscall */ |
3066 | rc = emulate_syscall(ctxt); | 3147 | rc = emulate_syscall(ctxt, ops); |
3067 | if (rc != X86EMUL_CONTINUE) | 3148 | if (rc != X86EMUL_CONTINUE) |
3068 | goto done; | 3149 | goto done; |
3069 | else | 3150 | else |
@@ -3073,8 +3154,11 @@ twobyte_insn: | |||
3073 | emulate_clts(ctxt->vcpu); | 3154 | emulate_clts(ctxt->vcpu); |
3074 | c->dst.type = OP_NONE; | 3155 | c->dst.type = OP_NONE; |
3075 | break; | 3156 | break; |
3076 | case 0x08: /* invd */ | ||
3077 | case 0x09: /* wbinvd */ | 3157 | case 0x09: /* wbinvd */ |
3158 | kvm_emulate_wbinvd(ctxt->vcpu); | ||
3159 | c->dst.type = OP_NONE; | ||
3160 | break; | ||
3161 | case 0x08: /* invd */ | ||
3078 | case 0x0d: /* GrpP (prefetch) */ | 3162 | case 0x0d: /* GrpP (prefetch) */ |
3079 | case 0x18: /* Grp16 (prefetch/nop) */ | 3163 | case 0x18: /* Grp16 (prefetch/nop) */ |
3080 | c->dst.type = OP_NONE; | 3164 | c->dst.type = OP_NONE; |
@@ -3084,7 +3168,7 @@ twobyte_insn: | |||
3084 | case 1: | 3168 | case 1: |
3085 | case 5 ... 7: | 3169 | case 5 ... 7: |
3086 | case 9 ... 15: | 3170 | case 9 ... 15: |
3087 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 3171 | emulate_ud(ctxt); |
3088 | goto done; | 3172 | goto done; |
3089 | } | 3173 | } |
3090 | c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); | 3174 | c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); |
@@ -3093,31 +3177,42 @@ twobyte_insn: | |||
3093 | case 0x21: /* mov from dr to reg */ | 3177 | case 0x21: /* mov from dr to reg */ |
3094 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 3178 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
3095 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | 3179 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
3096 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 3180 | emulate_ud(ctxt); |
3097 | goto done; | 3181 | goto done; |
3098 | } | 3182 | } |
3099 | emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | 3183 | ops->get_dr(c->modrm_reg, &c->regs[c->modrm_rm], ctxt->vcpu); |
3100 | c->dst.type = OP_NONE; /* no writeback */ | 3184 | c->dst.type = OP_NONE; /* no writeback */ |
3101 | break; | 3185 | break; |
3102 | case 0x22: /* mov reg, cr */ | 3186 | case 0x22: /* mov reg, cr */ |
3103 | ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); | 3187 | if (ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu)) { |
3188 | emulate_gp(ctxt, 0); | ||
3189 | goto done; | ||
3190 | } | ||
3104 | c->dst.type = OP_NONE; | 3191 | c->dst.type = OP_NONE; |
3105 | break; | 3192 | break; |
3106 | case 0x23: /* mov from reg to dr */ | 3193 | case 0x23: /* mov from reg to dr */ |
3107 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 3194 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
3108 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | 3195 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
3109 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 3196 | emulate_ud(ctxt); |
3197 | goto done; | ||
3198 | } | ||
3199 | |||
3200 | if (ops->set_dr(c->modrm_reg, c->regs[c->modrm_rm] & | ||
3201 | ((ctxt->mode == X86EMUL_MODE_PROT64) ? | ||
3202 | ~0ULL : ~0U), ctxt->vcpu) < 0) { | ||
3203 | /* #UD condition is already handled by the code above */ | ||
3204 | emulate_gp(ctxt, 0); | ||
3110 | goto done; | 3205 | goto done; |
3111 | } | 3206 | } |
3112 | emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); | 3207 | |
3113 | c->dst.type = OP_NONE; /* no writeback */ | 3208 | c->dst.type = OP_NONE; /* no writeback */ |
3114 | break; | 3209 | break; |
3115 | case 0x30: | 3210 | case 0x30: |
3116 | /* wrmsr */ | 3211 | /* wrmsr */ |
3117 | msr_data = (u32)c->regs[VCPU_REGS_RAX] | 3212 | msr_data = (u32)c->regs[VCPU_REGS_RAX] |
3118 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 3213 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
3119 | if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { | 3214 | if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { |
3120 | kvm_inject_gp(ctxt->vcpu, 0); | 3215 | emulate_gp(ctxt, 0); |
3121 | goto done; | 3216 | goto done; |
3122 | } | 3217 | } |
3123 | rc = X86EMUL_CONTINUE; | 3218 | rc = X86EMUL_CONTINUE; |
@@ -3125,8 +3220,8 @@ twobyte_insn: | |||
3125 | break; | 3220 | break; |
3126 | case 0x32: | 3221 | case 0x32: |
3127 | /* rdmsr */ | 3222 | /* rdmsr */ |
3128 | if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { | 3223 | if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { |
3129 | kvm_inject_gp(ctxt->vcpu, 0); | 3224 | emulate_gp(ctxt, 0); |
3130 | goto done; | 3225 | goto done; |
3131 | } else { | 3226 | } else { |
3132 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 3227 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
@@ -3136,14 +3231,14 @@ twobyte_insn: | |||
3136 | c->dst.type = OP_NONE; | 3231 | c->dst.type = OP_NONE; |
3137 | break; | 3232 | break; |
3138 | case 0x34: /* sysenter */ | 3233 | case 0x34: /* sysenter */ |
3139 | rc = emulate_sysenter(ctxt); | 3234 | rc = emulate_sysenter(ctxt, ops); |
3140 | if (rc != X86EMUL_CONTINUE) | 3235 | if (rc != X86EMUL_CONTINUE) |
3141 | goto done; | 3236 | goto done; |
3142 | else | 3237 | else |
3143 | goto writeback; | 3238 | goto writeback; |
3144 | break; | 3239 | break; |
3145 | case 0x35: /* sysexit */ | 3240 | case 0x35: /* sysexit */ |
3146 | rc = emulate_sysexit(ctxt); | 3241 | rc = emulate_sysexit(ctxt, ops); |
3147 | if (rc != X86EMUL_CONTINUE) | 3242 | if (rc != X86EMUL_CONTINUE) |
3148 | goto done; | 3243 | goto done; |
3149 | else | 3244 | else |
@@ -3160,7 +3255,7 @@ twobyte_insn: | |||
3160 | c->dst.type = OP_NONE; | 3255 | c->dst.type = OP_NONE; |
3161 | break; | 3256 | break; |
3162 | case 0xa0: /* push fs */ | 3257 | case 0xa0: /* push fs */ |
3163 | emulate_push_sreg(ctxt, VCPU_SREG_FS); | 3258 | emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); |
3164 | break; | 3259 | break; |
3165 | case 0xa1: /* pop fs */ | 3260 | case 0xa1: /* pop fs */ |
3166 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | 3261 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); |
@@ -3179,7 +3274,7 @@ twobyte_insn: | |||
3179 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); | 3274 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); |
3180 | break; | 3275 | break; |
3181 | case 0xa8: /* push gs */ | 3276 | case 0xa8: /* push gs */ |
3182 | emulate_push_sreg(ctxt, VCPU_SREG_GS); | 3277 | emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); |
3183 | break; | 3278 | break; |
3184 | case 0xa9: /* pop gs */ | 3279 | case 0xa9: /* pop gs */ |
3185 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | 3280 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 0150affad25d..0fd6378981f4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * Copyright (c) 2006 Intel Corporation | 5 | * Copyright (c) 2006 Intel Corporation |
6 | * Copyright (c) 2007 Keir Fraser, XenSource Inc | 6 | * Copyright (c) 2007 Keir Fraser, XenSource Inc |
7 | * Copyright (c) 2008 Intel Corporation | 7 | * Copyright (c) 2008 Intel Corporation |
8 | * Copyright 2009 Red Hat, Inc. and/or its affilates. | ||
8 | * | 9 | * |
9 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 10 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
10 | * of this software and associated documentation files (the "Software"), to deal | 11 | * of this software and associated documentation files (the "Software"), to deal |
@@ -33,6 +34,7 @@ | |||
33 | 34 | ||
34 | #include <linux/kvm_host.h> | 35 | #include <linux/kvm_host.h> |
35 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
37 | #include <linux/workqueue.h> | ||
36 | 38 | ||
37 | #include "irq.h" | 39 | #include "irq.h" |
38 | #include "i8254.h" | 40 | #include "i8254.h" |
@@ -243,11 +245,22 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
243 | { | 245 | { |
244 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, | 246 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, |
245 | irq_ack_notifier); | 247 | irq_ack_notifier); |
246 | raw_spin_lock(&ps->inject_lock); | 248 | int value; |
247 | if (atomic_dec_return(&ps->pit_timer.pending) < 0) | 249 | |
250 | spin_lock(&ps->inject_lock); | ||
251 | value = atomic_dec_return(&ps->pit_timer.pending); | ||
252 | if (value < 0) | ||
253 | /* spurious acks can be generated if, for example, the | ||
254 | * PIC is being reset. Handle it gracefully here | ||
255 | */ | ||
248 | atomic_inc(&ps->pit_timer.pending); | 256 | atomic_inc(&ps->pit_timer.pending); |
257 | else if (value > 0) | ||
258 | /* in this case, we had multiple outstanding pit interrupts | ||
259 | * that we needed to inject. Reinject | ||
260 | */ | ||
261 | queue_work(ps->pit->wq, &ps->pit->expired); | ||
249 | ps->irq_ack = 1; | 262 | ps->irq_ack = 1; |
250 | raw_spin_unlock(&ps->inject_lock); | 263 | spin_unlock(&ps->inject_lock); |
251 | } | 264 | } |
252 | 265 | ||
253 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | 266 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) |
@@ -263,10 +276,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | |||
263 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 276 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
264 | } | 277 | } |
265 | 278 | ||
266 | static void destroy_pit_timer(struct kvm_timer *pt) | 279 | static void destroy_pit_timer(struct kvm_pit *pit) |
267 | { | 280 | { |
268 | pr_debug("execute del timer!\n"); | 281 | hrtimer_cancel(&pit->pit_state.pit_timer.timer); |
269 | hrtimer_cancel(&pt->timer); | 282 | cancel_work_sync(&pit->expired); |
270 | } | 283 | } |
271 | 284 | ||
272 | static bool kpit_is_periodic(struct kvm_timer *ktimer) | 285 | static bool kpit_is_periodic(struct kvm_timer *ktimer) |
@@ -280,6 +293,60 @@ static struct kvm_timer_ops kpit_ops = { | |||
280 | .is_periodic = kpit_is_periodic, | 293 | .is_periodic = kpit_is_periodic, |
281 | }; | 294 | }; |
282 | 295 | ||
296 | static void pit_do_work(struct work_struct *work) | ||
297 | { | ||
298 | struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); | ||
299 | struct kvm *kvm = pit->kvm; | ||
300 | struct kvm_vcpu *vcpu; | ||
301 | int i; | ||
302 | struct kvm_kpit_state *ps = &pit->pit_state; | ||
303 | int inject = 0; | ||
304 | |||
305 | /* Try to inject pending interrupts when | ||
306 | * last one has been acked. | ||
307 | */ | ||
308 | spin_lock(&ps->inject_lock); | ||
309 | if (ps->irq_ack) { | ||
310 | ps->irq_ack = 0; | ||
311 | inject = 1; | ||
312 | } | ||
313 | spin_unlock(&ps->inject_lock); | ||
314 | if (inject) { | ||
315 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | ||
316 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | ||
317 | |||
318 | /* | ||
319 | * Provides NMI watchdog support via Virtual Wire mode. | ||
320 | * The route is: PIT -> PIC -> LVT0 in NMI mode. | ||
321 | * | ||
322 | * Note: Our Virtual Wire implementation is simplified, only | ||
323 | * propagating PIT interrupts to all VCPUs when they have set | ||
324 | * LVT0 to NMI delivery. Other PIC interrupts are just sent to | ||
325 | * VCPU0, and only if its LVT0 is in EXTINT mode. | ||
326 | */ | ||
327 | if (kvm->arch.vapics_in_nmi_mode > 0) | ||
328 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
329 | kvm_apic_nmi_wd_deliver(vcpu); | ||
330 | } | ||
331 | } | ||
332 | |||
333 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | ||
334 | { | ||
335 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); | ||
336 | struct kvm_pit *pt = ktimer->kvm->arch.vpit; | ||
337 | |||
338 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | ||
339 | atomic_inc(&ktimer->pending); | ||
340 | queue_work(pt->wq, &pt->expired); | ||
341 | } | ||
342 | |||
343 | if (ktimer->t_ops->is_periodic(ktimer)) { | ||
344 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); | ||
345 | return HRTIMER_RESTART; | ||
346 | } else | ||
347 | return HRTIMER_NORESTART; | ||
348 | } | ||
349 | |||
283 | static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) | 350 | static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) |
284 | { | 351 | { |
285 | struct kvm_timer *pt = &ps->pit_timer; | 352 | struct kvm_timer *pt = &ps->pit_timer; |
@@ -291,13 +358,13 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) | |||
291 | 358 | ||
292 | /* TODO The new value only affected after the retriggered */ | 359 | /* TODO The new value only affected after the retriggered */ |
293 | hrtimer_cancel(&pt->timer); | 360 | hrtimer_cancel(&pt->timer); |
361 | cancel_work_sync(&ps->pit->expired); | ||
294 | pt->period = interval; | 362 | pt->period = interval; |
295 | ps->is_periodic = is_period; | 363 | ps->is_periodic = is_period; |
296 | 364 | ||
297 | pt->timer.function = kvm_timer_fn; | 365 | pt->timer.function = pit_timer_fn; |
298 | pt->t_ops = &kpit_ops; | 366 | pt->t_ops = &kpit_ops; |
299 | pt->kvm = ps->pit->kvm; | 367 | pt->kvm = ps->pit->kvm; |
300 | pt->vcpu = pt->kvm->bsp_vcpu; | ||
301 | 368 | ||
302 | atomic_set(&pt->pending, 0); | 369 | atomic_set(&pt->pending, 0); |
303 | ps->irq_ack = 1; | 370 | ps->irq_ack = 1; |
@@ -346,7 +413,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
346 | } | 413 | } |
347 | break; | 414 | break; |
348 | default: | 415 | default: |
349 | destroy_pit_timer(&ps->pit_timer); | 416 | destroy_pit_timer(kvm->arch.vpit); |
350 | } | 417 | } |
351 | } | 418 | } |
352 | 419 | ||
@@ -625,7 +692,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
625 | 692 | ||
626 | mutex_init(&pit->pit_state.lock); | 693 | mutex_init(&pit->pit_state.lock); |
627 | mutex_lock(&pit->pit_state.lock); | 694 | mutex_lock(&pit->pit_state.lock); |
628 | raw_spin_lock_init(&pit->pit_state.inject_lock); | 695 | spin_lock_init(&pit->pit_state.inject_lock); |
696 | |||
697 | pit->wq = create_singlethread_workqueue("kvm-pit-wq"); | ||
698 | if (!pit->wq) { | ||
699 | mutex_unlock(&pit->pit_state.lock); | ||
700 | kfree(pit); | ||
701 | return NULL; | ||
702 | } | ||
703 | INIT_WORK(&pit->expired, pit_do_work); | ||
629 | 704 | ||
630 | kvm->arch.vpit = pit; | 705 | kvm->arch.vpit = pit; |
631 | pit->kvm = kvm; | 706 | pit->kvm = kvm; |
@@ -677,6 +752,9 @@ void kvm_free_pit(struct kvm *kvm) | |||
677 | struct hrtimer *timer; | 752 | struct hrtimer *timer; |
678 | 753 | ||
679 | if (kvm->arch.vpit) { | 754 | if (kvm->arch.vpit) { |
755 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &kvm->arch.vpit->dev); | ||
756 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||
757 | &kvm->arch.vpit->speaker_dev); | ||
680 | kvm_unregister_irq_mask_notifier(kvm, 0, | 758 | kvm_unregister_irq_mask_notifier(kvm, 0, |
681 | &kvm->arch.vpit->mask_notifier); | 759 | &kvm->arch.vpit->mask_notifier); |
682 | kvm_unregister_irq_ack_notifier(kvm, | 760 | kvm_unregister_irq_ack_notifier(kvm, |
@@ -684,54 +762,10 @@ void kvm_free_pit(struct kvm *kvm) | |||
684 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 762 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
685 | timer = &kvm->arch.vpit->pit_state.pit_timer.timer; | 763 | timer = &kvm->arch.vpit->pit_state.pit_timer.timer; |
686 | hrtimer_cancel(timer); | 764 | hrtimer_cancel(timer); |
765 | cancel_work_sync(&kvm->arch.vpit->expired); | ||
687 | kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); | 766 | kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); |
688 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 767 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
768 | destroy_workqueue(kvm->arch.vpit->wq); | ||
689 | kfree(kvm->arch.vpit); | 769 | kfree(kvm->arch.vpit); |
690 | } | 770 | } |
691 | } | 771 | } |
692 | |||
693 | static void __inject_pit_timer_intr(struct kvm *kvm) | ||
694 | { | ||
695 | struct kvm_vcpu *vcpu; | ||
696 | int i; | ||
697 | |||
698 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | ||
699 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | ||
700 | |||
701 | /* | ||
702 | * Provides NMI watchdog support via Virtual Wire mode. | ||
703 | * The route is: PIT -> PIC -> LVT0 in NMI mode. | ||
704 | * | ||
705 | * Note: Our Virtual Wire implementation is simplified, only | ||
706 | * propagating PIT interrupts to all VCPUs when they have set | ||
707 | * LVT0 to NMI delivery. Other PIC interrupts are just sent to | ||
708 | * VCPU0, and only if its LVT0 is in EXTINT mode. | ||
709 | */ | ||
710 | if (kvm->arch.vapics_in_nmi_mode > 0) | ||
711 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
712 | kvm_apic_nmi_wd_deliver(vcpu); | ||
713 | } | ||
714 | |||
715 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) | ||
716 | { | ||
717 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | ||
718 | struct kvm *kvm = vcpu->kvm; | ||
719 | struct kvm_kpit_state *ps; | ||
720 | |||
721 | if (pit) { | ||
722 | int inject = 0; | ||
723 | ps = &pit->pit_state; | ||
724 | |||
725 | /* Try to inject pending interrupts when | ||
726 | * last one has been acked. | ||
727 | */ | ||
728 | raw_spin_lock(&ps->inject_lock); | ||
729 | if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { | ||
730 | ps->irq_ack = 0; | ||
731 | inject = 1; | ||
732 | } | ||
733 | raw_spin_unlock(&ps->inject_lock); | ||
734 | if (inject) | ||
735 | __inject_pit_timer_intr(kvm); | ||
736 | } | ||
737 | } | ||
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 900d6b0ba7c2..46d08ca0b48f 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
@@ -27,7 +27,7 @@ struct kvm_kpit_state { | |||
27 | u32 speaker_data_on; | 27 | u32 speaker_data_on; |
28 | struct mutex lock; | 28 | struct mutex lock; |
29 | struct kvm_pit *pit; | 29 | struct kvm_pit *pit; |
30 | raw_spinlock_t inject_lock; | 30 | spinlock_t inject_lock; |
31 | unsigned long irq_ack; | 31 | unsigned long irq_ack; |
32 | struct kvm_irq_ack_notifier irq_ack_notifier; | 32 | struct kvm_irq_ack_notifier irq_ack_notifier; |
33 | }; | 33 | }; |
@@ -40,6 +40,8 @@ struct kvm_pit { | |||
40 | struct kvm_kpit_state pit_state; | 40 | struct kvm_kpit_state pit_state; |
41 | int irq_source_id; | 41 | int irq_source_id; |
42 | struct kvm_irq_mask_notifier mask_notifier; | 42 | struct kvm_irq_mask_notifier mask_notifier; |
43 | struct workqueue_struct *wq; | ||
44 | struct work_struct expired; | ||
43 | }; | 45 | }; |
44 | 46 | ||
45 | #define KVM_PIT_BASE_ADDRESS 0x40 | 47 | #define KVM_PIT_BASE_ADDRESS 0x40 |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 93825ff3338f..8d10c063d7f2 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -3,6 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Copyright (c) 2003-2004 Fabrice Bellard | 4 | * Copyright (c) 2003-2004 Fabrice Bellard |
5 | * Copyright (c) 2007 Intel Corporation | 5 | * Copyright (c) 2007 Intel Corporation |
6 | * Copyright 2009 Red Hat, Inc. and/or its affilates. | ||
6 | * | 7 | * |
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 8 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
8 | * of this software and associated documentation files (the "Software"), to deal | 9 | * of this software and associated documentation files (the "Software"), to deal |
@@ -33,6 +34,8 @@ | |||
33 | #include <linux/kvm_host.h> | 34 | #include <linux/kvm_host.h> |
34 | #include "trace.h" | 35 | #include "trace.h" |
35 | 36 | ||
37 | static void pic_irq_request(struct kvm *kvm, int level); | ||
38 | |||
36 | static void pic_lock(struct kvm_pic *s) | 39 | static void pic_lock(struct kvm_pic *s) |
37 | __acquires(&s->lock) | 40 | __acquires(&s->lock) |
38 | { | 41 | { |
@@ -43,16 +46,25 @@ static void pic_unlock(struct kvm_pic *s) | |||
43 | __releases(&s->lock) | 46 | __releases(&s->lock) |
44 | { | 47 | { |
45 | bool wakeup = s->wakeup_needed; | 48 | bool wakeup = s->wakeup_needed; |
46 | struct kvm_vcpu *vcpu; | 49 | struct kvm_vcpu *vcpu, *found = NULL; |
50 | int i; | ||
47 | 51 | ||
48 | s->wakeup_needed = false; | 52 | s->wakeup_needed = false; |
49 | 53 | ||
50 | raw_spin_unlock(&s->lock); | 54 | raw_spin_unlock(&s->lock); |
51 | 55 | ||
52 | if (wakeup) { | 56 | if (wakeup) { |
53 | vcpu = s->kvm->bsp_vcpu; | 57 | kvm_for_each_vcpu(i, vcpu, s->kvm) { |
54 | if (vcpu) | 58 | if (kvm_apic_accept_pic_intr(vcpu)) { |
55 | kvm_vcpu_kick(vcpu); | 59 | found = vcpu; |
60 | break; | ||
61 | } | ||
62 | } | ||
63 | |||
64 | if (!found) | ||
65 | found = s->kvm->bsp_vcpu; | ||
66 | |||
67 | kvm_vcpu_kick(found); | ||
56 | } | 68 | } |
57 | } | 69 | } |
58 | 70 | ||
@@ -173,10 +185,7 @@ static void pic_update_irq(struct kvm_pic *s) | |||
173 | pic_set_irq1(&s->pics[0], 2, 0); | 185 | pic_set_irq1(&s->pics[0], 2, 0); |
174 | } | 186 | } |
175 | irq = pic_get_irq(&s->pics[0]); | 187 | irq = pic_get_irq(&s->pics[0]); |
176 | if (irq >= 0) | 188 | pic_irq_request(s->kvm, irq >= 0); |
177 | s->irq_request(s->irq_request_opaque, 1); | ||
178 | else | ||
179 | s->irq_request(s->irq_request_opaque, 0); | ||
180 | } | 189 | } |
181 | 190 | ||
182 | void kvm_pic_update_irq(struct kvm_pic *s) | 191 | void kvm_pic_update_irq(struct kvm_pic *s) |
@@ -261,8 +270,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
261 | void kvm_pic_reset(struct kvm_kpic_state *s) | 270 | void kvm_pic_reset(struct kvm_kpic_state *s) |
262 | { | 271 | { |
263 | int irq; | 272 | int irq; |
264 | struct kvm *kvm = s->pics_state->irq_request_opaque; | 273 | struct kvm_vcpu *vcpu0 = s->pics_state->kvm->bsp_vcpu; |
265 | struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; | ||
266 | u8 irr = s->irr, isr = s->imr; | 274 | u8 irr = s->irr, isr = s->imr; |
267 | 275 | ||
268 | s->last_irr = 0; | 276 | s->last_irr = 0; |
@@ -301,8 +309,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
301 | /* | 309 | /* |
302 | * deassert a pending interrupt | 310 | * deassert a pending interrupt |
303 | */ | 311 | */ |
304 | s->pics_state->irq_request(s->pics_state-> | 312 | pic_irq_request(s->pics_state->kvm, 0); |
305 | irq_request_opaque, 0); | ||
306 | s->init_state = 1; | 313 | s->init_state = 1; |
307 | s->init4 = val & 1; | 314 | s->init4 = val & 1; |
308 | if (val & 0x02) | 315 | if (val & 0x02) |
@@ -356,10 +363,20 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
356 | } | 363 | } |
357 | } else | 364 | } else |
358 | switch (s->init_state) { | 365 | switch (s->init_state) { |
359 | case 0: /* normal mode */ | 366 | case 0: { /* normal mode */ |
367 | u8 imr_diff = s->imr ^ val, | ||
368 | off = (s == &s->pics_state->pics[0]) ? 0 : 8; | ||
360 | s->imr = val; | 369 | s->imr = val; |
370 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) | ||
371 | if (imr_diff & (1 << irq)) | ||
372 | kvm_fire_mask_notifiers( | ||
373 | s->pics_state->kvm, | ||
374 | SELECT_PIC(irq + off), | ||
375 | irq + off, | ||
376 | !!(s->imr & (1 << irq))); | ||
361 | pic_update_irq(s->pics_state); | 377 | pic_update_irq(s->pics_state); |
362 | break; | 378 | break; |
379 | } | ||
363 | case 1: | 380 | case 1: |
364 | s->irq_base = val & 0xf8; | 381 | s->irq_base = val & 0xf8; |
365 | s->init_state = 2; | 382 | s->init_state = 2; |
@@ -518,9 +535,8 @@ static int picdev_read(struct kvm_io_device *this, | |||
518 | /* | 535 | /* |
519 | * callback when PIC0 irq status changed | 536 | * callback when PIC0 irq status changed |
520 | */ | 537 | */ |
521 | static void pic_irq_request(void *opaque, int level) | 538 | static void pic_irq_request(struct kvm *kvm, int level) |
522 | { | 539 | { |
523 | struct kvm *kvm = opaque; | ||
524 | struct kvm_vcpu *vcpu = kvm->bsp_vcpu; | 540 | struct kvm_vcpu *vcpu = kvm->bsp_vcpu; |
525 | struct kvm_pic *s = pic_irqchip(kvm); | 541 | struct kvm_pic *s = pic_irqchip(kvm); |
526 | int irq = pic_get_irq(&s->pics[0]); | 542 | int irq = pic_get_irq(&s->pics[0]); |
@@ -549,8 +565,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
549 | s->kvm = kvm; | 565 | s->kvm = kvm; |
550 | s->pics[0].elcr_mask = 0xf8; | 566 | s->pics[0].elcr_mask = 0xf8; |
551 | s->pics[1].elcr_mask = 0xde; | 567 | s->pics[1].elcr_mask = 0xde; |
552 | s->irq_request = pic_irq_request; | ||
553 | s->irq_request_opaque = kvm; | ||
554 | s->pics[0].pics_state = s; | 568 | s->pics[0].pics_state = s; |
555 | s->pics[1].pics_state = s; | 569 | s->pics[1].pics_state = s; |
556 | 570 | ||
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 96dfbb6ad2a9..2095a049835e 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * irq.c: API for in kernel interrupt controller | 2 | * irq.c: API for in kernel interrupt controller |
3 | * Copyright (c) 2007, Intel Corporation. | 3 | * Copyright (c) 2007, Intel Corporation. |
4 | * Copyright 2009 Red Hat, Inc. and/or its affilates. | ||
4 | * | 5 | * |
5 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
6 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -89,7 +90,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); | |||
89 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) | 90 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) |
90 | { | 91 | { |
91 | kvm_inject_apic_timer_irqs(vcpu); | 92 | kvm_inject_apic_timer_irqs(vcpu); |
92 | kvm_inject_pit_timer_irqs(vcpu); | ||
93 | /* TODO: PIT, RTC etc. */ | 93 | /* TODO: PIT, RTC etc. */ |
94 | } | 94 | } |
95 | EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); | 95 | EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index cd1f362f413d..ffed06871c5c 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -38,8 +38,6 @@ | |||
38 | struct kvm; | 38 | struct kvm; |
39 | struct kvm_vcpu; | 39 | struct kvm_vcpu; |
40 | 40 | ||
41 | typedef void irq_request_func(void *opaque, int level); | ||
42 | |||
43 | struct kvm_kpic_state { | 41 | struct kvm_kpic_state { |
44 | u8 last_irr; /* edge detection */ | 42 | u8 last_irr; /* edge detection */ |
45 | u8 irr; /* interrupt request register */ | 43 | u8 irr; /* interrupt request register */ |
@@ -67,8 +65,6 @@ struct kvm_pic { | |||
67 | unsigned pending_acks; | 65 | unsigned pending_acks; |
68 | struct kvm *kvm; | 66 | struct kvm *kvm; |
69 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 67 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
70 | irq_request_func *irq_request; | ||
71 | void *irq_request_opaque; | ||
72 | int output; /* intr from master PIC */ | 68 | int output; /* intr from master PIC */ |
73 | struct kvm_io_device dev; | 69 | struct kvm_io_device dev; |
74 | void (*ack_notifier)(void *opaque, int irq); | 70 | void (*ack_notifier)(void *opaque, int irq); |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index cff851cf5322..6491ac8e755b 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
@@ -36,6 +36,8 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val) | |||
36 | 36 | ||
37 | static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) | 37 | static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) |
38 | { | 38 | { |
39 | might_sleep(); /* on svm */ | ||
40 | |||
39 | if (!test_bit(VCPU_EXREG_PDPTR, | 41 | if (!test_bit(VCPU_EXREG_PDPTR, |
40 | (unsigned long *)&vcpu->arch.regs_avail)) | 42 | (unsigned long *)&vcpu->arch.regs_avail)) |
41 | kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); | 43 | kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); |
@@ -69,4 +71,10 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) | |||
69 | return kvm_read_cr4_bits(vcpu, ~0UL); | 71 | return kvm_read_cr4_bits(vcpu, ~0UL); |
70 | } | 72 | } |
71 | 73 | ||
74 | static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) | ||
75 | { | ||
76 | return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u) | ||
77 | | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); | ||
78 | } | ||
79 | |||
72 | #endif | 80 | #endif |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1eb7a4ae0c9c..77d8c0f4817d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * Copyright (C) 2006 Qumranet, Inc. | 5 | * Copyright (C) 2006 Qumranet, Inc. |
6 | * Copyright (C) 2007 Novell | 6 | * Copyright (C) 2007 Novell |
7 | * Copyright (C) 2007 Intel | 7 | * Copyright (C) 2007 Intel |
8 | * Copyright 2009 Red Hat, Inc. and/or its affilates. | ||
8 | * | 9 | * |
9 | * Authors: | 10 | * Authors: |
10 | * Dor Laor <dor.laor@qumranet.com> | 11 | * Dor Laor <dor.laor@qumranet.com> |
@@ -328,7 +329,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
328 | "dest_mode 0x%x, short_hand 0x%x\n", | 329 | "dest_mode 0x%x, short_hand 0x%x\n", |
329 | target, source, dest, dest_mode, short_hand); | 330 | target, source, dest, dest_mode, short_hand); |
330 | 331 | ||
331 | ASSERT(!target); | 332 | ASSERT(target); |
332 | switch (short_hand) { | 333 | switch (short_hand) { |
333 | case APIC_DEST_NOSHORT: | 334 | case APIC_DEST_NOSHORT: |
334 | if (dest_mode == 0) | 335 | if (dest_mode == 0) |
@@ -533,7 +534,7 @@ static void __report_tpr_access(struct kvm_lapic *apic, bool write) | |||
533 | struct kvm_vcpu *vcpu = apic->vcpu; | 534 | struct kvm_vcpu *vcpu = apic->vcpu; |
534 | struct kvm_run *run = vcpu->run; | 535 | struct kvm_run *run = vcpu->run; |
535 | 536 | ||
536 | set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); | 537 | kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); |
537 | run->tpr_access.rip = kvm_rip_read(vcpu); | 538 | run->tpr_access.rip = kvm_rip_read(vcpu); |
538 | run->tpr_access.is_write = write; | 539 | run->tpr_access.is_write = write; |
539 | } | 540 | } |
@@ -1106,13 +1107,11 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) | |||
1106 | u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); | 1107 | u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); |
1107 | int r = 0; | 1108 | int r = 0; |
1108 | 1109 | ||
1109 | if (kvm_vcpu_is_bsp(vcpu)) { | 1110 | if (!apic_hw_enabled(vcpu->arch.apic)) |
1110 | if (!apic_hw_enabled(vcpu->arch.apic)) | 1111 | r = 1; |
1111 | r = 1; | 1112 | if ((lvt0 & APIC_LVT_MASKED) == 0 && |
1112 | if ((lvt0 & APIC_LVT_MASKED) == 0 && | 1113 | GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) |
1113 | GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) | 1114 | r = 1; |
1114 | r = 1; | ||
1115 | } | ||
1116 | return r; | 1115 | return r; |
1117 | } | 1116 | } |
1118 | 1117 | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a6f695d76928..311f6dad8951 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -7,6 +7,7 @@ | |||
7 | * MMU support | 7 | * MMU support |
8 | * | 8 | * |
9 | * Copyright (C) 2006 Qumranet, Inc. | 9 | * Copyright (C) 2006 Qumranet, Inc. |
10 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
10 | * | 11 | * |
11 | * Authors: | 12 | * Authors: |
12 | * Yaniv Kamay <yaniv@qumranet.com> | 13 | * Yaniv Kamay <yaniv@qumranet.com> |
@@ -32,6 +33,7 @@ | |||
32 | #include <linux/compiler.h> | 33 | #include <linux/compiler.h> |
33 | #include <linux/srcu.h> | 34 | #include <linux/srcu.h> |
34 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/uaccess.h> | ||
35 | 37 | ||
36 | #include <asm/page.h> | 38 | #include <asm/page.h> |
37 | #include <asm/cmpxchg.h> | 39 | #include <asm/cmpxchg.h> |
@@ -90,8 +92,6 @@ module_param(oos_shadow, bool, 0644); | |||
90 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 92 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
91 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 93 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
92 | 94 | ||
93 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | ||
94 | |||
95 | #define PT64_LEVEL_BITS 9 | 95 | #define PT64_LEVEL_BITS 9 |
96 | 96 | ||
97 | #define PT64_LEVEL_SHIFT(level) \ | 97 | #define PT64_LEVEL_SHIFT(level) \ |
@@ -173,7 +173,7 @@ struct kvm_shadow_walk_iterator { | |||
173 | shadow_walk_okay(&(_walker)); \ | 173 | shadow_walk_okay(&(_walker)); \ |
174 | shadow_walk_next(&(_walker))) | 174 | shadow_walk_next(&(_walker))) |
175 | 175 | ||
176 | typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); | 176 | typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte); |
177 | 177 | ||
178 | static struct kmem_cache *pte_chain_cache; | 178 | static struct kmem_cache *pte_chain_cache; |
179 | static struct kmem_cache *rmap_desc_cache; | 179 | static struct kmem_cache *rmap_desc_cache; |
@@ -281,13 +281,38 @@ static gfn_t pse36_gfn_delta(u32 gpte) | |||
281 | 281 | ||
282 | static void __set_spte(u64 *sptep, u64 spte) | 282 | static void __set_spte(u64 *sptep, u64 spte) |
283 | { | 283 | { |
284 | set_64bit(sptep, spte); | ||
285 | } | ||
286 | |||
287 | static u64 __xchg_spte(u64 *sptep, u64 new_spte) | ||
288 | { | ||
284 | #ifdef CONFIG_X86_64 | 289 | #ifdef CONFIG_X86_64 |
285 | set_64bit((unsigned long *)sptep, spte); | 290 | return xchg(sptep, new_spte); |
286 | #else | 291 | #else |
287 | set_64bit((unsigned long long *)sptep, spte); | 292 | u64 old_spte; |
293 | |||
294 | do { | ||
295 | old_spte = *sptep; | ||
296 | } while (cmpxchg64(sptep, old_spte, new_spte) != old_spte); | ||
297 | |||
298 | return old_spte; | ||
288 | #endif | 299 | #endif |
289 | } | 300 | } |
290 | 301 | ||
302 | static void update_spte(u64 *sptep, u64 new_spte) | ||
303 | { | ||
304 | u64 old_spte; | ||
305 | |||
306 | if (!shadow_accessed_mask || (new_spte & shadow_accessed_mask) || | ||
307 | !is_rmap_spte(*sptep)) | ||
308 | __set_spte(sptep, new_spte); | ||
309 | else { | ||
310 | old_spte = __xchg_spte(sptep, new_spte); | ||
311 | if (old_spte & shadow_accessed_mask) | ||
312 | mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte))); | ||
313 | } | ||
314 | } | ||
315 | |||
291 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 316 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
292 | struct kmem_cache *base_cache, int min) | 317 | struct kmem_cache *base_cache, int min) |
293 | { | 318 | { |
@@ -304,10 +329,11 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | |||
304 | return 0; | 329 | return 0; |
305 | } | 330 | } |
306 | 331 | ||
307 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) | 332 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, |
333 | struct kmem_cache *cache) | ||
308 | { | 334 | { |
309 | while (mc->nobjs) | 335 | while (mc->nobjs) |
310 | kfree(mc->objects[--mc->nobjs]); | 336 | kmem_cache_free(cache, mc->objects[--mc->nobjs]); |
311 | } | 337 | } |
312 | 338 | ||
313 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | 339 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, |
@@ -355,10 +381,11 @@ out: | |||
355 | 381 | ||
356 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | 382 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) |
357 | { | 383 | { |
358 | mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache); | 384 | mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache); |
359 | mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache); | 385 | mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache); |
360 | mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); | 386 | mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); |
361 | mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); | 387 | mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache, |
388 | mmu_page_header_cache); | ||
362 | } | 389 | } |
363 | 390 | ||
364 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | 391 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, |
@@ -379,7 +406,7 @@ static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) | |||
379 | 406 | ||
380 | static void mmu_free_pte_chain(struct kvm_pte_chain *pc) | 407 | static void mmu_free_pte_chain(struct kvm_pte_chain *pc) |
381 | { | 408 | { |
382 | kfree(pc); | 409 | kmem_cache_free(pte_chain_cache, pc); |
383 | } | 410 | } |
384 | 411 | ||
385 | static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) | 412 | static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) |
@@ -390,7 +417,23 @@ static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) | |||
390 | 417 | ||
391 | static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) | 418 | static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) |
392 | { | 419 | { |
393 | kfree(rd); | 420 | kmem_cache_free(rmap_desc_cache, rd); |
421 | } | ||
422 | |||
423 | static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) | ||
424 | { | ||
425 | if (!sp->role.direct) | ||
426 | return sp->gfns[index]; | ||
427 | |||
428 | return sp->gfn + (index << ((sp->role.level - 1) * PT64_LEVEL_BITS)); | ||
429 | } | ||
430 | |||
431 | static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) | ||
432 | { | ||
433 | if (sp->role.direct) | ||
434 | BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); | ||
435 | else | ||
436 | sp->gfns[index] = gfn; | ||
394 | } | 437 | } |
395 | 438 | ||
396 | /* | 439 | /* |
@@ -403,8 +446,8 @@ static int *slot_largepage_idx(gfn_t gfn, | |||
403 | { | 446 | { |
404 | unsigned long idx; | 447 | unsigned long idx; |
405 | 448 | ||
406 | idx = (gfn / KVM_PAGES_PER_HPAGE(level)) - | 449 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - |
407 | (slot->base_gfn / KVM_PAGES_PER_HPAGE(level)); | 450 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
408 | return &slot->lpage_info[level - 2][idx].write_count; | 451 | return &slot->lpage_info[level - 2][idx].write_count; |
409 | } | 452 | } |
410 | 453 | ||
@@ -414,9 +457,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) | |||
414 | int *write_count; | 457 | int *write_count; |
415 | int i; | 458 | int i; |
416 | 459 | ||
417 | gfn = unalias_gfn(kvm, gfn); | 460 | slot = gfn_to_memslot(kvm, gfn); |
418 | |||
419 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
420 | for (i = PT_DIRECTORY_LEVEL; | 461 | for (i = PT_DIRECTORY_LEVEL; |
421 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 462 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
422 | write_count = slot_largepage_idx(gfn, slot, i); | 463 | write_count = slot_largepage_idx(gfn, slot, i); |
@@ -430,8 +471,7 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
430 | int *write_count; | 471 | int *write_count; |
431 | int i; | 472 | int i; |
432 | 473 | ||
433 | gfn = unalias_gfn(kvm, gfn); | 474 | slot = gfn_to_memslot(kvm, gfn); |
434 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
435 | for (i = PT_DIRECTORY_LEVEL; | 475 | for (i = PT_DIRECTORY_LEVEL; |
436 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 476 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
437 | write_count = slot_largepage_idx(gfn, slot, i); | 477 | write_count = slot_largepage_idx(gfn, slot, i); |
@@ -447,8 +487,7 @@ static int has_wrprotected_page(struct kvm *kvm, | |||
447 | struct kvm_memory_slot *slot; | 487 | struct kvm_memory_slot *slot; |
448 | int *largepage_idx; | 488 | int *largepage_idx; |
449 | 489 | ||
450 | gfn = unalias_gfn(kvm, gfn); | 490 | slot = gfn_to_memslot(kvm, gfn); |
451 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
452 | if (slot) { | 491 | if (slot) { |
453 | largepage_idx = slot_largepage_idx(gfn, slot, level); | 492 | largepage_idx = slot_largepage_idx(gfn, slot, level); |
454 | return *largepage_idx; | 493 | return *largepage_idx; |
@@ -501,7 +540,6 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
501 | 540 | ||
502 | /* | 541 | /* |
503 | * Take gfn and return the reverse mapping to it. | 542 | * Take gfn and return the reverse mapping to it. |
504 | * Note: gfn must be unaliased before this function get called | ||
505 | */ | 543 | */ |
506 | 544 | ||
507 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | 545 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) |
@@ -513,8 +551,8 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | |||
513 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | 551 | if (likely(level == PT_PAGE_TABLE_LEVEL)) |
514 | return &slot->rmap[gfn - slot->base_gfn]; | 552 | return &slot->rmap[gfn - slot->base_gfn]; |
515 | 553 | ||
516 | idx = (gfn / KVM_PAGES_PER_HPAGE(level)) - | 554 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - |
517 | (slot->base_gfn / KVM_PAGES_PER_HPAGE(level)); | 555 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
518 | 556 | ||
519 | return &slot->lpage_info[level - 2][idx].rmap_pde; | 557 | return &slot->lpage_info[level - 2][idx].rmap_pde; |
520 | } | 558 | } |
@@ -541,9 +579,8 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
541 | 579 | ||
542 | if (!is_rmap_spte(*spte)) | 580 | if (!is_rmap_spte(*spte)) |
543 | return count; | 581 | return count; |
544 | gfn = unalias_gfn(vcpu->kvm, gfn); | ||
545 | sp = page_header(__pa(spte)); | 582 | sp = page_header(__pa(spte)); |
546 | sp->gfns[spte - sp->spt] = gfn; | 583 | kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); |
547 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 584 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
548 | if (!*rmapp) { | 585 | if (!*rmapp) { |
549 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); | 586 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); |
@@ -600,19 +637,13 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
600 | struct kvm_rmap_desc *desc; | 637 | struct kvm_rmap_desc *desc; |
601 | struct kvm_rmap_desc *prev_desc; | 638 | struct kvm_rmap_desc *prev_desc; |
602 | struct kvm_mmu_page *sp; | 639 | struct kvm_mmu_page *sp; |
603 | pfn_t pfn; | 640 | gfn_t gfn; |
604 | unsigned long *rmapp; | 641 | unsigned long *rmapp; |
605 | int i; | 642 | int i; |
606 | 643 | ||
607 | if (!is_rmap_spte(*spte)) | ||
608 | return; | ||
609 | sp = page_header(__pa(spte)); | 644 | sp = page_header(__pa(spte)); |
610 | pfn = spte_to_pfn(*spte); | 645 | gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); |
611 | if (*spte & shadow_accessed_mask) | 646 | rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); |
612 | kvm_set_pfn_accessed(pfn); | ||
613 | if (is_writable_pte(*spte)) | ||
614 | kvm_set_pfn_dirty(pfn); | ||
615 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); | ||
616 | if (!*rmapp) { | 647 | if (!*rmapp) { |
617 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 648 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
618 | BUG(); | 649 | BUG(); |
@@ -644,6 +675,32 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
644 | } | 675 | } |
645 | } | 676 | } |
646 | 677 | ||
678 | static void set_spte_track_bits(u64 *sptep, u64 new_spte) | ||
679 | { | ||
680 | pfn_t pfn; | ||
681 | u64 old_spte = *sptep; | ||
682 | |||
683 | if (!shadow_accessed_mask || !is_shadow_present_pte(old_spte) || | ||
684 | old_spte & shadow_accessed_mask) { | ||
685 | __set_spte(sptep, new_spte); | ||
686 | } else | ||
687 | old_spte = __xchg_spte(sptep, new_spte); | ||
688 | |||
689 | if (!is_rmap_spte(old_spte)) | ||
690 | return; | ||
691 | pfn = spte_to_pfn(old_spte); | ||
692 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | ||
693 | kvm_set_pfn_accessed(pfn); | ||
694 | if (is_writable_pte(old_spte)) | ||
695 | kvm_set_pfn_dirty(pfn); | ||
696 | } | ||
697 | |||
698 | static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte) | ||
699 | { | ||
700 | set_spte_track_bits(sptep, new_spte); | ||
701 | rmap_remove(kvm, sptep); | ||
702 | } | ||
703 | |||
647 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 704 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) |
648 | { | 705 | { |
649 | struct kvm_rmap_desc *desc; | 706 | struct kvm_rmap_desc *desc; |
@@ -676,7 +733,6 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
676 | u64 *spte; | 733 | u64 *spte; |
677 | int i, write_protected = 0; | 734 | int i, write_protected = 0; |
678 | 735 | ||
679 | gfn = unalias_gfn(kvm, gfn); | ||
680 | rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); | 736 | rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); |
681 | 737 | ||
682 | spte = rmap_next(kvm, rmapp, NULL); | 738 | spte = rmap_next(kvm, rmapp, NULL); |
@@ -685,7 +741,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
685 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 741 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
686 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 742 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
687 | if (is_writable_pte(*spte)) { | 743 | if (is_writable_pte(*spte)) { |
688 | __set_spte(spte, *spte & ~PT_WRITABLE_MASK); | 744 | update_spte(spte, *spte & ~PT_WRITABLE_MASK); |
689 | write_protected = 1; | 745 | write_protected = 1; |
690 | } | 746 | } |
691 | spte = rmap_next(kvm, rmapp, spte); | 747 | spte = rmap_next(kvm, rmapp, spte); |
@@ -709,9 +765,9 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
709 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | 765 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); |
710 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 766 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); |
711 | if (is_writable_pte(*spte)) { | 767 | if (is_writable_pte(*spte)) { |
712 | rmap_remove(kvm, spte); | 768 | drop_spte(kvm, spte, |
769 | shadow_trap_nonpresent_pte); | ||
713 | --kvm->stat.lpages; | 770 | --kvm->stat.lpages; |
714 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
715 | spte = NULL; | 771 | spte = NULL; |
716 | write_protected = 1; | 772 | write_protected = 1; |
717 | } | 773 | } |
@@ -731,8 +787,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
731 | while ((spte = rmap_next(kvm, rmapp, NULL))) { | 787 | while ((spte = rmap_next(kvm, rmapp, NULL))) { |
732 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 788 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
733 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | 789 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); |
734 | rmap_remove(kvm, spte); | 790 | drop_spte(kvm, spte, shadow_trap_nonpresent_pte); |
735 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
736 | need_tlb_flush = 1; | 791 | need_tlb_flush = 1; |
737 | } | 792 | } |
738 | return need_tlb_flush; | 793 | return need_tlb_flush; |
@@ -754,8 +809,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
754 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | 809 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); |
755 | need_flush = 1; | 810 | need_flush = 1; |
756 | if (pte_write(*ptep)) { | 811 | if (pte_write(*ptep)) { |
757 | rmap_remove(kvm, spte); | 812 | drop_spte(kvm, spte, shadow_trap_nonpresent_pte); |
758 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
759 | spte = rmap_next(kvm, rmapp, NULL); | 813 | spte = rmap_next(kvm, rmapp, NULL); |
760 | } else { | 814 | } else { |
761 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | 815 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); |
@@ -763,9 +817,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
763 | 817 | ||
764 | new_spte &= ~PT_WRITABLE_MASK; | 818 | new_spte &= ~PT_WRITABLE_MASK; |
765 | new_spte &= ~SPTE_HOST_WRITEABLE; | 819 | new_spte &= ~SPTE_HOST_WRITEABLE; |
766 | if (is_writable_pte(*spte)) | 820 | new_spte &= ~shadow_accessed_mask; |
767 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); | 821 | set_spte_track_bits(spte, new_spte); |
768 | __set_spte(spte, new_spte); | ||
769 | spte = rmap_next(kvm, rmapp, spte); | 822 | spte = rmap_next(kvm, rmapp, spte); |
770 | } | 823 | } |
771 | } | 824 | } |
@@ -799,8 +852,12 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
799 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); | 852 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); |
800 | 853 | ||
801 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 854 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
802 | int idx = gfn_offset; | 855 | unsigned long idx; |
803 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | 856 | int sh; |
857 | |||
858 | sh = KVM_HPAGE_GFN_SHIFT(PT_DIRECTORY_LEVEL+j); | ||
859 | idx = ((memslot->base_gfn+gfn_offset) >> sh) - | ||
860 | (memslot->base_gfn >> sh); | ||
804 | ret |= handler(kvm, | 861 | ret |= handler(kvm, |
805 | &memslot->lpage_info[j][idx].rmap_pde, | 862 | &memslot->lpage_info[j][idx].rmap_pde, |
806 | data); | 863 | data); |
@@ -863,7 +920,6 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
863 | 920 | ||
864 | sp = page_header(__pa(spte)); | 921 | sp = page_header(__pa(spte)); |
865 | 922 | ||
866 | gfn = unalias_gfn(vcpu->kvm, gfn); | ||
867 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 923 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
868 | 924 | ||
869 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); | 925 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); |
@@ -894,10 +950,12 @@ static int is_empty_shadow_page(u64 *spt) | |||
894 | static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 950 | static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
895 | { | 951 | { |
896 | ASSERT(is_empty_shadow_page(sp->spt)); | 952 | ASSERT(is_empty_shadow_page(sp->spt)); |
953 | hlist_del(&sp->hash_link); | ||
897 | list_del(&sp->link); | 954 | list_del(&sp->link); |
898 | __free_page(virt_to_page(sp->spt)); | 955 | __free_page(virt_to_page(sp->spt)); |
899 | __free_page(virt_to_page(sp->gfns)); | 956 | if (!sp->role.direct) |
900 | kfree(sp); | 957 | __free_page(virt_to_page(sp->gfns)); |
958 | kmem_cache_free(mmu_page_header_cache, sp); | ||
901 | ++kvm->arch.n_free_mmu_pages; | 959 | ++kvm->arch.n_free_mmu_pages; |
902 | } | 960 | } |
903 | 961 | ||
@@ -907,13 +965,15 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn) | |||
907 | } | 965 | } |
908 | 966 | ||
909 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | 967 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, |
910 | u64 *parent_pte) | 968 | u64 *parent_pte, int direct) |
911 | { | 969 | { |
912 | struct kvm_mmu_page *sp; | 970 | struct kvm_mmu_page *sp; |
913 | 971 | ||
914 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); | 972 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); |
915 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 973 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); |
916 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 974 | if (!direct) |
975 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, | ||
976 | PAGE_SIZE); | ||
917 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 977 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
918 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 978 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
919 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 979 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
@@ -998,7 +1058,6 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | |||
998 | BUG(); | 1058 | BUG(); |
999 | } | 1059 | } |
1000 | 1060 | ||
1001 | |||
1002 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) | 1061 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) |
1003 | { | 1062 | { |
1004 | struct kvm_pte_chain *pte_chain; | 1063 | struct kvm_pte_chain *pte_chain; |
@@ -1008,63 +1067,37 @@ static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) | |||
1008 | 1067 | ||
1009 | if (!sp->multimapped && sp->parent_pte) { | 1068 | if (!sp->multimapped && sp->parent_pte) { |
1010 | parent_sp = page_header(__pa(sp->parent_pte)); | 1069 | parent_sp = page_header(__pa(sp->parent_pte)); |
1011 | fn(parent_sp); | 1070 | fn(parent_sp, sp->parent_pte); |
1012 | mmu_parent_walk(parent_sp, fn); | ||
1013 | return; | 1071 | return; |
1014 | } | 1072 | } |
1073 | |||
1015 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | 1074 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) |
1016 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | 1075 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { |
1017 | if (!pte_chain->parent_ptes[i]) | 1076 | u64 *spte = pte_chain->parent_ptes[i]; |
1077 | |||
1078 | if (!spte) | ||
1018 | break; | 1079 | break; |
1019 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); | 1080 | parent_sp = page_header(__pa(spte)); |
1020 | fn(parent_sp); | 1081 | fn(parent_sp, spte); |
1021 | mmu_parent_walk(parent_sp, fn); | ||
1022 | } | 1082 | } |
1023 | } | 1083 | } |
1024 | 1084 | ||
1025 | static void kvm_mmu_update_unsync_bitmap(u64 *spte) | 1085 | static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte); |
1086 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) | ||
1026 | { | 1087 | { |
1027 | unsigned int index; | 1088 | mmu_parent_walk(sp, mark_unsync); |
1028 | struct kvm_mmu_page *sp = page_header(__pa(spte)); | ||
1029 | |||
1030 | index = spte - sp->spt; | ||
1031 | if (!__test_and_set_bit(index, sp->unsync_child_bitmap)) | ||
1032 | sp->unsync_children++; | ||
1033 | WARN_ON(!sp->unsync_children); | ||
1034 | } | 1089 | } |
1035 | 1090 | ||
1036 | static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | 1091 | static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte) |
1037 | { | 1092 | { |
1038 | struct kvm_pte_chain *pte_chain; | 1093 | unsigned int index; |
1039 | struct hlist_node *node; | ||
1040 | int i; | ||
1041 | 1094 | ||
1042 | if (!sp->parent_pte) | 1095 | index = spte - sp->spt; |
1096 | if (__test_and_set_bit(index, sp->unsync_child_bitmap)) | ||
1043 | return; | 1097 | return; |
1044 | 1098 | if (sp->unsync_children++) | |
1045 | if (!sp->multimapped) { | ||
1046 | kvm_mmu_update_unsync_bitmap(sp->parent_pte); | ||
1047 | return; | 1099 | return; |
1048 | } | 1100 | kvm_mmu_mark_parents_unsync(sp); |
1049 | |||
1050 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | ||
1051 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | ||
1052 | if (!pte_chain->parent_ptes[i]) | ||
1053 | break; | ||
1054 | kvm_mmu_update_unsync_bitmap(pte_chain->parent_ptes[i]); | ||
1055 | } | ||
1056 | } | ||
1057 | |||
1058 | static int unsync_walk_fn(struct kvm_mmu_page *sp) | ||
1059 | { | ||
1060 | kvm_mmu_update_parents_unsync(sp); | ||
1061 | return 1; | ||
1062 | } | ||
1063 | |||
1064 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) | ||
1065 | { | ||
1066 | mmu_parent_walk(sp, unsync_walk_fn); | ||
1067 | kvm_mmu_update_parents_unsync(sp); | ||
1068 | } | 1101 | } |
1069 | 1102 | ||
1070 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | 1103 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, |
@@ -1077,7 +1110,7 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | |||
1077 | } | 1110 | } |
1078 | 1111 | ||
1079 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, | 1112 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, |
1080 | struct kvm_mmu_page *sp) | 1113 | struct kvm_mmu_page *sp, bool clear_unsync) |
1081 | { | 1114 | { |
1082 | return 1; | 1115 | return 1; |
1083 | } | 1116 | } |
@@ -1123,35 +1156,40 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, | |||
1123 | int i, ret, nr_unsync_leaf = 0; | 1156 | int i, ret, nr_unsync_leaf = 0; |
1124 | 1157 | ||
1125 | for_each_unsync_children(sp->unsync_child_bitmap, i) { | 1158 | for_each_unsync_children(sp->unsync_child_bitmap, i) { |
1159 | struct kvm_mmu_page *child; | ||
1126 | u64 ent = sp->spt[i]; | 1160 | u64 ent = sp->spt[i]; |
1127 | 1161 | ||
1128 | if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { | 1162 | if (!is_shadow_present_pte(ent) || is_large_pte(ent)) |
1129 | struct kvm_mmu_page *child; | 1163 | goto clear_child_bitmap; |
1130 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 1164 | |
1131 | 1165 | child = page_header(ent & PT64_BASE_ADDR_MASK); | |
1132 | if (child->unsync_children) { | 1166 | |
1133 | if (mmu_pages_add(pvec, child, i)) | 1167 | if (child->unsync_children) { |
1134 | return -ENOSPC; | 1168 | if (mmu_pages_add(pvec, child, i)) |
1135 | 1169 | return -ENOSPC; | |
1136 | ret = __mmu_unsync_walk(child, pvec); | 1170 | |
1137 | if (!ret) | 1171 | ret = __mmu_unsync_walk(child, pvec); |
1138 | __clear_bit(i, sp->unsync_child_bitmap); | 1172 | if (!ret) |
1139 | else if (ret > 0) | 1173 | goto clear_child_bitmap; |
1140 | nr_unsync_leaf += ret; | 1174 | else if (ret > 0) |
1141 | else | 1175 | nr_unsync_leaf += ret; |
1142 | return ret; | 1176 | else |
1143 | } | 1177 | return ret; |
1178 | } else if (child->unsync) { | ||
1179 | nr_unsync_leaf++; | ||
1180 | if (mmu_pages_add(pvec, child, i)) | ||
1181 | return -ENOSPC; | ||
1182 | } else | ||
1183 | goto clear_child_bitmap; | ||
1144 | 1184 | ||
1145 | if (child->unsync) { | 1185 | continue; |
1146 | nr_unsync_leaf++; | 1186 | |
1147 | if (mmu_pages_add(pvec, child, i)) | 1187 | clear_child_bitmap: |
1148 | return -ENOSPC; | 1188 | __clear_bit(i, sp->unsync_child_bitmap); |
1149 | } | 1189 | sp->unsync_children--; |
1150 | } | 1190 | WARN_ON((int)sp->unsync_children < 0); |
1151 | } | 1191 | } |
1152 | 1192 | ||
1153 | if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) | ||
1154 | sp->unsync_children = 0; | ||
1155 | 1193 | ||
1156 | return nr_unsync_leaf; | 1194 | return nr_unsync_leaf; |
1157 | } | 1195 | } |
@@ -1166,26 +1204,6 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp, | |||
1166 | return __mmu_unsync_walk(sp, pvec); | 1204 | return __mmu_unsync_walk(sp, pvec); |
1167 | } | 1205 | } |
1168 | 1206 | ||
1169 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | ||
1170 | { | ||
1171 | unsigned index; | ||
1172 | struct hlist_head *bucket; | ||
1173 | struct kvm_mmu_page *sp; | ||
1174 | struct hlist_node *node; | ||
1175 | |||
1176 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); | ||
1177 | index = kvm_page_table_hashfn(gfn); | ||
1178 | bucket = &kvm->arch.mmu_page_hash[index]; | ||
1179 | hlist_for_each_entry(sp, node, bucket, hash_link) | ||
1180 | if (sp->gfn == gfn && !sp->role.direct | ||
1181 | && !sp->role.invalid) { | ||
1182 | pgprintk("%s: found role %x\n", | ||
1183 | __func__, sp->role.word); | ||
1184 | return sp; | ||
1185 | } | ||
1186 | return NULL; | ||
1187 | } | ||
1188 | |||
1189 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1207 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
1190 | { | 1208 | { |
1191 | WARN_ON(!sp->unsync); | 1209 | WARN_ON(!sp->unsync); |
@@ -1194,20 +1212,36 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1194 | --kvm->stat.mmu_unsync; | 1212 | --kvm->stat.mmu_unsync; |
1195 | } | 1213 | } |
1196 | 1214 | ||
1197 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); | 1215 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, |
1216 | struct list_head *invalid_list); | ||
1217 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | ||
1218 | struct list_head *invalid_list); | ||
1198 | 1219 | ||
1199 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1220 | #define for_each_gfn_sp(kvm, sp, gfn, pos) \ |
1221 | hlist_for_each_entry(sp, pos, \ | ||
1222 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | ||
1223 | if ((sp)->gfn != (gfn)) {} else | ||
1224 | |||
1225 | #define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \ | ||
1226 | hlist_for_each_entry(sp, pos, \ | ||
1227 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | ||
1228 | if ((sp)->gfn != (gfn) || (sp)->role.direct || \ | ||
1229 | (sp)->role.invalid) {} else | ||
1230 | |||
1231 | /* @sp->gfn should be write-protected at the call site */ | ||
1232 | static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
1233 | struct list_head *invalid_list, bool clear_unsync) | ||
1200 | { | 1234 | { |
1201 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { | 1235 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { |
1202 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1236 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
1203 | return 1; | 1237 | return 1; |
1204 | } | 1238 | } |
1205 | 1239 | ||
1206 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) | 1240 | if (clear_unsync) |
1207 | kvm_flush_remote_tlbs(vcpu->kvm); | 1241 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
1208 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1242 | |
1209 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { | 1243 | if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) { |
1210 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1244 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
1211 | return 1; | 1245 | return 1; |
1212 | } | 1246 | } |
1213 | 1247 | ||
@@ -1215,6 +1249,52 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1215 | return 0; | 1249 | return 0; |
1216 | } | 1250 | } |
1217 | 1251 | ||
1252 | static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, | ||
1253 | struct kvm_mmu_page *sp) | ||
1254 | { | ||
1255 | LIST_HEAD(invalid_list); | ||
1256 | int ret; | ||
1257 | |||
1258 | ret = __kvm_sync_page(vcpu, sp, &invalid_list, false); | ||
1259 | if (ret) | ||
1260 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
1261 | |||
1262 | return ret; | ||
1263 | } | ||
1264 | |||
1265 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
1266 | struct list_head *invalid_list) | ||
1267 | { | ||
1268 | return __kvm_sync_page(vcpu, sp, invalid_list, true); | ||
1269 | } | ||
1270 | |||
1271 | /* @gfn should be write-protected at the call site */ | ||
1272 | static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
1273 | { | ||
1274 | struct kvm_mmu_page *s; | ||
1275 | struct hlist_node *node; | ||
1276 | LIST_HEAD(invalid_list); | ||
1277 | bool flush = false; | ||
1278 | |||
1279 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { | ||
1280 | if (!s->unsync) | ||
1281 | continue; | ||
1282 | |||
1283 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | ||
1284 | if ((s->role.cr4_pae != !!is_pae(vcpu)) || | ||
1285 | (vcpu->arch.mmu.sync_page(vcpu, s, true))) { | ||
1286 | kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); | ||
1287 | continue; | ||
1288 | } | ||
1289 | kvm_unlink_unsync_page(vcpu->kvm, s); | ||
1290 | flush = true; | ||
1291 | } | ||
1292 | |||
1293 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
1294 | if (flush) | ||
1295 | kvm_mmu_flush_tlb(vcpu); | ||
1296 | } | ||
1297 | |||
1218 | struct mmu_page_path { | 1298 | struct mmu_page_path { |
1219 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; | 1299 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; |
1220 | unsigned int idx[PT64_ROOT_LEVEL-1]; | 1300 | unsigned int idx[PT64_ROOT_LEVEL-1]; |
@@ -1281,6 +1361,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, | |||
1281 | struct kvm_mmu_page *sp; | 1361 | struct kvm_mmu_page *sp; |
1282 | struct mmu_page_path parents; | 1362 | struct mmu_page_path parents; |
1283 | struct kvm_mmu_pages pages; | 1363 | struct kvm_mmu_pages pages; |
1364 | LIST_HEAD(invalid_list); | ||
1284 | 1365 | ||
1285 | kvm_mmu_pages_init(parent, &parents, &pages); | 1366 | kvm_mmu_pages_init(parent, &parents, &pages); |
1286 | while (mmu_unsync_walk(parent, &pages)) { | 1367 | while (mmu_unsync_walk(parent, &pages)) { |
@@ -1293,9 +1374,10 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, | |||
1293 | kvm_flush_remote_tlbs(vcpu->kvm); | 1374 | kvm_flush_remote_tlbs(vcpu->kvm); |
1294 | 1375 | ||
1295 | for_each_sp(pages, sp, parents, i) { | 1376 | for_each_sp(pages, sp, parents, i) { |
1296 | kvm_sync_page(vcpu, sp); | 1377 | kvm_sync_page(vcpu, sp, &invalid_list); |
1297 | mmu_pages_clear_parents(&parents); | 1378 | mmu_pages_clear_parents(&parents); |
1298 | } | 1379 | } |
1380 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
1299 | cond_resched_lock(&vcpu->kvm->mmu_lock); | 1381 | cond_resched_lock(&vcpu->kvm->mmu_lock); |
1300 | kvm_mmu_pages_init(parent, &parents, &pages); | 1382 | kvm_mmu_pages_init(parent, &parents, &pages); |
1301 | } | 1383 | } |
@@ -1310,11 +1392,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1310 | u64 *parent_pte) | 1392 | u64 *parent_pte) |
1311 | { | 1393 | { |
1312 | union kvm_mmu_page_role role; | 1394 | union kvm_mmu_page_role role; |
1313 | unsigned index; | ||
1314 | unsigned quadrant; | 1395 | unsigned quadrant; |
1315 | struct hlist_head *bucket; | ||
1316 | struct kvm_mmu_page *sp; | 1396 | struct kvm_mmu_page *sp; |
1317 | struct hlist_node *node, *tmp; | 1397 | struct hlist_node *node; |
1398 | bool need_sync = false; | ||
1318 | 1399 | ||
1319 | role = vcpu->arch.mmu.base_role; | 1400 | role = vcpu->arch.mmu.base_role; |
1320 | role.level = level; | 1401 | role.level = level; |
@@ -1322,40 +1403,45 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1322 | if (role.direct) | 1403 | if (role.direct) |
1323 | role.cr4_pae = 0; | 1404 | role.cr4_pae = 0; |
1324 | role.access = access; | 1405 | role.access = access; |
1325 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { | 1406 | if (!tdp_enabled && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { |
1326 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 1407 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
1327 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | 1408 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; |
1328 | role.quadrant = quadrant; | 1409 | role.quadrant = quadrant; |
1329 | } | 1410 | } |
1330 | index = kvm_page_table_hashfn(gfn); | 1411 | for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { |
1331 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1412 | if (!need_sync && sp->unsync) |
1332 | hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link) | 1413 | need_sync = true; |
1333 | if (sp->gfn == gfn) { | ||
1334 | if (sp->unsync) | ||
1335 | if (kvm_sync_page(vcpu, sp)) | ||
1336 | continue; | ||
1337 | 1414 | ||
1338 | if (sp->role.word != role.word) | 1415 | if (sp->role.word != role.word) |
1339 | continue; | 1416 | continue; |
1340 | 1417 | ||
1341 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1418 | if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) |
1342 | if (sp->unsync_children) { | 1419 | break; |
1343 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); | 1420 | |
1344 | kvm_mmu_mark_parents_unsync(sp); | 1421 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
1345 | } | 1422 | if (sp->unsync_children) { |
1346 | trace_kvm_mmu_get_page(sp, false); | 1423 | kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); |
1347 | return sp; | 1424 | kvm_mmu_mark_parents_unsync(sp); |
1348 | } | 1425 | } else if (sp->unsync) |
1426 | kvm_mmu_mark_parents_unsync(sp); | ||
1427 | |||
1428 | trace_kvm_mmu_get_page(sp, false); | ||
1429 | return sp; | ||
1430 | } | ||
1349 | ++vcpu->kvm->stat.mmu_cache_miss; | 1431 | ++vcpu->kvm->stat.mmu_cache_miss; |
1350 | sp = kvm_mmu_alloc_page(vcpu, parent_pte); | 1432 | sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct); |
1351 | if (!sp) | 1433 | if (!sp) |
1352 | return sp; | 1434 | return sp; |
1353 | sp->gfn = gfn; | 1435 | sp->gfn = gfn; |
1354 | sp->role = role; | 1436 | sp->role = role; |
1355 | hlist_add_head(&sp->hash_link, bucket); | 1437 | hlist_add_head(&sp->hash_link, |
1438 | &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); | ||
1356 | if (!direct) { | 1439 | if (!direct) { |
1357 | if (rmap_write_protect(vcpu->kvm, gfn)) | 1440 | if (rmap_write_protect(vcpu->kvm, gfn)) |
1358 | kvm_flush_remote_tlbs(vcpu->kvm); | 1441 | kvm_flush_remote_tlbs(vcpu->kvm); |
1442 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) | ||
1443 | kvm_sync_pages(vcpu, gfn); | ||
1444 | |||
1359 | account_shadowed(vcpu->kvm, gfn); | 1445 | account_shadowed(vcpu->kvm, gfn); |
1360 | } | 1446 | } |
1361 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) | 1447 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) |
@@ -1402,6 +1488,47 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) | |||
1402 | --iterator->level; | 1488 | --iterator->level; |
1403 | } | 1489 | } |
1404 | 1490 | ||
1491 | static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | ||
1492 | { | ||
1493 | u64 spte; | ||
1494 | |||
1495 | spte = __pa(sp->spt) | ||
1496 | | PT_PRESENT_MASK | PT_ACCESSED_MASK | ||
1497 | | PT_WRITABLE_MASK | PT_USER_MASK; | ||
1498 | __set_spte(sptep, spte); | ||
1499 | } | ||
1500 | |||
1501 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | ||
1502 | { | ||
1503 | if (is_large_pte(*sptep)) { | ||
1504 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); | ||
1505 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1506 | } | ||
1507 | } | ||
1508 | |||
1509 | static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, | ||
1510 | unsigned direct_access) | ||
1511 | { | ||
1512 | if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) { | ||
1513 | struct kvm_mmu_page *child; | ||
1514 | |||
1515 | /* | ||
1516 | * For the direct sp, if the guest pte's dirty bit | ||
1517 | * changed form clean to dirty, it will corrupt the | ||
1518 | * sp's access: allow writable in the read-only sp, | ||
1519 | * so we should update the spte at this point to get | ||
1520 | * a new sp with the correct access. | ||
1521 | */ | ||
1522 | child = page_header(*sptep & PT64_BASE_ADDR_MASK); | ||
1523 | if (child->role.access == direct_access) | ||
1524 | return; | ||
1525 | |||
1526 | mmu_page_remove_parent_pte(child, sptep); | ||
1527 | __set_spte(sptep, shadow_trap_nonpresent_pte); | ||
1528 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1529 | } | ||
1530 | } | ||
1531 | |||
1405 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, | 1532 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, |
1406 | struct kvm_mmu_page *sp) | 1533 | struct kvm_mmu_page *sp) |
1407 | { | 1534 | { |
@@ -1422,7 +1549,8 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
1422 | } else { | 1549 | } else { |
1423 | if (is_large_pte(ent)) | 1550 | if (is_large_pte(ent)) |
1424 | --kvm->stat.lpages; | 1551 | --kvm->stat.lpages; |
1425 | rmap_remove(kvm, &pt[i]); | 1552 | drop_spte(kvm, &pt[i], |
1553 | shadow_trap_nonpresent_pte); | ||
1426 | } | 1554 | } |
1427 | } | 1555 | } |
1428 | pt[i] = shadow_trap_nonpresent_pte; | 1556 | pt[i] = shadow_trap_nonpresent_pte; |
@@ -1464,7 +1592,8 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1464 | } | 1592 | } |
1465 | 1593 | ||
1466 | static int mmu_zap_unsync_children(struct kvm *kvm, | 1594 | static int mmu_zap_unsync_children(struct kvm *kvm, |
1467 | struct kvm_mmu_page *parent) | 1595 | struct kvm_mmu_page *parent, |
1596 | struct list_head *invalid_list) | ||
1468 | { | 1597 | { |
1469 | int i, zapped = 0; | 1598 | int i, zapped = 0; |
1470 | struct mmu_page_path parents; | 1599 | struct mmu_page_path parents; |
@@ -1478,7 +1607,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
1478 | struct kvm_mmu_page *sp; | 1607 | struct kvm_mmu_page *sp; |
1479 | 1608 | ||
1480 | for_each_sp(pages, sp, parents, i) { | 1609 | for_each_sp(pages, sp, parents, i) { |
1481 | kvm_mmu_zap_page(kvm, sp); | 1610 | kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); |
1482 | mmu_pages_clear_parents(&parents); | 1611 | mmu_pages_clear_parents(&parents); |
1483 | zapped++; | 1612 | zapped++; |
1484 | } | 1613 | } |
@@ -1488,32 +1617,52 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
1488 | return zapped; | 1617 | return zapped; |
1489 | } | 1618 | } |
1490 | 1619 | ||
1491 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1620 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, |
1621 | struct list_head *invalid_list) | ||
1492 | { | 1622 | { |
1493 | int ret; | 1623 | int ret; |
1494 | 1624 | ||
1495 | trace_kvm_mmu_zap_page(sp); | 1625 | trace_kvm_mmu_prepare_zap_page(sp); |
1496 | ++kvm->stat.mmu_shadow_zapped; | 1626 | ++kvm->stat.mmu_shadow_zapped; |
1497 | ret = mmu_zap_unsync_children(kvm, sp); | 1627 | ret = mmu_zap_unsync_children(kvm, sp, invalid_list); |
1498 | kvm_mmu_page_unlink_children(kvm, sp); | 1628 | kvm_mmu_page_unlink_children(kvm, sp); |
1499 | kvm_mmu_unlink_parents(kvm, sp); | 1629 | kvm_mmu_unlink_parents(kvm, sp); |
1500 | kvm_flush_remote_tlbs(kvm); | ||
1501 | if (!sp->role.invalid && !sp->role.direct) | 1630 | if (!sp->role.invalid && !sp->role.direct) |
1502 | unaccount_shadowed(kvm, sp->gfn); | 1631 | unaccount_shadowed(kvm, sp->gfn); |
1503 | if (sp->unsync) | 1632 | if (sp->unsync) |
1504 | kvm_unlink_unsync_page(kvm, sp); | 1633 | kvm_unlink_unsync_page(kvm, sp); |
1505 | if (!sp->root_count) { | 1634 | if (!sp->root_count) { |
1506 | hlist_del(&sp->hash_link); | 1635 | /* Count self */ |
1507 | kvm_mmu_free_page(kvm, sp); | 1636 | ret++; |
1637 | list_move(&sp->link, invalid_list); | ||
1508 | } else { | 1638 | } else { |
1509 | sp->role.invalid = 1; | ||
1510 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | 1639 | list_move(&sp->link, &kvm->arch.active_mmu_pages); |
1511 | kvm_reload_remote_mmus(kvm); | 1640 | kvm_reload_remote_mmus(kvm); |
1512 | } | 1641 | } |
1642 | |||
1643 | sp->role.invalid = 1; | ||
1513 | kvm_mmu_reset_last_pte_updated(kvm); | 1644 | kvm_mmu_reset_last_pte_updated(kvm); |
1514 | return ret; | 1645 | return ret; |
1515 | } | 1646 | } |
1516 | 1647 | ||
1648 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | ||
1649 | struct list_head *invalid_list) | ||
1650 | { | ||
1651 | struct kvm_mmu_page *sp; | ||
1652 | |||
1653 | if (list_empty(invalid_list)) | ||
1654 | return; | ||
1655 | |||
1656 | kvm_flush_remote_tlbs(kvm); | ||
1657 | |||
1658 | do { | ||
1659 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | ||
1660 | WARN_ON(!sp->role.invalid || sp->root_count); | ||
1661 | kvm_mmu_free_page(kvm, sp); | ||
1662 | } while (!list_empty(invalid_list)); | ||
1663 | |||
1664 | } | ||
1665 | |||
1517 | /* | 1666 | /* |
1518 | * Changing the number of mmu pages allocated to the vm | 1667 | * Changing the number of mmu pages allocated to the vm |
1519 | * Note: if kvm_nr_mmu_pages is too small, you will get dead lock | 1668 | * Note: if kvm_nr_mmu_pages is too small, you will get dead lock |
@@ -1521,6 +1670,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1521 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | 1670 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) |
1522 | { | 1671 | { |
1523 | int used_pages; | 1672 | int used_pages; |
1673 | LIST_HEAD(invalid_list); | ||
1524 | 1674 | ||
1525 | used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; | 1675 | used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; |
1526 | used_pages = max(0, used_pages); | 1676 | used_pages = max(0, used_pages); |
@@ -1538,9 +1688,10 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | |||
1538 | 1688 | ||
1539 | page = container_of(kvm->arch.active_mmu_pages.prev, | 1689 | page = container_of(kvm->arch.active_mmu_pages.prev, |
1540 | struct kvm_mmu_page, link); | 1690 | struct kvm_mmu_page, link); |
1541 | used_pages -= kvm_mmu_zap_page(kvm, page); | 1691 | used_pages -= kvm_mmu_prepare_zap_page(kvm, page, |
1542 | used_pages--; | 1692 | &invalid_list); |
1543 | } | 1693 | } |
1694 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
1544 | kvm_nr_mmu_pages = used_pages; | 1695 | kvm_nr_mmu_pages = used_pages; |
1545 | kvm->arch.n_free_mmu_pages = 0; | 1696 | kvm->arch.n_free_mmu_pages = 0; |
1546 | } | 1697 | } |
@@ -1553,47 +1704,36 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | |||
1553 | 1704 | ||
1554 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | 1705 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) |
1555 | { | 1706 | { |
1556 | unsigned index; | ||
1557 | struct hlist_head *bucket; | ||
1558 | struct kvm_mmu_page *sp; | 1707 | struct kvm_mmu_page *sp; |
1559 | struct hlist_node *node, *n; | 1708 | struct hlist_node *node; |
1709 | LIST_HEAD(invalid_list); | ||
1560 | int r; | 1710 | int r; |
1561 | 1711 | ||
1562 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); | 1712 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); |
1563 | r = 0; | 1713 | r = 0; |
1564 | index = kvm_page_table_hashfn(gfn); | 1714 | |
1565 | bucket = &kvm->arch.mmu_page_hash[index]; | 1715 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
1566 | restart: | 1716 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
1567 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) | 1717 | sp->role.word); |
1568 | if (sp->gfn == gfn && !sp->role.direct) { | 1718 | r = 1; |
1569 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1719 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
1570 | sp->role.word); | 1720 | } |
1571 | r = 1; | 1721 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
1572 | if (kvm_mmu_zap_page(kvm, sp)) | ||
1573 | goto restart; | ||
1574 | } | ||
1575 | return r; | 1722 | return r; |
1576 | } | 1723 | } |
1577 | 1724 | ||
1578 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | 1725 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) |
1579 | { | 1726 | { |
1580 | unsigned index; | ||
1581 | struct hlist_head *bucket; | ||
1582 | struct kvm_mmu_page *sp; | 1727 | struct kvm_mmu_page *sp; |
1583 | struct hlist_node *node, *nn; | 1728 | struct hlist_node *node; |
1729 | LIST_HEAD(invalid_list); | ||
1584 | 1730 | ||
1585 | index = kvm_page_table_hashfn(gfn); | 1731 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
1586 | bucket = &kvm->arch.mmu_page_hash[index]; | 1732 | pgprintk("%s: zap %lx %x\n", |
1587 | restart: | 1733 | __func__, gfn, sp->role.word); |
1588 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { | 1734 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
1589 | if (sp->gfn == gfn && !sp->role.direct | ||
1590 | && !sp->role.invalid) { | ||
1591 | pgprintk("%s: zap %lx %x\n", | ||
1592 | __func__, gfn, sp->role.word); | ||
1593 | if (kvm_mmu_zap_page(kvm, sp)) | ||
1594 | goto restart; | ||
1595 | } | ||
1596 | } | 1735 | } |
1736 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
1597 | } | 1737 | } |
1598 | 1738 | ||
1599 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | 1739 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) |
@@ -1723,47 +1863,51 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
1723 | } | 1863 | } |
1724 | EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); | 1864 | EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); |
1725 | 1865 | ||
1726 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1866 | static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
1727 | { | 1867 | { |
1728 | unsigned index; | ||
1729 | struct hlist_head *bucket; | ||
1730 | struct kvm_mmu_page *s; | ||
1731 | struct hlist_node *node, *n; | ||
1732 | |||
1733 | index = kvm_page_table_hashfn(sp->gfn); | ||
1734 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | ||
1735 | /* don't unsync if pagetable is shadowed with multiple roles */ | ||
1736 | hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { | ||
1737 | if (s->gfn != sp->gfn || s->role.direct) | ||
1738 | continue; | ||
1739 | if (s->role.word != sp->role.word) | ||
1740 | return 1; | ||
1741 | } | ||
1742 | trace_kvm_mmu_unsync_page(sp); | 1868 | trace_kvm_mmu_unsync_page(sp); |
1743 | ++vcpu->kvm->stat.mmu_unsync; | 1869 | ++vcpu->kvm->stat.mmu_unsync; |
1744 | sp->unsync = 1; | 1870 | sp->unsync = 1; |
1745 | 1871 | ||
1746 | kvm_mmu_mark_parents_unsync(sp); | 1872 | kvm_mmu_mark_parents_unsync(sp); |
1747 | |||
1748 | mmu_convert_notrap(sp); | 1873 | mmu_convert_notrap(sp); |
1749 | return 0; | 1874 | } |
1875 | |||
1876 | static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
1877 | { | ||
1878 | struct kvm_mmu_page *s; | ||
1879 | struct hlist_node *node; | ||
1880 | |||
1881 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { | ||
1882 | if (s->unsync) | ||
1883 | continue; | ||
1884 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | ||
1885 | __kvm_unsync_page(vcpu, s); | ||
1886 | } | ||
1750 | } | 1887 | } |
1751 | 1888 | ||
1752 | static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | 1889 | static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, |
1753 | bool can_unsync) | 1890 | bool can_unsync) |
1754 | { | 1891 | { |
1755 | struct kvm_mmu_page *shadow; | 1892 | struct kvm_mmu_page *s; |
1893 | struct hlist_node *node; | ||
1894 | bool need_unsync = false; | ||
1756 | 1895 | ||
1757 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); | 1896 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { |
1758 | if (shadow) { | 1897 | if (!can_unsync) |
1759 | if (shadow->role.level != PT_PAGE_TABLE_LEVEL) | ||
1760 | return 1; | 1898 | return 1; |
1761 | if (shadow->unsync) | 1899 | |
1762 | return 0; | 1900 | if (s->role.level != PT_PAGE_TABLE_LEVEL) |
1763 | if (can_unsync && oos_shadow) | 1901 | return 1; |
1764 | return kvm_unsync_page(vcpu, shadow); | 1902 | |
1765 | return 1; | 1903 | if (!need_unsync && !s->unsync) { |
1904 | if (!oos_shadow) | ||
1905 | return 1; | ||
1906 | need_unsync = true; | ||
1907 | } | ||
1766 | } | 1908 | } |
1909 | if (need_unsync) | ||
1910 | kvm_unsync_pages(vcpu, gfn); | ||
1767 | return 0; | 1911 | return 0; |
1768 | } | 1912 | } |
1769 | 1913 | ||
@@ -1804,13 +1948,14 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1804 | spte |= (u64)pfn << PAGE_SHIFT; | 1948 | spte |= (u64)pfn << PAGE_SHIFT; |
1805 | 1949 | ||
1806 | if ((pte_access & ACC_WRITE_MASK) | 1950 | if ((pte_access & ACC_WRITE_MASK) |
1807 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | 1951 | || (!tdp_enabled && write_fault && !is_write_protection(vcpu) |
1952 | && !user_fault)) { | ||
1808 | 1953 | ||
1809 | if (level > PT_PAGE_TABLE_LEVEL && | 1954 | if (level > PT_PAGE_TABLE_LEVEL && |
1810 | has_wrprotected_page(vcpu->kvm, gfn, level)) { | 1955 | has_wrprotected_page(vcpu->kvm, gfn, level)) { |
1811 | ret = 1; | 1956 | ret = 1; |
1812 | spte = shadow_trap_nonpresent_pte; | 1957 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); |
1813 | goto set_pte; | 1958 | goto done; |
1814 | } | 1959 | } |
1815 | 1960 | ||
1816 | spte |= PT_WRITABLE_MASK; | 1961 | spte |= PT_WRITABLE_MASK; |
@@ -1841,7 +1986,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1841 | mark_page_dirty(vcpu->kvm, gfn); | 1986 | mark_page_dirty(vcpu->kvm, gfn); |
1842 | 1987 | ||
1843 | set_pte: | 1988 | set_pte: |
1844 | __set_spte(sptep, spte); | 1989 | if (is_writable_pte(*sptep) && !is_writable_pte(spte)) |
1990 | kvm_set_pfn_dirty(pfn); | ||
1991 | update_spte(sptep, spte); | ||
1992 | done: | ||
1845 | return ret; | 1993 | return ret; |
1846 | } | 1994 | } |
1847 | 1995 | ||
@@ -1853,7 +2001,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1853 | bool reset_host_protection) | 2001 | bool reset_host_protection) |
1854 | { | 2002 | { |
1855 | int was_rmapped = 0; | 2003 | int was_rmapped = 0; |
1856 | int was_writable = is_writable_pte(*sptep); | ||
1857 | int rmap_count; | 2004 | int rmap_count; |
1858 | 2005 | ||
1859 | pgprintk("%s: spte %llx access %x write_fault %d" | 2006 | pgprintk("%s: spte %llx access %x write_fault %d" |
@@ -1878,7 +2025,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1878 | } else if (pfn != spte_to_pfn(*sptep)) { | 2025 | } else if (pfn != spte_to_pfn(*sptep)) { |
1879 | pgprintk("hfn old %lx new %lx\n", | 2026 | pgprintk("hfn old %lx new %lx\n", |
1880 | spte_to_pfn(*sptep), pfn); | 2027 | spte_to_pfn(*sptep), pfn); |
1881 | rmap_remove(vcpu->kvm, sptep); | 2028 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); |
2029 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1882 | } else | 2030 | } else |
1883 | was_rmapped = 1; | 2031 | was_rmapped = 1; |
1884 | } | 2032 | } |
@@ -1888,7 +2036,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1888 | reset_host_protection)) { | 2036 | reset_host_protection)) { |
1889 | if (write_fault) | 2037 | if (write_fault) |
1890 | *ptwrite = 1; | 2038 | *ptwrite = 1; |
1891 | kvm_x86_ops->tlb_flush(vcpu); | 2039 | kvm_mmu_flush_tlb(vcpu); |
1892 | } | 2040 | } |
1893 | 2041 | ||
1894 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); | 2042 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); |
@@ -1902,15 +2050,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1902 | page_header_update_slot(vcpu->kvm, sptep, gfn); | 2050 | page_header_update_slot(vcpu->kvm, sptep, gfn); |
1903 | if (!was_rmapped) { | 2051 | if (!was_rmapped) { |
1904 | rmap_count = rmap_add(vcpu, sptep, gfn); | 2052 | rmap_count = rmap_add(vcpu, sptep, gfn); |
1905 | kvm_release_pfn_clean(pfn); | ||
1906 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 2053 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
1907 | rmap_recycle(vcpu, sptep, gfn); | 2054 | rmap_recycle(vcpu, sptep, gfn); |
1908 | } else { | ||
1909 | if (was_writable) | ||
1910 | kvm_release_pfn_dirty(pfn); | ||
1911 | else | ||
1912 | kvm_release_pfn_clean(pfn); | ||
1913 | } | 2055 | } |
2056 | kvm_release_pfn_clean(pfn); | ||
1914 | if (speculative) { | 2057 | if (speculative) { |
1915 | vcpu->arch.last_pte_updated = sptep; | 2058 | vcpu->arch.last_pte_updated = sptep; |
1916 | vcpu->arch.last_pte_gfn = gfn; | 2059 | vcpu->arch.last_pte_gfn = gfn; |
@@ -1939,7 +2082,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1939 | } | 2082 | } |
1940 | 2083 | ||
1941 | if (*iterator.sptep == shadow_trap_nonpresent_pte) { | 2084 | if (*iterator.sptep == shadow_trap_nonpresent_pte) { |
1942 | pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; | 2085 | u64 base_addr = iterator.addr; |
2086 | |||
2087 | base_addr &= PT64_LVL_ADDR_MASK(iterator.level); | ||
2088 | pseudo_gfn = base_addr >> PAGE_SHIFT; | ||
1943 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, | 2089 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, |
1944 | iterator.level - 1, | 2090 | iterator.level - 1, |
1945 | 1, ACC_ALL, iterator.sptep); | 2091 | 1, ACC_ALL, iterator.sptep); |
@@ -1958,6 +2104,29 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1958 | return pt_write; | 2104 | return pt_write; |
1959 | } | 2105 | } |
1960 | 2106 | ||
2107 | static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn) | ||
2108 | { | ||
2109 | char buf[1]; | ||
2110 | void __user *hva; | ||
2111 | int r; | ||
2112 | |||
2113 | /* Touch the page, so send SIGBUS */ | ||
2114 | hva = (void __user *)gfn_to_hva(kvm, gfn); | ||
2115 | r = copy_from_user(buf, hva, 1); | ||
2116 | } | ||
2117 | |||
2118 | static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) | ||
2119 | { | ||
2120 | kvm_release_pfn_clean(pfn); | ||
2121 | if (is_hwpoison_pfn(pfn)) { | ||
2122 | kvm_send_hwpoison_signal(kvm, gfn); | ||
2123 | return 0; | ||
2124 | } else if (is_fault_pfn(pfn)) | ||
2125 | return -EFAULT; | ||
2126 | |||
2127 | return 1; | ||
2128 | } | ||
2129 | |||
1961 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 2130 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) |
1962 | { | 2131 | { |
1963 | int r; | 2132 | int r; |
@@ -1981,10 +2150,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1981 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2150 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1982 | 2151 | ||
1983 | /* mmio */ | 2152 | /* mmio */ |
1984 | if (is_error_pfn(pfn)) { | 2153 | if (is_error_pfn(pfn)) |
1985 | kvm_release_pfn_clean(pfn); | 2154 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); |
1986 | return 1; | ||
1987 | } | ||
1988 | 2155 | ||
1989 | spin_lock(&vcpu->kvm->mmu_lock); | 2156 | spin_lock(&vcpu->kvm->mmu_lock); |
1990 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2157 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
@@ -2007,6 +2174,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
2007 | { | 2174 | { |
2008 | int i; | 2175 | int i; |
2009 | struct kvm_mmu_page *sp; | 2176 | struct kvm_mmu_page *sp; |
2177 | LIST_HEAD(invalid_list); | ||
2010 | 2178 | ||
2011 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 2179 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
2012 | return; | 2180 | return; |
@@ -2016,8 +2184,10 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
2016 | 2184 | ||
2017 | sp = page_header(root); | 2185 | sp = page_header(root); |
2018 | --sp->root_count; | 2186 | --sp->root_count; |
2019 | if (!sp->root_count && sp->role.invalid) | 2187 | if (!sp->root_count && sp->role.invalid) { |
2020 | kvm_mmu_zap_page(vcpu->kvm, sp); | 2188 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); |
2189 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
2190 | } | ||
2021 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 2191 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
2022 | spin_unlock(&vcpu->kvm->mmu_lock); | 2192 | spin_unlock(&vcpu->kvm->mmu_lock); |
2023 | return; | 2193 | return; |
@@ -2030,10 +2200,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
2030 | sp = page_header(root); | 2200 | sp = page_header(root); |
2031 | --sp->root_count; | 2201 | --sp->root_count; |
2032 | if (!sp->root_count && sp->role.invalid) | 2202 | if (!sp->root_count && sp->role.invalid) |
2033 | kvm_mmu_zap_page(vcpu->kvm, sp); | 2203 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
2204 | &invalid_list); | ||
2034 | } | 2205 | } |
2035 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 2206 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
2036 | } | 2207 | } |
2208 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
2037 | spin_unlock(&vcpu->kvm->mmu_lock); | 2209 | spin_unlock(&vcpu->kvm->mmu_lock); |
2038 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 2210 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
2039 | } | 2211 | } |
@@ -2043,7 +2215,7 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) | |||
2043 | int ret = 0; | 2215 | int ret = 0; |
2044 | 2216 | ||
2045 | if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { | 2217 | if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { |
2046 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | 2218 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2047 | ret = 1; | 2219 | ret = 1; |
2048 | } | 2220 | } |
2049 | 2221 | ||
@@ -2071,6 +2243,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
2071 | root_gfn = 0; | 2243 | root_gfn = 0; |
2072 | } | 2244 | } |
2073 | spin_lock(&vcpu->kvm->mmu_lock); | 2245 | spin_lock(&vcpu->kvm->mmu_lock); |
2246 | kvm_mmu_free_some_pages(vcpu); | ||
2074 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 2247 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
2075 | PT64_ROOT_LEVEL, direct, | 2248 | PT64_ROOT_LEVEL, direct, |
2076 | ACC_ALL, NULL); | 2249 | ACC_ALL, NULL); |
@@ -2101,6 +2274,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
2101 | root_gfn = i << 30; | 2274 | root_gfn = i << 30; |
2102 | } | 2275 | } |
2103 | spin_lock(&vcpu->kvm->mmu_lock); | 2276 | spin_lock(&vcpu->kvm->mmu_lock); |
2277 | kvm_mmu_free_some_pages(vcpu); | ||
2104 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 2278 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
2105 | PT32_ROOT_LEVEL, direct, | 2279 | PT32_ROOT_LEVEL, direct, |
2106 | ACC_ALL, NULL); | 2280 | ACC_ALL, NULL); |
@@ -2196,10 +2370,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
2196 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2370 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2197 | smp_rmb(); | 2371 | smp_rmb(); |
2198 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2372 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
2199 | if (is_error_pfn(pfn)) { | 2373 | if (is_error_pfn(pfn)) |
2200 | kvm_release_pfn_clean(pfn); | 2374 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); |
2201 | return 1; | ||
2202 | } | ||
2203 | spin_lock(&vcpu->kvm->mmu_lock); | 2375 | spin_lock(&vcpu->kvm->mmu_lock); |
2204 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2376 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2205 | goto out_unlock; | 2377 | goto out_unlock; |
@@ -2241,7 +2413,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
2241 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 2413 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
2242 | { | 2414 | { |
2243 | ++vcpu->stat.tlb_flush; | 2415 | ++vcpu->stat.tlb_flush; |
2244 | kvm_x86_ops->tlb_flush(vcpu); | 2416 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
2245 | } | 2417 | } |
2246 | 2418 | ||
2247 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 2419 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
@@ -2455,10 +2627,9 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu) | |||
2455 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | 2627 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) |
2456 | { | 2628 | { |
2457 | ASSERT(vcpu); | 2629 | ASSERT(vcpu); |
2458 | if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) { | 2630 | if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
2631 | /* mmu.free() should set root_hpa = INVALID_PAGE */ | ||
2459 | vcpu->arch.mmu.free(vcpu); | 2632 | vcpu->arch.mmu.free(vcpu); |
2460 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | ||
2461 | } | ||
2462 | } | 2633 | } |
2463 | 2634 | ||
2464 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | 2635 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) |
@@ -2475,9 +2646,6 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
2475 | r = mmu_topup_memory_caches(vcpu); | 2646 | r = mmu_topup_memory_caches(vcpu); |
2476 | if (r) | 2647 | if (r) |
2477 | goto out; | 2648 | goto out; |
2478 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2479 | kvm_mmu_free_some_pages(vcpu); | ||
2480 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2481 | r = mmu_alloc_roots(vcpu); | 2649 | r = mmu_alloc_roots(vcpu); |
2482 | spin_lock(&vcpu->kvm->mmu_lock); | 2650 | spin_lock(&vcpu->kvm->mmu_lock); |
2483 | mmu_sync_roots(vcpu); | 2651 | mmu_sync_roots(vcpu); |
@@ -2506,7 +2674,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
2506 | pte = *spte; | 2674 | pte = *spte; |
2507 | if (is_shadow_present_pte(pte)) { | 2675 | if (is_shadow_present_pte(pte)) { |
2508 | if (is_last_spte(pte, sp->role.level)) | 2676 | if (is_last_spte(pte, sp->role.level)) |
2509 | rmap_remove(vcpu->kvm, spte); | 2677 | drop_spte(vcpu->kvm, spte, shadow_trap_nonpresent_pte); |
2510 | else { | 2678 | else { |
2511 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 2679 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
2512 | mmu_page_remove_parent_pte(child, spte); | 2680 | mmu_page_remove_parent_pte(child, spte); |
@@ -2527,6 +2695,9 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
2527 | return; | 2695 | return; |
2528 | } | 2696 | } |
2529 | 2697 | ||
2698 | if (is_rsvd_bits_set(vcpu, *(u64 *)new, PT_PAGE_TABLE_LEVEL)) | ||
2699 | return; | ||
2700 | |||
2530 | ++vcpu->kvm->stat.mmu_pte_updated; | 2701 | ++vcpu->kvm->stat.mmu_pte_updated; |
2531 | if (!sp->role.cr4_pae) | 2702 | if (!sp->role.cr4_pae) |
2532 | paging32_update_pte(vcpu, sp, spte, new); | 2703 | paging32_update_pte(vcpu, sp, spte, new); |
@@ -2547,11 +2718,15 @@ static bool need_remote_flush(u64 old, u64 new) | |||
2547 | return (old & ~new & PT64_PERM_MASK) != 0; | 2718 | return (old & ~new & PT64_PERM_MASK) != 0; |
2548 | } | 2719 | } |
2549 | 2720 | ||
2550 | static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, u64 old, u64 new) | 2721 | static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, |
2722 | bool remote_flush, bool local_flush) | ||
2551 | { | 2723 | { |
2552 | if (need_remote_flush(old, new)) | 2724 | if (zap_page) |
2725 | return; | ||
2726 | |||
2727 | if (remote_flush) | ||
2553 | kvm_flush_remote_tlbs(vcpu->kvm); | 2728 | kvm_flush_remote_tlbs(vcpu->kvm); |
2554 | else | 2729 | else if (local_flush) |
2555 | kvm_mmu_flush_tlb(vcpu); | 2730 | kvm_mmu_flush_tlb(vcpu); |
2556 | } | 2731 | } |
2557 | 2732 | ||
@@ -2601,10 +2776,10 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2601 | bool guest_initiated) | 2776 | bool guest_initiated) |
2602 | { | 2777 | { |
2603 | gfn_t gfn = gpa >> PAGE_SHIFT; | 2778 | gfn_t gfn = gpa >> PAGE_SHIFT; |
2779 | union kvm_mmu_page_role mask = { .word = 0 }; | ||
2604 | struct kvm_mmu_page *sp; | 2780 | struct kvm_mmu_page *sp; |
2605 | struct hlist_node *node, *n; | 2781 | struct hlist_node *node; |
2606 | struct hlist_head *bucket; | 2782 | LIST_HEAD(invalid_list); |
2607 | unsigned index; | ||
2608 | u64 entry, gentry; | 2783 | u64 entry, gentry; |
2609 | u64 *spte; | 2784 | u64 *spte; |
2610 | unsigned offset = offset_in_page(gpa); | 2785 | unsigned offset = offset_in_page(gpa); |
@@ -2617,6 +2792,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2617 | int npte; | 2792 | int npte; |
2618 | int r; | 2793 | int r; |
2619 | int invlpg_counter; | 2794 | int invlpg_counter; |
2795 | bool remote_flush, local_flush, zap_page; | ||
2796 | |||
2797 | zap_page = remote_flush = local_flush = false; | ||
2620 | 2798 | ||
2621 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 2799 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
2622 | 2800 | ||
@@ -2672,13 +2850,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2672 | vcpu->arch.last_pte_updated = NULL; | 2850 | vcpu->arch.last_pte_updated = NULL; |
2673 | } | 2851 | } |
2674 | } | 2852 | } |
2675 | index = kvm_page_table_hashfn(gfn); | ||
2676 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | ||
2677 | 2853 | ||
2678 | restart: | 2854 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; |
2679 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 2855 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { |
2680 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) | ||
2681 | continue; | ||
2682 | pte_size = sp->role.cr4_pae ? 8 : 4; | 2856 | pte_size = sp->role.cr4_pae ? 8 : 4; |
2683 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 2857 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
2684 | misaligned |= bytes < 4; | 2858 | misaligned |= bytes < 4; |
@@ -2695,8 +2869,8 @@ restart: | |||
2695 | */ | 2869 | */ |
2696 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | 2870 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", |
2697 | gpa, bytes, sp->role.word); | 2871 | gpa, bytes, sp->role.word); |
2698 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) | 2872 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
2699 | goto restart; | 2873 | &invalid_list); |
2700 | ++vcpu->kvm->stat.mmu_flooded; | 2874 | ++vcpu->kvm->stat.mmu_flooded; |
2701 | continue; | 2875 | continue; |
2702 | } | 2876 | } |
@@ -2720,16 +2894,22 @@ restart: | |||
2720 | if (quadrant != sp->role.quadrant) | 2894 | if (quadrant != sp->role.quadrant) |
2721 | continue; | 2895 | continue; |
2722 | } | 2896 | } |
2897 | local_flush = true; | ||
2723 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 2898 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
2724 | while (npte--) { | 2899 | while (npte--) { |
2725 | entry = *spte; | 2900 | entry = *spte; |
2726 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 2901 | mmu_pte_write_zap_pte(vcpu, sp, spte); |
2727 | if (gentry) | 2902 | if (gentry && |
2903 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | ||
2904 | & mask.word)) | ||
2728 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 2905 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
2729 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); | 2906 | if (!remote_flush && need_remote_flush(entry, *spte)) |
2907 | remote_flush = true; | ||
2730 | ++spte; | 2908 | ++spte; |
2731 | } | 2909 | } |
2732 | } | 2910 | } |
2911 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); | ||
2912 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
2733 | kvm_mmu_audit(vcpu, "post pte write"); | 2913 | kvm_mmu_audit(vcpu, "post pte write"); |
2734 | spin_unlock(&vcpu->kvm->mmu_lock); | 2914 | spin_unlock(&vcpu->kvm->mmu_lock); |
2735 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { | 2915 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { |
@@ -2757,15 +2937,21 @@ EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | |||
2757 | 2937 | ||
2758 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 2938 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
2759 | { | 2939 | { |
2760 | while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES && | 2940 | int free_pages; |
2941 | LIST_HEAD(invalid_list); | ||
2942 | |||
2943 | free_pages = vcpu->kvm->arch.n_free_mmu_pages; | ||
2944 | while (free_pages < KVM_REFILL_PAGES && | ||
2761 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { | 2945 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { |
2762 | struct kvm_mmu_page *sp; | 2946 | struct kvm_mmu_page *sp; |
2763 | 2947 | ||
2764 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, | 2948 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, |
2765 | struct kvm_mmu_page, link); | 2949 | struct kvm_mmu_page, link); |
2766 | kvm_mmu_zap_page(vcpu->kvm, sp); | 2950 | free_pages += kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
2951 | &invalid_list); | ||
2767 | ++vcpu->kvm->stat.mmu_recycled; | 2952 | ++vcpu->kvm->stat.mmu_recycled; |
2768 | } | 2953 | } |
2954 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
2769 | } | 2955 | } |
2770 | 2956 | ||
2771 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | 2957 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) |
@@ -2793,11 +2979,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
2793 | return 1; | 2979 | return 1; |
2794 | case EMULATE_DO_MMIO: | 2980 | case EMULATE_DO_MMIO: |
2795 | ++vcpu->stat.mmio_exits; | 2981 | ++vcpu->stat.mmio_exits; |
2796 | return 0; | 2982 | /* fall through */ |
2797 | case EMULATE_FAIL: | 2983 | case EMULATE_FAIL: |
2798 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
2799 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
2800 | vcpu->run->internal.ndata = 0; | ||
2801 | return 0; | 2984 | return 0; |
2802 | default: | 2985 | default: |
2803 | BUG(); | 2986 | BUG(); |
@@ -2894,7 +3077,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
2894 | pt = sp->spt; | 3077 | pt = sp->spt; |
2895 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 3078 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
2896 | /* avoid RMW */ | 3079 | /* avoid RMW */ |
2897 | if (pt[i] & PT_WRITABLE_MASK) | 3080 | if (is_writable_pte(pt[i])) |
2898 | pt[i] &= ~PT_WRITABLE_MASK; | 3081 | pt[i] &= ~PT_WRITABLE_MASK; |
2899 | } | 3082 | } |
2900 | kvm_flush_remote_tlbs(kvm); | 3083 | kvm_flush_remote_tlbs(kvm); |
@@ -2903,28 +3086,29 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
2903 | void kvm_mmu_zap_all(struct kvm *kvm) | 3086 | void kvm_mmu_zap_all(struct kvm *kvm) |
2904 | { | 3087 | { |
2905 | struct kvm_mmu_page *sp, *node; | 3088 | struct kvm_mmu_page *sp, *node; |
3089 | LIST_HEAD(invalid_list); | ||
2906 | 3090 | ||
2907 | spin_lock(&kvm->mmu_lock); | 3091 | spin_lock(&kvm->mmu_lock); |
2908 | restart: | 3092 | restart: |
2909 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 3093 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
2910 | if (kvm_mmu_zap_page(kvm, sp)) | 3094 | if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) |
2911 | goto restart; | 3095 | goto restart; |
2912 | 3096 | ||
3097 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
2913 | spin_unlock(&kvm->mmu_lock); | 3098 | spin_unlock(&kvm->mmu_lock); |
2914 | |||
2915 | kvm_flush_remote_tlbs(kvm); | ||
2916 | } | 3099 | } |
2917 | 3100 | ||
2918 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) | 3101 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, |
3102 | struct list_head *invalid_list) | ||
2919 | { | 3103 | { |
2920 | struct kvm_mmu_page *page; | 3104 | struct kvm_mmu_page *page; |
2921 | 3105 | ||
2922 | page = container_of(kvm->arch.active_mmu_pages.prev, | 3106 | page = container_of(kvm->arch.active_mmu_pages.prev, |
2923 | struct kvm_mmu_page, link); | 3107 | struct kvm_mmu_page, link); |
2924 | return kvm_mmu_zap_page(kvm, page) + 1; | 3108 | return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); |
2925 | } | 3109 | } |
2926 | 3110 | ||
2927 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | 3111 | static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) |
2928 | { | 3112 | { |
2929 | struct kvm *kvm; | 3113 | struct kvm *kvm; |
2930 | struct kvm *kvm_freed = NULL; | 3114 | struct kvm *kvm_freed = NULL; |
@@ -2934,6 +3118,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2934 | 3118 | ||
2935 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3119 | list_for_each_entry(kvm, &vm_list, vm_list) { |
2936 | int npages, idx, freed_pages; | 3120 | int npages, idx, freed_pages; |
3121 | LIST_HEAD(invalid_list); | ||
2937 | 3122 | ||
2938 | idx = srcu_read_lock(&kvm->srcu); | 3123 | idx = srcu_read_lock(&kvm->srcu); |
2939 | spin_lock(&kvm->mmu_lock); | 3124 | spin_lock(&kvm->mmu_lock); |
@@ -2941,12 +3126,14 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2941 | kvm->arch.n_free_mmu_pages; | 3126 | kvm->arch.n_free_mmu_pages; |
2942 | cache_count += npages; | 3127 | cache_count += npages; |
2943 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { | 3128 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { |
2944 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); | 3129 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, |
3130 | &invalid_list); | ||
2945 | cache_count -= freed_pages; | 3131 | cache_count -= freed_pages; |
2946 | kvm_freed = kvm; | 3132 | kvm_freed = kvm; |
2947 | } | 3133 | } |
2948 | nr_to_scan--; | 3134 | nr_to_scan--; |
2949 | 3135 | ||
3136 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
2950 | spin_unlock(&kvm->mmu_lock); | 3137 | spin_unlock(&kvm->mmu_lock); |
2951 | srcu_read_unlock(&kvm->srcu, idx); | 3138 | srcu_read_unlock(&kvm->srcu, idx); |
2952 | } | 3139 | } |
@@ -3072,7 +3259,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, | |||
3072 | 3259 | ||
3073 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 3260 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
3074 | { | 3261 | { |
3075 | kvm_set_cr3(vcpu, vcpu->arch.cr3); | 3262 | (void)kvm_set_cr3(vcpu, vcpu->arch.cr3); |
3076 | return 1; | 3263 | return 1; |
3077 | } | 3264 | } |
3078 | 3265 | ||
@@ -3329,9 +3516,9 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
3329 | struct kvm_mmu_page *rev_sp; | 3516 | struct kvm_mmu_page *rev_sp; |
3330 | gfn_t gfn; | 3517 | gfn_t gfn; |
3331 | 3518 | ||
3332 | if (*sptep & PT_WRITABLE_MASK) { | 3519 | if (is_writable_pte(*sptep)) { |
3333 | rev_sp = page_header(__pa(sptep)); | 3520 | rev_sp = page_header(__pa(sptep)); |
3334 | gfn = rev_sp->gfns[sptep - rev_sp->spt]; | 3521 | gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); |
3335 | 3522 | ||
3336 | if (!gfn_to_memslot(kvm, gfn)) { | 3523 | if (!gfn_to_memslot(kvm, gfn)) { |
3337 | if (!printk_ratelimit()) | 3524 | if (!printk_ratelimit()) |
@@ -3345,8 +3532,7 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
3345 | return; | 3532 | return; |
3346 | } | 3533 | } |
3347 | 3534 | ||
3348 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], | 3535 | rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level); |
3349 | rev_sp->role.level); | ||
3350 | if (!*rmapp) { | 3536 | if (!*rmapp) { |
3351 | if (!printk_ratelimit()) | 3537 | if (!printk_ratelimit()) |
3352 | return; | 3538 | return; |
@@ -3379,7 +3565,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) | |||
3379 | 3565 | ||
3380 | if (!(ent & PT_PRESENT_MASK)) | 3566 | if (!(ent & PT_PRESENT_MASK)) |
3381 | continue; | 3567 | continue; |
3382 | if (!(ent & PT_WRITABLE_MASK)) | 3568 | if (!is_writable_pte(ent)) |
3383 | continue; | 3569 | continue; |
3384 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); | 3570 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); |
3385 | } | 3571 | } |
@@ -3407,13 +3593,12 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) | |||
3407 | if (sp->unsync) | 3593 | if (sp->unsync) |
3408 | continue; | 3594 | continue; |
3409 | 3595 | ||
3410 | gfn = unalias_gfn(vcpu->kvm, sp->gfn); | 3596 | slot = gfn_to_memslot(vcpu->kvm, sp->gfn); |
3411 | slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); | ||
3412 | rmapp = &slot->rmap[gfn - slot->base_gfn]; | 3597 | rmapp = &slot->rmap[gfn - slot->base_gfn]; |
3413 | 3598 | ||
3414 | spte = rmap_next(vcpu->kvm, rmapp, NULL); | 3599 | spte = rmap_next(vcpu->kvm, rmapp, NULL); |
3415 | while (spte) { | 3600 | while (spte) { |
3416 | if (*spte & PT_WRITABLE_MASK) | 3601 | if (is_writable_pte(*spte)) |
3417 | printk(KERN_ERR "%s: (%s) shadow page has " | 3602 | printk(KERN_ERR "%s: (%s) shadow page has " |
3418 | "writable mappings: gfn %lx role %x\n", | 3603 | "writable mappings: gfn %lx role %x\n", |
3419 | __func__, audit_msg, sp->gfn, | 3604 | __func__, audit_msg, sp->gfn, |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 42f07b1bfbc9..3aab0f0930ef 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -190,7 +190,7 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page, | |||
190 | TP_ARGS(sp) | 190 | TP_ARGS(sp) |
191 | ); | 191 | ); |
192 | 192 | ||
193 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page, | 193 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, |
194 | TP_PROTO(struct kvm_mmu_page *sp), | 194 | TP_PROTO(struct kvm_mmu_page *sp), |
195 | 195 | ||
196 | TP_ARGS(sp) | 196 | TP_ARGS(sp) |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 89d66ca4d87c..51ef9097960d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -7,6 +7,7 @@ | |||
7 | * MMU support | 7 | * MMU support |
8 | * | 8 | * |
9 | * Copyright (C) 2006 Qumranet, Inc. | 9 | * Copyright (C) 2006 Qumranet, Inc. |
10 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
10 | * | 11 | * |
11 | * Authors: | 12 | * Authors: |
12 | * Yaniv Kamay <yaniv@qumranet.com> | 13 | * Yaniv Kamay <yaniv@qumranet.com> |
@@ -118,21 +119,25 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
118 | { | 119 | { |
119 | pt_element_t pte; | 120 | pt_element_t pte; |
120 | gfn_t table_gfn; | 121 | gfn_t table_gfn; |
121 | unsigned index, pt_access, pte_access; | 122 | unsigned index, pt_access, uninitialized_var(pte_access); |
122 | gpa_t pte_gpa; | 123 | gpa_t pte_gpa; |
123 | int rsvd_fault = 0; | 124 | bool eperm, present, rsvd_fault; |
124 | 125 | ||
125 | trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, | 126 | trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, |
126 | fetch_fault); | 127 | fetch_fault); |
127 | walk: | 128 | walk: |
129 | present = true; | ||
130 | eperm = rsvd_fault = false; | ||
128 | walker->level = vcpu->arch.mmu.root_level; | 131 | walker->level = vcpu->arch.mmu.root_level; |
129 | pte = vcpu->arch.cr3; | 132 | pte = vcpu->arch.cr3; |
130 | #if PTTYPE == 64 | 133 | #if PTTYPE == 64 |
131 | if (!is_long_mode(vcpu)) { | 134 | if (!is_long_mode(vcpu)) { |
132 | pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); | 135 | pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); |
133 | trace_kvm_mmu_paging_element(pte, walker->level); | 136 | trace_kvm_mmu_paging_element(pte, walker->level); |
134 | if (!is_present_gpte(pte)) | 137 | if (!is_present_gpte(pte)) { |
135 | goto not_present; | 138 | present = false; |
139 | goto error; | ||
140 | } | ||
136 | --walker->level; | 141 | --walker->level; |
137 | } | 142 | } |
138 | #endif | 143 | #endif |
@@ -150,37 +155,42 @@ walk: | |||
150 | walker->table_gfn[walker->level - 1] = table_gfn; | 155 | walker->table_gfn[walker->level - 1] = table_gfn; |
151 | walker->pte_gpa[walker->level - 1] = pte_gpa; | 156 | walker->pte_gpa[walker->level - 1] = pte_gpa; |
152 | 157 | ||
153 | if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) | 158 | if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) { |
154 | goto not_present; | 159 | present = false; |
160 | break; | ||
161 | } | ||
155 | 162 | ||
156 | trace_kvm_mmu_paging_element(pte, walker->level); | 163 | trace_kvm_mmu_paging_element(pte, walker->level); |
157 | 164 | ||
158 | if (!is_present_gpte(pte)) | 165 | if (!is_present_gpte(pte)) { |
159 | goto not_present; | 166 | present = false; |
167 | break; | ||
168 | } | ||
160 | 169 | ||
161 | rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); | 170 | if (is_rsvd_bits_set(vcpu, pte, walker->level)) { |
162 | if (rsvd_fault) | 171 | rsvd_fault = true; |
163 | goto access_error; | 172 | break; |
173 | } | ||
164 | 174 | ||
165 | if (write_fault && !is_writable_pte(pte)) | 175 | if (write_fault && !is_writable_pte(pte)) |
166 | if (user_fault || is_write_protection(vcpu)) | 176 | if (user_fault || is_write_protection(vcpu)) |
167 | goto access_error; | 177 | eperm = true; |
168 | 178 | ||
169 | if (user_fault && !(pte & PT_USER_MASK)) | 179 | if (user_fault && !(pte & PT_USER_MASK)) |
170 | goto access_error; | 180 | eperm = true; |
171 | 181 | ||
172 | #if PTTYPE == 64 | 182 | #if PTTYPE == 64 |
173 | if (fetch_fault && (pte & PT64_NX_MASK)) | 183 | if (fetch_fault && (pte & PT64_NX_MASK)) |
174 | goto access_error; | 184 | eperm = true; |
175 | #endif | 185 | #endif |
176 | 186 | ||
177 | if (!(pte & PT_ACCESSED_MASK)) { | 187 | if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { |
178 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, | 188 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, |
179 | sizeof(pte)); | 189 | sizeof(pte)); |
180 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
181 | if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, | 190 | if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, |
182 | index, pte, pte|PT_ACCESSED_MASK)) | 191 | index, pte, pte|PT_ACCESSED_MASK)) |
183 | goto walk; | 192 | goto walk; |
193 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
184 | pte |= PT_ACCESSED_MASK; | 194 | pte |= PT_ACCESSED_MASK; |
185 | } | 195 | } |
186 | 196 | ||
@@ -213,15 +223,18 @@ walk: | |||
213 | --walker->level; | 223 | --walker->level; |
214 | } | 224 | } |
215 | 225 | ||
226 | if (!present || eperm || rsvd_fault) | ||
227 | goto error; | ||
228 | |||
216 | if (write_fault && !is_dirty_gpte(pte)) { | 229 | if (write_fault && !is_dirty_gpte(pte)) { |
217 | bool ret; | 230 | bool ret; |
218 | 231 | ||
219 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); | 232 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); |
220 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
221 | ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, | 233 | ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, |
222 | pte|PT_DIRTY_MASK); | 234 | pte|PT_DIRTY_MASK); |
223 | if (ret) | 235 | if (ret) |
224 | goto walk; | 236 | goto walk; |
237 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
225 | pte |= PT_DIRTY_MASK; | 238 | pte |= PT_DIRTY_MASK; |
226 | walker->ptes[walker->level - 1] = pte; | 239 | walker->ptes[walker->level - 1] = pte; |
227 | } | 240 | } |
@@ -229,22 +242,18 @@ walk: | |||
229 | walker->pt_access = pt_access; | 242 | walker->pt_access = pt_access; |
230 | walker->pte_access = pte_access; | 243 | walker->pte_access = pte_access; |
231 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 244 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
232 | __func__, (u64)pte, pt_access, pte_access); | 245 | __func__, (u64)pte, pte_access, pt_access); |
233 | return 1; | 246 | return 1; |
234 | 247 | ||
235 | not_present: | 248 | error: |
236 | walker->error_code = 0; | 249 | walker->error_code = 0; |
237 | goto err; | 250 | if (present) |
238 | 251 | walker->error_code |= PFERR_PRESENT_MASK; | |
239 | access_error: | ||
240 | walker->error_code = PFERR_PRESENT_MASK; | ||
241 | |||
242 | err: | ||
243 | if (write_fault) | 252 | if (write_fault) |
244 | walker->error_code |= PFERR_WRITE_MASK; | 253 | walker->error_code |= PFERR_WRITE_MASK; |
245 | if (user_fault) | 254 | if (user_fault) |
246 | walker->error_code |= PFERR_USER_MASK; | 255 | walker->error_code |= PFERR_USER_MASK; |
247 | if (fetch_fault) | 256 | if (fetch_fault && is_nx(vcpu)) |
248 | walker->error_code |= PFERR_FETCH_MASK; | 257 | walker->error_code |= PFERR_FETCH_MASK; |
249 | if (rsvd_fault) | 258 | if (rsvd_fault) |
250 | walker->error_code |= PFERR_RSVD_MASK; | 259 | walker->error_code |= PFERR_RSVD_MASK; |
@@ -252,7 +261,7 @@ err: | |||
252 | return 0; | 261 | return 0; |
253 | } | 262 | } |
254 | 263 | ||
255 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | 264 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
256 | u64 *spte, const void *pte) | 265 | u64 *spte, const void *pte) |
257 | { | 266 | { |
258 | pt_element_t gpte; | 267 | pt_element_t gpte; |
@@ -263,7 +272,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
263 | gpte = *(const pt_element_t *)pte; | 272 | gpte = *(const pt_element_t *)pte; |
264 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 273 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
265 | if (!is_present_gpte(gpte)) { | 274 | if (!is_present_gpte(gpte)) { |
266 | if (page->unsync) | 275 | if (sp->unsync) |
267 | new_spte = shadow_trap_nonpresent_pte; | 276 | new_spte = shadow_trap_nonpresent_pte; |
268 | else | 277 | else |
269 | new_spte = shadow_notrap_nonpresent_pte; | 278 | new_spte = shadow_notrap_nonpresent_pte; |
@@ -272,7 +281,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
272 | return; | 281 | return; |
273 | } | 282 | } |
274 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 283 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
275 | pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); | 284 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
276 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) | 285 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) |
277 | return; | 286 | return; |
278 | pfn = vcpu->arch.update_pte.pfn; | 287 | pfn = vcpu->arch.update_pte.pfn; |
@@ -285,11 +294,22 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
285 | * we call mmu_set_spte() with reset_host_protection = true beacuse that | 294 | * we call mmu_set_spte() with reset_host_protection = true beacuse that |
286 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | 295 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). |
287 | */ | 296 | */ |
288 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 297 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, |
289 | gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, | 298 | is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL, |
290 | gpte_to_gfn(gpte), pfn, true, true); | 299 | gpte_to_gfn(gpte), pfn, true, true); |
291 | } | 300 | } |
292 | 301 | ||
302 | static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, | ||
303 | struct guest_walker *gw, int level) | ||
304 | { | ||
305 | int r; | ||
306 | pt_element_t curr_pte; | ||
307 | |||
308 | r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1], | ||
309 | &curr_pte, sizeof(curr_pte)); | ||
310 | return r || curr_pte != gw->ptes[level - 1]; | ||
311 | } | ||
312 | |||
293 | /* | 313 | /* |
294 | * Fetch a shadow pte for a specific level in the paging hierarchy. | 314 | * Fetch a shadow pte for a specific level in the paging hierarchy. |
295 | */ | 315 | */ |
@@ -299,74 +319,86 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
299 | int *ptwrite, pfn_t pfn) | 319 | int *ptwrite, pfn_t pfn) |
300 | { | 320 | { |
301 | unsigned access = gw->pt_access; | 321 | unsigned access = gw->pt_access; |
302 | struct kvm_mmu_page *shadow_page; | 322 | struct kvm_mmu_page *sp = NULL; |
303 | u64 spte, *sptep = NULL; | 323 | bool dirty = is_dirty_gpte(gw->ptes[gw->level - 1]); |
304 | int direct; | 324 | int top_level; |
305 | gfn_t table_gfn; | 325 | unsigned direct_access; |
306 | int r; | 326 | struct kvm_shadow_walk_iterator it; |
307 | int level; | ||
308 | pt_element_t curr_pte; | ||
309 | struct kvm_shadow_walk_iterator iterator; | ||
310 | 327 | ||
311 | if (!is_present_gpte(gw->ptes[gw->level - 1])) | 328 | if (!is_present_gpte(gw->ptes[gw->level - 1])) |
312 | return NULL; | 329 | return NULL; |
313 | 330 | ||
314 | for_each_shadow_entry(vcpu, addr, iterator) { | 331 | direct_access = gw->pt_access & gw->pte_access; |
315 | level = iterator.level; | 332 | if (!dirty) |
316 | sptep = iterator.sptep; | 333 | direct_access &= ~ACC_WRITE_MASK; |
317 | if (iterator.level == hlevel) { | ||
318 | mmu_set_spte(vcpu, sptep, access, | ||
319 | gw->pte_access & access, | ||
320 | user_fault, write_fault, | ||
321 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | ||
322 | ptwrite, level, | ||
323 | gw->gfn, pfn, false, true); | ||
324 | break; | ||
325 | } | ||
326 | 334 | ||
327 | if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) | 335 | top_level = vcpu->arch.mmu.root_level; |
328 | continue; | 336 | if (top_level == PT32E_ROOT_LEVEL) |
337 | top_level = PT32_ROOT_LEVEL; | ||
338 | /* | ||
339 | * Verify that the top-level gpte is still there. Since the page | ||
340 | * is a root page, it is either write protected (and cannot be | ||
341 | * changed from now on) or it is invalid (in which case, we don't | ||
342 | * really care if it changes underneath us after this point). | ||
343 | */ | ||
344 | if (FNAME(gpte_changed)(vcpu, gw, top_level)) | ||
345 | goto out_gpte_changed; | ||
329 | 346 | ||
330 | if (is_large_pte(*sptep)) { | 347 | for (shadow_walk_init(&it, vcpu, addr); |
331 | rmap_remove(vcpu->kvm, sptep); | 348 | shadow_walk_okay(&it) && it.level > gw->level; |
332 | __set_spte(sptep, shadow_trap_nonpresent_pte); | 349 | shadow_walk_next(&it)) { |
333 | kvm_flush_remote_tlbs(vcpu->kvm); | 350 | gfn_t table_gfn; |
334 | } | ||
335 | 351 | ||
336 | if (level <= gw->level) { | 352 | drop_large_spte(vcpu, it.sptep); |
337 | int delta = level - gw->level + 1; | 353 | |
338 | direct = 1; | 354 | sp = NULL; |
339 | if (!is_dirty_gpte(gw->ptes[level - delta])) | 355 | if (!is_shadow_present_pte(*it.sptep)) { |
340 | access &= ~ACC_WRITE_MASK; | 356 | table_gfn = gw->table_gfn[it.level - 2]; |
341 | table_gfn = gpte_to_gfn(gw->ptes[level - delta]); | 357 | sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, |
342 | /* advance table_gfn when emulating 1gb pages with 4k */ | 358 | false, access, it.sptep); |
343 | if (delta == 0) | ||
344 | table_gfn += PT_INDEX(addr, level); | ||
345 | } else { | ||
346 | direct = 0; | ||
347 | table_gfn = gw->table_gfn[level - 2]; | ||
348 | } | ||
349 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | ||
350 | direct, access, sptep); | ||
351 | if (!direct) { | ||
352 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
353 | gw->pte_gpa[level - 2], | ||
354 | &curr_pte, sizeof(curr_pte)); | ||
355 | if (r || curr_pte != gw->ptes[level - 2]) { | ||
356 | kvm_mmu_put_page(shadow_page, sptep); | ||
357 | kvm_release_pfn_clean(pfn); | ||
358 | sptep = NULL; | ||
359 | break; | ||
360 | } | ||
361 | } | 359 | } |
362 | 360 | ||
363 | spte = __pa(shadow_page->spt) | 361 | /* |
364 | | PT_PRESENT_MASK | PT_ACCESSED_MASK | 362 | * Verify that the gpte in the page we've just write |
365 | | PT_WRITABLE_MASK | PT_USER_MASK; | 363 | * protected is still there. |
366 | *sptep = spte; | 364 | */ |
365 | if (FNAME(gpte_changed)(vcpu, gw, it.level - 1)) | ||
366 | goto out_gpte_changed; | ||
367 | |||
368 | if (sp) | ||
369 | link_shadow_page(it.sptep, sp); | ||
367 | } | 370 | } |
368 | 371 | ||
369 | return sptep; | 372 | for (; |
373 | shadow_walk_okay(&it) && it.level > hlevel; | ||
374 | shadow_walk_next(&it)) { | ||
375 | gfn_t direct_gfn; | ||
376 | |||
377 | validate_direct_spte(vcpu, it.sptep, direct_access); | ||
378 | |||
379 | drop_large_spte(vcpu, it.sptep); | ||
380 | |||
381 | if (is_shadow_present_pte(*it.sptep)) | ||
382 | continue; | ||
383 | |||
384 | direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); | ||
385 | |||
386 | sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, | ||
387 | true, direct_access, it.sptep); | ||
388 | link_shadow_page(it.sptep, sp); | ||
389 | } | ||
390 | |||
391 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, | ||
392 | user_fault, write_fault, dirty, ptwrite, it.level, | ||
393 | gw->gfn, pfn, false, true); | ||
394 | |||
395 | return it.sptep; | ||
396 | |||
397 | out_gpte_changed: | ||
398 | if (sp) | ||
399 | kvm_mmu_put_page(sp, it.sptep); | ||
400 | kvm_release_pfn_clean(pfn); | ||
401 | return NULL; | ||
370 | } | 402 | } |
371 | 403 | ||
372 | /* | 404 | /* |
@@ -430,11 +462,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
430 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 462 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); |
431 | 463 | ||
432 | /* mmio */ | 464 | /* mmio */ |
433 | if (is_error_pfn(pfn)) { | 465 | if (is_error_pfn(pfn)) |
434 | pgprintk("gfn %lx is mmio\n", walker.gfn); | 466 | return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn); |
435 | kvm_release_pfn_clean(pfn); | ||
436 | return 1; | ||
437 | } | ||
438 | 467 | ||
439 | spin_lock(&vcpu->kvm->mmu_lock); | 468 | spin_lock(&vcpu->kvm->mmu_lock); |
440 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 469 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
@@ -442,6 +471,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
442 | kvm_mmu_free_some_pages(vcpu); | 471 | kvm_mmu_free_some_pages(vcpu); |
443 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 472 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
444 | level, &write_pt, pfn); | 473 | level, &write_pt, pfn); |
474 | (void)sptep; | ||
445 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, | 475 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, |
446 | sptep, *sptep, write_pt); | 476 | sptep, *sptep, write_pt); |
447 | 477 | ||
@@ -463,6 +493,7 @@ out_unlock: | |||
463 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 493 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
464 | { | 494 | { |
465 | struct kvm_shadow_walk_iterator iterator; | 495 | struct kvm_shadow_walk_iterator iterator; |
496 | struct kvm_mmu_page *sp; | ||
466 | gpa_t pte_gpa = -1; | 497 | gpa_t pte_gpa = -1; |
467 | int level; | 498 | int level; |
468 | u64 *sptep; | 499 | u64 *sptep; |
@@ -474,10 +505,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
474 | level = iterator.level; | 505 | level = iterator.level; |
475 | sptep = iterator.sptep; | 506 | sptep = iterator.sptep; |
476 | 507 | ||
508 | sp = page_header(__pa(sptep)); | ||
477 | if (is_last_spte(*sptep, level)) { | 509 | if (is_last_spte(*sptep, level)) { |
478 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
479 | int offset, shift; | 510 | int offset, shift; |
480 | 511 | ||
512 | if (!sp->unsync) | ||
513 | break; | ||
514 | |||
481 | shift = PAGE_SHIFT - | 515 | shift = PAGE_SHIFT - |
482 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; | 516 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; |
483 | offset = sp->role.quadrant << shift; | 517 | offset = sp->role.quadrant << shift; |
@@ -486,16 +520,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
486 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | 520 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); |
487 | 521 | ||
488 | if (is_shadow_present_pte(*sptep)) { | 522 | if (is_shadow_present_pte(*sptep)) { |
489 | rmap_remove(vcpu->kvm, sptep); | ||
490 | if (is_large_pte(*sptep)) | 523 | if (is_large_pte(*sptep)) |
491 | --vcpu->kvm->stat.lpages; | 524 | --vcpu->kvm->stat.lpages; |
525 | drop_spte(vcpu->kvm, sptep, | ||
526 | shadow_trap_nonpresent_pte); | ||
492 | need_flush = 1; | 527 | need_flush = 1; |
493 | } | 528 | } else |
494 | __set_spte(sptep, shadow_trap_nonpresent_pte); | 529 | __set_spte(sptep, shadow_trap_nonpresent_pte); |
495 | break; | 530 | break; |
496 | } | 531 | } |
497 | 532 | ||
498 | if (!is_shadow_present_pte(*sptep)) | 533 | if (!is_shadow_present_pte(*sptep) || !sp->unsync_children) |
499 | break; | 534 | break; |
500 | } | 535 | } |
501 | 536 | ||
@@ -569,9 +604,9 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
569 | * Using the cached information from sp->gfns is safe because: | 604 | * Using the cached information from sp->gfns is safe because: |
570 | * - The spte has a reference to the struct page, so the pfn for a given gfn | 605 | * - The spte has a reference to the struct page, so the pfn for a given gfn |
571 | * can't change unless all sptes pointing to it are nuked first. | 606 | * can't change unless all sptes pointing to it are nuked first. |
572 | * - Alias changes zap the entire shadow cache. | ||
573 | */ | 607 | */ |
574 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 608 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
609 | bool clear_unsync) | ||
575 | { | 610 | { |
576 | int i, offset, nr_present; | 611 | int i, offset, nr_present; |
577 | bool reset_host_protection; | 612 | bool reset_host_protection; |
@@ -579,6 +614,9 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
579 | 614 | ||
580 | offset = nr_present = 0; | 615 | offset = nr_present = 0; |
581 | 616 | ||
617 | /* direct kvm_mmu_page can not be unsync. */ | ||
618 | BUG_ON(sp->role.direct); | ||
619 | |||
582 | if (PTTYPE == 32) | 620 | if (PTTYPE == 32) |
583 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | 621 | offset = sp->role.quadrant << PT64_LEVEL_BITS; |
584 | 622 | ||
@@ -588,7 +626,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
588 | unsigned pte_access; | 626 | unsigned pte_access; |
589 | pt_element_t gpte; | 627 | pt_element_t gpte; |
590 | gpa_t pte_gpa; | 628 | gpa_t pte_gpa; |
591 | gfn_t gfn = sp->gfns[i]; | 629 | gfn_t gfn; |
592 | 630 | ||
593 | if (!is_shadow_present_pte(sp->spt[i])) | 631 | if (!is_shadow_present_pte(sp->spt[i])) |
594 | continue; | 632 | continue; |
@@ -599,16 +637,17 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
599 | sizeof(pt_element_t))) | 637 | sizeof(pt_element_t))) |
600 | return -EINVAL; | 638 | return -EINVAL; |
601 | 639 | ||
602 | if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) || | 640 | gfn = gpte_to_gfn(gpte); |
603 | !(gpte & PT_ACCESSED_MASK)) { | 641 | if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL) |
642 | || gfn != sp->gfns[i] || !is_present_gpte(gpte) | ||
643 | || !(gpte & PT_ACCESSED_MASK)) { | ||
604 | u64 nonpresent; | 644 | u64 nonpresent; |
605 | 645 | ||
606 | rmap_remove(vcpu->kvm, &sp->spt[i]); | 646 | if (is_present_gpte(gpte) || !clear_unsync) |
607 | if (is_present_gpte(gpte)) | ||
608 | nonpresent = shadow_trap_nonpresent_pte; | 647 | nonpresent = shadow_trap_nonpresent_pte; |
609 | else | 648 | else |
610 | nonpresent = shadow_notrap_nonpresent_pte; | 649 | nonpresent = shadow_notrap_nonpresent_pte; |
611 | __set_spte(&sp->spt[i], nonpresent); | 650 | drop_spte(vcpu->kvm, &sp->spt[i], nonpresent); |
612 | continue; | 651 | continue; |
613 | } | 652 | } |
614 | 653 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ce438e0fdd26..bc5b9b8d4a33 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * AMD SVM support | 4 | * AMD SVM support |
5 | * | 5 | * |
6 | * Copyright (C) 2006 Qumranet, Inc. | 6 | * Copyright (C) 2006 Qumranet, Inc. |
7 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
7 | * | 8 | * |
8 | * Authors: | 9 | * Authors: |
9 | * Yaniv Kamay <yaniv@qumranet.com> | 10 | * Yaniv Kamay <yaniv@qumranet.com> |
@@ -130,7 +131,7 @@ static struct svm_direct_access_msrs { | |||
130 | u32 index; /* Index of the MSR */ | 131 | u32 index; /* Index of the MSR */ |
131 | bool always; /* True if intercept is always on */ | 132 | bool always; /* True if intercept is always on */ |
132 | } direct_access_msrs[] = { | 133 | } direct_access_msrs[] = { |
133 | { .index = MSR_K6_STAR, .always = true }, | 134 | { .index = MSR_STAR, .always = true }, |
134 | { .index = MSR_IA32_SYSENTER_CS, .always = true }, | 135 | { .index = MSR_IA32_SYSENTER_CS, .always = true }, |
135 | #ifdef CONFIG_X86_64 | 136 | #ifdef CONFIG_X86_64 |
136 | { .index = MSR_GS_BASE, .always = true }, | 137 | { .index = MSR_GS_BASE, .always = true }, |
@@ -285,11 +286,11 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) | |||
285 | 286 | ||
286 | static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 287 | static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
287 | { | 288 | { |
289 | vcpu->arch.efer = efer; | ||
288 | if (!npt_enabled && !(efer & EFER_LMA)) | 290 | if (!npt_enabled && !(efer & EFER_LMA)) |
289 | efer &= ~EFER_LME; | 291 | efer &= ~EFER_LME; |
290 | 292 | ||
291 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; | 293 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; |
292 | vcpu->arch.efer = efer; | ||
293 | } | 294 | } |
294 | 295 | ||
295 | static int is_external_interrupt(u32 info) | 296 | static int is_external_interrupt(u32 info) |
@@ -383,8 +384,7 @@ static void svm_init_erratum_383(void) | |||
383 | int err; | 384 | int err; |
384 | u64 val; | 385 | u64 val; |
385 | 386 | ||
386 | /* Only Fam10h is affected */ | 387 | if (!cpu_has_amd_erratum(amd_erratum_383)) |
387 | if (boot_cpu_data.x86 != 0x10) | ||
388 | return; | 388 | return; |
389 | 389 | ||
390 | /* Use _safe variants to not break nested virtualization */ | 390 | /* Use _safe variants to not break nested virtualization */ |
@@ -640,7 +640,7 @@ static __init int svm_hardware_setup(void) | |||
640 | 640 | ||
641 | if (nested) { | 641 | if (nested) { |
642 | printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); | 642 | printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); |
643 | kvm_enable_efer_bits(EFER_SVME); | 643 | kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); |
644 | } | 644 | } |
645 | 645 | ||
646 | for_each_possible_cpu(cpu) { | 646 | for_each_possible_cpu(cpu) { |
@@ -806,7 +806,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
806 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. | 806 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
807 | */ | 807 | */ |
808 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 808 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
809 | kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); | 809 | (void)kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); |
810 | 810 | ||
811 | save->cr4 = X86_CR4_PAE; | 811 | save->cr4 = X86_CR4_PAE; |
812 | /* rdx = ?? */ | 812 | /* rdx = ?? */ |
@@ -903,13 +903,18 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
903 | svm->asid_generation = 0; | 903 | svm->asid_generation = 0; |
904 | init_vmcb(svm); | 904 | init_vmcb(svm); |
905 | 905 | ||
906 | fx_init(&svm->vcpu); | 906 | err = fx_init(&svm->vcpu); |
907 | if (err) | ||
908 | goto free_page4; | ||
909 | |||
907 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 910 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
908 | if (kvm_vcpu_is_bsp(&svm->vcpu)) | 911 | if (kvm_vcpu_is_bsp(&svm->vcpu)) |
909 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | 912 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; |
910 | 913 | ||
911 | return &svm->vcpu; | 914 | return &svm->vcpu; |
912 | 915 | ||
916 | free_page4: | ||
917 | __free_page(hsave_page); | ||
913 | free_page3: | 918 | free_page3: |
914 | __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); | 919 | __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); |
915 | free_page2: | 920 | free_page2: |
@@ -1488,7 +1493,7 @@ static void svm_handle_mce(struct vcpu_svm *svm) | |||
1488 | */ | 1493 | */ |
1489 | pr_err("KVM: Guest triggered AMD Erratum 383\n"); | 1494 | pr_err("KVM: Guest triggered AMD Erratum 383\n"); |
1490 | 1495 | ||
1491 | set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests); | 1496 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu); |
1492 | 1497 | ||
1493 | return; | 1498 | return; |
1494 | } | 1499 | } |
@@ -1535,7 +1540,7 @@ static int io_interception(struct vcpu_svm *svm) | |||
1535 | string = (io_info & SVM_IOIO_STR_MASK) != 0; | 1540 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
1536 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; | 1541 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
1537 | if (string || in) | 1542 | if (string || in) |
1538 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); | 1543 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; |
1539 | 1544 | ||
1540 | port = io_info >> 16; | 1545 | port = io_info >> 16; |
1541 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1546 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
@@ -1957,7 +1962,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1957 | svm->vmcb->save.cr3 = hsave->save.cr3; | 1962 | svm->vmcb->save.cr3 = hsave->save.cr3; |
1958 | svm->vcpu.arch.cr3 = hsave->save.cr3; | 1963 | svm->vcpu.arch.cr3 = hsave->save.cr3; |
1959 | } else { | 1964 | } else { |
1960 | kvm_set_cr3(&svm->vcpu, hsave->save.cr3); | 1965 | (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); |
1961 | } | 1966 | } |
1962 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); | 1967 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); |
1963 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); | 1968 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); |
@@ -2080,7 +2085,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2080 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; | 2085 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; |
2081 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; | 2086 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; |
2082 | } else | 2087 | } else |
2083 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); | 2088 | (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); |
2084 | 2089 | ||
2085 | /* Guest paging mode is active - reset mmu */ | 2090 | /* Guest paging mode is active - reset mmu */ |
2086 | kvm_mmu_reset_context(&svm->vcpu); | 2091 | kvm_mmu_reset_context(&svm->vcpu); |
@@ -2386,16 +2391,12 @@ static int iret_interception(struct vcpu_svm *svm) | |||
2386 | 2391 | ||
2387 | static int invlpg_interception(struct vcpu_svm *svm) | 2392 | static int invlpg_interception(struct vcpu_svm *svm) |
2388 | { | 2393 | { |
2389 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) | 2394 | return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; |
2390 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | ||
2391 | return 1; | ||
2392 | } | 2395 | } |
2393 | 2396 | ||
2394 | static int emulate_on_interception(struct vcpu_svm *svm) | 2397 | static int emulate_on_interception(struct vcpu_svm *svm) |
2395 | { | 2398 | { |
2396 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) | 2399 | return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; |
2397 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | ||
2398 | return 1; | ||
2399 | } | 2400 | } |
2400 | 2401 | ||
2401 | static int cr8_write_interception(struct vcpu_svm *svm) | 2402 | static int cr8_write_interception(struct vcpu_svm *svm) |
@@ -2431,7 +2432,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2431 | *data = tsc_offset + native_read_tsc(); | 2432 | *data = tsc_offset + native_read_tsc(); |
2432 | break; | 2433 | break; |
2433 | } | 2434 | } |
2434 | case MSR_K6_STAR: | 2435 | case MSR_STAR: |
2435 | *data = svm->vmcb->save.star; | 2436 | *data = svm->vmcb->save.star; |
2436 | break; | 2437 | break; |
2437 | #ifdef CONFIG_X86_64 | 2438 | #ifdef CONFIG_X86_64 |
@@ -2555,7 +2556,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2555 | 2556 | ||
2556 | break; | 2557 | break; |
2557 | } | 2558 | } |
2558 | case MSR_K6_STAR: | 2559 | case MSR_STAR: |
2559 | svm->vmcb->save.star = data; | 2560 | svm->vmcb->save.star = data; |
2560 | break; | 2561 | break; |
2561 | #ifdef CONFIG_X86_64 | 2562 | #ifdef CONFIG_X86_64 |
@@ -2726,6 +2727,99 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2726 | [SVM_EXIT_NPF] = pf_interception, | 2727 | [SVM_EXIT_NPF] = pf_interception, |
2727 | }; | 2728 | }; |
2728 | 2729 | ||
2730 | void dump_vmcb(struct kvm_vcpu *vcpu) | ||
2731 | { | ||
2732 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2733 | struct vmcb_control_area *control = &svm->vmcb->control; | ||
2734 | struct vmcb_save_area *save = &svm->vmcb->save; | ||
2735 | |||
2736 | pr_err("VMCB Control Area:\n"); | ||
2737 | pr_err("cr_read: %04x\n", control->intercept_cr_read); | ||
2738 | pr_err("cr_write: %04x\n", control->intercept_cr_write); | ||
2739 | pr_err("dr_read: %04x\n", control->intercept_dr_read); | ||
2740 | pr_err("dr_write: %04x\n", control->intercept_dr_write); | ||
2741 | pr_err("exceptions: %08x\n", control->intercept_exceptions); | ||
2742 | pr_err("intercepts: %016llx\n", control->intercept); | ||
2743 | pr_err("pause filter count: %d\n", control->pause_filter_count); | ||
2744 | pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); | ||
2745 | pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); | ||
2746 | pr_err("tsc_offset: %016llx\n", control->tsc_offset); | ||
2747 | pr_err("asid: %d\n", control->asid); | ||
2748 | pr_err("tlb_ctl: %d\n", control->tlb_ctl); | ||
2749 | pr_err("int_ctl: %08x\n", control->int_ctl); | ||
2750 | pr_err("int_vector: %08x\n", control->int_vector); | ||
2751 | pr_err("int_state: %08x\n", control->int_state); | ||
2752 | pr_err("exit_code: %08x\n", control->exit_code); | ||
2753 | pr_err("exit_info1: %016llx\n", control->exit_info_1); | ||
2754 | pr_err("exit_info2: %016llx\n", control->exit_info_2); | ||
2755 | pr_err("exit_int_info: %08x\n", control->exit_int_info); | ||
2756 | pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); | ||
2757 | pr_err("nested_ctl: %lld\n", control->nested_ctl); | ||
2758 | pr_err("nested_cr3: %016llx\n", control->nested_cr3); | ||
2759 | pr_err("event_inj: %08x\n", control->event_inj); | ||
2760 | pr_err("event_inj_err: %08x\n", control->event_inj_err); | ||
2761 | pr_err("lbr_ctl: %lld\n", control->lbr_ctl); | ||
2762 | pr_err("next_rip: %016llx\n", control->next_rip); | ||
2763 | pr_err("VMCB State Save Area:\n"); | ||
2764 | pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2765 | save->es.selector, save->es.attrib, | ||
2766 | save->es.limit, save->es.base); | ||
2767 | pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2768 | save->cs.selector, save->cs.attrib, | ||
2769 | save->cs.limit, save->cs.base); | ||
2770 | pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2771 | save->ss.selector, save->ss.attrib, | ||
2772 | save->ss.limit, save->ss.base); | ||
2773 | pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2774 | save->ds.selector, save->ds.attrib, | ||
2775 | save->ds.limit, save->ds.base); | ||
2776 | pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2777 | save->fs.selector, save->fs.attrib, | ||
2778 | save->fs.limit, save->fs.base); | ||
2779 | pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2780 | save->gs.selector, save->gs.attrib, | ||
2781 | save->gs.limit, save->gs.base); | ||
2782 | pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2783 | save->gdtr.selector, save->gdtr.attrib, | ||
2784 | save->gdtr.limit, save->gdtr.base); | ||
2785 | pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2786 | save->ldtr.selector, save->ldtr.attrib, | ||
2787 | save->ldtr.limit, save->ldtr.base); | ||
2788 | pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2789 | save->idtr.selector, save->idtr.attrib, | ||
2790 | save->idtr.limit, save->idtr.base); | ||
2791 | pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2792 | save->tr.selector, save->tr.attrib, | ||
2793 | save->tr.limit, save->tr.base); | ||
2794 | pr_err("cpl: %d efer: %016llx\n", | ||
2795 | save->cpl, save->efer); | ||
2796 | pr_err("cr0: %016llx cr2: %016llx\n", | ||
2797 | save->cr0, save->cr2); | ||
2798 | pr_err("cr3: %016llx cr4: %016llx\n", | ||
2799 | save->cr3, save->cr4); | ||
2800 | pr_err("dr6: %016llx dr7: %016llx\n", | ||
2801 | save->dr6, save->dr7); | ||
2802 | pr_err("rip: %016llx rflags: %016llx\n", | ||
2803 | save->rip, save->rflags); | ||
2804 | pr_err("rsp: %016llx rax: %016llx\n", | ||
2805 | save->rsp, save->rax); | ||
2806 | pr_err("star: %016llx lstar: %016llx\n", | ||
2807 | save->star, save->lstar); | ||
2808 | pr_err("cstar: %016llx sfmask: %016llx\n", | ||
2809 | save->cstar, save->sfmask); | ||
2810 | pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", | ||
2811 | save->kernel_gs_base, save->sysenter_cs); | ||
2812 | pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", | ||
2813 | save->sysenter_esp, save->sysenter_eip); | ||
2814 | pr_err("gpat: %016llx dbgctl: %016llx\n", | ||
2815 | save->g_pat, save->dbgctl); | ||
2816 | pr_err("br_from: %016llx br_to: %016llx\n", | ||
2817 | save->br_from, save->br_to); | ||
2818 | pr_err("excp_from: %016llx excp_to: %016llx\n", | ||
2819 | save->last_excp_from, save->last_excp_to); | ||
2820 | |||
2821 | } | ||
2822 | |||
2729 | static int handle_exit(struct kvm_vcpu *vcpu) | 2823 | static int handle_exit(struct kvm_vcpu *vcpu) |
2730 | { | 2824 | { |
2731 | struct vcpu_svm *svm = to_svm(vcpu); | 2825 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -2770,6 +2864,8 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2770 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2864 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
2771 | kvm_run->fail_entry.hardware_entry_failure_reason | 2865 | kvm_run->fail_entry.hardware_entry_failure_reason |
2772 | = svm->vmcb->control.exit_code; | 2866 | = svm->vmcb->control.exit_code; |
2867 | pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); | ||
2868 | dump_vmcb(vcpu); | ||
2773 | return 0; | 2869 | return 0; |
2774 | } | 2870 | } |
2775 | 2871 | ||
@@ -2826,9 +2922,6 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | |||
2826 | { | 2922 | { |
2827 | struct vmcb_control_area *control; | 2923 | struct vmcb_control_area *control; |
2828 | 2924 | ||
2829 | trace_kvm_inj_virq(irq); | ||
2830 | |||
2831 | ++svm->vcpu.stat.irq_injections; | ||
2832 | control = &svm->vmcb->control; | 2925 | control = &svm->vmcb->control; |
2833 | control->int_vector = irq; | 2926 | control->int_vector = irq; |
2834 | control->int_ctl &= ~V_INTR_PRIO_MASK; | 2927 | control->int_ctl &= ~V_INTR_PRIO_MASK; |
@@ -2842,6 +2935,9 @@ static void svm_set_irq(struct kvm_vcpu *vcpu) | |||
2842 | 2935 | ||
2843 | BUG_ON(!(gif_set(svm))); | 2936 | BUG_ON(!(gif_set(svm))); |
2844 | 2937 | ||
2938 | trace_kvm_inj_virq(vcpu->arch.interrupt.nr); | ||
2939 | ++vcpu->stat.irq_injections; | ||
2940 | |||
2845 | svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | | 2941 | svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | |
2846 | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; | 2942 | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; |
2847 | } | 2943 | } |
@@ -3327,6 +3423,11 @@ static bool svm_rdtscp_supported(void) | |||
3327 | return false; | 3423 | return false; |
3328 | } | 3424 | } |
3329 | 3425 | ||
3426 | static bool svm_has_wbinvd_exit(void) | ||
3427 | { | ||
3428 | return true; | ||
3429 | } | ||
3430 | |||
3330 | static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | 3431 | static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) |
3331 | { | 3432 | { |
3332 | struct vcpu_svm *svm = to_svm(vcpu); | 3433 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -3411,6 +3512,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
3411 | .rdtscp_supported = svm_rdtscp_supported, | 3512 | .rdtscp_supported = svm_rdtscp_supported, |
3412 | 3513 | ||
3413 | .set_supported_cpuid = svm_set_supported_cpuid, | 3514 | .set_supported_cpuid = svm_set_supported_cpuid, |
3515 | |||
3516 | .has_wbinvd_exit = svm_has_wbinvd_exit, | ||
3414 | }; | 3517 | }; |
3415 | 3518 | ||
3416 | static int __init svm_init(void) | 3519 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index 4ddadb1a5ffe..e16a0dbe74d8 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
@@ -1,3 +1,17 @@ | |||
1 | /* | ||
2 | * Kernel-based Virtual Machine driver for Linux | ||
3 | * | ||
4 | * This module enables machines with Intel VT-x extensions to run virtual | ||
5 | * machines without emulation or binary translation. | ||
6 | * | ||
7 | * timer support | ||
8 | * | ||
9 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
10 | * | ||
11 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
12 | * the COPYING file in the top-level directory. | ||
13 | */ | ||
14 | |||
1 | #include <linux/kvm_host.h> | 15 | #include <linux/kvm_host.h> |
2 | #include <linux/kvm.h> | 16 | #include <linux/kvm.h> |
3 | #include <linux/hrtimer.h> | 17 | #include <linux/hrtimer.h> |
@@ -18,7 +32,7 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
18 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | 32 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { |
19 | atomic_inc(&ktimer->pending); | 33 | atomic_inc(&ktimer->pending); |
20 | /* FIXME: this code should not know anything about vcpus */ | 34 | /* FIXME: this code should not know anything about vcpus */ |
21 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); | 35 | kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); |
22 | } | 36 | } |
23 | 37 | ||
24 | if (waitqueue_active(q)) | 38 | if (waitqueue_active(q)) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 859a01a07dbf..49b25eee25ac 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * machines without emulation or binary translation. | 5 | * machines without emulation or binary translation. |
6 | * | 6 | * |
7 | * Copyright (C) 2006 Qumranet, Inc. | 7 | * Copyright (C) 2006 Qumranet, Inc. |
8 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
8 | * | 9 | * |
9 | * Authors: | 10 | * Authors: |
10 | * Avi Kivity <avi@qumranet.com> | 11 | * Avi Kivity <avi@qumranet.com> |
@@ -36,6 +37,8 @@ | |||
36 | #include <asm/vmx.h> | 37 | #include <asm/vmx.h> |
37 | #include <asm/virtext.h> | 38 | #include <asm/virtext.h> |
38 | #include <asm/mce.h> | 39 | #include <asm/mce.h> |
40 | #include <asm/i387.h> | ||
41 | #include <asm/xcr.h> | ||
39 | 42 | ||
40 | #include "trace.h" | 43 | #include "trace.h" |
41 | 44 | ||
@@ -63,6 +66,9 @@ module_param_named(unrestricted_guest, | |||
63 | static int __read_mostly emulate_invalid_guest_state = 0; | 66 | static int __read_mostly emulate_invalid_guest_state = 0; |
64 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 67 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
65 | 68 | ||
69 | static int __read_mostly vmm_exclusive = 1; | ||
70 | module_param(vmm_exclusive, bool, S_IRUGO); | ||
71 | |||
66 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 72 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ |
67 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | 73 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) |
68 | #define KVM_GUEST_CR0_MASK \ | 74 | #define KVM_GUEST_CR0_MASK \ |
@@ -173,10 +179,13 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
173 | 179 | ||
174 | static int init_rmode(struct kvm *kvm); | 180 | static int init_rmode(struct kvm *kvm); |
175 | static u64 construct_eptp(unsigned long root_hpa); | 181 | static u64 construct_eptp(unsigned long root_hpa); |
182 | static void kvm_cpu_vmxon(u64 addr); | ||
183 | static void kvm_cpu_vmxoff(void); | ||
176 | 184 | ||
177 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
178 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
179 | static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); | 187 | static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); |
188 | static DEFINE_PER_CPU(struct desc_ptr, host_gdt); | ||
180 | 189 | ||
181 | static unsigned long *vmx_io_bitmap_a; | 190 | static unsigned long *vmx_io_bitmap_a; |
182 | static unsigned long *vmx_io_bitmap_b; | 191 | static unsigned long *vmx_io_bitmap_b; |
@@ -231,14 +240,14 @@ static u64 host_efer; | |||
231 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); | 240 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); |
232 | 241 | ||
233 | /* | 242 | /* |
234 | * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it | 243 | * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it |
235 | * away by decrementing the array size. | 244 | * away by decrementing the array size. |
236 | */ | 245 | */ |
237 | static const u32 vmx_msr_index[] = { | 246 | static const u32 vmx_msr_index[] = { |
238 | #ifdef CONFIG_X86_64 | 247 | #ifdef CONFIG_X86_64 |
239 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, | 248 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, |
240 | #endif | 249 | #endif |
241 | MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR, | 250 | MSR_EFER, MSR_TSC_AUX, MSR_STAR, |
242 | }; | 251 | }; |
243 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 252 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
244 | 253 | ||
@@ -334,6 +343,11 @@ static inline bool cpu_has_vmx_ept_1g_page(void) | |||
334 | return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; | 343 | return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; |
335 | } | 344 | } |
336 | 345 | ||
346 | static inline bool cpu_has_vmx_ept_4levels(void) | ||
347 | { | ||
348 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; | ||
349 | } | ||
350 | |||
337 | static inline bool cpu_has_vmx_invept_individual_addr(void) | 351 | static inline bool cpu_has_vmx_invept_individual_addr(void) |
338 | { | 352 | { |
339 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; | 353 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; |
@@ -349,6 +363,16 @@ static inline bool cpu_has_vmx_invept_global(void) | |||
349 | return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; | 363 | return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; |
350 | } | 364 | } |
351 | 365 | ||
366 | static inline bool cpu_has_vmx_invvpid_single(void) | ||
367 | { | ||
368 | return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT; | ||
369 | } | ||
370 | |||
371 | static inline bool cpu_has_vmx_invvpid_global(void) | ||
372 | { | ||
373 | return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; | ||
374 | } | ||
375 | |||
352 | static inline bool cpu_has_vmx_ept(void) | 376 | static inline bool cpu_has_vmx_ept(void) |
353 | { | 377 | { |
354 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 378 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
@@ -389,6 +413,12 @@ static inline bool cpu_has_virtual_nmis(void) | |||
389 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 413 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
390 | } | 414 | } |
391 | 415 | ||
416 | static inline bool cpu_has_vmx_wbinvd_exit(void) | ||
417 | { | ||
418 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
419 | SECONDARY_EXEC_WBINVD_EXITING; | ||
420 | } | ||
421 | |||
392 | static inline bool report_flexpriority(void) | 422 | static inline bool report_flexpriority(void) |
393 | { | 423 | { |
394 | return flexpriority_enabled; | 424 | return flexpriority_enabled; |
@@ -453,6 +483,19 @@ static void vmcs_clear(struct vmcs *vmcs) | |||
453 | vmcs, phys_addr); | 483 | vmcs, phys_addr); |
454 | } | 484 | } |
455 | 485 | ||
486 | static void vmcs_load(struct vmcs *vmcs) | ||
487 | { | ||
488 | u64 phys_addr = __pa(vmcs); | ||
489 | u8 error; | ||
490 | |||
491 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" | ||
492 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | ||
493 | : "cc", "memory"); | ||
494 | if (error) | ||
495 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", | ||
496 | vmcs, phys_addr); | ||
497 | } | ||
498 | |||
456 | static void __vcpu_clear(void *arg) | 499 | static void __vcpu_clear(void *arg) |
457 | { | 500 | { |
458 | struct vcpu_vmx *vmx = arg; | 501 | struct vcpu_vmx *vmx = arg; |
@@ -475,12 +518,27 @@ static void vcpu_clear(struct vcpu_vmx *vmx) | |||
475 | smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); | 518 | smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); |
476 | } | 519 | } |
477 | 520 | ||
478 | static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) | 521 | static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) |
479 | { | 522 | { |
480 | if (vmx->vpid == 0) | 523 | if (vmx->vpid == 0) |
481 | return; | 524 | return; |
482 | 525 | ||
483 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); | 526 | if (cpu_has_vmx_invvpid_single()) |
527 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); | ||
528 | } | ||
529 | |||
530 | static inline void vpid_sync_vcpu_global(void) | ||
531 | { | ||
532 | if (cpu_has_vmx_invvpid_global()) | ||
533 | __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); | ||
534 | } | ||
535 | |||
536 | static inline void vpid_sync_context(struct vcpu_vmx *vmx) | ||
537 | { | ||
538 | if (cpu_has_vmx_invvpid_single()) | ||
539 | vpid_sync_vcpu_single(vmx); | ||
540 | else | ||
541 | vpid_sync_vcpu_global(); | ||
484 | } | 542 | } |
485 | 543 | ||
486 | static inline void ept_sync_global(void) | 544 | static inline void ept_sync_global(void) |
@@ -812,6 +870,9 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
812 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 870 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
813 | } | 871 | } |
814 | #endif | 872 | #endif |
873 | if (current_thread_info()->status & TS_USEDFPU) | ||
874 | clts(); | ||
875 | load_gdt(&__get_cpu_var(host_gdt)); | ||
815 | } | 876 | } |
816 | 877 | ||
817 | static void vmx_load_host_state(struct vcpu_vmx *vmx) | 878 | static void vmx_load_host_state(struct vcpu_vmx *vmx) |
@@ -828,35 +889,30 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) | |||
828 | static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 889 | static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
829 | { | 890 | { |
830 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 891 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
831 | u64 phys_addr = __pa(vmx->vmcs); | ||
832 | u64 tsc_this, delta, new_offset; | 892 | u64 tsc_this, delta, new_offset; |
893 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | ||
833 | 894 | ||
834 | if (vcpu->cpu != cpu) { | 895 | if (!vmm_exclusive) |
896 | kvm_cpu_vmxon(phys_addr); | ||
897 | else if (vcpu->cpu != cpu) | ||
835 | vcpu_clear(vmx); | 898 | vcpu_clear(vmx); |
836 | kvm_migrate_timers(vcpu); | ||
837 | set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); | ||
838 | local_irq_disable(); | ||
839 | list_add(&vmx->local_vcpus_link, | ||
840 | &per_cpu(vcpus_on_cpu, cpu)); | ||
841 | local_irq_enable(); | ||
842 | } | ||
843 | 899 | ||
844 | if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { | 900 | if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { |
845 | u8 error; | ||
846 | |||
847 | per_cpu(current_vmcs, cpu) = vmx->vmcs; | 901 | per_cpu(current_vmcs, cpu) = vmx->vmcs; |
848 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" | 902 | vmcs_load(vmx->vmcs); |
849 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | ||
850 | : "cc"); | ||
851 | if (error) | ||
852 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", | ||
853 | vmx->vmcs, phys_addr); | ||
854 | } | 903 | } |
855 | 904 | ||
856 | if (vcpu->cpu != cpu) { | 905 | if (vcpu->cpu != cpu) { |
857 | struct desc_ptr dt; | 906 | struct desc_ptr dt; |
858 | unsigned long sysenter_esp; | 907 | unsigned long sysenter_esp; |
859 | 908 | ||
909 | kvm_migrate_timers(vcpu); | ||
910 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | ||
911 | local_irq_disable(); | ||
912 | list_add(&vmx->local_vcpus_link, | ||
913 | &per_cpu(vcpus_on_cpu, cpu)); | ||
914 | local_irq_enable(); | ||
915 | |||
860 | vcpu->cpu = cpu; | 916 | vcpu->cpu = cpu; |
861 | /* | 917 | /* |
862 | * Linux uses per-cpu TSS and GDT, so set these when switching | 918 | * Linux uses per-cpu TSS and GDT, so set these when switching |
@@ -884,6 +940,10 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
884 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | 940 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) |
885 | { | 941 | { |
886 | __vmx_load_host_state(to_vmx(vcpu)); | 942 | __vmx_load_host_state(to_vmx(vcpu)); |
943 | if (!vmm_exclusive) { | ||
944 | __vcpu_clear(to_vmx(vcpu)); | ||
945 | kvm_cpu_vmxoff(); | ||
946 | } | ||
887 | } | 947 | } |
888 | 948 | ||
889 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | 949 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) |
@@ -1057,10 +1117,10 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
1057 | if (index >= 0 && vmx->rdtscp_enabled) | 1117 | if (index >= 0 && vmx->rdtscp_enabled) |
1058 | move_msr_up(vmx, index, save_nmsrs++); | 1118 | move_msr_up(vmx, index, save_nmsrs++); |
1059 | /* | 1119 | /* |
1060 | * MSR_K6_STAR is only needed on long mode guests, and only | 1120 | * MSR_STAR is only needed on long mode guests, and only |
1061 | * if efer.sce is enabled. | 1121 | * if efer.sce is enabled. |
1062 | */ | 1122 | */ |
1063 | index = __find_msr_index(vmx, MSR_K6_STAR); | 1123 | index = __find_msr_index(vmx, MSR_STAR); |
1064 | if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE)) | 1124 | if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE)) |
1065 | move_msr_up(vmx, index, save_nmsrs++); | 1125 | move_msr_up(vmx, index, save_nmsrs++); |
1066 | } | 1126 | } |
@@ -1286,6 +1346,13 @@ static __init int vmx_disabled_by_bios(void) | |||
1286 | /* locked but not enabled */ | 1346 | /* locked but not enabled */ |
1287 | } | 1347 | } |
1288 | 1348 | ||
1349 | static void kvm_cpu_vmxon(u64 addr) | ||
1350 | { | ||
1351 | asm volatile (ASM_VMX_VMXON_RAX | ||
1352 | : : "a"(&addr), "m"(addr) | ||
1353 | : "memory", "cc"); | ||
1354 | } | ||
1355 | |||
1289 | static int hardware_enable(void *garbage) | 1356 | static int hardware_enable(void *garbage) |
1290 | { | 1357 | { |
1291 | int cpu = raw_smp_processor_id(); | 1358 | int cpu = raw_smp_processor_id(); |
@@ -1308,11 +1375,13 @@ static int hardware_enable(void *garbage) | |||
1308 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); | 1375 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); |
1309 | } | 1376 | } |
1310 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 1377 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ |
1311 | asm volatile (ASM_VMX_VMXON_RAX | ||
1312 | : : "a"(&phys_addr), "m"(phys_addr) | ||
1313 | : "memory", "cc"); | ||
1314 | 1378 | ||
1315 | ept_sync_global(); | 1379 | if (vmm_exclusive) { |
1380 | kvm_cpu_vmxon(phys_addr); | ||
1381 | ept_sync_global(); | ||
1382 | } | ||
1383 | |||
1384 | store_gdt(&__get_cpu_var(host_gdt)); | ||
1316 | 1385 | ||
1317 | return 0; | 1386 | return 0; |
1318 | } | 1387 | } |
@@ -1334,13 +1403,15 @@ static void vmclear_local_vcpus(void) | |||
1334 | static void kvm_cpu_vmxoff(void) | 1403 | static void kvm_cpu_vmxoff(void) |
1335 | { | 1404 | { |
1336 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); | 1405 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); |
1337 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | ||
1338 | } | 1406 | } |
1339 | 1407 | ||
1340 | static void hardware_disable(void *garbage) | 1408 | static void hardware_disable(void *garbage) |
1341 | { | 1409 | { |
1342 | vmclear_local_vcpus(); | 1410 | if (vmm_exclusive) { |
1343 | kvm_cpu_vmxoff(); | 1411 | vmclear_local_vcpus(); |
1412 | kvm_cpu_vmxoff(); | ||
1413 | } | ||
1414 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | ||
1344 | } | 1415 | } |
1345 | 1416 | ||
1346 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | 1417 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, |
@@ -1539,7 +1610,8 @@ static __init int hardware_setup(void) | |||
1539 | if (!cpu_has_vmx_vpid()) | 1610 | if (!cpu_has_vmx_vpid()) |
1540 | enable_vpid = 0; | 1611 | enable_vpid = 0; |
1541 | 1612 | ||
1542 | if (!cpu_has_vmx_ept()) { | 1613 | if (!cpu_has_vmx_ept() || |
1614 | !cpu_has_vmx_ept_4levels()) { | ||
1543 | enable_ept = 0; | 1615 | enable_ept = 0; |
1544 | enable_unrestricted_guest = 0; | 1616 | enable_unrestricted_guest = 0; |
1545 | } | 1617 | } |
@@ -1628,7 +1700,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
1628 | gfn_t base_gfn; | 1700 | gfn_t base_gfn; |
1629 | 1701 | ||
1630 | slots = kvm_memslots(kvm); | 1702 | slots = kvm_memslots(kvm); |
1631 | base_gfn = kvm->memslots->memslots[0].base_gfn + | 1703 | base_gfn = slots->memslots[0].base_gfn + |
1632 | kvm->memslots->memslots[0].npages - 3; | 1704 | kvm->memslots->memslots[0].npages - 3; |
1633 | return base_gfn << PAGE_SHIFT; | 1705 | return base_gfn << PAGE_SHIFT; |
1634 | } | 1706 | } |
@@ -1744,27 +1816,27 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
1744 | (guest_tr_ar & ~AR_TYPE_MASK) | 1816 | (guest_tr_ar & ~AR_TYPE_MASK) |
1745 | | AR_TYPE_BUSY_64_TSS); | 1817 | | AR_TYPE_BUSY_64_TSS); |
1746 | } | 1818 | } |
1747 | vcpu->arch.efer |= EFER_LMA; | 1819 | vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); |
1748 | vmx_set_efer(vcpu, vcpu->arch.efer); | ||
1749 | } | 1820 | } |
1750 | 1821 | ||
1751 | static void exit_lmode(struct kvm_vcpu *vcpu) | 1822 | static void exit_lmode(struct kvm_vcpu *vcpu) |
1752 | { | 1823 | { |
1753 | vcpu->arch.efer &= ~EFER_LMA; | ||
1754 | |||
1755 | vmcs_write32(VM_ENTRY_CONTROLS, | 1824 | vmcs_write32(VM_ENTRY_CONTROLS, |
1756 | vmcs_read32(VM_ENTRY_CONTROLS) | 1825 | vmcs_read32(VM_ENTRY_CONTROLS) |
1757 | & ~VM_ENTRY_IA32E_MODE); | 1826 | & ~VM_ENTRY_IA32E_MODE); |
1758 | vmx_set_efer(vcpu, vcpu->arch.efer); | 1827 | vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); |
1759 | } | 1828 | } |
1760 | 1829 | ||
1761 | #endif | 1830 | #endif |
1762 | 1831 | ||
1763 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | 1832 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) |
1764 | { | 1833 | { |
1765 | vpid_sync_vcpu_all(to_vmx(vcpu)); | 1834 | vpid_sync_context(to_vmx(vcpu)); |
1766 | if (enable_ept) | 1835 | if (enable_ept) { |
1836 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
1837 | return; | ||
1767 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); | 1838 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); |
1839 | } | ||
1768 | } | 1840 | } |
1769 | 1841 | ||
1770 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 1842 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
@@ -2510,7 +2582,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2510 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); | 2582 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); |
2511 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ | 2583 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ |
2512 | 2584 | ||
2513 | vmcs_writel(HOST_CR0, read_cr0()); /* 22.2.3 */ | 2585 | vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ |
2514 | vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ | 2586 | vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ |
2515 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ | 2587 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ |
2516 | 2588 | ||
@@ -2602,21 +2674,27 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2602 | 2674 | ||
2603 | static int init_rmode(struct kvm *kvm) | 2675 | static int init_rmode(struct kvm *kvm) |
2604 | { | 2676 | { |
2677 | int idx, ret = 0; | ||
2678 | |||
2679 | idx = srcu_read_lock(&kvm->srcu); | ||
2605 | if (!init_rmode_tss(kvm)) | 2680 | if (!init_rmode_tss(kvm)) |
2606 | return 0; | 2681 | goto exit; |
2607 | if (!init_rmode_identity_map(kvm)) | 2682 | if (!init_rmode_identity_map(kvm)) |
2608 | return 0; | 2683 | goto exit; |
2609 | return 1; | 2684 | |
2685 | ret = 1; | ||
2686 | exit: | ||
2687 | srcu_read_unlock(&kvm->srcu, idx); | ||
2688 | return ret; | ||
2610 | } | 2689 | } |
2611 | 2690 | ||
2612 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 2691 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
2613 | { | 2692 | { |
2614 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2693 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2615 | u64 msr; | 2694 | u64 msr; |
2616 | int ret, idx; | 2695 | int ret; |
2617 | 2696 | ||
2618 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | 2697 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); |
2619 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
2620 | if (!init_rmode(vmx->vcpu.kvm)) { | 2698 | if (!init_rmode(vmx->vcpu.kvm)) { |
2621 | ret = -ENOMEM; | 2699 | ret = -ENOMEM; |
2622 | goto out; | 2700 | goto out; |
@@ -2633,7 +2711,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2633 | msr |= MSR_IA32_APICBASE_BSP; | 2711 | msr |= MSR_IA32_APICBASE_BSP; |
2634 | kvm_set_apic_base(&vmx->vcpu, msr); | 2712 | kvm_set_apic_base(&vmx->vcpu, msr); |
2635 | 2713 | ||
2636 | fx_init(&vmx->vcpu); | 2714 | ret = fx_init(&vmx->vcpu); |
2715 | if (ret != 0) | ||
2716 | goto out; | ||
2637 | 2717 | ||
2638 | seg_setup(VCPU_SREG_CS); | 2718 | seg_setup(VCPU_SREG_CS); |
2639 | /* | 2719 | /* |
@@ -2716,7 +2796,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2716 | vmx_fpu_activate(&vmx->vcpu); | 2796 | vmx_fpu_activate(&vmx->vcpu); |
2717 | update_exception_bitmap(&vmx->vcpu); | 2797 | update_exception_bitmap(&vmx->vcpu); |
2718 | 2798 | ||
2719 | vpid_sync_vcpu_all(vmx); | 2799 | vpid_sync_context(vmx); |
2720 | 2800 | ||
2721 | ret = 0; | 2801 | ret = 0; |
2722 | 2802 | ||
@@ -2724,7 +2804,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2724 | vmx->emulation_required = 0; | 2804 | vmx->emulation_required = 0; |
2725 | 2805 | ||
2726 | out: | 2806 | out: |
2727 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
2728 | return ret; | 2807 | return ret; |
2729 | } | 2808 | } |
2730 | 2809 | ||
@@ -2829,9 +2908,7 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | |||
2829 | { | 2908 | { |
2830 | if (!cpu_has_virtual_nmis()) | 2909 | if (!cpu_has_virtual_nmis()) |
2831 | return to_vmx(vcpu)->soft_vnmi_blocked; | 2910 | return to_vmx(vcpu)->soft_vnmi_blocked; |
2832 | else | 2911 | return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; |
2833 | return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
2834 | GUEST_INTR_STATE_NMI); | ||
2835 | } | 2912 | } |
2836 | 2913 | ||
2837 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | 2914 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) |
@@ -3073,7 +3150,7 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
3073 | ++vcpu->stat.io_exits; | 3150 | ++vcpu->stat.io_exits; |
3074 | 3151 | ||
3075 | if (string || in) | 3152 | if (string || in) |
3076 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); | 3153 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; |
3077 | 3154 | ||
3078 | port = exit_qualification >> 16; | 3155 | port = exit_qualification >> 16; |
3079 | size = (exit_qualification & 7) + 1; | 3156 | size = (exit_qualification & 7) + 1; |
@@ -3093,11 +3170,20 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
3093 | hypercall[2] = 0xc1; | 3170 | hypercall[2] = 0xc1; |
3094 | } | 3171 | } |
3095 | 3172 | ||
3173 | static void complete_insn_gp(struct kvm_vcpu *vcpu, int err) | ||
3174 | { | ||
3175 | if (err) | ||
3176 | kvm_inject_gp(vcpu, 0); | ||
3177 | else | ||
3178 | skip_emulated_instruction(vcpu); | ||
3179 | } | ||
3180 | |||
3096 | static int handle_cr(struct kvm_vcpu *vcpu) | 3181 | static int handle_cr(struct kvm_vcpu *vcpu) |
3097 | { | 3182 | { |
3098 | unsigned long exit_qualification, val; | 3183 | unsigned long exit_qualification, val; |
3099 | int cr; | 3184 | int cr; |
3100 | int reg; | 3185 | int reg; |
3186 | int err; | ||
3101 | 3187 | ||
3102 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3188 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3103 | cr = exit_qualification & 15; | 3189 | cr = exit_qualification & 15; |
@@ -3108,16 +3194,16 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3108 | trace_kvm_cr_write(cr, val); | 3194 | trace_kvm_cr_write(cr, val); |
3109 | switch (cr) { | 3195 | switch (cr) { |
3110 | case 0: | 3196 | case 0: |
3111 | kvm_set_cr0(vcpu, val); | 3197 | err = kvm_set_cr0(vcpu, val); |
3112 | skip_emulated_instruction(vcpu); | 3198 | complete_insn_gp(vcpu, err); |
3113 | return 1; | 3199 | return 1; |
3114 | case 3: | 3200 | case 3: |
3115 | kvm_set_cr3(vcpu, val); | 3201 | err = kvm_set_cr3(vcpu, val); |
3116 | skip_emulated_instruction(vcpu); | 3202 | complete_insn_gp(vcpu, err); |
3117 | return 1; | 3203 | return 1; |
3118 | case 4: | 3204 | case 4: |
3119 | kvm_set_cr4(vcpu, val); | 3205 | err = kvm_set_cr4(vcpu, val); |
3120 | skip_emulated_instruction(vcpu); | 3206 | complete_insn_gp(vcpu, err); |
3121 | return 1; | 3207 | return 1; |
3122 | case 8: { | 3208 | case 8: { |
3123 | u8 cr8_prev = kvm_get_cr8(vcpu); | 3209 | u8 cr8_prev = kvm_get_cr8(vcpu); |
@@ -3324,30 +3410,25 @@ static int handle_invlpg(struct kvm_vcpu *vcpu) | |||
3324 | static int handle_wbinvd(struct kvm_vcpu *vcpu) | 3410 | static int handle_wbinvd(struct kvm_vcpu *vcpu) |
3325 | { | 3411 | { |
3326 | skip_emulated_instruction(vcpu); | 3412 | skip_emulated_instruction(vcpu); |
3327 | /* TODO: Add support for VT-d/pass-through device */ | 3413 | kvm_emulate_wbinvd(vcpu); |
3328 | return 1; | 3414 | return 1; |
3329 | } | 3415 | } |
3330 | 3416 | ||
3331 | static int handle_apic_access(struct kvm_vcpu *vcpu) | 3417 | static int handle_xsetbv(struct kvm_vcpu *vcpu) |
3332 | { | 3418 | { |
3333 | unsigned long exit_qualification; | 3419 | u64 new_bv = kvm_read_edx_eax(vcpu); |
3334 | enum emulation_result er; | 3420 | u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX); |
3335 | unsigned long offset; | ||
3336 | 3421 | ||
3337 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3422 | if (kvm_set_xcr(vcpu, index, new_bv) == 0) |
3338 | offset = exit_qualification & 0xffful; | 3423 | skip_emulated_instruction(vcpu); |
3339 | |||
3340 | er = emulate_instruction(vcpu, 0, 0, 0); | ||
3341 | |||
3342 | if (er != EMULATE_DONE) { | ||
3343 | printk(KERN_ERR | ||
3344 | "Fail to handle apic access vmexit! Offset is 0x%lx\n", | ||
3345 | offset); | ||
3346 | return -ENOEXEC; | ||
3347 | } | ||
3348 | return 1; | 3424 | return 1; |
3349 | } | 3425 | } |
3350 | 3426 | ||
3427 | static int handle_apic_access(struct kvm_vcpu *vcpu) | ||
3428 | { | ||
3429 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | ||
3430 | } | ||
3431 | |||
3351 | static int handle_task_switch(struct kvm_vcpu *vcpu) | 3432 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
3352 | { | 3433 | { |
3353 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3434 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -3557,13 +3638,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
3557 | goto out; | 3638 | goto out; |
3558 | } | 3639 | } |
3559 | 3640 | ||
3560 | if (err != EMULATE_DONE) { | 3641 | if (err != EMULATE_DONE) |
3561 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 3642 | return 0; |
3562 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
3563 | vcpu->run->internal.ndata = 0; | ||
3564 | ret = 0; | ||
3565 | goto out; | ||
3566 | } | ||
3567 | 3643 | ||
3568 | if (signal_pending(current)) | 3644 | if (signal_pending(current)) |
3569 | goto out; | 3645 | goto out; |
@@ -3626,6 +3702,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3626 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 3702 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
3627 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 3703 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
3628 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 3704 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
3705 | [EXIT_REASON_XSETBV] = handle_xsetbv, | ||
3629 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 3706 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
3630 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, | 3707 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, |
3631 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3708 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
@@ -3659,6 +3736,13 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
3659 | if (enable_ept && is_paging(vcpu)) | 3736 | if (enable_ept && is_paging(vcpu)) |
3660 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | 3737 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); |
3661 | 3738 | ||
3739 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { | ||
3740 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; | ||
3741 | vcpu->run->fail_entry.hardware_entry_failure_reason | ||
3742 | = exit_reason; | ||
3743 | return 0; | ||
3744 | } | ||
3745 | |||
3662 | if (unlikely(vmx->fail)) { | 3746 | if (unlikely(vmx->fail)) { |
3663 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3747 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
3664 | vcpu->run->fail_entry.hardware_entry_failure_reason | 3748 | vcpu->run->fail_entry.hardware_entry_failure_reason |
@@ -3864,11 +3948,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
3864 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 3948 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
3865 | vmx_set_interrupt_shadow(vcpu, 0); | 3949 | vmx_set_interrupt_shadow(vcpu, 0); |
3866 | 3950 | ||
3867 | /* | ||
3868 | * Loading guest fpu may have cleared host cr0.ts | ||
3869 | */ | ||
3870 | vmcs_writel(HOST_CR0, read_cr0()); | ||
3871 | |||
3872 | asm( | 3951 | asm( |
3873 | /* Store host registers */ | 3952 | /* Store host registers */ |
3874 | "push %%"R"dx; push %%"R"bp;" | 3953 | "push %%"R"dx; push %%"R"bp;" |
@@ -4004,6 +4083,19 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
4004 | kmem_cache_free(kvm_vcpu_cache, vmx); | 4083 | kmem_cache_free(kvm_vcpu_cache, vmx); |
4005 | } | 4084 | } |
4006 | 4085 | ||
4086 | static inline void vmcs_init(struct vmcs *vmcs) | ||
4087 | { | ||
4088 | u64 phys_addr = __pa(per_cpu(vmxarea, raw_smp_processor_id())); | ||
4089 | |||
4090 | if (!vmm_exclusive) | ||
4091 | kvm_cpu_vmxon(phys_addr); | ||
4092 | |||
4093 | vmcs_clear(vmcs); | ||
4094 | |||
4095 | if (!vmm_exclusive) | ||
4096 | kvm_cpu_vmxoff(); | ||
4097 | } | ||
4098 | |||
4007 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | 4099 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
4008 | { | 4100 | { |
4009 | int err; | 4101 | int err; |
@@ -4029,7 +4121,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4029 | if (!vmx->vmcs) | 4121 | if (!vmx->vmcs) |
4030 | goto free_msrs; | 4122 | goto free_msrs; |
4031 | 4123 | ||
4032 | vmcs_clear(vmx->vmcs); | 4124 | vmcs_init(vmx->vmcs); |
4033 | 4125 | ||
4034 | cpu = get_cpu(); | 4126 | cpu = get_cpu(); |
4035 | vmx_vcpu_load(&vmx->vcpu, cpu); | 4127 | vmx_vcpu_load(&vmx->vcpu, cpu); |
@@ -4268,6 +4360,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4268 | .rdtscp_supported = vmx_rdtscp_supported, | 4360 | .rdtscp_supported = vmx_rdtscp_supported, |
4269 | 4361 | ||
4270 | .set_supported_cpuid = vmx_set_supported_cpuid, | 4362 | .set_supported_cpuid = vmx_set_supported_cpuid, |
4363 | |||
4364 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, | ||
4271 | }; | 4365 | }; |
4272 | 4366 | ||
4273 | static int __init vmx_init(void) | 4367 | static int __init vmx_init(void) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 05d571f6f196..25f19078b321 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * Copyright (C) 2006 Qumranet, Inc. | 6 | * Copyright (C) 2006 Qumranet, Inc. |
7 | * Copyright (C) 2008 Qumranet, Inc. | 7 | * Copyright (C) 2008 Qumranet, Inc. |
8 | * Copyright IBM Corporation, 2008 | 8 | * Copyright IBM Corporation, 2008 |
9 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
9 | * | 10 | * |
10 | * Authors: | 11 | * Authors: |
11 | * Avi Kivity <avi@qumranet.com> | 12 | * Avi Kivity <avi@qumranet.com> |
@@ -41,17 +42,19 @@ | |||
41 | #include <linux/srcu.h> | 42 | #include <linux/srcu.h> |
42 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
43 | #include <linux/perf_event.h> | 44 | #include <linux/perf_event.h> |
45 | #include <linux/uaccess.h> | ||
44 | #include <trace/events/kvm.h> | 46 | #include <trace/events/kvm.h> |
45 | 47 | ||
46 | #define CREATE_TRACE_POINTS | 48 | #define CREATE_TRACE_POINTS |
47 | #include "trace.h" | 49 | #include "trace.h" |
48 | 50 | ||
49 | #include <asm/debugreg.h> | 51 | #include <asm/debugreg.h> |
50 | #include <asm/uaccess.h> | ||
51 | #include <asm/msr.h> | 52 | #include <asm/msr.h> |
52 | #include <asm/desc.h> | 53 | #include <asm/desc.h> |
53 | #include <asm/mtrr.h> | 54 | #include <asm/mtrr.h> |
54 | #include <asm/mce.h> | 55 | #include <asm/mce.h> |
56 | #include <asm/i387.h> | ||
57 | #include <asm/xcr.h> | ||
55 | 58 | ||
56 | #define MAX_IO_MSRS 256 | 59 | #define MAX_IO_MSRS 256 |
57 | #define CR0_RESERVED_BITS \ | 60 | #define CR0_RESERVED_BITS \ |
@@ -62,6 +65,7 @@ | |||
62 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | 65 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ |
63 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | 66 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ |
64 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 67 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ |
68 | | X86_CR4_OSXSAVE \ | ||
65 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | 69 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) |
66 | 70 | ||
67 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | 71 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) |
@@ -147,6 +151,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
147 | { NULL } | 151 | { NULL } |
148 | }; | 152 | }; |
149 | 153 | ||
154 | u64 __read_mostly host_xcr0; | ||
155 | |||
156 | static inline u32 bit(int bitno) | ||
157 | { | ||
158 | return 1 << (bitno & 31); | ||
159 | } | ||
160 | |||
150 | static void kvm_on_user_return(struct user_return_notifier *urn) | 161 | static void kvm_on_user_return(struct user_return_notifier *urn) |
151 | { | 162 | { |
152 | unsigned slot; | 163 | unsigned slot; |
@@ -285,7 +296,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
285 | prev_nr = vcpu->arch.exception.nr; | 296 | prev_nr = vcpu->arch.exception.nr; |
286 | if (prev_nr == DF_VECTOR) { | 297 | if (prev_nr == DF_VECTOR) { |
287 | /* triple fault -> shutdown */ | 298 | /* triple fault -> shutdown */ |
288 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | 299 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
289 | return; | 300 | return; |
290 | } | 301 | } |
291 | class1 = exception_class(prev_nr); | 302 | class1 = exception_class(prev_nr); |
@@ -414,121 +425,163 @@ out: | |||
414 | return changed; | 425 | return changed; |
415 | } | 426 | } |
416 | 427 | ||
417 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 428 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
418 | { | 429 | { |
430 | unsigned long old_cr0 = kvm_read_cr0(vcpu); | ||
431 | unsigned long update_bits = X86_CR0_PG | X86_CR0_WP | | ||
432 | X86_CR0_CD | X86_CR0_NW; | ||
433 | |||
419 | cr0 |= X86_CR0_ET; | 434 | cr0 |= X86_CR0_ET; |
420 | 435 | ||
421 | #ifdef CONFIG_X86_64 | 436 | #ifdef CONFIG_X86_64 |
422 | if (cr0 & 0xffffffff00000000UL) { | 437 | if (cr0 & 0xffffffff00000000UL) |
423 | kvm_inject_gp(vcpu, 0); | 438 | return 1; |
424 | return; | ||
425 | } | ||
426 | #endif | 439 | #endif |
427 | 440 | ||
428 | cr0 &= ~CR0_RESERVED_BITS; | 441 | cr0 &= ~CR0_RESERVED_BITS; |
429 | 442 | ||
430 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { | 443 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) |
431 | kvm_inject_gp(vcpu, 0); | 444 | return 1; |
432 | return; | ||
433 | } | ||
434 | 445 | ||
435 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { | 446 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) |
436 | kvm_inject_gp(vcpu, 0); | 447 | return 1; |
437 | return; | ||
438 | } | ||
439 | 448 | ||
440 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 449 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
441 | #ifdef CONFIG_X86_64 | 450 | #ifdef CONFIG_X86_64 |
442 | if ((vcpu->arch.efer & EFER_LME)) { | 451 | if ((vcpu->arch.efer & EFER_LME)) { |
443 | int cs_db, cs_l; | 452 | int cs_db, cs_l; |
444 | 453 | ||
445 | if (!is_pae(vcpu)) { | 454 | if (!is_pae(vcpu)) |
446 | kvm_inject_gp(vcpu, 0); | 455 | return 1; |
447 | return; | ||
448 | } | ||
449 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 456 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
450 | if (cs_l) { | 457 | if (cs_l) |
451 | kvm_inject_gp(vcpu, 0); | 458 | return 1; |
452 | return; | ||
453 | |||
454 | } | ||
455 | } else | 459 | } else |
456 | #endif | 460 | #endif |
457 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { | 461 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) |
458 | kvm_inject_gp(vcpu, 0); | 462 | return 1; |
459 | return; | ||
460 | } | ||
461 | |||
462 | } | 463 | } |
463 | 464 | ||
464 | kvm_x86_ops->set_cr0(vcpu, cr0); | 465 | kvm_x86_ops->set_cr0(vcpu, cr0); |
465 | 466 | ||
466 | kvm_mmu_reset_context(vcpu); | 467 | if ((cr0 ^ old_cr0) & update_bits) |
467 | return; | 468 | kvm_mmu_reset_context(vcpu); |
469 | return 0; | ||
468 | } | 470 | } |
469 | EXPORT_SYMBOL_GPL(kvm_set_cr0); | 471 | EXPORT_SYMBOL_GPL(kvm_set_cr0); |
470 | 472 | ||
471 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 473 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
472 | { | 474 | { |
473 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); | 475 | (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); |
474 | } | 476 | } |
475 | EXPORT_SYMBOL_GPL(kvm_lmsw); | 477 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
476 | 478 | ||
477 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 479 | int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) |
478 | { | 480 | { |
479 | unsigned long old_cr4 = kvm_read_cr4(vcpu); | 481 | u64 xcr0; |
480 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; | ||
481 | 482 | ||
482 | if (cr4 & CR4_RESERVED_BITS) { | 483 | /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ |
484 | if (index != XCR_XFEATURE_ENABLED_MASK) | ||
485 | return 1; | ||
486 | xcr0 = xcr; | ||
487 | if (kvm_x86_ops->get_cpl(vcpu) != 0) | ||
488 | return 1; | ||
489 | if (!(xcr0 & XSTATE_FP)) | ||
490 | return 1; | ||
491 | if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) | ||
492 | return 1; | ||
493 | if (xcr0 & ~host_xcr0) | ||
494 | return 1; | ||
495 | vcpu->arch.xcr0 = xcr0; | ||
496 | vcpu->guest_xcr0_loaded = 0; | ||
497 | return 0; | ||
498 | } | ||
499 | |||
500 | int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | ||
501 | { | ||
502 | if (__kvm_set_xcr(vcpu, index, xcr)) { | ||
483 | kvm_inject_gp(vcpu, 0); | 503 | kvm_inject_gp(vcpu, 0); |
504 | return 1; | ||
505 | } | ||
506 | return 0; | ||
507 | } | ||
508 | EXPORT_SYMBOL_GPL(kvm_set_xcr); | ||
509 | |||
510 | static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | ||
511 | { | ||
512 | struct kvm_cpuid_entry2 *best; | ||
513 | |||
514 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
515 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | ||
516 | } | ||
517 | |||
518 | static void update_cpuid(struct kvm_vcpu *vcpu) | ||
519 | { | ||
520 | struct kvm_cpuid_entry2 *best; | ||
521 | |||
522 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
523 | if (!best) | ||
484 | return; | 524 | return; |
525 | |||
526 | /* Update OSXSAVE bit */ | ||
527 | if (cpu_has_xsave && best->function == 0x1) { | ||
528 | best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); | ||
529 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) | ||
530 | best->ecx |= bit(X86_FEATURE_OSXSAVE); | ||
485 | } | 531 | } |
532 | } | ||
533 | |||
534 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | ||
535 | { | ||
536 | unsigned long old_cr4 = kvm_read_cr4(vcpu); | ||
537 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; | ||
538 | |||
539 | if (cr4 & CR4_RESERVED_BITS) | ||
540 | return 1; | ||
541 | |||
542 | if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) | ||
543 | return 1; | ||
486 | 544 | ||
487 | if (is_long_mode(vcpu)) { | 545 | if (is_long_mode(vcpu)) { |
488 | if (!(cr4 & X86_CR4_PAE)) { | 546 | if (!(cr4 & X86_CR4_PAE)) |
489 | kvm_inject_gp(vcpu, 0); | 547 | return 1; |
490 | return; | ||
491 | } | ||
492 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) | 548 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) |
493 | && ((cr4 ^ old_cr4) & pdptr_bits) | 549 | && ((cr4 ^ old_cr4) & pdptr_bits) |
494 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) { | 550 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) |
495 | kvm_inject_gp(vcpu, 0); | 551 | return 1; |
496 | return; | 552 | |
497 | } | 553 | if (cr4 & X86_CR4_VMXE) |
554 | return 1; | ||
498 | 555 | ||
499 | if (cr4 & X86_CR4_VMXE) { | ||
500 | kvm_inject_gp(vcpu, 0); | ||
501 | return; | ||
502 | } | ||
503 | kvm_x86_ops->set_cr4(vcpu, cr4); | 556 | kvm_x86_ops->set_cr4(vcpu, cr4); |
504 | vcpu->arch.cr4 = cr4; | 557 | |
505 | kvm_mmu_reset_context(vcpu); | 558 | if ((cr4 ^ old_cr4) & pdptr_bits) |
559 | kvm_mmu_reset_context(vcpu); | ||
560 | |||
561 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) | ||
562 | update_cpuid(vcpu); | ||
563 | |||
564 | return 0; | ||
506 | } | 565 | } |
507 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 566 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
508 | 567 | ||
509 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 568 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
510 | { | 569 | { |
511 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { | 570 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { |
512 | kvm_mmu_sync_roots(vcpu); | 571 | kvm_mmu_sync_roots(vcpu); |
513 | kvm_mmu_flush_tlb(vcpu); | 572 | kvm_mmu_flush_tlb(vcpu); |
514 | return; | 573 | return 0; |
515 | } | 574 | } |
516 | 575 | ||
517 | if (is_long_mode(vcpu)) { | 576 | if (is_long_mode(vcpu)) { |
518 | if (cr3 & CR3_L_MODE_RESERVED_BITS) { | 577 | if (cr3 & CR3_L_MODE_RESERVED_BITS) |
519 | kvm_inject_gp(vcpu, 0); | 578 | return 1; |
520 | return; | ||
521 | } | ||
522 | } else { | 579 | } else { |
523 | if (is_pae(vcpu)) { | 580 | if (is_pae(vcpu)) { |
524 | if (cr3 & CR3_PAE_RESERVED_BITS) { | 581 | if (cr3 & CR3_PAE_RESERVED_BITS) |
525 | kvm_inject_gp(vcpu, 0); | 582 | return 1; |
526 | return; | 583 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) |
527 | } | 584 | return 1; |
528 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { | ||
529 | kvm_inject_gp(vcpu, 0); | ||
530 | return; | ||
531 | } | ||
532 | } | 585 | } |
533 | /* | 586 | /* |
534 | * We don't check reserved bits in nonpae mode, because | 587 | * We don't check reserved bits in nonpae mode, because |
@@ -546,24 +599,28 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
546 | * to debug) behavior on the guest side. | 599 | * to debug) behavior on the guest side. |
547 | */ | 600 | */ |
548 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) | 601 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) |
549 | kvm_inject_gp(vcpu, 0); | 602 | return 1; |
550 | else { | 603 | vcpu->arch.cr3 = cr3; |
551 | vcpu->arch.cr3 = cr3; | 604 | vcpu->arch.mmu.new_cr3(vcpu); |
552 | vcpu->arch.mmu.new_cr3(vcpu); | 605 | return 0; |
553 | } | ||
554 | } | 606 | } |
555 | EXPORT_SYMBOL_GPL(kvm_set_cr3); | 607 | EXPORT_SYMBOL_GPL(kvm_set_cr3); |
556 | 608 | ||
557 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | 609 | int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) |
558 | { | 610 | { |
559 | if (cr8 & CR8_RESERVED_BITS) { | 611 | if (cr8 & CR8_RESERVED_BITS) |
560 | kvm_inject_gp(vcpu, 0); | 612 | return 1; |
561 | return; | ||
562 | } | ||
563 | if (irqchip_in_kernel(vcpu->kvm)) | 613 | if (irqchip_in_kernel(vcpu->kvm)) |
564 | kvm_lapic_set_tpr(vcpu, cr8); | 614 | kvm_lapic_set_tpr(vcpu, cr8); |
565 | else | 615 | else |
566 | vcpu->arch.cr8 = cr8; | 616 | vcpu->arch.cr8 = cr8; |
617 | return 0; | ||
618 | } | ||
619 | |||
620 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | ||
621 | { | ||
622 | if (__kvm_set_cr8(vcpu, cr8)) | ||
623 | kvm_inject_gp(vcpu, 0); | ||
567 | } | 624 | } |
568 | EXPORT_SYMBOL_GPL(kvm_set_cr8); | 625 | EXPORT_SYMBOL_GPL(kvm_set_cr8); |
569 | 626 | ||
@@ -576,7 +633,7 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
576 | } | 633 | } |
577 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 634 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
578 | 635 | ||
579 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | 636 | static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) |
580 | { | 637 | { |
581 | switch (dr) { | 638 | switch (dr) { |
582 | case 0 ... 3: | 639 | case 0 ... 3: |
@@ -585,29 +642,21 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | |||
585 | vcpu->arch.eff_db[dr] = val; | 642 | vcpu->arch.eff_db[dr] = val; |
586 | break; | 643 | break; |
587 | case 4: | 644 | case 4: |
588 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | 645 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
589 | kvm_queue_exception(vcpu, UD_VECTOR); | 646 | return 1; /* #UD */ |
590 | return 1; | ||
591 | } | ||
592 | /* fall through */ | 647 | /* fall through */ |
593 | case 6: | 648 | case 6: |
594 | if (val & 0xffffffff00000000ULL) { | 649 | if (val & 0xffffffff00000000ULL) |
595 | kvm_inject_gp(vcpu, 0); | 650 | return -1; /* #GP */ |
596 | return 1; | ||
597 | } | ||
598 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | 651 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; |
599 | break; | 652 | break; |
600 | case 5: | 653 | case 5: |
601 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | 654 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
602 | kvm_queue_exception(vcpu, UD_VECTOR); | 655 | return 1; /* #UD */ |
603 | return 1; | ||
604 | } | ||
605 | /* fall through */ | 656 | /* fall through */ |
606 | default: /* 7 */ | 657 | default: /* 7 */ |
607 | if (val & 0xffffffff00000000ULL) { | 658 | if (val & 0xffffffff00000000ULL) |
608 | kvm_inject_gp(vcpu, 0); | 659 | return -1; /* #GP */ |
609 | return 1; | ||
610 | } | ||
611 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | 660 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; |
612 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | 661 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { |
613 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); | 662 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); |
@@ -618,28 +667,37 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | |||
618 | 667 | ||
619 | return 0; | 668 | return 0; |
620 | } | 669 | } |
670 | |||
671 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | ||
672 | { | ||
673 | int res; | ||
674 | |||
675 | res = __kvm_set_dr(vcpu, dr, val); | ||
676 | if (res > 0) | ||
677 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
678 | else if (res < 0) | ||
679 | kvm_inject_gp(vcpu, 0); | ||
680 | |||
681 | return res; | ||
682 | } | ||
621 | EXPORT_SYMBOL_GPL(kvm_set_dr); | 683 | EXPORT_SYMBOL_GPL(kvm_set_dr); |
622 | 684 | ||
623 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | 685 | static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) |
624 | { | 686 | { |
625 | switch (dr) { | 687 | switch (dr) { |
626 | case 0 ... 3: | 688 | case 0 ... 3: |
627 | *val = vcpu->arch.db[dr]; | 689 | *val = vcpu->arch.db[dr]; |
628 | break; | 690 | break; |
629 | case 4: | 691 | case 4: |
630 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | 692 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
631 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
632 | return 1; | 693 | return 1; |
633 | } | ||
634 | /* fall through */ | 694 | /* fall through */ |
635 | case 6: | 695 | case 6: |
636 | *val = vcpu->arch.dr6; | 696 | *val = vcpu->arch.dr6; |
637 | break; | 697 | break; |
638 | case 5: | 698 | case 5: |
639 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | 699 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
640 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
641 | return 1; | 700 | return 1; |
642 | } | ||
643 | /* fall through */ | 701 | /* fall through */ |
644 | default: /* 7 */ | 702 | default: /* 7 */ |
645 | *val = vcpu->arch.dr7; | 703 | *val = vcpu->arch.dr7; |
@@ -648,12 +706,16 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | |||
648 | 706 | ||
649 | return 0; | 707 | return 0; |
650 | } | 708 | } |
651 | EXPORT_SYMBOL_GPL(kvm_get_dr); | ||
652 | 709 | ||
653 | static inline u32 bit(int bitno) | 710 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) |
654 | { | 711 | { |
655 | return 1 << (bitno & 31); | 712 | if (_kvm_get_dr(vcpu, dr, val)) { |
713 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
714 | return 1; | ||
715 | } | ||
716 | return 0; | ||
656 | } | 717 | } |
718 | EXPORT_SYMBOL_GPL(kvm_get_dr); | ||
657 | 719 | ||
658 | /* | 720 | /* |
659 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS | 721 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS |
@@ -671,7 +733,7 @@ static u32 msrs_to_save[] = { | |||
671 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 733 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
672 | HV_X64_MSR_APIC_ASSIST_PAGE, | 734 | HV_X64_MSR_APIC_ASSIST_PAGE, |
673 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 735 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
674 | MSR_K6_STAR, | 736 | MSR_STAR, |
675 | #ifdef CONFIG_X86_64 | 737 | #ifdef CONFIG_X86_64 |
676 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 738 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
677 | #endif | 739 | #endif |
@@ -682,10 +744,14 @@ static unsigned num_msrs_to_save; | |||
682 | 744 | ||
683 | static u32 emulated_msrs[] = { | 745 | static u32 emulated_msrs[] = { |
684 | MSR_IA32_MISC_ENABLE, | 746 | MSR_IA32_MISC_ENABLE, |
747 | MSR_IA32_MCG_STATUS, | ||
748 | MSR_IA32_MCG_CTL, | ||
685 | }; | 749 | }; |
686 | 750 | ||
687 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | 751 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) |
688 | { | 752 | { |
753 | u64 old_efer = vcpu->arch.efer; | ||
754 | |||
689 | if (efer & efer_reserved_bits) | 755 | if (efer & efer_reserved_bits) |
690 | return 1; | 756 | return 1; |
691 | 757 | ||
@@ -714,11 +780,13 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
714 | 780 | ||
715 | kvm_x86_ops->set_efer(vcpu, efer); | 781 | kvm_x86_ops->set_efer(vcpu, efer); |
716 | 782 | ||
717 | vcpu->arch.efer = efer; | ||
718 | |||
719 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 783 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
720 | kvm_mmu_reset_context(vcpu); | 784 | kvm_mmu_reset_context(vcpu); |
721 | 785 | ||
786 | /* Update reserved bits */ | ||
787 | if ((efer ^ old_efer) & EFER_NX) | ||
788 | kvm_mmu_reset_context(vcpu); | ||
789 | |||
722 | return 0; | 790 | return 0; |
723 | } | 791 | } |
724 | 792 | ||
@@ -882,7 +950,7 @@ static int kvm_request_guest_time_update(struct kvm_vcpu *v) | |||
882 | 950 | ||
883 | if (!vcpu->time_page) | 951 | if (!vcpu->time_page) |
884 | return 0; | 952 | return 0; |
885 | set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); | 953 | kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v); |
886 | return 1; | 954 | return 1; |
887 | } | 955 | } |
888 | 956 | ||
@@ -1524,16 +1592,12 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, | |||
1524 | { | 1592 | { |
1525 | int i, idx; | 1593 | int i, idx; |
1526 | 1594 | ||
1527 | vcpu_load(vcpu); | ||
1528 | |||
1529 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 1595 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
1530 | for (i = 0; i < msrs->nmsrs; ++i) | 1596 | for (i = 0; i < msrs->nmsrs; ++i) |
1531 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) | 1597 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) |
1532 | break; | 1598 | break; |
1533 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | 1599 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
1534 | 1600 | ||
1535 | vcpu_put(vcpu); | ||
1536 | |||
1537 | return i; | 1601 | return i; |
1538 | } | 1602 | } |
1539 | 1603 | ||
@@ -1562,7 +1626,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, | |||
1562 | 1626 | ||
1563 | r = -ENOMEM; | 1627 | r = -ENOMEM; |
1564 | size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; | 1628 | size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; |
1565 | entries = vmalloc(size); | 1629 | entries = kmalloc(size, GFP_KERNEL); |
1566 | if (!entries) | 1630 | if (!entries) |
1567 | goto out; | 1631 | goto out; |
1568 | 1632 | ||
@@ -1581,7 +1645,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, | |||
1581 | r = n; | 1645 | r = n; |
1582 | 1646 | ||
1583 | out_free: | 1647 | out_free: |
1584 | vfree(entries); | 1648 | kfree(entries); |
1585 | out: | 1649 | out: |
1586 | return r; | 1650 | return r; |
1587 | } | 1651 | } |
@@ -1618,6 +1682,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1618 | case KVM_CAP_PCI_SEGMENT: | 1682 | case KVM_CAP_PCI_SEGMENT: |
1619 | case KVM_CAP_DEBUGREGS: | 1683 | case KVM_CAP_DEBUGREGS: |
1620 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1684 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
1685 | case KVM_CAP_XSAVE: | ||
1621 | r = 1; | 1686 | r = 1; |
1622 | break; | 1687 | break; |
1623 | case KVM_CAP_COALESCED_MMIO: | 1688 | case KVM_CAP_COALESCED_MMIO: |
@@ -1641,6 +1706,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1641 | case KVM_CAP_MCE: | 1706 | case KVM_CAP_MCE: |
1642 | r = KVM_MAX_MCE_BANKS; | 1707 | r = KVM_MAX_MCE_BANKS; |
1643 | break; | 1708 | break; |
1709 | case KVM_CAP_XCRS: | ||
1710 | r = cpu_has_xsave; | ||
1711 | break; | ||
1644 | default: | 1712 | default: |
1645 | r = 0; | 1713 | r = 0; |
1646 | break; | 1714 | break; |
@@ -1717,8 +1785,28 @@ out: | |||
1717 | return r; | 1785 | return r; |
1718 | } | 1786 | } |
1719 | 1787 | ||
1788 | static void wbinvd_ipi(void *garbage) | ||
1789 | { | ||
1790 | wbinvd(); | ||
1791 | } | ||
1792 | |||
1793 | static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) | ||
1794 | { | ||
1795 | return vcpu->kvm->arch.iommu_domain && | ||
1796 | !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY); | ||
1797 | } | ||
1798 | |||
1720 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 1799 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
1721 | { | 1800 | { |
1801 | /* Address WBINVD may be executed by guest */ | ||
1802 | if (need_emulate_wbinvd(vcpu)) { | ||
1803 | if (kvm_x86_ops->has_wbinvd_exit()) | ||
1804 | cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); | ||
1805 | else if (vcpu->cpu != -1 && vcpu->cpu != cpu) | ||
1806 | smp_call_function_single(vcpu->cpu, | ||
1807 | wbinvd_ipi, NULL, 1); | ||
1808 | } | ||
1809 | |||
1722 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 1810 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
1723 | if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { | 1811 | if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { |
1724 | unsigned long khz = cpufreq_quick_get(cpu); | 1812 | unsigned long khz = cpufreq_quick_get(cpu); |
@@ -1731,8 +1819,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1731 | 1819 | ||
1732 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 1820 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
1733 | { | 1821 | { |
1734 | kvm_put_guest_fpu(vcpu); | ||
1735 | kvm_x86_ops->vcpu_put(vcpu); | 1822 | kvm_x86_ops->vcpu_put(vcpu); |
1823 | kvm_put_guest_fpu(vcpu); | ||
1736 | } | 1824 | } |
1737 | 1825 | ||
1738 | static int is_efer_nx(void) | 1826 | static int is_efer_nx(void) |
@@ -1781,7 +1869,6 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1781 | if (copy_from_user(cpuid_entries, entries, | 1869 | if (copy_from_user(cpuid_entries, entries, |
1782 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | 1870 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) |
1783 | goto out_free; | 1871 | goto out_free; |
1784 | vcpu_load(vcpu); | ||
1785 | for (i = 0; i < cpuid->nent; i++) { | 1872 | for (i = 0; i < cpuid->nent; i++) { |
1786 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | 1873 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; |
1787 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | 1874 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; |
@@ -1799,7 +1886,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1799 | r = 0; | 1886 | r = 0; |
1800 | kvm_apic_set_version(vcpu); | 1887 | kvm_apic_set_version(vcpu); |
1801 | kvm_x86_ops->cpuid_update(vcpu); | 1888 | kvm_x86_ops->cpuid_update(vcpu); |
1802 | vcpu_put(vcpu); | 1889 | update_cpuid(vcpu); |
1803 | 1890 | ||
1804 | out_free: | 1891 | out_free: |
1805 | vfree(cpuid_entries); | 1892 | vfree(cpuid_entries); |
@@ -1820,11 +1907,10 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
1820 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | 1907 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, |
1821 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | 1908 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) |
1822 | goto out; | 1909 | goto out; |
1823 | vcpu_load(vcpu); | ||
1824 | vcpu->arch.cpuid_nent = cpuid->nent; | 1910 | vcpu->arch.cpuid_nent = cpuid->nent; |
1825 | kvm_apic_set_version(vcpu); | 1911 | kvm_apic_set_version(vcpu); |
1826 | kvm_x86_ops->cpuid_update(vcpu); | 1912 | kvm_x86_ops->cpuid_update(vcpu); |
1827 | vcpu_put(vcpu); | 1913 | update_cpuid(vcpu); |
1828 | return 0; | 1914 | return 0; |
1829 | 1915 | ||
1830 | out: | 1916 | out: |
@@ -1837,7 +1923,6 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
1837 | { | 1923 | { |
1838 | int r; | 1924 | int r; |
1839 | 1925 | ||
1840 | vcpu_load(vcpu); | ||
1841 | r = -E2BIG; | 1926 | r = -E2BIG; |
1842 | if (cpuid->nent < vcpu->arch.cpuid_nent) | 1927 | if (cpuid->nent < vcpu->arch.cpuid_nent) |
1843 | goto out; | 1928 | goto out; |
@@ -1849,7 +1934,6 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
1849 | 1934 | ||
1850 | out: | 1935 | out: |
1851 | cpuid->nent = vcpu->arch.cpuid_nent; | 1936 | cpuid->nent = vcpu->arch.cpuid_nent; |
1852 | vcpu_put(vcpu); | ||
1853 | return r; | 1937 | return r; |
1854 | } | 1938 | } |
1855 | 1939 | ||
@@ -1901,13 +1985,13 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1901 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | 1985 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); |
1902 | /* cpuid 1.ecx */ | 1986 | /* cpuid 1.ecx */ |
1903 | const u32 kvm_supported_word4_x86_features = | 1987 | const u32 kvm_supported_word4_x86_features = |
1904 | F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ | | 1988 | F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | |
1905 | 0 /* DS-CPL, VMX, SMX, EST */ | | 1989 | 0 /* DS-CPL, VMX, SMX, EST */ | |
1906 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | 1990 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | |
1907 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | | 1991 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | |
1908 | 0 /* Reserved, DCA */ | F(XMM4_1) | | 1992 | 0 /* Reserved, DCA */ | F(XMM4_1) | |
1909 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | 1993 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | |
1910 | 0 /* Reserved, XSAVE, OSXSAVE */; | 1994 | 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX); |
1911 | /* cpuid 0x80000001.ecx */ | 1995 | /* cpuid 0x80000001.ecx */ |
1912 | const u32 kvm_supported_word6_x86_features = | 1996 | const u32 kvm_supported_word6_x86_features = |
1913 | F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | | 1997 | F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | |
@@ -1922,7 +2006,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1922 | 2006 | ||
1923 | switch (function) { | 2007 | switch (function) { |
1924 | case 0: | 2008 | case 0: |
1925 | entry->eax = min(entry->eax, (u32)0xb); | 2009 | entry->eax = min(entry->eax, (u32)0xd); |
1926 | break; | 2010 | break; |
1927 | case 1: | 2011 | case 1: |
1928 | entry->edx &= kvm_supported_word0_x86_features; | 2012 | entry->edx &= kvm_supported_word0_x86_features; |
@@ -1980,6 +2064,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1980 | } | 2064 | } |
1981 | break; | 2065 | break; |
1982 | } | 2066 | } |
2067 | case 0xd: { | ||
2068 | int i; | ||
2069 | |||
2070 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2071 | for (i = 1; *nent < maxnent; ++i) { | ||
2072 | if (entry[i - 1].eax == 0 && i != 2) | ||
2073 | break; | ||
2074 | do_cpuid_1_ent(&entry[i], function, i); | ||
2075 | entry[i].flags |= | ||
2076 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2077 | ++*nent; | ||
2078 | } | ||
2079 | break; | ||
2080 | } | ||
1983 | case KVM_CPUID_SIGNATURE: { | 2081 | case KVM_CPUID_SIGNATURE: { |
1984 | char signature[12] = "KVMKVMKVM\0\0"; | 2082 | char signature[12] = "KVMKVMKVM\0\0"; |
1985 | u32 *sigptr = (u32 *)signature; | 2083 | u32 *sigptr = (u32 *)signature; |
@@ -2081,9 +2179,7 @@ out: | |||
2081 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2179 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
2082 | struct kvm_lapic_state *s) | 2180 | struct kvm_lapic_state *s) |
2083 | { | 2181 | { |
2084 | vcpu_load(vcpu); | ||
2085 | memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); | 2182 | memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); |
2086 | vcpu_put(vcpu); | ||
2087 | 2183 | ||
2088 | return 0; | 2184 | return 0; |
2089 | } | 2185 | } |
@@ -2091,11 +2187,9 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | |||
2091 | static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, | 2187 | static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, |
2092 | struct kvm_lapic_state *s) | 2188 | struct kvm_lapic_state *s) |
2093 | { | 2189 | { |
2094 | vcpu_load(vcpu); | ||
2095 | memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); | 2190 | memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); |
2096 | kvm_apic_post_state_restore(vcpu); | 2191 | kvm_apic_post_state_restore(vcpu); |
2097 | update_cr8_intercept(vcpu); | 2192 | update_cr8_intercept(vcpu); |
2098 | vcpu_put(vcpu); | ||
2099 | 2193 | ||
2100 | return 0; | 2194 | return 0; |
2101 | } | 2195 | } |
@@ -2107,20 +2201,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |||
2107 | return -EINVAL; | 2201 | return -EINVAL; |
2108 | if (irqchip_in_kernel(vcpu->kvm)) | 2202 | if (irqchip_in_kernel(vcpu->kvm)) |
2109 | return -ENXIO; | 2203 | return -ENXIO; |
2110 | vcpu_load(vcpu); | ||
2111 | 2204 | ||
2112 | kvm_queue_interrupt(vcpu, irq->irq, false); | 2205 | kvm_queue_interrupt(vcpu, irq->irq, false); |
2113 | 2206 | ||
2114 | vcpu_put(vcpu); | ||
2115 | |||
2116 | return 0; | 2207 | return 0; |
2117 | } | 2208 | } |
2118 | 2209 | ||
2119 | static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) | 2210 | static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) |
2120 | { | 2211 | { |
2121 | vcpu_load(vcpu); | ||
2122 | kvm_inject_nmi(vcpu); | 2212 | kvm_inject_nmi(vcpu); |
2123 | vcpu_put(vcpu); | ||
2124 | 2213 | ||
2125 | return 0; | 2214 | return 0; |
2126 | } | 2215 | } |
@@ -2140,7 +2229,6 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
2140 | int r; | 2229 | int r; |
2141 | unsigned bank_num = mcg_cap & 0xff, bank; | 2230 | unsigned bank_num = mcg_cap & 0xff, bank; |
2142 | 2231 | ||
2143 | vcpu_load(vcpu); | ||
2144 | r = -EINVAL; | 2232 | r = -EINVAL; |
2145 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) | 2233 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) |
2146 | goto out; | 2234 | goto out; |
@@ -2155,7 +2243,6 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
2155 | for (bank = 0; bank < bank_num; bank++) | 2243 | for (bank = 0; bank < bank_num; bank++) |
2156 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; | 2244 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; |
2157 | out: | 2245 | out: |
2158 | vcpu_put(vcpu); | ||
2159 | return r; | 2246 | return r; |
2160 | } | 2247 | } |
2161 | 2248 | ||
@@ -2188,7 +2275,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
2188 | printk(KERN_DEBUG "kvm: set_mce: " | 2275 | printk(KERN_DEBUG "kvm: set_mce: " |
2189 | "injects mce exception while " | 2276 | "injects mce exception while " |
2190 | "previous one is in progress!\n"); | 2277 | "previous one is in progress!\n"); |
2191 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | 2278 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2192 | return 0; | 2279 | return 0; |
2193 | } | 2280 | } |
2194 | if (banks[1] & MCI_STATUS_VAL) | 2281 | if (banks[1] & MCI_STATUS_VAL) |
@@ -2213,8 +2300,6 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
2213 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | 2300 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, |
2214 | struct kvm_vcpu_events *events) | 2301 | struct kvm_vcpu_events *events) |
2215 | { | 2302 | { |
2216 | vcpu_load(vcpu); | ||
2217 | |||
2218 | events->exception.injected = | 2303 | events->exception.injected = |
2219 | vcpu->arch.exception.pending && | 2304 | vcpu->arch.exception.pending && |
2220 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | 2305 | !kvm_exception_is_soft(vcpu->arch.exception.nr); |
@@ -2239,8 +2324,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2239 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2324 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
2240 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR | 2325 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
2241 | | KVM_VCPUEVENT_VALID_SHADOW); | 2326 | | KVM_VCPUEVENT_VALID_SHADOW); |
2242 | |||
2243 | vcpu_put(vcpu); | ||
2244 | } | 2327 | } |
2245 | 2328 | ||
2246 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | 2329 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, |
@@ -2251,8 +2334,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2251 | | KVM_VCPUEVENT_VALID_SHADOW)) | 2334 | | KVM_VCPUEVENT_VALID_SHADOW)) |
2252 | return -EINVAL; | 2335 | return -EINVAL; |
2253 | 2336 | ||
2254 | vcpu_load(vcpu); | ||
2255 | |||
2256 | vcpu->arch.exception.pending = events->exception.injected; | 2337 | vcpu->arch.exception.pending = events->exception.injected; |
2257 | vcpu->arch.exception.nr = events->exception.nr; | 2338 | vcpu->arch.exception.nr = events->exception.nr; |
2258 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; | 2339 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; |
@@ -2275,22 +2356,16 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2275 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) | 2356 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) |
2276 | vcpu->arch.sipi_vector = events->sipi_vector; | 2357 | vcpu->arch.sipi_vector = events->sipi_vector; |
2277 | 2358 | ||
2278 | vcpu_put(vcpu); | ||
2279 | |||
2280 | return 0; | 2359 | return 0; |
2281 | } | 2360 | } |
2282 | 2361 | ||
2283 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | 2362 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, |
2284 | struct kvm_debugregs *dbgregs) | 2363 | struct kvm_debugregs *dbgregs) |
2285 | { | 2364 | { |
2286 | vcpu_load(vcpu); | ||
2287 | |||
2288 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | 2365 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); |
2289 | dbgregs->dr6 = vcpu->arch.dr6; | 2366 | dbgregs->dr6 = vcpu->arch.dr6; |
2290 | dbgregs->dr7 = vcpu->arch.dr7; | 2367 | dbgregs->dr7 = vcpu->arch.dr7; |
2291 | dbgregs->flags = 0; | 2368 | dbgregs->flags = 0; |
2292 | |||
2293 | vcpu_put(vcpu); | ||
2294 | } | 2369 | } |
2295 | 2370 | ||
2296 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | 2371 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, |
@@ -2299,40 +2374,113 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | |||
2299 | if (dbgregs->flags) | 2374 | if (dbgregs->flags) |
2300 | return -EINVAL; | 2375 | return -EINVAL; |
2301 | 2376 | ||
2302 | vcpu_load(vcpu); | ||
2303 | |||
2304 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | 2377 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); |
2305 | vcpu->arch.dr6 = dbgregs->dr6; | 2378 | vcpu->arch.dr6 = dbgregs->dr6; |
2306 | vcpu->arch.dr7 = dbgregs->dr7; | 2379 | vcpu->arch.dr7 = dbgregs->dr7; |
2307 | 2380 | ||
2308 | vcpu_put(vcpu); | 2381 | return 0; |
2382 | } | ||
2383 | |||
2384 | static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, | ||
2385 | struct kvm_xsave *guest_xsave) | ||
2386 | { | ||
2387 | if (cpu_has_xsave) | ||
2388 | memcpy(guest_xsave->region, | ||
2389 | &vcpu->arch.guest_fpu.state->xsave, | ||
2390 | sizeof(struct xsave_struct)); | ||
2391 | else { | ||
2392 | memcpy(guest_xsave->region, | ||
2393 | &vcpu->arch.guest_fpu.state->fxsave, | ||
2394 | sizeof(struct i387_fxsave_struct)); | ||
2395 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = | ||
2396 | XSTATE_FPSSE; | ||
2397 | } | ||
2398 | } | ||
2399 | |||
2400 | static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, | ||
2401 | struct kvm_xsave *guest_xsave) | ||
2402 | { | ||
2403 | u64 xstate_bv = | ||
2404 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; | ||
2309 | 2405 | ||
2406 | if (cpu_has_xsave) | ||
2407 | memcpy(&vcpu->arch.guest_fpu.state->xsave, | ||
2408 | guest_xsave->region, sizeof(struct xsave_struct)); | ||
2409 | else { | ||
2410 | if (xstate_bv & ~XSTATE_FPSSE) | ||
2411 | return -EINVAL; | ||
2412 | memcpy(&vcpu->arch.guest_fpu.state->fxsave, | ||
2413 | guest_xsave->region, sizeof(struct i387_fxsave_struct)); | ||
2414 | } | ||
2310 | return 0; | 2415 | return 0; |
2311 | } | 2416 | } |
2312 | 2417 | ||
2418 | static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, | ||
2419 | struct kvm_xcrs *guest_xcrs) | ||
2420 | { | ||
2421 | if (!cpu_has_xsave) { | ||
2422 | guest_xcrs->nr_xcrs = 0; | ||
2423 | return; | ||
2424 | } | ||
2425 | |||
2426 | guest_xcrs->nr_xcrs = 1; | ||
2427 | guest_xcrs->flags = 0; | ||
2428 | guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK; | ||
2429 | guest_xcrs->xcrs[0].value = vcpu->arch.xcr0; | ||
2430 | } | ||
2431 | |||
2432 | static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, | ||
2433 | struct kvm_xcrs *guest_xcrs) | ||
2434 | { | ||
2435 | int i, r = 0; | ||
2436 | |||
2437 | if (!cpu_has_xsave) | ||
2438 | return -EINVAL; | ||
2439 | |||
2440 | if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags) | ||
2441 | return -EINVAL; | ||
2442 | |||
2443 | for (i = 0; i < guest_xcrs->nr_xcrs; i++) | ||
2444 | /* Only support XCR0 currently */ | ||
2445 | if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) { | ||
2446 | r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, | ||
2447 | guest_xcrs->xcrs[0].value); | ||
2448 | break; | ||
2449 | } | ||
2450 | if (r) | ||
2451 | r = -EINVAL; | ||
2452 | return r; | ||
2453 | } | ||
2454 | |||
2313 | long kvm_arch_vcpu_ioctl(struct file *filp, | 2455 | long kvm_arch_vcpu_ioctl(struct file *filp, |
2314 | unsigned int ioctl, unsigned long arg) | 2456 | unsigned int ioctl, unsigned long arg) |
2315 | { | 2457 | { |
2316 | struct kvm_vcpu *vcpu = filp->private_data; | 2458 | struct kvm_vcpu *vcpu = filp->private_data; |
2317 | void __user *argp = (void __user *)arg; | 2459 | void __user *argp = (void __user *)arg; |
2318 | int r; | 2460 | int r; |
2319 | struct kvm_lapic_state *lapic = NULL; | 2461 | union { |
2462 | struct kvm_lapic_state *lapic; | ||
2463 | struct kvm_xsave *xsave; | ||
2464 | struct kvm_xcrs *xcrs; | ||
2465 | void *buffer; | ||
2466 | } u; | ||
2320 | 2467 | ||
2468 | u.buffer = NULL; | ||
2321 | switch (ioctl) { | 2469 | switch (ioctl) { |
2322 | case KVM_GET_LAPIC: { | 2470 | case KVM_GET_LAPIC: { |
2323 | r = -EINVAL; | 2471 | r = -EINVAL; |
2324 | if (!vcpu->arch.apic) | 2472 | if (!vcpu->arch.apic) |
2325 | goto out; | 2473 | goto out; |
2326 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 2474 | u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
2327 | 2475 | ||
2328 | r = -ENOMEM; | 2476 | r = -ENOMEM; |
2329 | if (!lapic) | 2477 | if (!u.lapic) |
2330 | goto out; | 2478 | goto out; |
2331 | r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic); | 2479 | r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic); |
2332 | if (r) | 2480 | if (r) |
2333 | goto out; | 2481 | goto out; |
2334 | r = -EFAULT; | 2482 | r = -EFAULT; |
2335 | if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state))) | 2483 | if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state))) |
2336 | goto out; | 2484 | goto out; |
2337 | r = 0; | 2485 | r = 0; |
2338 | break; | 2486 | break; |
@@ -2341,14 +2489,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2341 | r = -EINVAL; | 2489 | r = -EINVAL; |
2342 | if (!vcpu->arch.apic) | 2490 | if (!vcpu->arch.apic) |
2343 | goto out; | 2491 | goto out; |
2344 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 2492 | u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
2345 | r = -ENOMEM; | 2493 | r = -ENOMEM; |
2346 | if (!lapic) | 2494 | if (!u.lapic) |
2347 | goto out; | 2495 | goto out; |
2348 | r = -EFAULT; | 2496 | r = -EFAULT; |
2349 | if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state))) | 2497 | if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state))) |
2350 | goto out; | 2498 | goto out; |
2351 | r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic); | 2499 | r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); |
2352 | if (r) | 2500 | if (r) |
2353 | goto out; | 2501 | goto out; |
2354 | r = 0; | 2502 | r = 0; |
@@ -2464,9 +2612,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2464 | r = -EFAULT; | 2612 | r = -EFAULT; |
2465 | if (copy_from_user(&mce, argp, sizeof mce)) | 2613 | if (copy_from_user(&mce, argp, sizeof mce)) |
2466 | goto out; | 2614 | goto out; |
2467 | vcpu_load(vcpu); | ||
2468 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | 2615 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); |
2469 | vcpu_put(vcpu); | ||
2470 | break; | 2616 | break; |
2471 | } | 2617 | } |
2472 | case KVM_GET_VCPU_EVENTS: { | 2618 | case KVM_GET_VCPU_EVENTS: { |
@@ -2513,11 +2659,67 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2513 | r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); | 2659 | r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); |
2514 | break; | 2660 | break; |
2515 | } | 2661 | } |
2662 | case KVM_GET_XSAVE: { | ||
2663 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); | ||
2664 | r = -ENOMEM; | ||
2665 | if (!u.xsave) | ||
2666 | break; | ||
2667 | |||
2668 | kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave); | ||
2669 | |||
2670 | r = -EFAULT; | ||
2671 | if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave))) | ||
2672 | break; | ||
2673 | r = 0; | ||
2674 | break; | ||
2675 | } | ||
2676 | case KVM_SET_XSAVE: { | ||
2677 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); | ||
2678 | r = -ENOMEM; | ||
2679 | if (!u.xsave) | ||
2680 | break; | ||
2681 | |||
2682 | r = -EFAULT; | ||
2683 | if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave))) | ||
2684 | break; | ||
2685 | |||
2686 | r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); | ||
2687 | break; | ||
2688 | } | ||
2689 | case KVM_GET_XCRS: { | ||
2690 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); | ||
2691 | r = -ENOMEM; | ||
2692 | if (!u.xcrs) | ||
2693 | break; | ||
2694 | |||
2695 | kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs); | ||
2696 | |||
2697 | r = -EFAULT; | ||
2698 | if (copy_to_user(argp, u.xcrs, | ||
2699 | sizeof(struct kvm_xcrs))) | ||
2700 | break; | ||
2701 | r = 0; | ||
2702 | break; | ||
2703 | } | ||
2704 | case KVM_SET_XCRS: { | ||
2705 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); | ||
2706 | r = -ENOMEM; | ||
2707 | if (!u.xcrs) | ||
2708 | break; | ||
2709 | |||
2710 | r = -EFAULT; | ||
2711 | if (copy_from_user(u.xcrs, argp, | ||
2712 | sizeof(struct kvm_xcrs))) | ||
2713 | break; | ||
2714 | |||
2715 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); | ||
2716 | break; | ||
2717 | } | ||
2516 | default: | 2718 | default: |
2517 | r = -EINVAL; | 2719 | r = -EINVAL; |
2518 | } | 2720 | } |
2519 | out: | 2721 | out: |
2520 | kfree(lapic); | 2722 | kfree(u.buffer); |
2521 | return r; | 2723 | return r; |
2522 | } | 2724 | } |
2523 | 2725 | ||
@@ -2560,115 +2762,6 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) | |||
2560 | return kvm->arch.n_alloc_mmu_pages; | 2762 | return kvm->arch.n_alloc_mmu_pages; |
2561 | } | 2763 | } |
2562 | 2764 | ||
2563 | gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | ||
2564 | { | ||
2565 | int i; | ||
2566 | struct kvm_mem_alias *alias; | ||
2567 | struct kvm_mem_aliases *aliases; | ||
2568 | |||
2569 | aliases = kvm_aliases(kvm); | ||
2570 | |||
2571 | for (i = 0; i < aliases->naliases; ++i) { | ||
2572 | alias = &aliases->aliases[i]; | ||
2573 | if (alias->flags & KVM_ALIAS_INVALID) | ||
2574 | continue; | ||
2575 | if (gfn >= alias->base_gfn | ||
2576 | && gfn < alias->base_gfn + alias->npages) | ||
2577 | return alias->target_gfn + gfn - alias->base_gfn; | ||
2578 | } | ||
2579 | return gfn; | ||
2580 | } | ||
2581 | |||
2582 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
2583 | { | ||
2584 | int i; | ||
2585 | struct kvm_mem_alias *alias; | ||
2586 | struct kvm_mem_aliases *aliases; | ||
2587 | |||
2588 | aliases = kvm_aliases(kvm); | ||
2589 | |||
2590 | for (i = 0; i < aliases->naliases; ++i) { | ||
2591 | alias = &aliases->aliases[i]; | ||
2592 | if (gfn >= alias->base_gfn | ||
2593 | && gfn < alias->base_gfn + alias->npages) | ||
2594 | return alias->target_gfn + gfn - alias->base_gfn; | ||
2595 | } | ||
2596 | return gfn; | ||
2597 | } | ||
2598 | |||
2599 | /* | ||
2600 | * Set a new alias region. Aliases map a portion of physical memory into | ||
2601 | * another portion. This is useful for memory windows, for example the PC | ||
2602 | * VGA region. | ||
2603 | */ | ||
2604 | static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | ||
2605 | struct kvm_memory_alias *alias) | ||
2606 | { | ||
2607 | int r, n; | ||
2608 | struct kvm_mem_alias *p; | ||
2609 | struct kvm_mem_aliases *aliases, *old_aliases; | ||
2610 | |||
2611 | r = -EINVAL; | ||
2612 | /* General sanity checks */ | ||
2613 | if (alias->memory_size & (PAGE_SIZE - 1)) | ||
2614 | goto out; | ||
2615 | if (alias->guest_phys_addr & (PAGE_SIZE - 1)) | ||
2616 | goto out; | ||
2617 | if (alias->slot >= KVM_ALIAS_SLOTS) | ||
2618 | goto out; | ||
2619 | if (alias->guest_phys_addr + alias->memory_size | ||
2620 | < alias->guest_phys_addr) | ||
2621 | goto out; | ||
2622 | if (alias->target_phys_addr + alias->memory_size | ||
2623 | < alias->target_phys_addr) | ||
2624 | goto out; | ||
2625 | |||
2626 | r = -ENOMEM; | ||
2627 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
2628 | if (!aliases) | ||
2629 | goto out; | ||
2630 | |||
2631 | mutex_lock(&kvm->slots_lock); | ||
2632 | |||
2633 | /* invalidate any gfn reference in case of deletion/shrinking */ | ||
2634 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
2635 | aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID; | ||
2636 | old_aliases = kvm->arch.aliases; | ||
2637 | rcu_assign_pointer(kvm->arch.aliases, aliases); | ||
2638 | synchronize_srcu_expedited(&kvm->srcu); | ||
2639 | kvm_mmu_zap_all(kvm); | ||
2640 | kfree(old_aliases); | ||
2641 | |||
2642 | r = -ENOMEM; | ||
2643 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
2644 | if (!aliases) | ||
2645 | goto out_unlock; | ||
2646 | |||
2647 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
2648 | |||
2649 | p = &aliases->aliases[alias->slot]; | ||
2650 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | ||
2651 | p->npages = alias->memory_size >> PAGE_SHIFT; | ||
2652 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; | ||
2653 | p->flags &= ~(KVM_ALIAS_INVALID); | ||
2654 | |||
2655 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) | ||
2656 | if (aliases->aliases[n - 1].npages) | ||
2657 | break; | ||
2658 | aliases->naliases = n; | ||
2659 | |||
2660 | old_aliases = kvm->arch.aliases; | ||
2661 | rcu_assign_pointer(kvm->arch.aliases, aliases); | ||
2662 | synchronize_srcu_expedited(&kvm->srcu); | ||
2663 | kfree(old_aliases); | ||
2664 | r = 0; | ||
2665 | |||
2666 | out_unlock: | ||
2667 | mutex_unlock(&kvm->slots_lock); | ||
2668 | out: | ||
2669 | return r; | ||
2670 | } | ||
2671 | |||
2672 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | 2765 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) |
2673 | { | 2766 | { |
2674 | int r; | 2767 | int r; |
@@ -2797,7 +2890,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
2797 | struct kvm_memory_slot *memslot; | 2890 | struct kvm_memory_slot *memslot; |
2798 | unsigned long n; | 2891 | unsigned long n; |
2799 | unsigned long is_dirty = 0; | 2892 | unsigned long is_dirty = 0; |
2800 | unsigned long *dirty_bitmap = NULL; | ||
2801 | 2893 | ||
2802 | mutex_lock(&kvm->slots_lock); | 2894 | mutex_lock(&kvm->slots_lock); |
2803 | 2895 | ||
@@ -2812,27 +2904,30 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
2812 | 2904 | ||
2813 | n = kvm_dirty_bitmap_bytes(memslot); | 2905 | n = kvm_dirty_bitmap_bytes(memslot); |
2814 | 2906 | ||
2815 | r = -ENOMEM; | ||
2816 | dirty_bitmap = vmalloc(n); | ||
2817 | if (!dirty_bitmap) | ||
2818 | goto out; | ||
2819 | memset(dirty_bitmap, 0, n); | ||
2820 | |||
2821 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) | 2907 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) |
2822 | is_dirty = memslot->dirty_bitmap[i]; | 2908 | is_dirty = memslot->dirty_bitmap[i]; |
2823 | 2909 | ||
2824 | /* If nothing is dirty, don't bother messing with page tables. */ | 2910 | /* If nothing is dirty, don't bother messing with page tables. */ |
2825 | if (is_dirty) { | 2911 | if (is_dirty) { |
2826 | struct kvm_memslots *slots, *old_slots; | 2912 | struct kvm_memslots *slots, *old_slots; |
2913 | unsigned long *dirty_bitmap; | ||
2827 | 2914 | ||
2828 | spin_lock(&kvm->mmu_lock); | 2915 | spin_lock(&kvm->mmu_lock); |
2829 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | 2916 | kvm_mmu_slot_remove_write_access(kvm, log->slot); |
2830 | spin_unlock(&kvm->mmu_lock); | 2917 | spin_unlock(&kvm->mmu_lock); |
2831 | 2918 | ||
2832 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 2919 | r = -ENOMEM; |
2833 | if (!slots) | 2920 | dirty_bitmap = vmalloc(n); |
2834 | goto out_free; | 2921 | if (!dirty_bitmap) |
2922 | goto out; | ||
2923 | memset(dirty_bitmap, 0, n); | ||
2835 | 2924 | ||
2925 | r = -ENOMEM; | ||
2926 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | ||
2927 | if (!slots) { | ||
2928 | vfree(dirty_bitmap); | ||
2929 | goto out; | ||
2930 | } | ||
2836 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 2931 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); |
2837 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | 2932 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; |
2838 | 2933 | ||
@@ -2841,13 +2936,20 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
2841 | synchronize_srcu_expedited(&kvm->srcu); | 2936 | synchronize_srcu_expedited(&kvm->srcu); |
2842 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; | 2937 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; |
2843 | kfree(old_slots); | 2938 | kfree(old_slots); |
2939 | |||
2940 | r = -EFAULT; | ||
2941 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { | ||
2942 | vfree(dirty_bitmap); | ||
2943 | goto out; | ||
2944 | } | ||
2945 | vfree(dirty_bitmap); | ||
2946 | } else { | ||
2947 | r = -EFAULT; | ||
2948 | if (clear_user(log->dirty_bitmap, n)) | ||
2949 | goto out; | ||
2844 | } | 2950 | } |
2845 | 2951 | ||
2846 | r = 0; | 2952 | r = 0; |
2847 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | ||
2848 | r = -EFAULT; | ||
2849 | out_free: | ||
2850 | vfree(dirty_bitmap); | ||
2851 | out: | 2953 | out: |
2852 | mutex_unlock(&kvm->slots_lock); | 2954 | mutex_unlock(&kvm->slots_lock); |
2853 | return r; | 2955 | return r; |
@@ -2867,7 +2969,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2867 | union { | 2969 | union { |
2868 | struct kvm_pit_state ps; | 2970 | struct kvm_pit_state ps; |
2869 | struct kvm_pit_state2 ps2; | 2971 | struct kvm_pit_state2 ps2; |
2870 | struct kvm_memory_alias alias; | ||
2871 | struct kvm_pit_config pit_config; | 2972 | struct kvm_pit_config pit_config; |
2872 | } u; | 2973 | } u; |
2873 | 2974 | ||
@@ -2888,22 +2989,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2888 | goto out; | 2989 | goto out; |
2889 | break; | 2990 | break; |
2890 | } | 2991 | } |
2891 | case KVM_SET_MEMORY_REGION: { | ||
2892 | struct kvm_memory_region kvm_mem; | ||
2893 | struct kvm_userspace_memory_region kvm_userspace_mem; | ||
2894 | |||
2895 | r = -EFAULT; | ||
2896 | if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) | ||
2897 | goto out; | ||
2898 | kvm_userspace_mem.slot = kvm_mem.slot; | ||
2899 | kvm_userspace_mem.flags = kvm_mem.flags; | ||
2900 | kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr; | ||
2901 | kvm_userspace_mem.memory_size = kvm_mem.memory_size; | ||
2902 | r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0); | ||
2903 | if (r) | ||
2904 | goto out; | ||
2905 | break; | ||
2906 | } | ||
2907 | case KVM_SET_NR_MMU_PAGES: | 2992 | case KVM_SET_NR_MMU_PAGES: |
2908 | r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); | 2993 | r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); |
2909 | if (r) | 2994 | if (r) |
@@ -2912,14 +2997,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2912 | case KVM_GET_NR_MMU_PAGES: | 2997 | case KVM_GET_NR_MMU_PAGES: |
2913 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); | 2998 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); |
2914 | break; | 2999 | break; |
2915 | case KVM_SET_MEMORY_ALIAS: | ||
2916 | r = -EFAULT; | ||
2917 | if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias))) | ||
2918 | goto out; | ||
2919 | r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias); | ||
2920 | if (r) | ||
2921 | goto out; | ||
2922 | break; | ||
2923 | case KVM_CREATE_IRQCHIP: { | 3000 | case KVM_CREATE_IRQCHIP: { |
2924 | struct kvm_pic *vpic; | 3001 | struct kvm_pic *vpic; |
2925 | 3002 | ||
@@ -3259,7 +3336,7 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, | |||
3259 | } | 3336 | } |
3260 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); | 3337 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); |
3261 | if (ret < 0) { | 3338 | if (ret < 0) { |
3262 | r = X86EMUL_UNHANDLEABLE; | 3339 | r = X86EMUL_IO_NEEDED; |
3263 | goto out; | 3340 | goto out; |
3264 | } | 3341 | } |
3265 | 3342 | ||
@@ -3315,7 +3392,7 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val, | |||
3315 | } | 3392 | } |
3316 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); | 3393 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); |
3317 | if (ret < 0) { | 3394 | if (ret < 0) { |
3318 | r = X86EMUL_UNHANDLEABLE; | 3395 | r = X86EMUL_IO_NEEDED; |
3319 | goto out; | 3396 | goto out; |
3320 | } | 3397 | } |
3321 | 3398 | ||
@@ -3330,10 +3407,10 @@ out: | |||
3330 | static int emulator_read_emulated(unsigned long addr, | 3407 | static int emulator_read_emulated(unsigned long addr, |
3331 | void *val, | 3408 | void *val, |
3332 | unsigned int bytes, | 3409 | unsigned int bytes, |
3410 | unsigned int *error_code, | ||
3333 | struct kvm_vcpu *vcpu) | 3411 | struct kvm_vcpu *vcpu) |
3334 | { | 3412 | { |
3335 | gpa_t gpa; | 3413 | gpa_t gpa; |
3336 | u32 error_code; | ||
3337 | 3414 | ||
3338 | if (vcpu->mmio_read_completed) { | 3415 | if (vcpu->mmio_read_completed) { |
3339 | memcpy(val, vcpu->mmio_data, bytes); | 3416 | memcpy(val, vcpu->mmio_data, bytes); |
@@ -3343,12 +3420,10 @@ static int emulator_read_emulated(unsigned long addr, | |||
3343 | return X86EMUL_CONTINUE; | 3420 | return X86EMUL_CONTINUE; |
3344 | } | 3421 | } |
3345 | 3422 | ||
3346 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code); | 3423 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code); |
3347 | 3424 | ||
3348 | if (gpa == UNMAPPED_GVA) { | 3425 | if (gpa == UNMAPPED_GVA) |
3349 | kvm_inject_page_fault(vcpu, addr, error_code); | ||
3350 | return X86EMUL_PROPAGATE_FAULT; | 3426 | return X86EMUL_PROPAGATE_FAULT; |
3351 | } | ||
3352 | 3427 | ||
3353 | /* For APIC access vmexit */ | 3428 | /* For APIC access vmexit */ |
3354 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3429 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
@@ -3370,11 +3445,12 @@ mmio: | |||
3370 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); | 3445 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); |
3371 | 3446 | ||
3372 | vcpu->mmio_needed = 1; | 3447 | vcpu->mmio_needed = 1; |
3373 | vcpu->mmio_phys_addr = gpa; | 3448 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
3374 | vcpu->mmio_size = bytes; | 3449 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
3375 | vcpu->mmio_is_write = 0; | 3450 | vcpu->run->mmio.len = vcpu->mmio_size = bytes; |
3451 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; | ||
3376 | 3452 | ||
3377 | return X86EMUL_UNHANDLEABLE; | 3453 | return X86EMUL_IO_NEEDED; |
3378 | } | 3454 | } |
3379 | 3455 | ||
3380 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 3456 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
@@ -3392,17 +3468,15 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3392 | static int emulator_write_emulated_onepage(unsigned long addr, | 3468 | static int emulator_write_emulated_onepage(unsigned long addr, |
3393 | const void *val, | 3469 | const void *val, |
3394 | unsigned int bytes, | 3470 | unsigned int bytes, |
3471 | unsigned int *error_code, | ||
3395 | struct kvm_vcpu *vcpu) | 3472 | struct kvm_vcpu *vcpu) |
3396 | { | 3473 | { |
3397 | gpa_t gpa; | 3474 | gpa_t gpa; |
3398 | u32 error_code; | ||
3399 | 3475 | ||
3400 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code); | 3476 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code); |
3401 | 3477 | ||
3402 | if (gpa == UNMAPPED_GVA) { | 3478 | if (gpa == UNMAPPED_GVA) |
3403 | kvm_inject_page_fault(vcpu, addr, error_code); | ||
3404 | return X86EMUL_PROPAGATE_FAULT; | 3479 | return X86EMUL_PROPAGATE_FAULT; |
3405 | } | ||
3406 | 3480 | ||
3407 | /* For APIC access vmexit */ | 3481 | /* For APIC access vmexit */ |
3408 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3482 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
@@ -3420,10 +3494,11 @@ mmio: | |||
3420 | return X86EMUL_CONTINUE; | 3494 | return X86EMUL_CONTINUE; |
3421 | 3495 | ||
3422 | vcpu->mmio_needed = 1; | 3496 | vcpu->mmio_needed = 1; |
3423 | vcpu->mmio_phys_addr = gpa; | 3497 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
3424 | vcpu->mmio_size = bytes; | 3498 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
3425 | vcpu->mmio_is_write = 1; | 3499 | vcpu->run->mmio.len = vcpu->mmio_size = bytes; |
3426 | memcpy(vcpu->mmio_data, val, bytes); | 3500 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; |
3501 | memcpy(vcpu->run->mmio.data, val, bytes); | ||
3427 | 3502 | ||
3428 | return X86EMUL_CONTINUE; | 3503 | return X86EMUL_CONTINUE; |
3429 | } | 3504 | } |
@@ -3431,6 +3506,7 @@ mmio: | |||
3431 | int emulator_write_emulated(unsigned long addr, | 3506 | int emulator_write_emulated(unsigned long addr, |
3432 | const void *val, | 3507 | const void *val, |
3433 | unsigned int bytes, | 3508 | unsigned int bytes, |
3509 | unsigned int *error_code, | ||
3434 | struct kvm_vcpu *vcpu) | 3510 | struct kvm_vcpu *vcpu) |
3435 | { | 3511 | { |
3436 | /* Crossing a page boundary? */ | 3512 | /* Crossing a page boundary? */ |
@@ -3438,16 +3514,17 @@ int emulator_write_emulated(unsigned long addr, | |||
3438 | int rc, now; | 3514 | int rc, now; |
3439 | 3515 | ||
3440 | now = -addr & ~PAGE_MASK; | 3516 | now = -addr & ~PAGE_MASK; |
3441 | rc = emulator_write_emulated_onepage(addr, val, now, vcpu); | 3517 | rc = emulator_write_emulated_onepage(addr, val, now, error_code, |
3518 | vcpu); | ||
3442 | if (rc != X86EMUL_CONTINUE) | 3519 | if (rc != X86EMUL_CONTINUE) |
3443 | return rc; | 3520 | return rc; |
3444 | addr += now; | 3521 | addr += now; |
3445 | val += now; | 3522 | val += now; |
3446 | bytes -= now; | 3523 | bytes -= now; |
3447 | } | 3524 | } |
3448 | return emulator_write_emulated_onepage(addr, val, bytes, vcpu); | 3525 | return emulator_write_emulated_onepage(addr, val, bytes, error_code, |
3526 | vcpu); | ||
3449 | } | 3527 | } |
3450 | EXPORT_SYMBOL_GPL(emulator_write_emulated); | ||
3451 | 3528 | ||
3452 | #define CMPXCHG_TYPE(t, ptr, old, new) \ | 3529 | #define CMPXCHG_TYPE(t, ptr, old, new) \ |
3453 | (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) | 3530 | (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) |
@@ -3463,6 +3540,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3463 | const void *old, | 3540 | const void *old, |
3464 | const void *new, | 3541 | const void *new, |
3465 | unsigned int bytes, | 3542 | unsigned int bytes, |
3543 | unsigned int *error_code, | ||
3466 | struct kvm_vcpu *vcpu) | 3544 | struct kvm_vcpu *vcpu) |
3467 | { | 3545 | { |
3468 | gpa_t gpa; | 3546 | gpa_t gpa; |
@@ -3484,6 +3562,10 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3484 | goto emul_write; | 3562 | goto emul_write; |
3485 | 3563 | ||
3486 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3564 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
3565 | if (is_error_page(page)) { | ||
3566 | kvm_release_page_clean(page); | ||
3567 | goto emul_write; | ||
3568 | } | ||
3487 | 3569 | ||
3488 | kaddr = kmap_atomic(page, KM_USER0); | 3570 | kaddr = kmap_atomic(page, KM_USER0); |
3489 | kaddr += offset_in_page(gpa); | 3571 | kaddr += offset_in_page(gpa); |
@@ -3516,7 +3598,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3516 | emul_write: | 3598 | emul_write: |
3517 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 3599 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
3518 | 3600 | ||
3519 | return emulator_write_emulated(addr, new, bytes, vcpu); | 3601 | return emulator_write_emulated(addr, new, bytes, error_code, vcpu); |
3520 | } | 3602 | } |
3521 | 3603 | ||
3522 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | 3604 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) |
@@ -3604,42 +3686,38 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | |||
3604 | return X86EMUL_CONTINUE; | 3686 | return X86EMUL_CONTINUE; |
3605 | } | 3687 | } |
3606 | 3688 | ||
3607 | int emulate_clts(struct kvm_vcpu *vcpu) | 3689 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) |
3608 | { | 3690 | { |
3609 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); | 3691 | if (!need_emulate_wbinvd(vcpu)) |
3610 | kvm_x86_ops->fpu_activate(vcpu); | 3692 | return X86EMUL_CONTINUE; |
3693 | |||
3694 | if (kvm_x86_ops->has_wbinvd_exit()) { | ||
3695 | smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, | ||
3696 | wbinvd_ipi, NULL, 1); | ||
3697 | cpumask_clear(vcpu->arch.wbinvd_dirty_mask); | ||
3698 | } | ||
3699 | wbinvd(); | ||
3611 | return X86EMUL_CONTINUE; | 3700 | return X86EMUL_CONTINUE; |
3612 | } | 3701 | } |
3702 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); | ||
3613 | 3703 | ||
3614 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3704 | int emulate_clts(struct kvm_vcpu *vcpu) |
3615 | { | 3705 | { |
3616 | return kvm_get_dr(ctxt->vcpu, dr, dest); | 3706 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); |
3707 | kvm_x86_ops->fpu_activate(vcpu); | ||
3708 | return X86EMUL_CONTINUE; | ||
3617 | } | 3709 | } |
3618 | 3710 | ||
3619 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3711 | int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) |
3620 | { | 3712 | { |
3621 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3713 | return _kvm_get_dr(vcpu, dr, dest); |
3622 | |||
3623 | return kvm_set_dr(ctxt->vcpu, dr, value & mask); | ||
3624 | } | 3714 | } |
3625 | 3715 | ||
3626 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3716 | int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) |
3627 | { | 3717 | { |
3628 | u8 opcodes[4]; | ||
3629 | unsigned long rip = kvm_rip_read(vcpu); | ||
3630 | unsigned long rip_linear; | ||
3631 | |||
3632 | if (!printk_ratelimit()) | ||
3633 | return; | ||
3634 | 3718 | ||
3635 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); | 3719 | return __kvm_set_dr(vcpu, dr, value); |
3636 | |||
3637 | kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL); | ||
3638 | |||
3639 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", | ||
3640 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); | ||
3641 | } | 3720 | } |
3642 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | ||
3643 | 3721 | ||
3644 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | 3722 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) |
3645 | { | 3723 | { |
@@ -3674,27 +3752,32 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | |||
3674 | return value; | 3752 | return value; |
3675 | } | 3753 | } |
3676 | 3754 | ||
3677 | static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | 3755 | static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) |
3678 | { | 3756 | { |
3757 | int res = 0; | ||
3758 | |||
3679 | switch (cr) { | 3759 | switch (cr) { |
3680 | case 0: | 3760 | case 0: |
3681 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | 3761 | res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); |
3682 | break; | 3762 | break; |
3683 | case 2: | 3763 | case 2: |
3684 | vcpu->arch.cr2 = val; | 3764 | vcpu->arch.cr2 = val; |
3685 | break; | 3765 | break; |
3686 | case 3: | 3766 | case 3: |
3687 | kvm_set_cr3(vcpu, val); | 3767 | res = kvm_set_cr3(vcpu, val); |
3688 | break; | 3768 | break; |
3689 | case 4: | 3769 | case 4: |
3690 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | 3770 | res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); |
3691 | break; | 3771 | break; |
3692 | case 8: | 3772 | case 8: |
3693 | kvm_set_cr8(vcpu, val & 0xfUL); | 3773 | res = __kvm_set_cr8(vcpu, val & 0xfUL); |
3694 | break; | 3774 | break; |
3695 | default: | 3775 | default: |
3696 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | 3776 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); |
3777 | res = -1; | ||
3697 | } | 3778 | } |
3779 | |||
3780 | return res; | ||
3698 | } | 3781 | } |
3699 | 3782 | ||
3700 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) | 3783 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) |
@@ -3707,6 +3790,12 @@ static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | |||
3707 | kvm_x86_ops->get_gdt(vcpu, dt); | 3790 | kvm_x86_ops->get_gdt(vcpu, dt); |
3708 | } | 3791 | } |
3709 | 3792 | ||
3793 | static unsigned long emulator_get_cached_segment_base(int seg, | ||
3794 | struct kvm_vcpu *vcpu) | ||
3795 | { | ||
3796 | return get_segment_base(vcpu, seg); | ||
3797 | } | ||
3798 | |||
3710 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | 3799 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, |
3711 | struct kvm_vcpu *vcpu) | 3800 | struct kvm_vcpu *vcpu) |
3712 | { | 3801 | { |
@@ -3779,11 +3868,6 @@ static void emulator_set_segment_selector(u16 sel, int seg, | |||
3779 | kvm_set_segment(vcpu, &kvm_seg, seg); | 3868 | kvm_set_segment(vcpu, &kvm_seg, seg); |
3780 | } | 3869 | } |
3781 | 3870 | ||
3782 | static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
3783 | { | ||
3784 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
3785 | } | ||
3786 | |||
3787 | static struct x86_emulate_ops emulate_ops = { | 3871 | static struct x86_emulate_ops emulate_ops = { |
3788 | .read_std = kvm_read_guest_virt_system, | 3872 | .read_std = kvm_read_guest_virt_system, |
3789 | .write_std = kvm_write_guest_virt_system, | 3873 | .write_std = kvm_write_guest_virt_system, |
@@ -3797,11 +3881,15 @@ static struct x86_emulate_ops emulate_ops = { | |||
3797 | .set_cached_descriptor = emulator_set_cached_descriptor, | 3881 | .set_cached_descriptor = emulator_set_cached_descriptor, |
3798 | .get_segment_selector = emulator_get_segment_selector, | 3882 | .get_segment_selector = emulator_get_segment_selector, |
3799 | .set_segment_selector = emulator_set_segment_selector, | 3883 | .set_segment_selector = emulator_set_segment_selector, |
3884 | .get_cached_segment_base = emulator_get_cached_segment_base, | ||
3800 | .get_gdt = emulator_get_gdt, | 3885 | .get_gdt = emulator_get_gdt, |
3801 | .get_cr = emulator_get_cr, | 3886 | .get_cr = emulator_get_cr, |
3802 | .set_cr = emulator_set_cr, | 3887 | .set_cr = emulator_set_cr, |
3803 | .cpl = emulator_get_cpl, | 3888 | .cpl = emulator_get_cpl, |
3804 | .set_rflags = emulator_set_rflags, | 3889 | .get_dr = emulator_get_dr, |
3890 | .set_dr = emulator_set_dr, | ||
3891 | .set_msr = kvm_set_msr, | ||
3892 | .get_msr = kvm_get_msr, | ||
3805 | }; | 3893 | }; |
3806 | 3894 | ||
3807 | static void cache_all_regs(struct kvm_vcpu *vcpu) | 3895 | static void cache_all_regs(struct kvm_vcpu *vcpu) |
@@ -3812,14 +3900,75 @@ static void cache_all_regs(struct kvm_vcpu *vcpu) | |||
3812 | vcpu->arch.regs_dirty = ~0; | 3900 | vcpu->arch.regs_dirty = ~0; |
3813 | } | 3901 | } |
3814 | 3902 | ||
3903 | static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) | ||
3904 | { | ||
3905 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); | ||
3906 | /* | ||
3907 | * an sti; sti; sequence only disable interrupts for the first | ||
3908 | * instruction. So, if the last instruction, be it emulated or | ||
3909 | * not, left the system with the INT_STI flag enabled, it | ||
3910 | * means that the last instruction is an sti. We should not | ||
3911 | * leave the flag on in this case. The same goes for mov ss | ||
3912 | */ | ||
3913 | if (!(int_shadow & mask)) | ||
3914 | kvm_x86_ops->set_interrupt_shadow(vcpu, mask); | ||
3915 | } | ||
3916 | |||
3917 | static void inject_emulated_exception(struct kvm_vcpu *vcpu) | ||
3918 | { | ||
3919 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | ||
3920 | if (ctxt->exception == PF_VECTOR) | ||
3921 | kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code); | ||
3922 | else if (ctxt->error_code_valid) | ||
3923 | kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code); | ||
3924 | else | ||
3925 | kvm_queue_exception(vcpu, ctxt->exception); | ||
3926 | } | ||
3927 | |||
3928 | static int handle_emulation_failure(struct kvm_vcpu *vcpu) | ||
3929 | { | ||
3930 | ++vcpu->stat.insn_emulation_fail; | ||
3931 | trace_kvm_emulate_insn_failed(vcpu); | ||
3932 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
3933 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
3934 | vcpu->run->internal.ndata = 0; | ||
3935 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3936 | return EMULATE_FAIL; | ||
3937 | } | ||
3938 | |||
3939 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | ||
3940 | { | ||
3941 | gpa_t gpa; | ||
3942 | |||
3943 | if (tdp_enabled) | ||
3944 | return false; | ||
3945 | |||
3946 | /* | ||
3947 | * if emulation was due to access to shadowed page table | ||
3948 | * and it failed try to unshadow page and re-entetr the | ||
3949 | * guest to let CPU execute the instruction. | ||
3950 | */ | ||
3951 | if (kvm_mmu_unprotect_page_virt(vcpu, gva)) | ||
3952 | return true; | ||
3953 | |||
3954 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL); | ||
3955 | |||
3956 | if (gpa == UNMAPPED_GVA) | ||
3957 | return true; /* let cpu generate fault */ | ||
3958 | |||
3959 | if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT))) | ||
3960 | return true; | ||
3961 | |||
3962 | return false; | ||
3963 | } | ||
3964 | |||
3815 | int emulate_instruction(struct kvm_vcpu *vcpu, | 3965 | int emulate_instruction(struct kvm_vcpu *vcpu, |
3816 | unsigned long cr2, | 3966 | unsigned long cr2, |
3817 | u16 error_code, | 3967 | u16 error_code, |
3818 | int emulation_type) | 3968 | int emulation_type) |
3819 | { | 3969 | { |
3820 | int r, shadow_mask; | 3970 | int r; |
3821 | struct decode_cache *c; | 3971 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
3822 | struct kvm_run *run = vcpu->run; | ||
3823 | 3972 | ||
3824 | kvm_clear_exception_queue(vcpu); | 3973 | kvm_clear_exception_queue(vcpu); |
3825 | vcpu->arch.mmio_fault_cr2 = cr2; | 3974 | vcpu->arch.mmio_fault_cr2 = cr2; |
@@ -3831,8 +3980,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3831 | */ | 3980 | */ |
3832 | cache_all_regs(vcpu); | 3981 | cache_all_regs(vcpu); |
3833 | 3982 | ||
3834 | vcpu->mmio_is_write = 0; | ||
3835 | |||
3836 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 3983 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
3837 | int cs_db, cs_l; | 3984 | int cs_db, cs_l; |
3838 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3985 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
@@ -3846,13 +3993,16 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3846 | ? X86EMUL_MODE_VM86 : cs_l | 3993 | ? X86EMUL_MODE_VM86 : cs_l |
3847 | ? X86EMUL_MODE_PROT64 : cs_db | 3994 | ? X86EMUL_MODE_PROT64 : cs_db |
3848 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3995 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
3996 | memset(c, 0, sizeof(struct decode_cache)); | ||
3997 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
3998 | vcpu->arch.emulate_ctxt.interruptibility = 0; | ||
3999 | vcpu->arch.emulate_ctxt.exception = -1; | ||
3849 | 4000 | ||
3850 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 4001 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
3851 | trace_kvm_emulate_insn_start(vcpu); | 4002 | trace_kvm_emulate_insn_start(vcpu); |
3852 | 4003 | ||
3853 | /* Only allow emulation of specific instructions on #UD | 4004 | /* Only allow emulation of specific instructions on #UD |
3854 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | 4005 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ |
3855 | c = &vcpu->arch.emulate_ctxt.decode; | ||
3856 | if (emulation_type & EMULTYPE_TRAP_UD) { | 4006 | if (emulation_type & EMULTYPE_TRAP_UD) { |
3857 | if (!c->twobyte) | 4007 | if (!c->twobyte) |
3858 | return EMULATE_FAIL; | 4008 | return EMULATE_FAIL; |
@@ -3880,11 +4030,11 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3880 | 4030 | ||
3881 | ++vcpu->stat.insn_emulation; | 4031 | ++vcpu->stat.insn_emulation; |
3882 | if (r) { | 4032 | if (r) { |
3883 | ++vcpu->stat.insn_emulation_fail; | 4033 | if (reexecute_instruction(vcpu, cr2)) |
3884 | trace_kvm_emulate_insn_failed(vcpu); | ||
3885 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | ||
3886 | return EMULATE_DONE; | 4034 | return EMULATE_DONE; |
3887 | return EMULATE_FAIL; | 4035 | if (emulation_type & EMULTYPE_SKIP) |
4036 | return EMULATE_FAIL; | ||
4037 | return handle_emulation_failure(vcpu); | ||
3888 | } | 4038 | } |
3889 | } | 4039 | } |
3890 | 4040 | ||
@@ -3893,48 +4043,42 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3893 | return EMULATE_DONE; | 4043 | return EMULATE_DONE; |
3894 | } | 4044 | } |
3895 | 4045 | ||
4046 | /* this is needed for vmware backdor interface to work since it | ||
4047 | changes registers values during IO operation */ | ||
4048 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
4049 | |||
3896 | restart: | 4050 | restart: |
3897 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 4051 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
3898 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; | ||
3899 | 4052 | ||
3900 | if (r == 0) | 4053 | if (r) { /* emulation failed */ |
3901 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); | 4054 | if (reexecute_instruction(vcpu, cr2)) |
4055 | return EMULATE_DONE; | ||
3902 | 4056 | ||
3903 | if (vcpu->arch.pio.count) { | 4057 | return handle_emulation_failure(vcpu); |
3904 | if (!vcpu->arch.pio.in) | ||
3905 | vcpu->arch.pio.count = 0; | ||
3906 | return EMULATE_DO_MMIO; | ||
3907 | } | 4058 | } |
3908 | 4059 | ||
3909 | if (r || vcpu->mmio_is_write) { | 4060 | toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); |
3910 | run->exit_reason = KVM_EXIT_MMIO; | 4061 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
3911 | run->mmio.phys_addr = vcpu->mmio_phys_addr; | 4062 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); |
3912 | memcpy(run->mmio.data, vcpu->mmio_data, 8); | 4063 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); |
3913 | run->mmio.len = vcpu->mmio_size; | 4064 | |
3914 | run->mmio.is_write = vcpu->mmio_is_write; | 4065 | if (vcpu->arch.emulate_ctxt.exception >= 0) { |
4066 | inject_emulated_exception(vcpu); | ||
4067 | return EMULATE_DONE; | ||
3915 | } | 4068 | } |
3916 | 4069 | ||
3917 | if (r) { | 4070 | if (vcpu->arch.pio.count) { |
3918 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 4071 | if (!vcpu->arch.pio.in) |
3919 | goto done; | 4072 | vcpu->arch.pio.count = 0; |
3920 | if (!vcpu->mmio_needed) { | ||
3921 | ++vcpu->stat.insn_emulation_fail; | ||
3922 | trace_kvm_emulate_insn_failed(vcpu); | ||
3923 | kvm_report_emulation_failure(vcpu, "mmio"); | ||
3924 | return EMULATE_FAIL; | ||
3925 | } | ||
3926 | return EMULATE_DO_MMIO; | 4073 | return EMULATE_DO_MMIO; |
3927 | } | 4074 | } |
3928 | 4075 | ||
3929 | if (vcpu->mmio_is_write) { | 4076 | if (vcpu->mmio_needed) { |
3930 | vcpu->mmio_needed = 0; | 4077 | if (vcpu->mmio_is_write) |
4078 | vcpu->mmio_needed = 0; | ||
3931 | return EMULATE_DO_MMIO; | 4079 | return EMULATE_DO_MMIO; |
3932 | } | 4080 | } |
3933 | 4081 | ||
3934 | done: | ||
3935 | if (vcpu->arch.exception.pending) | ||
3936 | vcpu->arch.emulate_ctxt.restart = false; | ||
3937 | |||
3938 | if (vcpu->arch.emulate_ctxt.restart) | 4082 | if (vcpu->arch.emulate_ctxt.restart) |
3939 | goto restart; | 4083 | goto restart; |
3940 | 4084 | ||
@@ -4108,6 +4252,9 @@ int kvm_arch_init(void *opaque) | |||
4108 | 4252 | ||
4109 | perf_register_guest_info_callbacks(&kvm_guest_cbs); | 4253 | perf_register_guest_info_callbacks(&kvm_guest_cbs); |
4110 | 4254 | ||
4255 | if (cpu_has_xsave) | ||
4256 | host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
4257 | |||
4111 | return 0; | 4258 | return 0; |
4112 | 4259 | ||
4113 | out: | 4260 | out: |
@@ -4270,7 +4417,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
4270 | 4417 | ||
4271 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 4418 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
4272 | 4419 | ||
4273 | return emulator_write_emulated(rip, instruction, 3, vcpu); | 4420 | return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); |
4274 | } | 4421 | } |
4275 | 4422 | ||
4276 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4423 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
@@ -4506,59 +4653,78 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
4506 | } | 4653 | } |
4507 | } | 4654 | } |
4508 | 4655 | ||
4656 | static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) | ||
4657 | { | ||
4658 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && | ||
4659 | !vcpu->guest_xcr0_loaded) { | ||
4660 | /* kvm_set_xcr() also depends on this */ | ||
4661 | xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); | ||
4662 | vcpu->guest_xcr0_loaded = 1; | ||
4663 | } | ||
4664 | } | ||
4665 | |||
4666 | static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) | ||
4667 | { | ||
4668 | if (vcpu->guest_xcr0_loaded) { | ||
4669 | if (vcpu->arch.xcr0 != host_xcr0) | ||
4670 | xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0); | ||
4671 | vcpu->guest_xcr0_loaded = 0; | ||
4672 | } | ||
4673 | } | ||
4674 | |||
4509 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 4675 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
4510 | { | 4676 | { |
4511 | int r; | 4677 | int r; |
4512 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 4678 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
4513 | vcpu->run->request_interrupt_window; | 4679 | vcpu->run->request_interrupt_window; |
4514 | 4680 | ||
4515 | if (vcpu->requests) | ||
4516 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | ||
4517 | kvm_mmu_unload(vcpu); | ||
4518 | |||
4519 | r = kvm_mmu_reload(vcpu); | ||
4520 | if (unlikely(r)) | ||
4521 | goto out; | ||
4522 | |||
4523 | if (vcpu->requests) { | 4681 | if (vcpu->requests) { |
4524 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) | 4682 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) |
4683 | kvm_mmu_unload(vcpu); | ||
4684 | if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) | ||
4525 | __kvm_migrate_timers(vcpu); | 4685 | __kvm_migrate_timers(vcpu); |
4526 | if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) | 4686 | if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) |
4527 | kvm_write_guest_time(vcpu); | 4687 | kvm_write_guest_time(vcpu); |
4528 | if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) | 4688 | if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) |
4529 | kvm_mmu_sync_roots(vcpu); | 4689 | kvm_mmu_sync_roots(vcpu); |
4530 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | 4690 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) |
4531 | kvm_x86_ops->tlb_flush(vcpu); | 4691 | kvm_x86_ops->tlb_flush(vcpu); |
4532 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, | 4692 | if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { |
4533 | &vcpu->requests)) { | ||
4534 | vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; | 4693 | vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; |
4535 | r = 0; | 4694 | r = 0; |
4536 | goto out; | 4695 | goto out; |
4537 | } | 4696 | } |
4538 | if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { | 4697 | if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { |
4539 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; | 4698 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
4540 | r = 0; | 4699 | r = 0; |
4541 | goto out; | 4700 | goto out; |
4542 | } | 4701 | } |
4543 | if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { | 4702 | if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) { |
4544 | vcpu->fpu_active = 0; | 4703 | vcpu->fpu_active = 0; |
4545 | kvm_x86_ops->fpu_deactivate(vcpu); | 4704 | kvm_x86_ops->fpu_deactivate(vcpu); |
4546 | } | 4705 | } |
4547 | } | 4706 | } |
4548 | 4707 | ||
4708 | r = kvm_mmu_reload(vcpu); | ||
4709 | if (unlikely(r)) | ||
4710 | goto out; | ||
4711 | |||
4549 | preempt_disable(); | 4712 | preempt_disable(); |
4550 | 4713 | ||
4551 | kvm_x86_ops->prepare_guest_switch(vcpu); | 4714 | kvm_x86_ops->prepare_guest_switch(vcpu); |
4552 | if (vcpu->fpu_active) | 4715 | if (vcpu->fpu_active) |
4553 | kvm_load_guest_fpu(vcpu); | 4716 | kvm_load_guest_fpu(vcpu); |
4717 | kvm_load_guest_xcr0(vcpu); | ||
4554 | 4718 | ||
4555 | local_irq_disable(); | 4719 | atomic_set(&vcpu->guest_mode, 1); |
4720 | smp_wmb(); | ||
4556 | 4721 | ||
4557 | clear_bit(KVM_REQ_KICK, &vcpu->requests); | 4722 | local_irq_disable(); |
4558 | smp_mb__after_clear_bit(); | ||
4559 | 4723 | ||
4560 | if (vcpu->requests || need_resched() || signal_pending(current)) { | 4724 | if (!atomic_read(&vcpu->guest_mode) || vcpu->requests |
4561 | set_bit(KVM_REQ_KICK, &vcpu->requests); | 4725 | || need_resched() || signal_pending(current)) { |
4726 | atomic_set(&vcpu->guest_mode, 0); | ||
4727 | smp_wmb(); | ||
4562 | local_irq_enable(); | 4728 | local_irq_enable(); |
4563 | preempt_enable(); | 4729 | preempt_enable(); |
4564 | r = 1; | 4730 | r = 1; |
@@ -4603,7 +4769,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
4603 | if (hw_breakpoint_active()) | 4769 | if (hw_breakpoint_active()) |
4604 | hw_breakpoint_restore(); | 4770 | hw_breakpoint_restore(); |
4605 | 4771 | ||
4606 | set_bit(KVM_REQ_KICK, &vcpu->requests); | 4772 | atomic_set(&vcpu->guest_mode, 0); |
4773 | smp_wmb(); | ||
4607 | local_irq_enable(); | 4774 | local_irq_enable(); |
4608 | 4775 | ||
4609 | ++vcpu->stat.exits; | 4776 | ++vcpu->stat.exits; |
@@ -4665,7 +4832,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
4665 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 4832 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
4666 | kvm_vcpu_block(vcpu); | 4833 | kvm_vcpu_block(vcpu); |
4667 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 4834 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
4668 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) | 4835 | if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) |
4669 | { | 4836 | { |
4670 | switch(vcpu->arch.mp_state) { | 4837 | switch(vcpu->arch.mp_state) { |
4671 | case KVM_MP_STATE_HALTED: | 4838 | case KVM_MP_STATE_HALTED: |
@@ -4717,8 +4884,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4717 | int r; | 4884 | int r; |
4718 | sigset_t sigsaved; | 4885 | sigset_t sigsaved; |
4719 | 4886 | ||
4720 | vcpu_load(vcpu); | ||
4721 | |||
4722 | if (vcpu->sigset_active) | 4887 | if (vcpu->sigset_active) |
4723 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 4888 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
4724 | 4889 | ||
@@ -4743,7 +4908,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4743 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4908 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
4744 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); | 4909 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); |
4745 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4910 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
4746 | if (r == EMULATE_DO_MMIO) { | 4911 | if (r != EMULATE_DONE) { |
4747 | r = 0; | 4912 | r = 0; |
4748 | goto out; | 4913 | goto out; |
4749 | } | 4914 | } |
@@ -4759,14 +4924,11 @@ out: | |||
4759 | if (vcpu->sigset_active) | 4924 | if (vcpu->sigset_active) |
4760 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 4925 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
4761 | 4926 | ||
4762 | vcpu_put(vcpu); | ||
4763 | return r; | 4927 | return r; |
4764 | } | 4928 | } |
4765 | 4929 | ||
4766 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 4930 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
4767 | { | 4931 | { |
4768 | vcpu_load(vcpu); | ||
4769 | |||
4770 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4932 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
4771 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); | 4933 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); |
4772 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 4934 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
@@ -4789,15 +4951,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
4789 | regs->rip = kvm_rip_read(vcpu); | 4951 | regs->rip = kvm_rip_read(vcpu); |
4790 | regs->rflags = kvm_get_rflags(vcpu); | 4952 | regs->rflags = kvm_get_rflags(vcpu); |
4791 | 4953 | ||
4792 | vcpu_put(vcpu); | ||
4793 | |||
4794 | return 0; | 4954 | return 0; |
4795 | } | 4955 | } |
4796 | 4956 | ||
4797 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 4957 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
4798 | { | 4958 | { |
4799 | vcpu_load(vcpu); | ||
4800 | |||
4801 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); | 4959 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); |
4802 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); | 4960 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); |
4803 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); | 4961 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); |
@@ -4822,8 +4980,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
4822 | 4980 | ||
4823 | vcpu->arch.exception.pending = false; | 4981 | vcpu->arch.exception.pending = false; |
4824 | 4982 | ||
4825 | vcpu_put(vcpu); | ||
4826 | |||
4827 | return 0; | 4983 | return 0; |
4828 | } | 4984 | } |
4829 | 4985 | ||
@@ -4842,8 +4998,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
4842 | { | 4998 | { |
4843 | struct desc_ptr dt; | 4999 | struct desc_ptr dt; |
4844 | 5000 | ||
4845 | vcpu_load(vcpu); | ||
4846 | |||
4847 | kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 5001 | kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
4848 | kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); | 5002 | kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); |
4849 | kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); | 5003 | kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); |
@@ -4875,32 +5029,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
4875 | set_bit(vcpu->arch.interrupt.nr, | 5029 | set_bit(vcpu->arch.interrupt.nr, |
4876 | (unsigned long *)sregs->interrupt_bitmap); | 5030 | (unsigned long *)sregs->interrupt_bitmap); |
4877 | 5031 | ||
4878 | vcpu_put(vcpu); | ||
4879 | |||
4880 | return 0; | 5032 | return 0; |
4881 | } | 5033 | } |
4882 | 5034 | ||
4883 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 5035 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
4884 | struct kvm_mp_state *mp_state) | 5036 | struct kvm_mp_state *mp_state) |
4885 | { | 5037 | { |
4886 | vcpu_load(vcpu); | ||
4887 | mp_state->mp_state = vcpu->arch.mp_state; | 5038 | mp_state->mp_state = vcpu->arch.mp_state; |
4888 | vcpu_put(vcpu); | ||
4889 | return 0; | 5039 | return 0; |
4890 | } | 5040 | } |
4891 | 5041 | ||
4892 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | 5042 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, |
4893 | struct kvm_mp_state *mp_state) | 5043 | struct kvm_mp_state *mp_state) |
4894 | { | 5044 | { |
4895 | vcpu_load(vcpu); | ||
4896 | vcpu->arch.mp_state = mp_state->mp_state; | 5045 | vcpu->arch.mp_state = mp_state->mp_state; |
4897 | vcpu_put(vcpu); | ||
4898 | return 0; | 5046 | return 0; |
4899 | } | 5047 | } |
4900 | 5048 | ||
4901 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | 5049 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
4902 | bool has_error_code, u32 error_code) | 5050 | bool has_error_code, u32 error_code) |
4903 | { | 5051 | { |
5052 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | ||
4904 | int cs_db, cs_l, ret; | 5053 | int cs_db, cs_l, ret; |
4905 | cache_all_regs(vcpu); | 5054 | cache_all_regs(vcpu); |
4906 | 5055 | ||
@@ -4915,6 +5064,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | |||
4915 | ? X86EMUL_MODE_VM86 : cs_l | 5064 | ? X86EMUL_MODE_VM86 : cs_l |
4916 | ? X86EMUL_MODE_PROT64 : cs_db | 5065 | ? X86EMUL_MODE_PROT64 : cs_db |
4917 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 5066 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
5067 | memset(c, 0, sizeof(struct decode_cache)); | ||
5068 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
4918 | 5069 | ||
4919 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, | 5070 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, |
4920 | tss_selector, reason, has_error_code, | 5071 | tss_selector, reason, has_error_code, |
@@ -4923,6 +5074,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | |||
4923 | if (ret) | 5074 | if (ret) |
4924 | return EMULATE_FAIL; | 5075 | return EMULATE_FAIL; |
4925 | 5076 | ||
5077 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | ||
5078 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | ||
4926 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 5079 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
4927 | return EMULATE_DONE; | 5080 | return EMULATE_DONE; |
4928 | } | 5081 | } |
@@ -4935,8 +5088,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4935 | int pending_vec, max_bits; | 5088 | int pending_vec, max_bits; |
4936 | struct desc_ptr dt; | 5089 | struct desc_ptr dt; |
4937 | 5090 | ||
4938 | vcpu_load(vcpu); | ||
4939 | |||
4940 | dt.size = sregs->idt.limit; | 5091 | dt.size = sregs->idt.limit; |
4941 | dt.address = sregs->idt.base; | 5092 | dt.address = sregs->idt.base; |
4942 | kvm_x86_ops->set_idt(vcpu, &dt); | 5093 | kvm_x86_ops->set_idt(vcpu, &dt); |
@@ -4996,8 +5147,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4996 | !is_protmode(vcpu)) | 5147 | !is_protmode(vcpu)) |
4997 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 5148 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
4998 | 5149 | ||
4999 | vcpu_put(vcpu); | ||
5000 | |||
5001 | return 0; | 5150 | return 0; |
5002 | } | 5151 | } |
5003 | 5152 | ||
@@ -5007,12 +5156,10 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
5007 | unsigned long rflags; | 5156 | unsigned long rflags; |
5008 | int i, r; | 5157 | int i, r; |
5009 | 5158 | ||
5010 | vcpu_load(vcpu); | ||
5011 | |||
5012 | if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { | 5159 | if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { |
5013 | r = -EBUSY; | 5160 | r = -EBUSY; |
5014 | if (vcpu->arch.exception.pending) | 5161 | if (vcpu->arch.exception.pending) |
5015 | goto unlock_out; | 5162 | goto out; |
5016 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) | 5163 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) |
5017 | kvm_queue_exception(vcpu, DB_VECTOR); | 5164 | kvm_queue_exception(vcpu, DB_VECTOR); |
5018 | else | 5165 | else |
@@ -5054,34 +5201,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
5054 | 5201 | ||
5055 | r = 0; | 5202 | r = 0; |
5056 | 5203 | ||
5057 | unlock_out: | 5204 | out: |
5058 | vcpu_put(vcpu); | ||
5059 | 5205 | ||
5060 | return r; | 5206 | return r; |
5061 | } | 5207 | } |
5062 | 5208 | ||
5063 | /* | 5209 | /* |
5064 | * fxsave fpu state. Taken from x86_64/processor.h. To be killed when | ||
5065 | * we have asm/x86/processor.h | ||
5066 | */ | ||
5067 | struct fxsave { | ||
5068 | u16 cwd; | ||
5069 | u16 swd; | ||
5070 | u16 twd; | ||
5071 | u16 fop; | ||
5072 | u64 rip; | ||
5073 | u64 rdp; | ||
5074 | u32 mxcsr; | ||
5075 | u32 mxcsr_mask; | ||
5076 | u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ | ||
5077 | #ifdef CONFIG_X86_64 | ||
5078 | u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ | ||
5079 | #else | ||
5080 | u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ | ||
5081 | #endif | ||
5082 | }; | ||
5083 | |||
5084 | /* | ||
5085 | * Translate a guest virtual address to a guest physical address. | 5210 | * Translate a guest virtual address to a guest physical address. |
5086 | */ | 5211 | */ |
5087 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | 5212 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, |
@@ -5091,7 +5216,6 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
5091 | gpa_t gpa; | 5216 | gpa_t gpa; |
5092 | int idx; | 5217 | int idx; |
5093 | 5218 | ||
5094 | vcpu_load(vcpu); | ||
5095 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 5219 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
5096 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); | 5220 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); |
5097 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | 5221 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
@@ -5099,16 +5223,14 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
5099 | tr->valid = gpa != UNMAPPED_GVA; | 5223 | tr->valid = gpa != UNMAPPED_GVA; |
5100 | tr->writeable = 1; | 5224 | tr->writeable = 1; |
5101 | tr->usermode = 0; | 5225 | tr->usermode = 0; |
5102 | vcpu_put(vcpu); | ||
5103 | 5226 | ||
5104 | return 0; | 5227 | return 0; |
5105 | } | 5228 | } |
5106 | 5229 | ||
5107 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 5230 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
5108 | { | 5231 | { |
5109 | struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; | 5232 | struct i387_fxsave_struct *fxsave = |
5110 | 5233 | &vcpu->arch.guest_fpu.state->fxsave; | |
5111 | vcpu_load(vcpu); | ||
5112 | 5234 | ||
5113 | memcpy(fpu->fpr, fxsave->st_space, 128); | 5235 | memcpy(fpu->fpr, fxsave->st_space, 128); |
5114 | fpu->fcw = fxsave->cwd; | 5236 | fpu->fcw = fxsave->cwd; |
@@ -5119,16 +5241,13 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
5119 | fpu->last_dp = fxsave->rdp; | 5241 | fpu->last_dp = fxsave->rdp; |
5120 | memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); | 5242 | memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); |
5121 | 5243 | ||
5122 | vcpu_put(vcpu); | ||
5123 | |||
5124 | return 0; | 5244 | return 0; |
5125 | } | 5245 | } |
5126 | 5246 | ||
5127 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 5247 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
5128 | { | 5248 | { |
5129 | struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; | 5249 | struct i387_fxsave_struct *fxsave = |
5130 | 5250 | &vcpu->arch.guest_fpu.state->fxsave; | |
5131 | vcpu_load(vcpu); | ||
5132 | 5251 | ||
5133 | memcpy(fxsave->st_space, fpu->fpr, 128); | 5252 | memcpy(fxsave->st_space, fpu->fpr, 128); |
5134 | fxsave->cwd = fpu->fcw; | 5253 | fxsave->cwd = fpu->fcw; |
@@ -5139,61 +5258,63 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
5139 | fxsave->rdp = fpu->last_dp; | 5258 | fxsave->rdp = fpu->last_dp; |
5140 | memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); | 5259 | memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); |
5141 | 5260 | ||
5142 | vcpu_put(vcpu); | ||
5143 | |||
5144 | return 0; | 5261 | return 0; |
5145 | } | 5262 | } |
5146 | 5263 | ||
5147 | void fx_init(struct kvm_vcpu *vcpu) | 5264 | int fx_init(struct kvm_vcpu *vcpu) |
5148 | { | 5265 | { |
5149 | unsigned after_mxcsr_mask; | 5266 | int err; |
5267 | |||
5268 | err = fpu_alloc(&vcpu->arch.guest_fpu); | ||
5269 | if (err) | ||
5270 | return err; | ||
5271 | |||
5272 | fpu_finit(&vcpu->arch.guest_fpu); | ||
5150 | 5273 | ||
5151 | /* | 5274 | /* |
5152 | * Touch the fpu the first time in non atomic context as if | 5275 | * Ensure guest xcr0 is valid for loading |
5153 | * this is the first fpu instruction the exception handler | ||
5154 | * will fire before the instruction returns and it'll have to | ||
5155 | * allocate ram with GFP_KERNEL. | ||
5156 | */ | 5276 | */ |
5157 | if (!used_math()) | 5277 | vcpu->arch.xcr0 = XSTATE_FP; |
5158 | kvm_fx_save(&vcpu->arch.host_fx_image); | ||
5159 | |||
5160 | /* Initialize guest FPU by resetting ours and saving into guest's */ | ||
5161 | preempt_disable(); | ||
5162 | kvm_fx_save(&vcpu->arch.host_fx_image); | ||
5163 | kvm_fx_finit(); | ||
5164 | kvm_fx_save(&vcpu->arch.guest_fx_image); | ||
5165 | kvm_fx_restore(&vcpu->arch.host_fx_image); | ||
5166 | preempt_enable(); | ||
5167 | 5278 | ||
5168 | vcpu->arch.cr0 |= X86_CR0_ET; | 5279 | vcpu->arch.cr0 |= X86_CR0_ET; |
5169 | after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); | 5280 | |
5170 | vcpu->arch.guest_fx_image.mxcsr = 0x1f80; | 5281 | return 0; |
5171 | memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask, | ||
5172 | 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask); | ||
5173 | } | 5282 | } |
5174 | EXPORT_SYMBOL_GPL(fx_init); | 5283 | EXPORT_SYMBOL_GPL(fx_init); |
5175 | 5284 | ||
5285 | static void fx_free(struct kvm_vcpu *vcpu) | ||
5286 | { | ||
5287 | fpu_free(&vcpu->arch.guest_fpu); | ||
5288 | } | ||
5289 | |||
5176 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | 5290 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) |
5177 | { | 5291 | { |
5178 | if (vcpu->guest_fpu_loaded) | 5292 | if (vcpu->guest_fpu_loaded) |
5179 | return; | 5293 | return; |
5180 | 5294 | ||
5295 | /* | ||
5296 | * Restore all possible states in the guest, | ||
5297 | * and assume host would use all available bits. | ||
5298 | * Guest xcr0 would be loaded later. | ||
5299 | */ | ||
5300 | kvm_put_guest_xcr0(vcpu); | ||
5181 | vcpu->guest_fpu_loaded = 1; | 5301 | vcpu->guest_fpu_loaded = 1; |
5182 | kvm_fx_save(&vcpu->arch.host_fx_image); | 5302 | unlazy_fpu(current); |
5183 | kvm_fx_restore(&vcpu->arch.guest_fx_image); | 5303 | fpu_restore_checking(&vcpu->arch.guest_fpu); |
5184 | trace_kvm_fpu(1); | 5304 | trace_kvm_fpu(1); |
5185 | } | 5305 | } |
5186 | 5306 | ||
5187 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | 5307 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) |
5188 | { | 5308 | { |
5309 | kvm_put_guest_xcr0(vcpu); | ||
5310 | |||
5189 | if (!vcpu->guest_fpu_loaded) | 5311 | if (!vcpu->guest_fpu_loaded) |
5190 | return; | 5312 | return; |
5191 | 5313 | ||
5192 | vcpu->guest_fpu_loaded = 0; | 5314 | vcpu->guest_fpu_loaded = 0; |
5193 | kvm_fx_save(&vcpu->arch.guest_fx_image); | 5315 | fpu_save_init(&vcpu->arch.guest_fpu); |
5194 | kvm_fx_restore(&vcpu->arch.host_fx_image); | ||
5195 | ++vcpu->stat.fpu_reload; | 5316 | ++vcpu->stat.fpu_reload; |
5196 | set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); | 5317 | kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); |
5197 | trace_kvm_fpu(0); | 5318 | trace_kvm_fpu(0); |
5198 | } | 5319 | } |
5199 | 5320 | ||
@@ -5204,6 +5325,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
5204 | vcpu->arch.time_page = NULL; | 5325 | vcpu->arch.time_page = NULL; |
5205 | } | 5326 | } |
5206 | 5327 | ||
5328 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); | ||
5329 | fx_free(vcpu); | ||
5207 | kvm_x86_ops->vcpu_free(vcpu); | 5330 | kvm_x86_ops->vcpu_free(vcpu); |
5208 | } | 5331 | } |
5209 | 5332 | ||
@@ -5217,9 +5340,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
5217 | { | 5340 | { |
5218 | int r; | 5341 | int r; |
5219 | 5342 | ||
5220 | /* We do fxsave: this must be aligned. */ | ||
5221 | BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); | ||
5222 | |||
5223 | vcpu->arch.mtrr_state.have_fixed = 1; | 5343 | vcpu->arch.mtrr_state.have_fixed = 1; |
5224 | vcpu_load(vcpu); | 5344 | vcpu_load(vcpu); |
5225 | r = kvm_arch_vcpu_reset(vcpu); | 5345 | r = kvm_arch_vcpu_reset(vcpu); |
@@ -5241,6 +5361,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
5241 | kvm_mmu_unload(vcpu); | 5361 | kvm_mmu_unload(vcpu); |
5242 | vcpu_put(vcpu); | 5362 | vcpu_put(vcpu); |
5243 | 5363 | ||
5364 | fx_free(vcpu); | ||
5244 | kvm_x86_ops->vcpu_free(vcpu); | 5365 | kvm_x86_ops->vcpu_free(vcpu); |
5245 | } | 5366 | } |
5246 | 5367 | ||
@@ -5334,7 +5455,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5334 | } | 5455 | } |
5335 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; | 5456 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; |
5336 | 5457 | ||
5458 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) | ||
5459 | goto fail_free_mce_banks; | ||
5460 | |||
5337 | return 0; | 5461 | return 0; |
5462 | fail_free_mce_banks: | ||
5463 | kfree(vcpu->arch.mce_banks); | ||
5338 | fail_free_lapic: | 5464 | fail_free_lapic: |
5339 | kvm_free_lapic(vcpu); | 5465 | kvm_free_lapic(vcpu); |
5340 | fail_mmu_destroy: | 5466 | fail_mmu_destroy: |
@@ -5364,12 +5490,6 @@ struct kvm *kvm_arch_create_vm(void) | |||
5364 | if (!kvm) | 5490 | if (!kvm) |
5365 | return ERR_PTR(-ENOMEM); | 5491 | return ERR_PTR(-ENOMEM); |
5366 | 5492 | ||
5367 | kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
5368 | if (!kvm->arch.aliases) { | ||
5369 | kfree(kvm); | ||
5370 | return ERR_PTR(-ENOMEM); | ||
5371 | } | ||
5372 | |||
5373 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 5493 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
5374 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 5494 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
5375 | 5495 | ||
@@ -5412,12 +5532,12 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
5412 | void kvm_arch_sync_events(struct kvm *kvm) | 5532 | void kvm_arch_sync_events(struct kvm *kvm) |
5413 | { | 5533 | { |
5414 | kvm_free_all_assigned_devices(kvm); | 5534 | kvm_free_all_assigned_devices(kvm); |
5535 | kvm_free_pit(kvm); | ||
5415 | } | 5536 | } |
5416 | 5537 | ||
5417 | void kvm_arch_destroy_vm(struct kvm *kvm) | 5538 | void kvm_arch_destroy_vm(struct kvm *kvm) |
5418 | { | 5539 | { |
5419 | kvm_iommu_unmap_guest(kvm); | 5540 | kvm_iommu_unmap_guest(kvm); |
5420 | kvm_free_pit(kvm); | ||
5421 | kfree(kvm->arch.vpic); | 5541 | kfree(kvm->arch.vpic); |
5422 | kfree(kvm->arch.vioapic); | 5542 | kfree(kvm->arch.vioapic); |
5423 | kvm_free_vcpus(kvm); | 5543 | kvm_free_vcpus(kvm); |
@@ -5427,7 +5547,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
5427 | if (kvm->arch.ept_identity_pagetable) | 5547 | if (kvm->arch.ept_identity_pagetable) |
5428 | put_page(kvm->arch.ept_identity_pagetable); | 5548 | put_page(kvm->arch.ept_identity_pagetable); |
5429 | cleanup_srcu_struct(&kvm->srcu); | 5549 | cleanup_srcu_struct(&kvm->srcu); |
5430 | kfree(kvm->arch.aliases); | ||
5431 | kfree(kvm); | 5550 | kfree(kvm); |
5432 | } | 5551 | } |
5433 | 5552 | ||
@@ -5438,6 +5557,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
5438 | int user_alloc) | 5557 | int user_alloc) |
5439 | { | 5558 | { |
5440 | int npages = memslot->npages; | 5559 | int npages = memslot->npages; |
5560 | int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; | ||
5561 | |||
5562 | /* Prevent internal slot pages from being moved by fork()/COW. */ | ||
5563 | if (memslot->id >= KVM_MEMORY_SLOTS) | ||
5564 | map_flags = MAP_SHARED | MAP_ANONYMOUS; | ||
5441 | 5565 | ||
5442 | /*To keep backward compatibility with older userspace, | 5566 | /*To keep backward compatibility with older userspace, |
5443 | *x86 needs to hanlde !user_alloc case. | 5567 | *x86 needs to hanlde !user_alloc case. |
@@ -5450,7 +5574,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
5450 | userspace_addr = do_mmap(NULL, 0, | 5574 | userspace_addr = do_mmap(NULL, 0, |
5451 | npages * PAGE_SIZE, | 5575 | npages * PAGE_SIZE, |
5452 | PROT_READ | PROT_WRITE, | 5576 | PROT_READ | PROT_WRITE, |
5453 | MAP_PRIVATE | MAP_ANONYMOUS, | 5577 | map_flags, |
5454 | 0); | 5578 | 0); |
5455 | up_write(¤t->mm->mmap_sem); | 5579 | up_write(¤t->mm->mmap_sem); |
5456 | 5580 | ||
@@ -5523,7 +5647,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | |||
5523 | 5647 | ||
5524 | me = get_cpu(); | 5648 | me = get_cpu(); |
5525 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) | 5649 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) |
5526 | if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) | 5650 | if (atomic_xchg(&vcpu->guest_mode, 0)) |
5527 | smp_send_reschedule(cpu); | 5651 | smp_send_reschedule(cpu); |
5528 | put_cpu(); | 5652 | put_cpu(); |
5529 | } | 5653 | } |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index f4b54458285b..b7a404722d2b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -65,13 +65,6 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
66 | } | 66 | } |
67 | 67 | ||
68 | static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) | ||
69 | { | ||
70 | return rcu_dereference_check(kvm->arch.aliases, | ||
71 | srcu_read_lock_held(&kvm->srcu) | ||
72 | || lockdep_is_held(&kvm->slots_lock)); | ||
73 | } | ||
74 | |||
75 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 68 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
76 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 69 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
77 | 70 | ||
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index f871e04b6965..e10cf070ede0 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -30,6 +30,7 @@ ifeq ($(CONFIG_X86_32),y) | |||
30 | lib-y += checksum_32.o | 30 | lib-y += checksum_32.o |
31 | lib-y += strstr_32.o | 31 | lib-y += strstr_32.o |
32 | lib-y += semaphore_32.o string_32.o | 32 | lib-y += semaphore_32.o string_32.o |
33 | lib-y += cmpxchg.o | ||
33 | ifneq ($(CONFIG_X86_CMPXCHG64),y) | 34 | ifneq ($(CONFIG_X86_CMPXCHG64),y) |
34 | lib-y += cmpxchg8b_emu.o atomic64_386_32.o | 35 | lib-y += cmpxchg8b_emu.o atomic64_386_32.o |
35 | endif | 36 | endif |
diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/lib/cmpxchg.c index 2056ccf572cc..5d619f6df3ee 100644 --- a/arch/x86/kernel/cpu/cmpxchg.c +++ b/arch/x86/lib/cmpxchg.c | |||
@@ -52,21 +52,3 @@ unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) | |||
52 | } | 52 | } |
53 | EXPORT_SYMBOL(cmpxchg_386_u32); | 53 | EXPORT_SYMBOL(cmpxchg_386_u32); |
54 | #endif | 54 | #endif |
55 | |||
56 | #ifndef CONFIG_X86_CMPXCHG64 | ||
57 | unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) | ||
58 | { | ||
59 | u64 prev; | ||
60 | unsigned long flags; | ||
61 | |||
62 | /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ | ||
63 | local_irq_save(flags); | ||
64 | prev = *(u64 *)ptr; | ||
65 | if (prev == old) | ||
66 | *(u64 *)ptr = new; | ||
67 | local_irq_restore(flags); | ||
68 | return prev; | ||
69 | } | ||
70 | EXPORT_SYMBOL(cmpxchg_486_u64); | ||
71 | #endif | ||
72 | |||
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index a725b7f760ae..0002a3a33081 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -37,6 +37,28 @@ struct addr_marker { | |||
37 | const char *name; | 37 | const char *name; |
38 | }; | 38 | }; |
39 | 39 | ||
40 | /* indices for address_markers; keep sync'd w/ address_markers below */ | ||
41 | enum address_markers_idx { | ||
42 | USER_SPACE_NR = 0, | ||
43 | #ifdef CONFIG_X86_64 | ||
44 | KERNEL_SPACE_NR, | ||
45 | LOW_KERNEL_NR, | ||
46 | VMALLOC_START_NR, | ||
47 | VMEMMAP_START_NR, | ||
48 | HIGH_KERNEL_NR, | ||
49 | MODULES_VADDR_NR, | ||
50 | MODULES_END_NR, | ||
51 | #else | ||
52 | KERNEL_SPACE_NR, | ||
53 | VMALLOC_START_NR, | ||
54 | VMALLOC_END_NR, | ||
55 | # ifdef CONFIG_HIGHMEM | ||
56 | PKMAP_BASE_NR, | ||
57 | # endif | ||
58 | FIXADDR_START_NR, | ||
59 | #endif | ||
60 | }; | ||
61 | |||
40 | /* Address space markers hints */ | 62 | /* Address space markers hints */ |
41 | static struct addr_marker address_markers[] = { | 63 | static struct addr_marker address_markers[] = { |
42 | { 0, "User Space" }, | 64 | { 0, "User Space" }, |
@@ -331,14 +353,12 @@ static int pt_dump_init(void) | |||
331 | 353 | ||
332 | #ifdef CONFIG_X86_32 | 354 | #ifdef CONFIG_X86_32 |
333 | /* Not a compile-time constant on x86-32 */ | 355 | /* Not a compile-time constant on x86-32 */ |
334 | address_markers[2].start_address = VMALLOC_START; | 356 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
335 | address_markers[3].start_address = VMALLOC_END; | 357 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; |
336 | # ifdef CONFIG_HIGHMEM | 358 | # ifdef CONFIG_HIGHMEM |
337 | address_markers[4].start_address = PKMAP_BASE; | 359 | address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; |
338 | address_markers[5].start_address = FIXADDR_START; | ||
339 | # else | ||
340 | address_markers[4].start_address = FIXADDR_START; | ||
341 | # endif | 360 | # endif |
361 | address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; | ||
342 | #endif | 362 | #endif |
343 | 363 | ||
344 | pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, | 364 | pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ee41bba315d1..9a6674689a20 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * linux/arch/x86_64/mm/init.c | 2 | * linux/arch/x86_64/mm/init.c |
3 | * | 3 | * |
4 | * Copyright (C) 1995 Linus Torvalds | 4 | * Copyright (C) 1995 Linus Torvalds |
5 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> | 5 | * Copyright (C) 2000 Pavel Machek <pavel@ucw.cz> |
6 | * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de> | 6 | * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de> |
7 | */ | 7 | */ |
8 | 8 | ||
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 12e4d2d3c110..3ba6e0608c55 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -62,8 +62,8 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, | |||
62 | static void __iomem *__ioremap_caller(resource_size_t phys_addr, | 62 | static void __iomem *__ioremap_caller(resource_size_t phys_addr, |
63 | unsigned long size, unsigned long prot_val, void *caller) | 63 | unsigned long size, unsigned long prot_val, void *caller) |
64 | { | 64 | { |
65 | unsigned long pfn, offset, vaddr; | 65 | unsigned long offset, vaddr; |
66 | resource_size_t last_addr; | 66 | resource_size_t pfn, last_pfn, last_addr; |
67 | const resource_size_t unaligned_phys_addr = phys_addr; | 67 | const resource_size_t unaligned_phys_addr = phys_addr; |
68 | const unsigned long unaligned_size = size; | 68 | const unsigned long unaligned_size = size; |
69 | struct vm_struct *area; | 69 | struct vm_struct *area; |
@@ -100,10 +100,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
100 | /* | 100 | /* |
101 | * Don't allow anybody to remap normal RAM that we're using.. | 101 | * Don't allow anybody to remap normal RAM that we're using.. |
102 | */ | 102 | */ |
103 | for (pfn = phys_addr >> PAGE_SHIFT; | 103 | last_pfn = last_addr >> PAGE_SHIFT; |
104 | (pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK); | 104 | for (pfn = phys_addr >> PAGE_SHIFT; pfn <= last_pfn; pfn++) { |
105 | pfn++) { | ||
106 | |||
107 | int is_ram = page_is_ram(pfn); | 105 | int is_ram = page_is_ram(pfn); |
108 | 106 | ||
109 | if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) | 107 | if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) |
@@ -115,7 +113,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
115 | * Mappings have to be page-aligned | 113 | * Mappings have to be page-aligned |
116 | */ | 114 | */ |
117 | offset = phys_addr & ~PAGE_MASK; | 115 | offset = phys_addr & ~PAGE_MASK; |
118 | phys_addr &= PAGE_MASK; | 116 | phys_addr &= PHYSICAL_PAGE_MASK; |
119 | size = PAGE_ALIGN(last_addr+1) - phys_addr; | 117 | size = PAGE_ALIGN(last_addr+1) - phys_addr; |
120 | 118 | ||
121 | retval = reserve_memtype(phys_addr, (u64)phys_addr + size, | 119 | retval = reserve_memtype(phys_addr, (u64)phys_addr + size, |
@@ -613,7 +611,7 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) | |||
613 | return; | 611 | return; |
614 | } | 612 | } |
615 | offset = virt_addr & ~PAGE_MASK; | 613 | offset = virt_addr & ~PAGE_MASK; |
616 | nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; | 614 | nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; |
617 | 615 | ||
618 | idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; | 616 | idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; |
619 | while (nrpages > 0) { | 617 | while (nrpages > 0) { |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 5d0e67fff1a6..e5d5e2ce9f77 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -45,6 +45,8 @@ struct kmmio_fault_page { | |||
45 | * Protected by kmmio_lock, when linked into kmmio_page_table. | 45 | * Protected by kmmio_lock, when linked into kmmio_page_table. |
46 | */ | 46 | */ |
47 | int count; | 47 | int count; |
48 | |||
49 | bool scheduled_for_release; | ||
48 | }; | 50 | }; |
49 | 51 | ||
50 | struct kmmio_delayed_release { | 52 | struct kmmio_delayed_release { |
@@ -398,8 +400,11 @@ static void release_kmmio_fault_page(unsigned long page, | |||
398 | BUG_ON(f->count < 0); | 400 | BUG_ON(f->count < 0); |
399 | if (!f->count) { | 401 | if (!f->count) { |
400 | disarm_kmmio_fault_page(f); | 402 | disarm_kmmio_fault_page(f); |
401 | f->release_next = *release_list; | 403 | if (!f->scheduled_for_release) { |
402 | *release_list = f; | 404 | f->release_next = *release_list; |
405 | *release_list = f; | ||
406 | f->scheduled_for_release = true; | ||
407 | } | ||
403 | } | 408 | } |
404 | } | 409 | } |
405 | 410 | ||
@@ -471,8 +476,10 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) | |||
471 | prevp = &f->release_next; | 476 | prevp = &f->release_next; |
472 | } else { | 477 | } else { |
473 | *prevp = f->release_next; | 478 | *prevp = f->release_next; |
479 | f->release_next = NULL; | ||
480 | f->scheduled_for_release = false; | ||
474 | } | 481 | } |
475 | f = f->release_next; | 482 | f = *prevp; |
476 | } | 483 | } |
477 | spin_unlock_irqrestore(&kmmio_lock, flags); | 484 | spin_unlock_irqrestore(&kmmio_lock, flags); |
478 | 485 | ||
@@ -510,6 +517,9 @@ void unregister_kmmio_probe(struct kmmio_probe *p) | |||
510 | kmmio_count--; | 517 | kmmio_count--; |
511 | spin_unlock_irqrestore(&kmmio_lock, flags); | 518 | spin_unlock_irqrestore(&kmmio_lock, flags); |
512 | 519 | ||
520 | if (!release_list) | ||
521 | return; | ||
522 | |||
513 | drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); | 523 | drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); |
514 | if (!drelease) { | 524 | if (!drelease) { |
515 | pr_crit("leaking kmmio_fault_page objects.\n"); | 525 | pr_crit("leaking kmmio_fault_page objects.\n"); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 64121a18b8cb..f6ff57b7efa5 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -158,7 +158,7 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) | |||
158 | return req_type; | 158 | return req_type; |
159 | } | 159 | } |
160 | 160 | ||
161 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | 161 | static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) |
162 | { | 162 | { |
163 | int ram_page = 0, not_rampage = 0; | 163 | int ram_page = 0, not_rampage = 0; |
164 | unsigned long page_nr; | 164 | unsigned long page_nr; |
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index f20eeec85a86..8acaddd0fb21 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c | |||
@@ -34,8 +34,7 @@ | |||
34 | * memtype_lock protects the rbtree. | 34 | * memtype_lock protects the rbtree. |
35 | */ | 35 | */ |
36 | 36 | ||
37 | static void memtype_rb_augment_cb(struct rb_node *node); | 37 | static struct rb_root memtype_rbroot = RB_ROOT; |
38 | static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb); | ||
39 | 38 | ||
40 | static int is_node_overlap(struct memtype *node, u64 start, u64 end) | 39 | static int is_node_overlap(struct memtype *node, u64 start, u64 end) |
41 | { | 40 | { |
@@ -56,7 +55,7 @@ static u64 get_subtree_max_end(struct rb_node *node) | |||
56 | } | 55 | } |
57 | 56 | ||
58 | /* Update 'subtree_max_end' for a node, based on node and its children */ | 57 | /* Update 'subtree_max_end' for a node, based on node and its children */ |
59 | static void update_node_max_end(struct rb_node *node) | 58 | static void memtype_rb_augment_cb(struct rb_node *node, void *__unused) |
60 | { | 59 | { |
61 | struct memtype *data; | 60 | struct memtype *data; |
62 | u64 max_end, child_max_end; | 61 | u64 max_end, child_max_end; |
@@ -78,25 +77,6 @@ static void update_node_max_end(struct rb_node *node) | |||
78 | data->subtree_max_end = max_end; | 77 | data->subtree_max_end = max_end; |
79 | } | 78 | } |
80 | 79 | ||
81 | /* Update 'subtree_max_end' for a node and all its ancestors */ | ||
82 | static void update_path_max_end(struct rb_node *node) | ||
83 | { | ||
84 | u64 old_max_end, new_max_end; | ||
85 | |||
86 | while (node) { | ||
87 | struct memtype *data = container_of(node, struct memtype, rb); | ||
88 | |||
89 | old_max_end = data->subtree_max_end; | ||
90 | update_node_max_end(node); | ||
91 | new_max_end = data->subtree_max_end; | ||
92 | |||
93 | if (new_max_end == old_max_end) | ||
94 | break; | ||
95 | |||
96 | node = rb_parent(node); | ||
97 | } | ||
98 | } | ||
99 | |||
100 | /* Find the first (lowest start addr) overlapping range from rb tree */ | 80 | /* Find the first (lowest start addr) overlapping range from rb tree */ |
101 | static struct memtype *memtype_rb_lowest_match(struct rb_root *root, | 81 | static struct memtype *memtype_rb_lowest_match(struct rb_root *root, |
102 | u64 start, u64 end) | 82 | u64 start, u64 end) |
@@ -190,12 +170,6 @@ failure: | |||
190 | return -EBUSY; | 170 | return -EBUSY; |
191 | } | 171 | } |
192 | 172 | ||
193 | static void memtype_rb_augment_cb(struct rb_node *node) | ||
194 | { | ||
195 | if (node) | ||
196 | update_path_max_end(node); | ||
197 | } | ||
198 | |||
199 | static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) | 173 | static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) |
200 | { | 174 | { |
201 | struct rb_node **node = &(root->rb_node); | 175 | struct rb_node **node = &(root->rb_node); |
@@ -213,6 +187,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) | |||
213 | 187 | ||
214 | rb_link_node(&newdata->rb, parent, node); | 188 | rb_link_node(&newdata->rb, parent, node); |
215 | rb_insert_color(&newdata->rb, root); | 189 | rb_insert_color(&newdata->rb, root); |
190 | rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL); | ||
216 | } | 191 | } |
217 | 192 | ||
218 | int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) | 193 | int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) |
@@ -234,13 +209,16 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) | |||
234 | 209 | ||
235 | struct memtype *rbt_memtype_erase(u64 start, u64 end) | 210 | struct memtype *rbt_memtype_erase(u64 start, u64 end) |
236 | { | 211 | { |
212 | struct rb_node *deepest; | ||
237 | struct memtype *data; | 213 | struct memtype *data; |
238 | 214 | ||
239 | data = memtype_rb_exact_match(&memtype_rbroot, start, end); | 215 | data = memtype_rb_exact_match(&memtype_rbroot, start, end); |
240 | if (!data) | 216 | if (!data) |
241 | goto out; | 217 | goto out; |
242 | 218 | ||
219 | deepest = rb_augment_erase_begin(&data->rb); | ||
243 | rb_erase(&data->rb, &memtype_rbroot); | 220 | rb_erase(&data->rb, &memtype_rbroot); |
221 | rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL); | ||
244 | out: | 222 | out: |
245 | return data; | 223 | return data; |
246 | } | 224 | } |
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index 308e32570d84..38e6d174c497 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c | |||
@@ -40,16 +40,16 @@ static unsigned char prefix_codes[] = { | |||
40 | static unsigned int reg_rop[] = { | 40 | static unsigned int reg_rop[] = { |
41 | 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F | 41 | 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F |
42 | }; | 42 | }; |
43 | static unsigned int reg_wop[] = { 0x88, 0x89 }; | 43 | static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB }; |
44 | static unsigned int imm_wop[] = { 0xC6, 0xC7 }; | 44 | static unsigned int imm_wop[] = { 0xC6, 0xC7 }; |
45 | /* IA32 Manual 3, 3-432*/ | 45 | /* IA32 Manual 3, 3-432*/ |
46 | static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 }; | 46 | static unsigned int rw8[] = { 0x88, 0x8A, 0xC6, 0xAA }; |
47 | static unsigned int rw32[] = { | 47 | static unsigned int rw32[] = { |
48 | 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F | 48 | 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB |
49 | }; | 49 | }; |
50 | static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F }; | 50 | static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F, 0xAA }; |
51 | static unsigned int mw16[] = { 0xB70F, 0xBF0F }; | 51 | static unsigned int mw16[] = { 0xB70F, 0xBF0F }; |
52 | static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 }; | 52 | static unsigned int mw32[] = { 0x89, 0x8B, 0xC7, 0xAB }; |
53 | static unsigned int mw64[] = {}; | 53 | static unsigned int mw64[] = {}; |
54 | #else /* not __i386__ */ | 54 | #else /* not __i386__ */ |
55 | static unsigned char prefix_codes[] = { | 55 | static unsigned char prefix_codes[] = { |
@@ -63,20 +63,20 @@ static unsigned char prefix_codes[] = { | |||
63 | static unsigned int reg_rop[] = { | 63 | static unsigned int reg_rop[] = { |
64 | 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F | 64 | 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F |
65 | }; | 65 | }; |
66 | static unsigned int reg_wop[] = { 0x88, 0x89 }; | 66 | static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB }; |
67 | static unsigned int imm_wop[] = { 0xC6, 0xC7 }; | 67 | static unsigned int imm_wop[] = { 0xC6, 0xC7 }; |
68 | static unsigned int rw8[] = { 0xC6, 0x88, 0x8A }; | 68 | static unsigned int rw8[] = { 0xC6, 0x88, 0x8A, 0xAA }; |
69 | static unsigned int rw32[] = { | 69 | static unsigned int rw32[] = { |
70 | 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F | 70 | 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB |
71 | }; | 71 | }; |
72 | /* 8 bit only */ | 72 | /* 8 bit only */ |
73 | static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F }; | 73 | static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F, 0xAA }; |
74 | /* 16 bit only */ | 74 | /* 16 bit only */ |
75 | static unsigned int mw16[] = { 0xB70F, 0xBF0F }; | 75 | static unsigned int mw16[] = { 0xB70F, 0xBF0F }; |
76 | /* 16 or 32 bit */ | 76 | /* 16 or 32 bit */ |
77 | static unsigned int mw32[] = { 0xC7 }; | 77 | static unsigned int mw32[] = { 0xC7 }; |
78 | /* 16, 32 or 64 bit */ | 78 | /* 16, 32 or 64 bit */ |
79 | static unsigned int mw64[] = { 0x89, 0x8B }; | 79 | static unsigned int mw64[] = { 0x89, 0x8B, 0xAB }; |
80 | #endif /* not __i386__ */ | 80 | #endif /* not __i386__ */ |
81 | 81 | ||
82 | struct prefix_bits { | 82 | struct prefix_bits { |
@@ -410,7 +410,6 @@ static unsigned long *get_reg_w32(int no, struct pt_regs *regs) | |||
410 | unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) | 410 | unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) |
411 | { | 411 | { |
412 | unsigned int opcode; | 412 | unsigned int opcode; |
413 | unsigned char mod_rm; | ||
414 | int reg; | 413 | int reg; |
415 | unsigned char *p; | 414 | unsigned char *p; |
416 | struct prefix_bits prf; | 415 | struct prefix_bits prf; |
@@ -437,8 +436,13 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) | |||
437 | goto err; | 436 | goto err; |
438 | 437 | ||
439 | do_work: | 438 | do_work: |
440 | mod_rm = *p; | 439 | /* for STOS, source register is fixed */ |
441 | reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3); | 440 | if (opcode == 0xAA || opcode == 0xAB) { |
441 | reg = arg_AX; | ||
442 | } else { | ||
443 | unsigned char mod_rm = *p; | ||
444 | reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3); | ||
445 | } | ||
442 | switch (get_ins_reg_width(ins_addr)) { | 446 | switch (get_ins_reg_width(ins_addr)) { |
443 | case 1: | 447 | case 1: |
444 | return *get_reg_w8(reg, prf.rex, regs); | 448 | return *get_reg_w8(reg, prf.rex, regs); |
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 8565d944f7cf..38868adf07ea 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c | |||
@@ -90,6 +90,27 @@ static void do_test(unsigned long size) | |||
90 | iounmap(p); | 90 | iounmap(p); |
91 | } | 91 | } |
92 | 92 | ||
93 | /* | ||
94 | * Tests how mmiotrace behaves in face of multiple ioremap / iounmaps in | ||
95 | * a short time. We had a bug in deferred freeing procedure which tried | ||
96 | * to free this region multiple times (ioremap can reuse the same address | ||
97 | * for many mappings). | ||
98 | */ | ||
99 | static void do_test_bulk_ioremapping(void) | ||
100 | { | ||
101 | void __iomem *p; | ||
102 | int i; | ||
103 | |||
104 | for (i = 0; i < 10; ++i) { | ||
105 | p = ioremap_nocache(mmio_address, PAGE_SIZE); | ||
106 | if (p) | ||
107 | iounmap(p); | ||
108 | } | ||
109 | |||
110 | /* Force freeing. If it will crash we will know why. */ | ||
111 | synchronize_rcu(); | ||
112 | } | ||
113 | |||
93 | static int __init init(void) | 114 | static int __init init(void) |
94 | { | 115 | { |
95 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); | 116 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); |
@@ -104,6 +125,7 @@ static int __init init(void) | |||
104 | "and writing 16 kB of rubbish in there.\n", | 125 | "and writing 16 kB of rubbish in there.\n", |
105 | size >> 10, mmio_address); | 126 | size >> 10, mmio_address); |
106 | do_test(size); | 127 | do_test(size); |
128 | do_test_bulk_ioremapping(); | ||
107 | pr_info("All done.\n"); | 129 | pr_info("All done.\n"); |
108 | return 0; | 130 | return 0; |
109 | } | 131 | } |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 426f3a1a64d3..c03f14ab6667 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -278,11 +278,9 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) | |||
278 | 278 | ||
279 | static void do_flush_tlb_all(void *info) | 279 | static void do_flush_tlb_all(void *info) |
280 | { | 280 | { |
281 | unsigned long cpu = smp_processor_id(); | ||
282 | |||
283 | __flush_tlb_all(); | 281 | __flush_tlb_all(); |
284 | if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) | 282 | if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) |
285 | leave_mm(cpu); | 283 | leave_mm(smp_processor_id()); |
286 | } | 284 | } |
287 | 285 | ||
288 | void flush_tlb_all(void) | 286 | void flush_tlb_all(void) |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index b28d2f1253bb..1ba67dc8006a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -634,6 +634,18 @@ static int __init ppro_init(char **cpu_type) | |||
634 | if (force_arch_perfmon && cpu_has_arch_perfmon) | 634 | if (force_arch_perfmon && cpu_has_arch_perfmon) |
635 | return 0; | 635 | return 0; |
636 | 636 | ||
637 | /* | ||
638 | * Documentation on identifying Intel processors by CPU family | ||
639 | * and model can be found in the Intel Software Developer's | ||
640 | * Manuals (SDM): | ||
641 | * | ||
642 | * http://www.intel.com/products/processor/manuals/ | ||
643 | * | ||
644 | * As of May 2010 the documentation for this was in the: | ||
645 | * "Intel 64 and IA-32 Architectures Software Developer's | ||
646 | * Manual Volume 3B: System Programming Guide", "Table B-1 | ||
647 | * CPUID Signature Values of DisplayFamily_DisplayModel". | ||
648 | */ | ||
637 | switch (cpu_model) { | 649 | switch (cpu_model) { |
638 | case 0 ... 2: | 650 | case 0 ... 2: |
639 | *cpu_type = "i386/ppro"; | 651 | *cpu_type = "i386/ppro"; |
@@ -655,12 +667,12 @@ static int __init ppro_init(char **cpu_type) | |||
655 | case 15: case 23: | 667 | case 15: case 23: |
656 | *cpu_type = "i386/core_2"; | 668 | *cpu_type = "i386/core_2"; |
657 | break; | 669 | break; |
670 | case 0x1a: | ||
658 | case 0x2e: | 671 | case 0x2e: |
659 | case 26: | ||
660 | spec = &op_arch_perfmon_spec; | 672 | spec = &op_arch_perfmon_spec; |
661 | *cpu_type = "i386/core_i7"; | 673 | *cpu_type = "i386/core_i7"; |
662 | break; | 674 | break; |
663 | case 28: | 675 | case 0x1c: |
664 | *cpu_type = "i386/atom"; | 676 | *cpu_type = "i386/atom"; |
665 | break; | 677 | break; |
666 | default: | 678 | default: |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 2ec04c424a62..15466c096ba5 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -34,6 +34,15 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = { | |||
34 | DMI_MATCH(DMI_PRODUCT_NAME, "x3800"), | 34 | DMI_MATCH(DMI_PRODUCT_NAME, "x3800"), |
35 | }, | 35 | }, |
36 | }, | 36 | }, |
37 | /* https://bugzilla.kernel.org/show_bug.cgi?id=16007 */ | ||
38 | /* 2006 AMD HT/VIA system with two host bridges */ | ||
39 | { | ||
40 | .callback = set_use_crs, | ||
41 | .ident = "ASRock ALiveSATA2-GLAN", | ||
42 | .matches = { | ||
43 | DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"), | ||
44 | }, | ||
45 | }, | ||
37 | {} | 46 | {} |
38 | }; | 47 | }; |
39 | 48 | ||
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 215a27ae050d..a0772af64efb 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -125,6 +125,23 @@ void __init dmi_check_skip_isa_align(void) | |||
125 | static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) | 125 | static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) |
126 | { | 126 | { |
127 | struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE]; | 127 | struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE]; |
128 | struct resource *bar_r; | ||
129 | int bar; | ||
130 | |||
131 | if (pci_probe & PCI_NOASSIGN_BARS) { | ||
132 | /* | ||
133 | * If the BIOS did not assign the BAR, zero out the | ||
134 | * resource so the kernel doesn't attmept to assign | ||
135 | * it later on in pci_assign_unassigned_resources | ||
136 | */ | ||
137 | for (bar = 0; bar <= PCI_STD_RESOURCE_END; bar++) { | ||
138 | bar_r = &dev->resource[bar]; | ||
139 | if (bar_r->start == 0 && bar_r->end != 0) { | ||
140 | bar_r->flags = 0; | ||
141 | bar_r->end = 0; | ||
142 | } | ||
143 | } | ||
144 | } | ||
128 | 145 | ||
129 | if (pci_probe & PCI_NOASSIGN_ROMS) { | 146 | if (pci_probe & PCI_NOASSIGN_ROMS) { |
130 | if (rom_r->parent) | 147 | if (rom_r->parent) |
@@ -509,6 +526,9 @@ char * __devinit pcibios_setup(char *str) | |||
509 | } else if (!strcmp(str, "norom")) { | 526 | } else if (!strcmp(str, "norom")) { |
510 | pci_probe |= PCI_NOASSIGN_ROMS; | 527 | pci_probe |= PCI_NOASSIGN_ROMS; |
511 | return NULL; | 528 | return NULL; |
529 | } else if (!strcmp(str, "nobar")) { | ||
530 | pci_probe |= PCI_NOASSIGN_BARS; | ||
531 | return NULL; | ||
512 | } else if (!strcmp(str, "assign-busses")) { | 532 | } else if (!strcmp(str, "assign-busses")) { |
513 | pci_probe |= PCI_ASSIGN_ALL_BUSSES; | 533 | pci_probe |= PCI_ASSIGN_ALL_BUSSES; |
514 | return NULL; | 534 | return NULL; |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 6fdb3ec30c31..55253095be84 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -184,6 +184,7 @@ static void __init pcibios_allocate_resources(int pass) | |||
184 | idx, r, disabled, pass); | 184 | idx, r, disabled, pass); |
185 | if (pci_claim_resource(dev, idx) < 0) { | 185 | if (pci_claim_resource(dev, idx) < 0) { |
186 | /* We'll assign a new address later */ | 186 | /* We'll assign a new address later */ |
187 | dev->fw_addr[idx] = r->start; | ||
187 | r->end -= r->start; | 188 | r->end -= r->start; |
188 | r->start = 0; | 189 | r->start = 0; |
189 | } | 190 | } |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 9810a0f76c91..f547ee05f715 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -989,7 +989,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
989 | dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq); | 989 | dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq); |
990 | 990 | ||
991 | /* Update IRQ for all devices with the same pirq value */ | 991 | /* Update IRQ for all devices with the same pirq value */ |
992 | while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { | 992 | for_each_pci_dev(dev2) { |
993 | pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); | 993 | pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); |
994 | if (!pin) | 994 | if (!pin) |
995 | continue; | 995 | continue; |
@@ -1028,7 +1028,7 @@ void __init pcibios_fixup_irqs(void) | |||
1028 | u8 pin; | 1028 | u8 pin; |
1029 | 1029 | ||
1030 | DBG(KERN_DEBUG "PCI: IRQ fixup\n"); | 1030 | DBG(KERN_DEBUG "PCI: IRQ fixup\n"); |
1031 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1031 | for_each_pci_dev(dev) { |
1032 | /* | 1032 | /* |
1033 | * If the BIOS has set an out of range IRQ number, just | 1033 | * If the BIOS has set an out of range IRQ number, just |
1034 | * ignore it. Also keep track of which IRQ's are | 1034 | * ignore it. Also keep track of which IRQ's are |
@@ -1052,7 +1052,7 @@ void __init pcibios_fixup_irqs(void) | |||
1052 | return; | 1052 | return; |
1053 | 1053 | ||
1054 | dev = NULL; | 1054 | dev = NULL; |
1055 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1055 | for_each_pci_dev(dev) { |
1056 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | 1056 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); |
1057 | if (!pin) | 1057 | if (!pin) |
1058 | continue; | 1058 | continue; |
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 8d460eaf524f..c89266be6048 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c | |||
@@ -36,7 +36,7 @@ int __init pci_legacy_init(void) | |||
36 | return 0; | 36 | return 0; |
37 | } | 37 | } |
38 | 38 | ||
39 | void pcibios_scan_specific_bus(int busn) | 39 | void __devinit pcibios_scan_specific_bus(int busn) |
40 | { | 40 | { |
41 | int devfn; | 41 | int devfn; |
42 | long node; | 42 | long node; |
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index 7ef3a2735df3..cb29191cee58 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c | |||
@@ -66,8 +66,9 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn) | |||
66 | devfn, pos, 4, &pcie_cap)) | 66 | devfn, pos, 4, &pcie_cap)) |
67 | return 0; | 67 | return 0; |
68 | 68 | ||
69 | if (pcie_cap == 0xffffffff) | 69 | if (PCI_EXT_CAP_ID(pcie_cap) == 0x0000 || |
70 | return 0; | 70 | PCI_EXT_CAP_ID(pcie_cap) == 0xffff) |
71 | break; | ||
71 | 72 | ||
72 | if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) { | 73 | if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) { |
73 | raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, | 74 | raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, |
@@ -76,7 +77,7 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn) | |||
76 | return pos; | 77 | return pos; |
77 | } | 78 | } |
78 | 79 | ||
79 | pos = pcie_cap >> 20; | 80 | pos = PCI_EXT_CAP_NEXT(pcie_cap); |
80 | } | 81 | } |
81 | 82 | ||
82 | return 0; | 83 | return 0; |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 1290ba54b350..e7e8c5f54956 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -4,7 +4,7 @@ | |||
4 | * Distribute under GPLv2 | 4 | * Distribute under GPLv2 |
5 | * | 5 | * |
6 | * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl> | 6 | * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl> |
7 | * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> | 7 | * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz> |
8 | * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> | 8 | * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> |
9 | */ | 9 | */ |
10 | 10 | ||
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index d24f983ba1e5..460f314d13e5 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c | |||
@@ -4,7 +4,7 @@ | |||
4 | * Distribute under GPLv2 | 4 | * Distribute under GPLv2 |
5 | * | 5 | * |
6 | * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl> | 6 | * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl> |
7 | * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> | 7 | * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz> |
8 | * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> | 8 | * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> |
9 | */ | 9 | */ |
10 | 10 | ||
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 6b4ffedb93c9..4a2afa1bac51 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -120,7 +120,8 @@ $(obj)/vdso32-syms.lds: $(vdso32.so-y:%=$(obj)/vdso32-%-syms.lds) FORCE | |||
120 | quiet_cmd_vdso = VDSO $@ | 120 | quiet_cmd_vdso = VDSO $@ |
121 | cmd_vdso = $(CC) -nostdlib -o $@ \ | 121 | cmd_vdso = $(CC) -nostdlib -o $@ \ |
122 | $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ | 122 | $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ |
123 | -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) | 123 | -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ |
124 | sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' | ||
124 | 125 | ||
125 | VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) | 126 | VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) |
126 | GCOV_PROFILE := n | 127 | GCOV_PROFILE := n |
diff --git a/arch/x86/vdso/checkundef.sh b/arch/x86/vdso/checkundef.sh new file mode 100755 index 000000000000..7ee90a9b549d --- /dev/null +++ b/arch/x86/vdso/checkundef.sh | |||
@@ -0,0 +1,10 @@ | |||
1 | #!/bin/sh | ||
2 | nm="$1" | ||
3 | file="$2" | ||
4 | $nm "$file" | grep '^ *U' > /dev/null 2>&1 | ||
5 | if [ $? -eq 1 ]; then | ||
6 | exit 0 | ||
7 | else | ||
8 | echo "$file: undefined symbols found" >&2 | ||
9 | exit 1 | ||
10 | fi | ||
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 02b442e92007..36df991985b2 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -374,7 +374,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
374 | 374 | ||
375 | #ifdef CONFIG_X86_64 | 375 | #ifdef CONFIG_X86_64 |
376 | 376 | ||
377 | __initcall(sysenter_setup); | 377 | subsys_initcall(sysenter_setup); |
378 | 378 | ||
379 | #ifdef CONFIG_SYSCTL | 379 | #ifdef CONFIG_SYSCTL |
380 | /* Register vsyscall32 into the ABI table */ | 380 | /* Register vsyscall32 into the ABI table */ |
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index ac74869b8140..4b5d26f108bb 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -67,6 +67,7 @@ static int __init init_vdso_vars(void) | |||
67 | *(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x; | 67 | *(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x; |
68 | #include "vextern.h" | 68 | #include "vextern.h" |
69 | #undef VEXTERN | 69 | #undef VEXTERN |
70 | vunmap(vbase); | ||
70 | return 0; | 71 | return 0; |
71 | 72 | ||
72 | oom: | 73 | oom: |
@@ -74,7 +75,7 @@ static int __init init_vdso_vars(void) | |||
74 | vdso_enabled = 0; | 75 | vdso_enabled = 0; |
75 | return -ENOMEM; | 76 | return -ENOMEM; |
76 | } | 77 | } |
77 | __initcall(init_vdso_vars); | 78 | subsys_initcall(init_vdso_vars); |
78 | 79 | ||
79 | struct linux_binprm; | 80 | struct linux_binprm; |
80 | 81 | ||
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index b83e119fbeb0..68128a1b401a 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -13,6 +13,11 @@ config XEN | |||
13 | kernel to boot in a paravirtualized environment under the | 13 | kernel to boot in a paravirtualized environment under the |
14 | Xen hypervisor. | 14 | Xen hypervisor. |
15 | 15 | ||
16 | config XEN_PVHVM | ||
17 | def_bool y | ||
18 | depends on XEN | ||
19 | depends on X86_LOCAL_APIC | ||
20 | |||
16 | config XEN_MAX_DOMAIN_MEMORY | 21 | config XEN_MAX_DOMAIN_MEMORY |
17 | int "Maximum allowed size of a domain in gigabytes" | 22 | int "Maximum allowed size of a domain in gigabytes" |
18 | default 8 if X86_32 | 23 | default 8 if X86_32 |
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3bb4fc21f4f2..930954685980 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -12,7 +12,7 @@ CFLAGS_mmu.o := $(nostackp) | |||
12 | 12 | ||
13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | 13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ |
14 | time.o xen-asm.o xen-asm_$(BITS).o \ | 14 | time.o xen-asm.o xen-asm_$(BITS).o \ |
15 | grant-table.o suspend.o | 15 | grant-table.o suspend.o platform-pci-unplug.o |
16 | 16 | ||
17 | obj-$(CONFIG_SMP) += smp.o | 17 | obj-$(CONFIG_SMP) += smp.o |
18 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o | 18 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 65d8d79b46a8..d4ff5e83621d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -11,6 +11,7 @@ | |||
11 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | 11 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/cpu.h> | ||
14 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
15 | #include <linux/init.h> | 16 | #include <linux/init.h> |
16 | #include <linux/smp.h> | 17 | #include <linux/smp.h> |
@@ -35,8 +36,10 @@ | |||
35 | #include <xen/interface/version.h> | 36 | #include <xen/interface/version.h> |
36 | #include <xen/interface/physdev.h> | 37 | #include <xen/interface/physdev.h> |
37 | #include <xen/interface/vcpu.h> | 38 | #include <xen/interface/vcpu.h> |
39 | #include <xen/interface/memory.h> | ||
38 | #include <xen/features.h> | 40 | #include <xen/features.h> |
39 | #include <xen/page.h> | 41 | #include <xen/page.h> |
42 | #include <xen/hvm.h> | ||
40 | #include <xen/hvc-console.h> | 43 | #include <xen/hvc-console.h> |
41 | 44 | ||
42 | #include <asm/paravirt.h> | 45 | #include <asm/paravirt.h> |
@@ -55,7 +58,9 @@ | |||
55 | #include <asm/pgtable.h> | 58 | #include <asm/pgtable.h> |
56 | #include <asm/tlbflush.h> | 59 | #include <asm/tlbflush.h> |
57 | #include <asm/reboot.h> | 60 | #include <asm/reboot.h> |
61 | #include <asm/setup.h> | ||
58 | #include <asm/stackprotector.h> | 62 | #include <asm/stackprotector.h> |
63 | #include <asm/hypervisor.h> | ||
59 | 64 | ||
60 | #include "xen-ops.h" | 65 | #include "xen-ops.h" |
61 | #include "mmu.h" | 66 | #include "mmu.h" |
@@ -76,6 +81,10 @@ struct shared_info xen_dummy_shared_info; | |||
76 | 81 | ||
77 | void *xen_initial_gdt; | 82 | void *xen_initial_gdt; |
78 | 83 | ||
84 | RESERVE_BRK(shared_info_page_brk, PAGE_SIZE); | ||
85 | __read_mostly int xen_have_vector_callback; | ||
86 | EXPORT_SYMBOL_GPL(xen_have_vector_callback); | ||
87 | |||
79 | /* | 88 | /* |
80 | * Point at some empty memory to start with. We map the real shared_info | 89 | * Point at some empty memory to start with. We map the real shared_info |
81 | * page as soon as fixmap is up and running. | 90 | * page as soon as fixmap is up and running. |
@@ -97,6 +106,14 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | |||
97 | */ | 106 | */ |
98 | static int have_vcpu_info_placement = 1; | 107 | static int have_vcpu_info_placement = 1; |
99 | 108 | ||
109 | static void clamp_max_cpus(void) | ||
110 | { | ||
111 | #ifdef CONFIG_SMP | ||
112 | if (setup_max_cpus > MAX_VIRT_CPUS) | ||
113 | setup_max_cpus = MAX_VIRT_CPUS; | ||
114 | #endif | ||
115 | } | ||
116 | |||
100 | static void xen_vcpu_setup(int cpu) | 117 | static void xen_vcpu_setup(int cpu) |
101 | { | 118 | { |
102 | struct vcpu_register_vcpu_info info; | 119 | struct vcpu_register_vcpu_info info; |
@@ -104,13 +121,17 @@ static void xen_vcpu_setup(int cpu) | |||
104 | struct vcpu_info *vcpup; | 121 | struct vcpu_info *vcpup; |
105 | 122 | ||
106 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | 123 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
107 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | ||
108 | 124 | ||
109 | if (!have_vcpu_info_placement) | 125 | if (cpu < MAX_VIRT_CPUS) |
110 | return; /* already tested, not available */ | 126 | per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
111 | 127 | ||
112 | vcpup = &per_cpu(xen_vcpu_info, cpu); | 128 | if (!have_vcpu_info_placement) { |
129 | if (cpu >= MAX_VIRT_CPUS) | ||
130 | clamp_max_cpus(); | ||
131 | return; | ||
132 | } | ||
113 | 133 | ||
134 | vcpup = &per_cpu(xen_vcpu_info, cpu); | ||
114 | info.mfn = arbitrary_virt_to_mfn(vcpup); | 135 | info.mfn = arbitrary_virt_to_mfn(vcpup); |
115 | info.offset = offset_in_page(vcpup); | 136 | info.offset = offset_in_page(vcpup); |
116 | 137 | ||
@@ -125,6 +146,7 @@ static void xen_vcpu_setup(int cpu) | |||
125 | if (err) { | 146 | if (err) { |
126 | printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); | 147 | printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); |
127 | have_vcpu_info_placement = 0; | 148 | have_vcpu_info_placement = 0; |
149 | clamp_max_cpus(); | ||
128 | } else { | 150 | } else { |
129 | /* This cpu is using the registered vcpu info, even if | 151 | /* This cpu is using the registered vcpu info, even if |
130 | later ones fail to. */ | 152 | later ones fail to. */ |
@@ -731,7 +753,6 @@ static void set_xen_basic_apic_ops(void) | |||
731 | 753 | ||
732 | #endif | 754 | #endif |
733 | 755 | ||
734 | |||
735 | static void xen_clts(void) | 756 | static void xen_clts(void) |
736 | { | 757 | { |
737 | struct multicall_space mcs; | 758 | struct multicall_space mcs; |
@@ -926,10 +947,6 @@ static const struct pv_init_ops xen_init_ops __initdata = { | |||
926 | .patch = xen_patch, | 947 | .patch = xen_patch, |
927 | }; | 948 | }; |
928 | 949 | ||
929 | static const struct pv_time_ops xen_time_ops __initdata = { | ||
930 | .sched_clock = xen_sched_clock, | ||
931 | }; | ||
932 | |||
933 | static const struct pv_cpu_ops xen_cpu_ops __initdata = { | 950 | static const struct pv_cpu_ops xen_cpu_ops __initdata = { |
934 | .cpuid = xen_cpuid, | 951 | .cpuid = xen_cpuid, |
935 | 952 | ||
@@ -1028,6 +1045,23 @@ static void xen_crash_shutdown(struct pt_regs *regs) | |||
1028 | xen_reboot(SHUTDOWN_crash); | 1045 | xen_reboot(SHUTDOWN_crash); |
1029 | } | 1046 | } |
1030 | 1047 | ||
1048 | static int | ||
1049 | xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) | ||
1050 | { | ||
1051 | xen_reboot(SHUTDOWN_crash); | ||
1052 | return NOTIFY_DONE; | ||
1053 | } | ||
1054 | |||
1055 | static struct notifier_block xen_panic_block = { | ||
1056 | .notifier_call= xen_panic_event, | ||
1057 | }; | ||
1058 | |||
1059 | int xen_panic_handler_init(void) | ||
1060 | { | ||
1061 | atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); | ||
1062 | return 0; | ||
1063 | } | ||
1064 | |||
1031 | static const struct machine_ops __initdata xen_machine_ops = { | 1065 | static const struct machine_ops __initdata xen_machine_ops = { |
1032 | .restart = xen_restart, | 1066 | .restart = xen_restart, |
1033 | .halt = xen_machine_halt, | 1067 | .halt = xen_machine_halt, |
@@ -1067,7 +1101,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1067 | /* Install Xen paravirt ops */ | 1101 | /* Install Xen paravirt ops */ |
1068 | pv_info = xen_info; | 1102 | pv_info = xen_info; |
1069 | pv_init_ops = xen_init_ops; | 1103 | pv_init_ops = xen_init_ops; |
1070 | pv_time_ops = xen_time_ops; | ||
1071 | pv_cpu_ops = xen_cpu_ops; | 1104 | pv_cpu_ops = xen_cpu_ops; |
1072 | pv_apic_ops = xen_apic_ops; | 1105 | pv_apic_ops = xen_apic_ops; |
1073 | 1106 | ||
@@ -1075,13 +1108,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1075 | x86_init.oem.arch_setup = xen_arch_setup; | 1108 | x86_init.oem.arch_setup = xen_arch_setup; |
1076 | x86_init.oem.banner = xen_banner; | 1109 | x86_init.oem.banner = xen_banner; |
1077 | 1110 | ||
1078 | x86_init.timers.timer_init = xen_time_init; | 1111 | xen_init_time_ops(); |
1079 | x86_init.timers.setup_percpu_clockev = x86_init_noop; | ||
1080 | x86_cpuinit.setup_percpu_clockev = x86_init_noop; | ||
1081 | |||
1082 | x86_platform.calibrate_tsc = xen_tsc_khz; | ||
1083 | x86_platform.get_wallclock = xen_get_wallclock; | ||
1084 | x86_platform.set_wallclock = xen_set_wallclock; | ||
1085 | 1112 | ||
1086 | /* | 1113 | /* |
1087 | * Set up some pagetable state before starting to set any ptes. | 1114 | * Set up some pagetable state before starting to set any ptes. |
@@ -1206,3 +1233,139 @@ asmlinkage void __init xen_start_kernel(void) | |||
1206 | x86_64_start_reservations((char *)__pa_symbol(&boot_params)); | 1233 | x86_64_start_reservations((char *)__pa_symbol(&boot_params)); |
1207 | #endif | 1234 | #endif |
1208 | } | 1235 | } |
1236 | |||
1237 | static uint32_t xen_cpuid_base(void) | ||
1238 | { | ||
1239 | uint32_t base, eax, ebx, ecx, edx; | ||
1240 | char signature[13]; | ||
1241 | |||
1242 | for (base = 0x40000000; base < 0x40010000; base += 0x100) { | ||
1243 | cpuid(base, &eax, &ebx, &ecx, &edx); | ||
1244 | *(uint32_t *)(signature + 0) = ebx; | ||
1245 | *(uint32_t *)(signature + 4) = ecx; | ||
1246 | *(uint32_t *)(signature + 8) = edx; | ||
1247 | signature[12] = 0; | ||
1248 | |||
1249 | if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2)) | ||
1250 | return base; | ||
1251 | } | ||
1252 | |||
1253 | return 0; | ||
1254 | } | ||
1255 | |||
1256 | static int init_hvm_pv_info(int *major, int *minor) | ||
1257 | { | ||
1258 | uint32_t eax, ebx, ecx, edx, pages, msr, base; | ||
1259 | u64 pfn; | ||
1260 | |||
1261 | base = xen_cpuid_base(); | ||
1262 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | ||
1263 | |||
1264 | *major = eax >> 16; | ||
1265 | *minor = eax & 0xffff; | ||
1266 | printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor); | ||
1267 | |||
1268 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | ||
1269 | |||
1270 | pfn = __pa(hypercall_page); | ||
1271 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | ||
1272 | |||
1273 | xen_setup_features(); | ||
1274 | |||
1275 | pv_info = xen_info; | ||
1276 | pv_info.kernel_rpl = 0; | ||
1277 | |||
1278 | xen_domain_type = XEN_HVM_DOMAIN; | ||
1279 | |||
1280 | return 0; | ||
1281 | } | ||
1282 | |||
1283 | void xen_hvm_init_shared_info(void) | ||
1284 | { | ||
1285 | int cpu; | ||
1286 | struct xen_add_to_physmap xatp; | ||
1287 | static struct shared_info *shared_info_page = 0; | ||
1288 | |||
1289 | if (!shared_info_page) | ||
1290 | shared_info_page = (struct shared_info *) | ||
1291 | extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1292 | xatp.domid = DOMID_SELF; | ||
1293 | xatp.idx = 0; | ||
1294 | xatp.space = XENMAPSPACE_shared_info; | ||
1295 | xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; | ||
1296 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) | ||
1297 | BUG(); | ||
1298 | |||
1299 | HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; | ||
1300 | |||
1301 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info | ||
1302 | * page, we use it in the event channel upcall and in some pvclock | ||
1303 | * related functions. We don't need the vcpu_info placement | ||
1304 | * optimizations because we don't use any pv_mmu or pv_irq op on | ||
1305 | * HVM. | ||
1306 | * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is | ||
1307 | * online but xen_hvm_init_shared_info is run at resume time too and | ||
1308 | * in that case multiple vcpus might be online. */ | ||
1309 | for_each_online_cpu(cpu) { | ||
1310 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | ||
1311 | } | ||
1312 | } | ||
1313 | |||
1314 | #ifdef CONFIG_XEN_PVHVM | ||
1315 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, | ||
1316 | unsigned long action, void *hcpu) | ||
1317 | { | ||
1318 | int cpu = (long)hcpu; | ||
1319 | switch (action) { | ||
1320 | case CPU_UP_PREPARE: | ||
1321 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | ||
1322 | break; | ||
1323 | default: | ||
1324 | break; | ||
1325 | } | ||
1326 | return NOTIFY_OK; | ||
1327 | } | ||
1328 | |||
1329 | static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = { | ||
1330 | .notifier_call = xen_hvm_cpu_notify, | ||
1331 | }; | ||
1332 | |||
1333 | static void __init xen_hvm_guest_init(void) | ||
1334 | { | ||
1335 | int r; | ||
1336 | int major, minor; | ||
1337 | |||
1338 | r = init_hvm_pv_info(&major, &minor); | ||
1339 | if (r < 0) | ||
1340 | return; | ||
1341 | |||
1342 | xen_hvm_init_shared_info(); | ||
1343 | |||
1344 | if (xen_feature(XENFEAT_hvm_callback_vector)) | ||
1345 | xen_have_vector_callback = 1; | ||
1346 | register_cpu_notifier(&xen_hvm_cpu_notifier); | ||
1347 | xen_unplug_emulated_devices(); | ||
1348 | have_vcpu_info_placement = 0; | ||
1349 | x86_init.irqs.intr_init = xen_init_IRQ; | ||
1350 | xen_hvm_init_time_ops(); | ||
1351 | xen_hvm_init_mmu_ops(); | ||
1352 | } | ||
1353 | |||
1354 | static bool __init xen_hvm_platform(void) | ||
1355 | { | ||
1356 | if (xen_pv_domain()) | ||
1357 | return false; | ||
1358 | |||
1359 | if (!xen_cpuid_base()) | ||
1360 | return false; | ||
1361 | |||
1362 | return true; | ||
1363 | } | ||
1364 | |||
1365 | const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = { | ||
1366 | .name = "Xen HVM", | ||
1367 | .detect = xen_hvm_platform, | ||
1368 | .init_platform = xen_hvm_guest_init, | ||
1369 | }; | ||
1370 | EXPORT_SYMBOL(x86_hyper_xen_hvm); | ||
1371 | #endif | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 914f04695ce5..413b19b3d0fe 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -58,6 +58,7 @@ | |||
58 | 58 | ||
59 | #include <xen/page.h> | 59 | #include <xen/page.h> |
60 | #include <xen/interface/xen.h> | 60 | #include <xen/interface/xen.h> |
61 | #include <xen/interface/hvm/hvm_op.h> | ||
61 | #include <xen/interface/version.h> | 62 | #include <xen/interface/version.h> |
62 | #include <xen/hvc-console.h> | 63 | #include <xen/hvc-console.h> |
63 | 64 | ||
@@ -1941,6 +1942,40 @@ void __init xen_init_mmu_ops(void) | |||
1941 | pv_mmu_ops = xen_mmu_ops; | 1942 | pv_mmu_ops = xen_mmu_ops; |
1942 | } | 1943 | } |
1943 | 1944 | ||
1945 | #ifdef CONFIG_XEN_PVHVM | ||
1946 | static void xen_hvm_exit_mmap(struct mm_struct *mm) | ||
1947 | { | ||
1948 | struct xen_hvm_pagetable_dying a; | ||
1949 | int rc; | ||
1950 | |||
1951 | a.domid = DOMID_SELF; | ||
1952 | a.gpa = __pa(mm->pgd); | ||
1953 | rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a); | ||
1954 | WARN_ON_ONCE(rc < 0); | ||
1955 | } | ||
1956 | |||
1957 | static int is_pagetable_dying_supported(void) | ||
1958 | { | ||
1959 | struct xen_hvm_pagetable_dying a; | ||
1960 | int rc = 0; | ||
1961 | |||
1962 | a.domid = DOMID_SELF; | ||
1963 | a.gpa = 0x00; | ||
1964 | rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a); | ||
1965 | if (rc < 0) { | ||
1966 | printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n"); | ||
1967 | return 0; | ||
1968 | } | ||
1969 | return 1; | ||
1970 | } | ||
1971 | |||
1972 | void __init xen_hvm_init_mmu_ops(void) | ||
1973 | { | ||
1974 | if (is_pagetable_dying_supported()) | ||
1975 | pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; | ||
1976 | } | ||
1977 | #endif | ||
1978 | |||
1944 | #ifdef CONFIG_XEN_DEBUG_FS | 1979 | #ifdef CONFIG_XEN_DEBUG_FS |
1945 | 1980 | ||
1946 | static struct dentry *d_mmu_debug; | 1981 | static struct dentry *d_mmu_debug; |
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 5fe6bc7f5ecf..fa938c4aa2f7 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -60,4 +60,5 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |||
60 | unsigned long xen_read_cr2_direct(void); | 60 | unsigned long xen_read_cr2_direct(void); |
61 | 61 | ||
62 | extern void xen_init_mmu_ops(void); | 62 | extern void xen_init_mmu_ops(void); |
63 | extern void xen_hvm_init_mmu_ops(void); | ||
63 | #endif /* _XEN_MMU_H */ | 64 | #endif /* _XEN_MMU_H */ |
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c new file mode 100644 index 000000000000..554c002a1e1a --- /dev/null +++ b/arch/x86/xen/platform-pci-unplug.c | |||
@@ -0,0 +1,137 @@ | |||
1 | /****************************************************************************** | ||
2 | * platform-pci-unplug.c | ||
3 | * | ||
4 | * Xen platform PCI device driver | ||
5 | * Copyright (c) 2010, Citrix | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms and conditions of the GNU General Public License, | ||
9 | * version 2, as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
14 | * more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License along with | ||
17 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
18 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
19 | * | ||
20 | */ | ||
21 | |||
22 | #include <linux/init.h> | ||
23 | #include <linux/io.h> | ||
24 | #include <linux/module.h> | ||
25 | |||
26 | #include <xen/platform_pci.h> | ||
27 | |||
28 | #define XEN_PLATFORM_ERR_MAGIC -1 | ||
29 | #define XEN_PLATFORM_ERR_PROTOCOL -2 | ||
30 | #define XEN_PLATFORM_ERR_BLACKLIST -3 | ||
31 | |||
32 | /* store the value of xen_emul_unplug after the unplug is done */ | ||
33 | int xen_platform_pci_unplug; | ||
34 | EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); | ||
35 | #ifdef CONFIG_XEN_PVHVM | ||
36 | static int xen_emul_unplug; | ||
37 | |||
38 | static int __init check_platform_magic(void) | ||
39 | { | ||
40 | short magic; | ||
41 | char protocol; | ||
42 | |||
43 | magic = inw(XEN_IOPORT_MAGIC); | ||
44 | if (magic != XEN_IOPORT_MAGIC_VAL) { | ||
45 | printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n"); | ||
46 | return XEN_PLATFORM_ERR_MAGIC; | ||
47 | } | ||
48 | |||
49 | protocol = inb(XEN_IOPORT_PROTOVER); | ||
50 | |||
51 | printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n", | ||
52 | protocol); | ||
53 | |||
54 | switch (protocol) { | ||
55 | case 1: | ||
56 | outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM); | ||
57 | outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER); | ||
58 | if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) { | ||
59 | printk(KERN_ERR "Xen Platform: blacklisted by host\n"); | ||
60 | return XEN_PLATFORM_ERR_BLACKLIST; | ||
61 | } | ||
62 | break; | ||
63 | default: | ||
64 | printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version"); | ||
65 | return XEN_PLATFORM_ERR_PROTOCOL; | ||
66 | } | ||
67 | |||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | void __init xen_unplug_emulated_devices(void) | ||
72 | { | ||
73 | int r; | ||
74 | |||
75 | /* check the version of the xen platform PCI device */ | ||
76 | r = check_platform_magic(); | ||
77 | /* If the version matches enable the Xen platform PCI driver. | ||
78 | * Also enable the Xen platform PCI driver if the version is really old | ||
79 | * and the user told us to ignore it. */ | ||
80 | if (r && !(r == XEN_PLATFORM_ERR_MAGIC && | ||
81 | (xen_emul_unplug & XEN_UNPLUG_IGNORE))) | ||
82 | return; | ||
83 | /* Set the default value of xen_emul_unplug depending on whether or | ||
84 | * not the Xen PV frontends and the Xen platform PCI driver have | ||
85 | * been compiled for this kernel (modules or built-in are both OK). */ | ||
86 | if (!xen_emul_unplug) { | ||
87 | if (xen_must_unplug_nics()) { | ||
88 | printk(KERN_INFO "Netfront and the Xen platform PCI driver have " | ||
89 | "been compiled for this kernel: unplug emulated NICs.\n"); | ||
90 | xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; | ||
91 | } | ||
92 | if (xen_must_unplug_disks()) { | ||
93 | printk(KERN_INFO "Blkfront and the Xen platform PCI driver have " | ||
94 | "been compiled for this kernel: unplug emulated disks.\n" | ||
95 | "You might have to change the root device\n" | ||
96 | "from /dev/hd[a-d] to /dev/xvd[a-d]\n" | ||
97 | "in your root= kernel command line option\n"); | ||
98 | xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS; | ||
99 | } | ||
100 | } | ||
101 | /* Now unplug the emulated devices */ | ||
102 | if (!(xen_emul_unplug & XEN_UNPLUG_IGNORE)) | ||
103 | outw(xen_emul_unplug, XEN_IOPORT_UNPLUG); | ||
104 | xen_platform_pci_unplug = xen_emul_unplug; | ||
105 | } | ||
106 | |||
107 | static int __init parse_xen_emul_unplug(char *arg) | ||
108 | { | ||
109 | char *p, *q; | ||
110 | int l; | ||
111 | |||
112 | for (p = arg; p; p = q) { | ||
113 | q = strchr(p, ','); | ||
114 | if (q) { | ||
115 | l = q - p; | ||
116 | q++; | ||
117 | } else { | ||
118 | l = strlen(p); | ||
119 | } | ||
120 | if (!strncmp(p, "all", l)) | ||
121 | xen_emul_unplug |= XEN_UNPLUG_ALL; | ||
122 | else if (!strncmp(p, "ide-disks", l)) | ||
123 | xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS; | ||
124 | else if (!strncmp(p, "aux-ide-disks", l)) | ||
125 | xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS; | ||
126 | else if (!strncmp(p, "nics", l)) | ||
127 | xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; | ||
128 | else if (!strncmp(p, "ignore", l)) | ||
129 | xen_emul_unplug |= XEN_UNPLUG_IGNORE; | ||
130 | else | ||
131 | printk(KERN_WARNING "unrecognised option '%s' " | ||
132 | "in parameter 'xen_emul_unplug'\n", p); | ||
133 | } | ||
134 | return 0; | ||
135 | } | ||
136 | early_param("xen_emul_unplug", parse_xen_emul_unplug); | ||
137 | #endif | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index ad0047f47cd4..328b00305426 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <xen/page.h> | 20 | #include <xen/page.h> |
21 | #include <xen/interface/callback.h> | 21 | #include <xen/interface/callback.h> |
22 | #include <xen/interface/physdev.h> | 22 | #include <xen/interface/physdev.h> |
23 | #include <xen/interface/memory.h> | ||
23 | #include <xen/features.h> | 24 | #include <xen/features.h> |
24 | 25 | ||
25 | #include "xen-ops.h" | 26 | #include "xen-ops.h" |
@@ -32,6 +33,73 @@ extern void xen_sysenter_target(void); | |||
32 | extern void xen_syscall_target(void); | 33 | extern void xen_syscall_target(void); |
33 | extern void xen_syscall32_target(void); | 34 | extern void xen_syscall32_target(void); |
34 | 35 | ||
36 | static unsigned long __init xen_release_chunk(phys_addr_t start_addr, | ||
37 | phys_addr_t end_addr) | ||
38 | { | ||
39 | struct xen_memory_reservation reservation = { | ||
40 | .address_bits = 0, | ||
41 | .extent_order = 0, | ||
42 | .domid = DOMID_SELF | ||
43 | }; | ||
44 | unsigned long start, end; | ||
45 | unsigned long len = 0; | ||
46 | unsigned long pfn; | ||
47 | int ret; | ||
48 | |||
49 | start = PFN_UP(start_addr); | ||
50 | end = PFN_DOWN(end_addr); | ||
51 | |||
52 | if (end <= start) | ||
53 | return 0; | ||
54 | |||
55 | printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ", | ||
56 | start, end); | ||
57 | for(pfn = start; pfn < end; pfn++) { | ||
58 | unsigned long mfn = pfn_to_mfn(pfn); | ||
59 | |||
60 | /* Make sure pfn exists to start with */ | ||
61 | if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) | ||
62 | continue; | ||
63 | |||
64 | set_xen_guest_handle(reservation.extent_start, &mfn); | ||
65 | reservation.nr_extents = 1; | ||
66 | |||
67 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | ||
68 | &reservation); | ||
69 | WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", | ||
70 | start, end, ret); | ||
71 | if (ret == 1) { | ||
72 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | ||
73 | len++; | ||
74 | } | ||
75 | } | ||
76 | printk(KERN_CONT "%ld pages freed\n", len); | ||
77 | |||
78 | return len; | ||
79 | } | ||
80 | |||
81 | static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, | ||
82 | const struct e820map *e820) | ||
83 | { | ||
84 | phys_addr_t max_addr = PFN_PHYS(max_pfn); | ||
85 | phys_addr_t last_end = 0; | ||
86 | unsigned long released = 0; | ||
87 | int i; | ||
88 | |||
89 | for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { | ||
90 | phys_addr_t end = e820->map[i].addr; | ||
91 | end = min(max_addr, end); | ||
92 | |||
93 | released += xen_release_chunk(last_end, end); | ||
94 | last_end = e820->map[i].addr + e820->map[i].size; | ||
95 | } | ||
96 | |||
97 | if (last_end < max_addr) | ||
98 | released += xen_release_chunk(last_end, max_addr); | ||
99 | |||
100 | printk(KERN_INFO "released %ld pages of unused memory\n", released); | ||
101 | return released; | ||
102 | } | ||
35 | 103 | ||
36 | /** | 104 | /** |
37 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 105 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
@@ -67,6 +135,8 @@ char * __init xen_memory_setup(void) | |||
67 | 135 | ||
68 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 136 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
69 | 137 | ||
138 | xen_return_unused_memory(xen_start_info->nr_pages, &e820); | ||
139 | |||
70 | return "Xen"; | 140 | return "Xen"; |
71 | } | 141 | } |
72 | 142 | ||
@@ -156,6 +226,8 @@ void __init xen_arch_setup(void) | |||
156 | struct physdev_set_iopl set_iopl; | 226 | struct physdev_set_iopl set_iopl; |
157 | int rc; | 227 | int rc; |
158 | 228 | ||
229 | xen_panic_handler_init(); | ||
230 | |||
159 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); | 231 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); |
160 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); | 232 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); |
161 | 233 | ||
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index a29693fd3138..25f232b18a82 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -394,6 +394,8 @@ static void stop_self(void *v) | |||
394 | load_cr3(swapper_pg_dir); | 394 | load_cr3(swapper_pg_dir); |
395 | /* should set up a minimal gdt */ | 395 | /* should set up a minimal gdt */ |
396 | 396 | ||
397 | set_cpu_online(cpu, false); | ||
398 | |||
397 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); | 399 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); |
398 | BUG(); | 400 | BUG(); |
399 | } | 401 | } |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index a9c661108034..1d789d56877c 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -26,6 +26,18 @@ void xen_pre_suspend(void) | |||
26 | BUG(); | 26 | BUG(); |
27 | } | 27 | } |
28 | 28 | ||
29 | void xen_hvm_post_suspend(int suspend_cancelled) | ||
30 | { | ||
31 | int cpu; | ||
32 | xen_hvm_init_shared_info(); | ||
33 | xen_callback_vector(); | ||
34 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { | ||
35 | for_each_online_cpu(cpu) { | ||
36 | xen_setup_runstate_info(cpu); | ||
37 | } | ||
38 | } | ||
39 | } | ||
40 | |||
29 | void xen_post_suspend(int suspend_cancelled) | 41 | void xen_post_suspend(int suspend_cancelled) |
30 | { | 42 | { |
31 | xen_build_mfn_list_list(); | 43 | xen_build_mfn_list_list(); |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index b3c6c59ed302..1a5353a753fc 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <asm/xen/hypercall.h> | 20 | #include <asm/xen/hypercall.h> |
21 | 21 | ||
22 | #include <xen/events.h> | 22 | #include <xen/events.h> |
23 | #include <xen/features.h> | ||
23 | #include <xen/interface/xen.h> | 24 | #include <xen/interface/xen.h> |
24 | #include <xen/interface/vcpu.h> | 25 | #include <xen/interface/vcpu.h> |
25 | 26 | ||
@@ -155,47 +156,8 @@ static void do_stolen_accounting(void) | |||
155 | account_idle_ticks(ticks); | 156 | account_idle_ticks(ticks); |
156 | } | 157 | } |
157 | 158 | ||
158 | /* | ||
159 | * Xen sched_clock implementation. Returns the number of unstolen | ||
160 | * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED | ||
161 | * states. | ||
162 | */ | ||
163 | unsigned long long xen_sched_clock(void) | ||
164 | { | ||
165 | struct vcpu_runstate_info state; | ||
166 | cycle_t now; | ||
167 | u64 ret; | ||
168 | s64 offset; | ||
169 | |||
170 | /* | ||
171 | * Ideally sched_clock should be called on a per-cpu basis | ||
172 | * anyway, so preempt should already be disabled, but that's | ||
173 | * not current practice at the moment. | ||
174 | */ | ||
175 | preempt_disable(); | ||
176 | |||
177 | now = xen_clocksource_read(); | ||
178 | |||
179 | get_runstate_snapshot(&state); | ||
180 | |||
181 | WARN_ON(state.state != RUNSTATE_running); | ||
182 | |||
183 | offset = now - state.state_entry_time; | ||
184 | if (offset < 0) | ||
185 | offset = 0; | ||
186 | |||
187 | ret = state.time[RUNSTATE_blocked] + | ||
188 | state.time[RUNSTATE_running] + | ||
189 | offset; | ||
190 | |||
191 | preempt_enable(); | ||
192 | |||
193 | return ret; | ||
194 | } | ||
195 | |||
196 | |||
197 | /* Get the TSC speed from Xen */ | 159 | /* Get the TSC speed from Xen */ |
198 | unsigned long xen_tsc_khz(void) | 160 | static unsigned long xen_tsc_khz(void) |
199 | { | 161 | { |
200 | struct pvclock_vcpu_time_info *info = | 162 | struct pvclock_vcpu_time_info *info = |
201 | &HYPERVISOR_shared_info->vcpu_info[0].time; | 163 | &HYPERVISOR_shared_info->vcpu_info[0].time; |
@@ -230,7 +192,7 @@ static void xen_read_wallclock(struct timespec *ts) | |||
230 | put_cpu_var(xen_vcpu); | 192 | put_cpu_var(xen_vcpu); |
231 | } | 193 | } |
232 | 194 | ||
233 | unsigned long xen_get_wallclock(void) | 195 | static unsigned long xen_get_wallclock(void) |
234 | { | 196 | { |
235 | struct timespec ts; | 197 | struct timespec ts; |
236 | 198 | ||
@@ -238,7 +200,7 @@ unsigned long xen_get_wallclock(void) | |||
238 | return ts.tv_sec; | 200 | return ts.tv_sec; |
239 | } | 201 | } |
240 | 202 | ||
241 | int xen_set_wallclock(unsigned long now) | 203 | static int xen_set_wallclock(unsigned long now) |
242 | { | 204 | { |
243 | /* do nothing for domU */ | 205 | /* do nothing for domU */ |
244 | return -1; | 206 | return -1; |
@@ -473,7 +435,11 @@ void xen_timer_resume(void) | |||
473 | } | 435 | } |
474 | } | 436 | } |
475 | 437 | ||
476 | __init void xen_time_init(void) | 438 | static const struct pv_time_ops xen_time_ops __initdata = { |
439 | .sched_clock = xen_clocksource_read, | ||
440 | }; | ||
441 | |||
442 | static __init void xen_time_init(void) | ||
477 | { | 443 | { |
478 | int cpu = smp_processor_id(); | 444 | int cpu = smp_processor_id(); |
479 | struct timespec tp; | 445 | struct timespec tp; |
@@ -497,3 +463,47 @@ __init void xen_time_init(void) | |||
497 | xen_setup_timer(cpu); | 463 | xen_setup_timer(cpu); |
498 | xen_setup_cpu_clockevents(); | 464 | xen_setup_cpu_clockevents(); |
499 | } | 465 | } |
466 | |||
467 | __init void xen_init_time_ops(void) | ||
468 | { | ||
469 | pv_time_ops = xen_time_ops; | ||
470 | |||
471 | x86_init.timers.timer_init = xen_time_init; | ||
472 | x86_init.timers.setup_percpu_clockev = x86_init_noop; | ||
473 | x86_cpuinit.setup_percpu_clockev = x86_init_noop; | ||
474 | |||
475 | x86_platform.calibrate_tsc = xen_tsc_khz; | ||
476 | x86_platform.get_wallclock = xen_get_wallclock; | ||
477 | x86_platform.set_wallclock = xen_set_wallclock; | ||
478 | } | ||
479 | |||
480 | #ifdef CONFIG_XEN_PVHVM | ||
481 | static void xen_hvm_setup_cpu_clockevents(void) | ||
482 | { | ||
483 | int cpu = smp_processor_id(); | ||
484 | xen_setup_runstate_info(cpu); | ||
485 | xen_setup_timer(cpu); | ||
486 | xen_setup_cpu_clockevents(); | ||
487 | } | ||
488 | |||
489 | __init void xen_hvm_init_time_ops(void) | ||
490 | { | ||
491 | /* vector callback is needed otherwise we cannot receive interrupts | ||
492 | * on cpu > 0 */ | ||
493 | if (!xen_have_vector_callback && num_present_cpus() > 1) | ||
494 | return; | ||
495 | if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { | ||
496 | printk(KERN_INFO "Xen doesn't support pvclock on HVM," | ||
497 | "disable pv timer\n"); | ||
498 | return; | ||
499 | } | ||
500 | |||
501 | pv_time_ops = xen_time_ops; | ||
502 | x86_init.timers.setup_percpu_clockev = xen_time_init; | ||
503 | x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; | ||
504 | |||
505 | x86_platform.calibrate_tsc = xen_tsc_khz; | ||
506 | x86_platform.get_wallclock = xen_get_wallclock; | ||
507 | x86_platform.set_wallclock = xen_set_wallclock; | ||
508 | } | ||
509 | #endif | ||
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f9153a300bce..7c8ab86163e9 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -38,6 +38,10 @@ void xen_enable_sysenter(void); | |||
38 | void xen_enable_syscall(void); | 38 | void xen_enable_syscall(void); |
39 | void xen_vcpu_restore(void); | 39 | void xen_vcpu_restore(void); |
40 | 40 | ||
41 | void xen_callback_vector(void); | ||
42 | void xen_hvm_init_shared_info(void); | ||
43 | void __init xen_unplug_emulated_devices(void); | ||
44 | |||
41 | void __init xen_build_dynamic_phys_to_machine(void); | 45 | void __init xen_build_dynamic_phys_to_machine(void); |
42 | 46 | ||
43 | void xen_init_irq_ops(void); | 47 | void xen_init_irq_ops(void); |
@@ -46,11 +50,8 @@ void xen_setup_runstate_info(int cpu); | |||
46 | void xen_teardown_timer(int cpu); | 50 | void xen_teardown_timer(int cpu); |
47 | cycle_t xen_clocksource_read(void); | 51 | cycle_t xen_clocksource_read(void); |
48 | void xen_setup_cpu_clockevents(void); | 52 | void xen_setup_cpu_clockevents(void); |
49 | unsigned long xen_tsc_khz(void); | 53 | void __init xen_init_time_ops(void); |
50 | void __init xen_time_init(void); | 54 | void __init xen_hvm_init_time_ops(void); |
51 | unsigned long xen_get_wallclock(void); | ||
52 | int xen_set_wallclock(unsigned long time); | ||
53 | unsigned long long xen_sched_clock(void); | ||
54 | 55 | ||
55 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); | 56 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); |
56 | 57 | ||
@@ -101,4 +102,6 @@ void xen_sysret32(void); | |||
101 | void xen_sysret64(void); | 102 | void xen_sysret64(void); |
102 | void xen_adjust_exception_frame(void); | 103 | void xen_adjust_exception_frame(void); |
103 | 104 | ||
105 | extern int xen_panic_handler_init(void); | ||
106 | |||
104 | #endif /* XEN_OPS_H */ | 107 | #endif /* XEN_OPS_H */ |