diff options
27 files changed, 1203 insertions, 786 deletions
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index dbea4f95fc85..1c058b552e93 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt | |||
@@ -121,6 +121,7 @@ Code Seq# Include File Comments | |||
121 | 'c' 00-7F linux/comstats.h conflict! | 121 | 'c' 00-7F linux/comstats.h conflict! |
122 | 'c' 00-7F linux/coda.h conflict! | 122 | 'c' 00-7F linux/coda.h conflict! |
123 | 'c' 80-9F arch/s390/include/asm/chsc.h | 123 | 'c' 80-9F arch/s390/include/asm/chsc.h |
124 | 'c' A0-AF arch/x86/include/asm/msr.h | ||
124 | 'd' 00-FF linux/char/drm/drm/h conflict! | 125 | 'd' 00-FF linux/char/drm/drm/h conflict! |
125 | 'd' F0-FF linux/digi1.h | 126 | 'd' F0-FF linux/digi1.h |
126 | 'e' all linux/digi1.h conflict! | 127 | 'e' all linux/digi1.h conflict! |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8130334329c0..527519b8a9f9 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -262,6 +262,15 @@ config MCORE2 | |||
262 | family in /proc/cpuinfo. Newer ones have 6 and older ones 15 | 262 | family in /proc/cpuinfo. Newer ones have 6 and older ones 15 |
263 | (not a typo) | 263 | (not a typo) |
264 | 264 | ||
265 | config MATOM | ||
266 | bool "Intel Atom" | ||
267 | ---help--- | ||
268 | |||
269 | Select this for the Intel Atom platform. Intel Atom CPUs have an | ||
270 | in-order pipelining architecture and thus can benefit from | ||
271 | accordingly optimized code. Use a recent GCC with specific Atom | ||
272 | support in order to fully benefit from selecting this option. | ||
273 | |||
265 | config GENERIC_CPU | 274 | config GENERIC_CPU |
266 | bool "Generic-x86-64" | 275 | bool "Generic-x86-64" |
267 | depends on X86_64 | 276 | depends on X86_64 |
@@ -295,7 +304,7 @@ config X86_CPU | |||
295 | config X86_L1_CACHE_BYTES | 304 | config X86_L1_CACHE_BYTES |
296 | int | 305 | int |
297 | default "128" if MPSC | 306 | default "128" if MPSC |
298 | default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32 | 307 | default "64" if GENERIC_CPU || MK8 || MCORE2 || MATOM || X86_32 |
299 | 308 | ||
300 | config X86_INTERNODE_CACHE_BYTES | 309 | config X86_INTERNODE_CACHE_BYTES |
301 | int | 310 | int |
@@ -310,7 +319,7 @@ config X86_L1_CACHE_SHIFT | |||
310 | default "7" if MPENTIUM4 || MPSC | 319 | default "7" if MPENTIUM4 || MPSC |
311 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 | 320 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 |
312 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX | 321 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX |
313 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU | 322 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU |
314 | 323 | ||
315 | config X86_XADD | 324 | config X86_XADD |
316 | def_bool y | 325 | def_bool y |
@@ -359,7 +368,7 @@ config X86_INTEL_USERCOPY | |||
359 | 368 | ||
360 | config X86_USE_PPRO_CHECKSUM | 369 | config X86_USE_PPRO_CHECKSUM |
361 | def_bool y | 370 | def_bool y |
362 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 | 371 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM |
363 | 372 | ||
364 | config X86_USE_3DNOW | 373 | config X86_USE_3DNOW |
365 | def_bool y | 374 | def_bool y |
@@ -387,7 +396,7 @@ config X86_P6_NOP | |||
387 | 396 | ||
388 | config X86_TSC | 397 | config X86_TSC |
389 | def_bool y | 398 | def_bool y |
390 | depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 | 399 | depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64 |
391 | 400 | ||
392 | config X86_CMPXCHG64 | 401 | config X86_CMPXCHG64 |
393 | def_bool y | 402 | def_bool y |
@@ -397,7 +406,7 @@ config X86_CMPXCHG64 | |||
397 | # generates cmov. | 406 | # generates cmov. |
398 | config X86_CMOV | 407 | config X86_CMOV |
399 | def_bool y | 408 | def_bool y |
400 | depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64) | 409 | depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM) |
401 | 410 | ||
402 | config X86_MINIMUM_CPU_FAMILY | 411 | config X86_MINIMUM_CPU_FAMILY |
403 | int | 412 | int |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5e7db44d709e..5128b178529f 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -55,6 +55,8 @@ else | |||
55 | 55 | ||
56 | cflags-$(CONFIG_MCORE2) += \ | 56 | cflags-$(CONFIG_MCORE2) += \ |
57 | $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) | 57 | $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) |
58 | cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ | ||
59 | $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) | ||
58 | cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) | 60 | cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) |
59 | KBUILD_CFLAGS += $(cflags-y) | 61 | KBUILD_CFLAGS += $(cflags-y) |
60 | 62 | ||
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 80177ec052f0..30e9a264f69d 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu | |||
@@ -33,6 +33,8 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-f | |||
33 | cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) | 33 | cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) |
34 | cflags-$(CONFIG_MVIAC7) += -march=i686 | 34 | cflags-$(CONFIG_MVIAC7) += -march=i686 |
35 | cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) | 35 | cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) |
36 | cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ | ||
37 | $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) | ||
36 | 38 | ||
37 | # AMD Elan support | 39 | # AMD Elan support |
38 | cflags-$(CONFIG_X86_ELAN) += -march=i486 | 40 | cflags-$(CONFIG_X86_ELAN) += -march=i486 |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index d3ec8d588d4b..585edebe12cf 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -59,13 +59,6 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, | |||
59 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | 59 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, |
60 | const u8 *in, unsigned int len, u8 *iv); | 60 | const u8 *in, unsigned int len, u8 *iv); |
61 | 61 | ||
62 | static inline int kernel_fpu_using(void) | ||
63 | { | ||
64 | if (in_interrupt() && !(read_cr0() & X86_CR0_TS)) | ||
65 | return 1; | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) | 62 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) |
70 | { | 63 | { |
71 | unsigned long addr = (unsigned long)raw_ctx; | 64 | unsigned long addr = (unsigned long)raw_ctx; |
@@ -89,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, | |||
89 | return -EINVAL; | 82 | return -EINVAL; |
90 | } | 83 | } |
91 | 84 | ||
92 | if (kernel_fpu_using()) | 85 | if (irq_fpu_usable()) |
93 | err = crypto_aes_expand_key(ctx, in_key, key_len); | 86 | err = crypto_aes_expand_key(ctx, in_key, key_len); |
94 | else { | 87 | else { |
95 | kernel_fpu_begin(); | 88 | kernel_fpu_begin(); |
@@ -110,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
110 | { | 103 | { |
111 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); | 104 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); |
112 | 105 | ||
113 | if (kernel_fpu_using()) | 106 | if (irq_fpu_usable()) |
114 | crypto_aes_encrypt_x86(ctx, dst, src); | 107 | crypto_aes_encrypt_x86(ctx, dst, src); |
115 | else { | 108 | else { |
116 | kernel_fpu_begin(); | 109 | kernel_fpu_begin(); |
@@ -123,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
123 | { | 116 | { |
124 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); | 117 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); |
125 | 118 | ||
126 | if (kernel_fpu_using()) | 119 | if (irq_fpu_usable()) |
127 | crypto_aes_decrypt_x86(ctx, dst, src); | 120 | crypto_aes_decrypt_x86(ctx, dst, src); |
128 | else { | 121 | else { |
129 | kernel_fpu_begin(); | 122 | kernel_fpu_begin(); |
@@ -349,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req) | |||
349 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | 342 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); |
350 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | 343 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); |
351 | 344 | ||
352 | if (kernel_fpu_using()) { | 345 | if (irq_fpu_usable()) { |
353 | struct ablkcipher_request *cryptd_req = | 346 | struct ablkcipher_request *cryptd_req = |
354 | ablkcipher_request_ctx(req); | 347 | ablkcipher_request_ctx(req); |
355 | memcpy(cryptd_req, req, sizeof(*req)); | 348 | memcpy(cryptd_req, req, sizeof(*req)); |
@@ -370,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req) | |||
370 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | 363 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); |
371 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | 364 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); |
372 | 365 | ||
373 | if (kernel_fpu_using()) { | 366 | if (irq_fpu_usable()) { |
374 | struct ablkcipher_request *cryptd_req = | 367 | struct ablkcipher_request *cryptd_req = |
375 | ablkcipher_request_ctx(req); | 368 | ablkcipher_request_ctx(req); |
376 | memcpy(cryptd_req, req, sizeof(*req)); | 369 | memcpy(cryptd_req, req, sizeof(*req)); |
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 56be78f582f0..b3ed1e1460ff 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | #ifdef __ASSEMBLY__ | 4 | #ifdef __ASSEMBLY__ |
5 | # define __ASM_FORM(x) x | 5 | # define __ASM_FORM(x) x |
6 | # define __ASM_EX_SEC .section __ex_table | 6 | # define __ASM_EX_SEC .section __ex_table, "a" |
7 | #else | 7 | #else |
8 | # define __ASM_FORM(x) " " #x " " | 8 | # define __ASM_FORM(x) " " #x " " |
9 | # define __ASM_EX_SEC " .section __ex_table,\"a\"\n" | 9 | # define __ASM_EX_SEC " .section __ex_table,\"a\"\n" |
@@ -38,10 +38,18 @@ | |||
38 | #define _ASM_DI __ASM_REG(di) | 38 | #define _ASM_DI __ASM_REG(di) |
39 | 39 | ||
40 | /* Exception table entry */ | 40 | /* Exception table entry */ |
41 | #ifdef __ASSEMBLY__ | ||
42 | # define _ASM_EXTABLE(from,to) \ | ||
43 | __ASM_EX_SEC ; \ | ||
44 | _ASM_ALIGN ; \ | ||
45 | _ASM_PTR from , to ; \ | ||
46 | .previous | ||
47 | #else | ||
41 | # define _ASM_EXTABLE(from,to) \ | 48 | # define _ASM_EXTABLE(from,to) \ |
42 | __ASM_EX_SEC \ | 49 | __ASM_EX_SEC \ |
43 | _ASM_ALIGN "\n" \ | 50 | _ASM_ALIGN "\n" \ |
44 | _ASM_PTR #from "," #to "\n" \ | 51 | _ASM_PTR #from "," #to "\n" \ |
45 | " .previous\n" | 52 | " .previous\n" |
53 | #endif | ||
46 | 54 | ||
47 | #endif /* _ASM_X86_ASM_H */ | 55 | #endif /* _ASM_X86_ASM_H */ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 4a28d22d4793..847fee6493a2 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -95,6 +95,7 @@ | |||
95 | #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ | 95 | #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ |
96 | #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ | 96 | #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ |
97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ | 97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ |
98 | #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ | ||
98 | 99 | ||
99 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 100 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
100 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ | 101 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ |
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 3afc5e87cfdd..ae6253ab9029 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h | |||
@@ -87,9 +87,25 @@ | |||
87 | CFI_RESTORE \reg | 87 | CFI_RESTORE \reg |
88 | .endm | 88 | .endm |
89 | #else /*!CONFIG_X86_64*/ | 89 | #else /*!CONFIG_X86_64*/ |
90 | .macro pushl_cfi reg | ||
91 | pushl \reg | ||
92 | CFI_ADJUST_CFA_OFFSET 4 | ||
93 | .endm | ||
90 | 94 | ||
91 | /* 32bit defenitions are missed yet */ | 95 | .macro popl_cfi reg |
96 | popl \reg | ||
97 | CFI_ADJUST_CFA_OFFSET -4 | ||
98 | .endm | ||
92 | 99 | ||
100 | .macro movl_cfi reg offset=0 | ||
101 | movl %\reg, \offset(%esp) | ||
102 | CFI_REL_OFFSET \reg, \offset | ||
103 | .endm | ||
104 | |||
105 | .macro movl_cfi_restore offset reg | ||
106 | movl \offset(%esp), %\reg | ||
107 | CFI_RESTORE \reg | ||
108 | .endm | ||
93 | #endif /*!CONFIG_X86_64*/ | 109 | #endif /*!CONFIG_X86_64*/ |
94 | #endif /*__ASSEMBLY__*/ | 110 | #endif /*__ASSEMBLY__*/ |
95 | 111 | ||
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 175adf58dd4f..fb7f0d64e14f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -301,6 +301,14 @@ static inline void kernel_fpu_end(void) | |||
301 | preempt_enable(); | 301 | preempt_enable(); |
302 | } | 302 | } |
303 | 303 | ||
304 | static inline bool irq_fpu_usable(void) | ||
305 | { | ||
306 | struct pt_regs *regs; | ||
307 | |||
308 | return !in_interrupt() || !(regs = get_irq_regs()) || \ | ||
309 | user_mode(regs) || (read_cr0() & X86_CR0_TS); | ||
310 | } | ||
311 | |||
304 | /* | 312 | /* |
305 | * Some instructions like VIA's padlock instructions generate a spurious | 313 | * Some instructions like VIA's padlock instructions generate a spurious |
306 | * DNA fault but don't modify SSE registers. And these instructions | 314 | * DNA fault but don't modify SSE registers. And these instructions |
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 555bc12bdcd6..3e2ce58a31a3 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h | |||
@@ -17,6 +17,8 @@ | |||
17 | #define MODULE_PROC_FAMILY "586MMX " | 17 | #define MODULE_PROC_FAMILY "586MMX " |
18 | #elif defined CONFIG_MCORE2 | 18 | #elif defined CONFIG_MCORE2 |
19 | #define MODULE_PROC_FAMILY "CORE2 " | 19 | #define MODULE_PROC_FAMILY "CORE2 " |
20 | #elif defined CONFIG_MATOM | ||
21 | #define MODULE_PROC_FAMILY "ATOM " | ||
20 | #elif defined CONFIG_M686 | 22 | #elif defined CONFIG_M686 |
21 | #define MODULE_PROC_FAMILY "686 " | 23 | #define MODULE_PROC_FAMILY "686 " |
22 | #elif defined CONFIG_MPENTIUMII | 24 | #elif defined CONFIG_MPENTIUMII |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 48ad9d29484a..7e2b6ba962ff 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -3,10 +3,16 @@ | |||
3 | 3 | ||
4 | #include <asm/msr-index.h> | 4 | #include <asm/msr-index.h> |
5 | 5 | ||
6 | #ifdef __KERNEL__ | ||
7 | #ifndef __ASSEMBLY__ | 6 | #ifndef __ASSEMBLY__ |
8 | 7 | ||
9 | #include <linux/types.h> | 8 | #include <linux/types.h> |
9 | #include <linux/ioctl.h> | ||
10 | |||
11 | #define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8]) | ||
12 | #define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8]) | ||
13 | |||
14 | #ifdef __KERNEL__ | ||
15 | |||
10 | #include <asm/asm.h> | 16 | #include <asm/asm.h> |
11 | #include <asm/errno.h> | 17 | #include <asm/errno.h> |
12 | #include <asm/cpumask.h> | 18 | #include <asm/cpumask.h> |
@@ -67,23 +73,7 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, | |||
67 | ".previous\n\t" | 73 | ".previous\n\t" |
68 | _ASM_EXTABLE(2b, 3b) | 74 | _ASM_EXTABLE(2b, 3b) |
69 | : [err] "=r" (*err), EAX_EDX_RET(val, low, high) | 75 | : [err] "=r" (*err), EAX_EDX_RET(val, low, high) |
70 | : "c" (msr), [fault] "i" (-EFAULT)); | 76 | : "c" (msr), [fault] "i" (-EIO)); |
71 | return EAX_EDX_VAL(val, low, high); | ||
72 | } | ||
73 | |||
74 | static inline unsigned long long native_read_msr_amd_safe(unsigned int msr, | ||
75 | int *err) | ||
76 | { | ||
77 | DECLARE_ARGS(val, low, high); | ||
78 | |||
79 | asm volatile("2: rdmsr ; xor %0,%0\n" | ||
80 | "1:\n\t" | ||
81 | ".section .fixup,\"ax\"\n\t" | ||
82 | "3: mov %3,%0 ; jmp 1b\n\t" | ||
83 | ".previous\n\t" | ||
84 | _ASM_EXTABLE(2b, 3b) | ||
85 | : "=r" (*err), EAX_EDX_RET(val, low, high) | ||
86 | : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT)); | ||
87 | return EAX_EDX_VAL(val, low, high); | 77 | return EAX_EDX_VAL(val, low, high); |
88 | } | 78 | } |
89 | 79 | ||
@@ -106,13 +96,16 @@ notrace static inline int native_write_msr_safe(unsigned int msr, | |||
106 | _ASM_EXTABLE(2b, 3b) | 96 | _ASM_EXTABLE(2b, 3b) |
107 | : [err] "=a" (err) | 97 | : [err] "=a" (err) |
108 | : "c" (msr), "0" (low), "d" (high), | 98 | : "c" (msr), "0" (low), "d" (high), |
109 | [fault] "i" (-EFAULT) | 99 | [fault] "i" (-EIO) |
110 | : "memory"); | 100 | : "memory"); |
111 | return err; | 101 | return err; |
112 | } | 102 | } |
113 | 103 | ||
114 | extern unsigned long long native_read_tsc(void); | 104 | extern unsigned long long native_read_tsc(void); |
115 | 105 | ||
106 | extern int native_rdmsr_safe_regs(u32 regs[8]); | ||
107 | extern int native_wrmsr_safe_regs(u32 regs[8]); | ||
108 | |||
116 | static __always_inline unsigned long long __native_read_tsc(void) | 109 | static __always_inline unsigned long long __native_read_tsc(void) |
117 | { | 110 | { |
118 | DECLARE_ARGS(val, low, high); | 111 | DECLARE_ARGS(val, low, high); |
@@ -181,14 +174,44 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | |||
181 | *p = native_read_msr_safe(msr, &err); | 174 | *p = native_read_msr_safe(msr, &err); |
182 | return err; | 175 | return err; |
183 | } | 176 | } |
177 | |||
184 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | 178 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) |
185 | { | 179 | { |
180 | u32 gprs[8] = { 0 }; | ||
186 | int err; | 181 | int err; |
187 | 182 | ||
188 | *p = native_read_msr_amd_safe(msr, &err); | 183 | gprs[1] = msr; |
184 | gprs[7] = 0x9c5a203a; | ||
185 | |||
186 | err = native_rdmsr_safe_regs(gprs); | ||
187 | |||
188 | *p = gprs[0] | ((u64)gprs[2] << 32); | ||
189 | |||
189 | return err; | 190 | return err; |
190 | } | 191 | } |
191 | 192 | ||
193 | static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) | ||
194 | { | ||
195 | u32 gprs[8] = { 0 }; | ||
196 | |||
197 | gprs[0] = (u32)val; | ||
198 | gprs[1] = msr; | ||
199 | gprs[2] = val >> 32; | ||
200 | gprs[7] = 0x9c5a203a; | ||
201 | |||
202 | return native_wrmsr_safe_regs(gprs); | ||
203 | } | ||
204 | |||
205 | static inline int rdmsr_safe_regs(u32 regs[8]) | ||
206 | { | ||
207 | return native_rdmsr_safe_regs(regs); | ||
208 | } | ||
209 | |||
210 | static inline int wrmsr_safe_regs(u32 regs[8]) | ||
211 | { | ||
212 | return native_wrmsr_safe_regs(regs); | ||
213 | } | ||
214 | |||
192 | #define rdtscl(low) \ | 215 | #define rdtscl(low) \ |
193 | ((low) = (u32)__native_read_tsc()) | 216 | ((low) = (u32)__native_read_tsc()) |
194 | 217 | ||
@@ -228,6 +251,8 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); | |||
228 | void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); | 251 | void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); |
229 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); | 252 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); |
230 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); | 253 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); |
254 | int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); | ||
255 | int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); | ||
231 | #else /* CONFIG_SMP */ | 256 | #else /* CONFIG_SMP */ |
232 | static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | 257 | static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) |
233 | { | 258 | { |
@@ -258,7 +283,15 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | |||
258 | { | 283 | { |
259 | return wrmsr_safe(msr_no, l, h); | 284 | return wrmsr_safe(msr_no, l, h); |
260 | } | 285 | } |
286 | static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) | ||
287 | { | ||
288 | return rdmsr_safe_regs(regs); | ||
289 | } | ||
290 | static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) | ||
291 | { | ||
292 | return wrmsr_safe_regs(regs); | ||
293 | } | ||
261 | #endif /* CONFIG_SMP */ | 294 | #endif /* CONFIG_SMP */ |
262 | #endif /* __ASSEMBLY__ */ | ||
263 | #endif /* __KERNEL__ */ | 295 | #endif /* __KERNEL__ */ |
296 | #endif /* __ASSEMBLY__ */ | ||
264 | #endif /* _ASM_X86_MSR_H */ | 297 | #endif /* _ASM_X86_MSR_H */ |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 4fb37c8a0832..40d6586af25b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -7,689 +7,11 @@ | |||
7 | #include <asm/pgtable_types.h> | 7 | #include <asm/pgtable_types.h> |
8 | #include <asm/asm.h> | 8 | #include <asm/asm.h> |
9 | 9 | ||
10 | /* Bitmask of what can be clobbered: usually at least eax. */ | 10 | #include <asm/paravirt_types.h> |
11 | #define CLBR_NONE 0 | ||
12 | #define CLBR_EAX (1 << 0) | ||
13 | #define CLBR_ECX (1 << 1) | ||
14 | #define CLBR_EDX (1 << 2) | ||
15 | #define CLBR_EDI (1 << 3) | ||
16 | |||
17 | #ifdef CONFIG_X86_32 | ||
18 | /* CLBR_ANY should match all regs platform has. For i386, that's just it */ | ||
19 | #define CLBR_ANY ((1 << 4) - 1) | ||
20 | |||
21 | #define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) | ||
22 | #define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) | ||
23 | #define CLBR_SCRATCH (0) | ||
24 | #else | ||
25 | #define CLBR_RAX CLBR_EAX | ||
26 | #define CLBR_RCX CLBR_ECX | ||
27 | #define CLBR_RDX CLBR_EDX | ||
28 | #define CLBR_RDI CLBR_EDI | ||
29 | #define CLBR_RSI (1 << 4) | ||
30 | #define CLBR_R8 (1 << 5) | ||
31 | #define CLBR_R9 (1 << 6) | ||
32 | #define CLBR_R10 (1 << 7) | ||
33 | #define CLBR_R11 (1 << 8) | ||
34 | |||
35 | #define CLBR_ANY ((1 << 9) - 1) | ||
36 | |||
37 | #define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ | ||
38 | CLBR_RCX | CLBR_R8 | CLBR_R9) | ||
39 | #define CLBR_RET_REG (CLBR_RAX) | ||
40 | #define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) | ||
41 | |||
42 | #include <asm/desc_defs.h> | ||
43 | #endif /* X86_64 */ | ||
44 | |||
45 | #define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) | ||
46 | 11 | ||
47 | #ifndef __ASSEMBLY__ | 12 | #ifndef __ASSEMBLY__ |
48 | #include <linux/types.h> | 13 | #include <linux/types.h> |
49 | #include <linux/cpumask.h> | 14 | #include <linux/cpumask.h> |
50 | #include <asm/kmap_types.h> | ||
51 | #include <asm/desc_defs.h> | ||
52 | |||
53 | struct page; | ||
54 | struct thread_struct; | ||
55 | struct desc_ptr; | ||
56 | struct tss_struct; | ||
57 | struct mm_struct; | ||
58 | struct desc_struct; | ||
59 | struct task_struct; | ||
60 | |||
61 | /* | ||
62 | * Wrapper type for pointers to code which uses the non-standard | ||
63 | * calling convention. See PV_CALL_SAVE_REGS_THUNK below. | ||
64 | */ | ||
65 | struct paravirt_callee_save { | ||
66 | void *func; | ||
67 | }; | ||
68 | |||
69 | /* general info */ | ||
70 | struct pv_info { | ||
71 | unsigned int kernel_rpl; | ||
72 | int shared_kernel_pmd; | ||
73 | int paravirt_enabled; | ||
74 | const char *name; | ||
75 | }; | ||
76 | |||
77 | struct pv_init_ops { | ||
78 | /* | ||
79 | * Patch may replace one of the defined code sequences with | ||
80 | * arbitrary code, subject to the same register constraints. | ||
81 | * This generally means the code is not free to clobber any | ||
82 | * registers other than EAX. The patch function should return | ||
83 | * the number of bytes of code generated, as we nop pad the | ||
84 | * rest in generic code. | ||
85 | */ | ||
86 | unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, | ||
87 | unsigned long addr, unsigned len); | ||
88 | |||
89 | /* Basic arch-specific setup */ | ||
90 | void (*arch_setup)(void); | ||
91 | char *(*memory_setup)(void); | ||
92 | void (*post_allocator_init)(void); | ||
93 | |||
94 | /* Print a banner to identify the environment */ | ||
95 | void (*banner)(void); | ||
96 | }; | ||
97 | |||
98 | |||
99 | struct pv_lazy_ops { | ||
100 | /* Set deferred update mode, used for batching operations. */ | ||
101 | void (*enter)(void); | ||
102 | void (*leave)(void); | ||
103 | }; | ||
104 | |||
105 | struct pv_time_ops { | ||
106 | void (*time_init)(void); | ||
107 | |||
108 | /* Set and set time of day */ | ||
109 | unsigned long (*get_wallclock)(void); | ||
110 | int (*set_wallclock)(unsigned long); | ||
111 | |||
112 | unsigned long long (*sched_clock)(void); | ||
113 | unsigned long (*get_tsc_khz)(void); | ||
114 | }; | ||
115 | |||
116 | struct pv_cpu_ops { | ||
117 | /* hooks for various privileged instructions */ | ||
118 | unsigned long (*get_debugreg)(int regno); | ||
119 | void (*set_debugreg)(int regno, unsigned long value); | ||
120 | |||
121 | void (*clts)(void); | ||
122 | |||
123 | unsigned long (*read_cr0)(void); | ||
124 | void (*write_cr0)(unsigned long); | ||
125 | |||
126 | unsigned long (*read_cr4_safe)(void); | ||
127 | unsigned long (*read_cr4)(void); | ||
128 | void (*write_cr4)(unsigned long); | ||
129 | |||
130 | #ifdef CONFIG_X86_64 | ||
131 | unsigned long (*read_cr8)(void); | ||
132 | void (*write_cr8)(unsigned long); | ||
133 | #endif | ||
134 | |||
135 | /* Segment descriptor handling */ | ||
136 | void (*load_tr_desc)(void); | ||
137 | void (*load_gdt)(const struct desc_ptr *); | ||
138 | void (*load_idt)(const struct desc_ptr *); | ||
139 | void (*store_gdt)(struct desc_ptr *); | ||
140 | void (*store_idt)(struct desc_ptr *); | ||
141 | void (*set_ldt)(const void *desc, unsigned entries); | ||
142 | unsigned long (*store_tr)(void); | ||
143 | void (*load_tls)(struct thread_struct *t, unsigned int cpu); | ||
144 | #ifdef CONFIG_X86_64 | ||
145 | void (*load_gs_index)(unsigned int idx); | ||
146 | #endif | ||
147 | void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, | ||
148 | const void *desc); | ||
149 | void (*write_gdt_entry)(struct desc_struct *, | ||
150 | int entrynum, const void *desc, int size); | ||
151 | void (*write_idt_entry)(gate_desc *, | ||
152 | int entrynum, const gate_desc *gate); | ||
153 | void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); | ||
154 | void (*free_ldt)(struct desc_struct *ldt, unsigned entries); | ||
155 | |||
156 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); | ||
157 | |||
158 | void (*set_iopl_mask)(unsigned mask); | ||
159 | |||
160 | void (*wbinvd)(void); | ||
161 | void (*io_delay)(void); | ||
162 | |||
163 | /* cpuid emulation, mostly so that caps bits can be disabled */ | ||
164 | void (*cpuid)(unsigned int *eax, unsigned int *ebx, | ||
165 | unsigned int *ecx, unsigned int *edx); | ||
166 | |||
167 | /* MSR, PMC and TSR operations. | ||
168 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ | ||
169 | u64 (*read_msr_amd)(unsigned int msr, int *err); | ||
170 | u64 (*read_msr)(unsigned int msr, int *err); | ||
171 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); | ||
172 | |||
173 | u64 (*read_tsc)(void); | ||
174 | u64 (*read_pmc)(int counter); | ||
175 | unsigned long long (*read_tscp)(unsigned int *aux); | ||
176 | |||
177 | /* | ||
178 | * Atomically enable interrupts and return to userspace. This | ||
179 | * is only ever used to return to 32-bit processes; in a | ||
180 | * 64-bit kernel, it's used for 32-on-64 compat processes, but | ||
181 | * never native 64-bit processes. (Jump, not call.) | ||
182 | */ | ||
183 | void (*irq_enable_sysexit)(void); | ||
184 | |||
185 | /* | ||
186 | * Switch to usermode gs and return to 64-bit usermode using | ||
187 | * sysret. Only used in 64-bit kernels to return to 64-bit | ||
188 | * processes. Usermode register state, including %rsp, must | ||
189 | * already be restored. | ||
190 | */ | ||
191 | void (*usergs_sysret64)(void); | ||
192 | |||
193 | /* | ||
194 | * Switch to usermode gs and return to 32-bit usermode using | ||
195 | * sysret. Used to return to 32-on-64 compat processes. | ||
196 | * Other usermode register state, including %esp, must already | ||
197 | * be restored. | ||
198 | */ | ||
199 | void (*usergs_sysret32)(void); | ||
200 | |||
201 | /* Normal iret. Jump to this with the standard iret stack | ||
202 | frame set up. */ | ||
203 | void (*iret)(void); | ||
204 | |||
205 | void (*swapgs)(void); | ||
206 | |||
207 | void (*start_context_switch)(struct task_struct *prev); | ||
208 | void (*end_context_switch)(struct task_struct *next); | ||
209 | }; | ||
210 | |||
211 | struct pv_irq_ops { | ||
212 | void (*init_IRQ)(void); | ||
213 | |||
214 | /* | ||
215 | * Get/set interrupt state. save_fl and restore_fl are only | ||
216 | * expected to use X86_EFLAGS_IF; all other bits | ||
217 | * returned from save_fl are undefined, and may be ignored by | ||
218 | * restore_fl. | ||
219 | * | ||
220 | * NOTE: These functions callers expect the callee to preserve | ||
221 | * more registers than the standard C calling convention. | ||
222 | */ | ||
223 | struct paravirt_callee_save save_fl; | ||
224 | struct paravirt_callee_save restore_fl; | ||
225 | struct paravirt_callee_save irq_disable; | ||
226 | struct paravirt_callee_save irq_enable; | ||
227 | |||
228 | void (*safe_halt)(void); | ||
229 | void (*halt)(void); | ||
230 | |||
231 | #ifdef CONFIG_X86_64 | ||
232 | void (*adjust_exception_frame)(void); | ||
233 | #endif | ||
234 | }; | ||
235 | |||
236 | struct pv_apic_ops { | ||
237 | #ifdef CONFIG_X86_LOCAL_APIC | ||
238 | void (*setup_boot_clock)(void); | ||
239 | void (*setup_secondary_clock)(void); | ||
240 | |||
241 | void (*startup_ipi_hook)(int phys_apicid, | ||
242 | unsigned long start_eip, | ||
243 | unsigned long start_esp); | ||
244 | #endif | ||
245 | }; | ||
246 | |||
247 | struct pv_mmu_ops { | ||
248 | /* | ||
249 | * Called before/after init_mm pagetable setup. setup_start | ||
250 | * may reset %cr3, and may pre-install parts of the pagetable; | ||
251 | * pagetable setup is expected to preserve any existing | ||
252 | * mapping. | ||
253 | */ | ||
254 | void (*pagetable_setup_start)(pgd_t *pgd_base); | ||
255 | void (*pagetable_setup_done)(pgd_t *pgd_base); | ||
256 | |||
257 | unsigned long (*read_cr2)(void); | ||
258 | void (*write_cr2)(unsigned long); | ||
259 | |||
260 | unsigned long (*read_cr3)(void); | ||
261 | void (*write_cr3)(unsigned long); | ||
262 | |||
263 | /* | ||
264 | * Hooks for intercepting the creation/use/destruction of an | ||
265 | * mm_struct. | ||
266 | */ | ||
267 | void (*activate_mm)(struct mm_struct *prev, | ||
268 | struct mm_struct *next); | ||
269 | void (*dup_mmap)(struct mm_struct *oldmm, | ||
270 | struct mm_struct *mm); | ||
271 | void (*exit_mmap)(struct mm_struct *mm); | ||
272 | |||
273 | |||
274 | /* TLB operations */ | ||
275 | void (*flush_tlb_user)(void); | ||
276 | void (*flush_tlb_kernel)(void); | ||
277 | void (*flush_tlb_single)(unsigned long addr); | ||
278 | void (*flush_tlb_others)(const struct cpumask *cpus, | ||
279 | struct mm_struct *mm, | ||
280 | unsigned long va); | ||
281 | |||
282 | /* Hooks for allocating and freeing a pagetable top-level */ | ||
283 | int (*pgd_alloc)(struct mm_struct *mm); | ||
284 | void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); | ||
285 | |||
286 | /* | ||
287 | * Hooks for allocating/releasing pagetable pages when they're | ||
288 | * attached to a pagetable | ||
289 | */ | ||
290 | void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); | ||
291 | void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); | ||
292 | void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count); | ||
293 | void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); | ||
294 | void (*release_pte)(unsigned long pfn); | ||
295 | void (*release_pmd)(unsigned long pfn); | ||
296 | void (*release_pud)(unsigned long pfn); | ||
297 | |||
298 | /* Pagetable manipulation functions */ | ||
299 | void (*set_pte)(pte_t *ptep, pte_t pteval); | ||
300 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, | ||
301 | pte_t *ptep, pte_t pteval); | ||
302 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); | ||
303 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, | ||
304 | pte_t *ptep); | ||
305 | void (*pte_update_defer)(struct mm_struct *mm, | ||
306 | unsigned long addr, pte_t *ptep); | ||
307 | |||
308 | pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, | ||
309 | pte_t *ptep); | ||
310 | void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, | ||
311 | pte_t *ptep, pte_t pte); | ||
312 | |||
313 | struct paravirt_callee_save pte_val; | ||
314 | struct paravirt_callee_save make_pte; | ||
315 | |||
316 | struct paravirt_callee_save pgd_val; | ||
317 | struct paravirt_callee_save make_pgd; | ||
318 | |||
319 | #if PAGETABLE_LEVELS >= 3 | ||
320 | #ifdef CONFIG_X86_PAE | ||
321 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); | ||
322 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, | ||
323 | pte_t *ptep); | ||
324 | void (*pmd_clear)(pmd_t *pmdp); | ||
325 | |||
326 | #endif /* CONFIG_X86_PAE */ | ||
327 | |||
328 | void (*set_pud)(pud_t *pudp, pud_t pudval); | ||
329 | |||
330 | struct paravirt_callee_save pmd_val; | ||
331 | struct paravirt_callee_save make_pmd; | ||
332 | |||
333 | #if PAGETABLE_LEVELS == 4 | ||
334 | struct paravirt_callee_save pud_val; | ||
335 | struct paravirt_callee_save make_pud; | ||
336 | |||
337 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); | ||
338 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
339 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
340 | |||
341 | #ifdef CONFIG_HIGHPTE | ||
342 | void *(*kmap_atomic_pte)(struct page *page, enum km_type type); | ||
343 | #endif | ||
344 | |||
345 | struct pv_lazy_ops lazy_mode; | ||
346 | |||
347 | /* dom0 ops */ | ||
348 | |||
349 | /* Sometimes the physical address is a pfn, and sometimes its | ||
350 | an mfn. We can tell which is which from the index. */ | ||
351 | void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, | ||
352 | phys_addr_t phys, pgprot_t flags); | ||
353 | }; | ||
354 | |||
355 | struct raw_spinlock; | ||
356 | struct pv_lock_ops { | ||
357 | int (*spin_is_locked)(struct raw_spinlock *lock); | ||
358 | int (*spin_is_contended)(struct raw_spinlock *lock); | ||
359 | void (*spin_lock)(struct raw_spinlock *lock); | ||
360 | void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); | ||
361 | int (*spin_trylock)(struct raw_spinlock *lock); | ||
362 | void (*spin_unlock)(struct raw_spinlock *lock); | ||
363 | }; | ||
364 | |||
365 | /* This contains all the paravirt structures: we get a convenient | ||
366 | * number for each function using the offset which we use to indicate | ||
367 | * what to patch. */ | ||
368 | struct paravirt_patch_template { | ||
369 | struct pv_init_ops pv_init_ops; | ||
370 | struct pv_time_ops pv_time_ops; | ||
371 | struct pv_cpu_ops pv_cpu_ops; | ||
372 | struct pv_irq_ops pv_irq_ops; | ||
373 | struct pv_apic_ops pv_apic_ops; | ||
374 | struct pv_mmu_ops pv_mmu_ops; | ||
375 | struct pv_lock_ops pv_lock_ops; | ||
376 | }; | ||
377 | |||
378 | extern struct pv_info pv_info; | ||
379 | extern struct pv_init_ops pv_init_ops; | ||
380 | extern struct pv_time_ops pv_time_ops; | ||
381 | extern struct pv_cpu_ops pv_cpu_ops; | ||
382 | extern struct pv_irq_ops pv_irq_ops; | ||
383 | extern struct pv_apic_ops pv_apic_ops; | ||
384 | extern struct pv_mmu_ops pv_mmu_ops; | ||
385 | extern struct pv_lock_ops pv_lock_ops; | ||
386 | |||
387 | #define PARAVIRT_PATCH(x) \ | ||
388 | (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) | ||
389 | |||
390 | #define paravirt_type(op) \ | ||
391 | [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ | ||
392 | [paravirt_opptr] "i" (&(op)) | ||
393 | #define paravirt_clobber(clobber) \ | ||
394 | [paravirt_clobber] "i" (clobber) | ||
395 | |||
396 | /* | ||
397 | * Generate some code, and mark it as patchable by the | ||
398 | * apply_paravirt() alternate instruction patcher. | ||
399 | */ | ||
400 | #define _paravirt_alt(insn_string, type, clobber) \ | ||
401 | "771:\n\t" insn_string "\n" "772:\n" \ | ||
402 | ".pushsection .parainstructions,\"a\"\n" \ | ||
403 | _ASM_ALIGN "\n" \ | ||
404 | _ASM_PTR " 771b\n" \ | ||
405 | " .byte " type "\n" \ | ||
406 | " .byte 772b-771b\n" \ | ||
407 | " .short " clobber "\n" \ | ||
408 | ".popsection\n" | ||
409 | |||
410 | /* Generate patchable code, with the default asm parameters. */ | ||
411 | #define paravirt_alt(insn_string) \ | ||
412 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") | ||
413 | |||
414 | /* Simple instruction patching code. */ | ||
415 | #define DEF_NATIVE(ops, name, code) \ | ||
416 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | ||
417 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | ||
418 | |||
419 | unsigned paravirt_patch_nop(void); | ||
420 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); | ||
421 | unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); | ||
422 | unsigned paravirt_patch_ignore(unsigned len); | ||
423 | unsigned paravirt_patch_call(void *insnbuf, | ||
424 | const void *target, u16 tgt_clobbers, | ||
425 | unsigned long addr, u16 site_clobbers, | ||
426 | unsigned len); | ||
427 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, | ||
428 | unsigned long addr, unsigned len); | ||
429 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | ||
430 | unsigned long addr, unsigned len); | ||
431 | |||
432 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | ||
433 | const char *start, const char *end); | ||
434 | |||
435 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | ||
436 | unsigned long addr, unsigned len); | ||
437 | |||
438 | int paravirt_disable_iospace(void); | ||
439 | |||
440 | /* | ||
441 | * This generates an indirect call based on the operation type number. | ||
442 | * The type number, computed in PARAVIRT_PATCH, is derived from the | ||
443 | * offset into the paravirt_patch_template structure, and can therefore be | ||
444 | * freely converted back into a structure offset. | ||
445 | */ | ||
446 | #define PARAVIRT_CALL "call *%c[paravirt_opptr];" | ||
447 | |||
448 | /* | ||
449 | * These macros are intended to wrap calls through one of the paravirt | ||
450 | * ops structs, so that they can be later identified and patched at | ||
451 | * runtime. | ||
452 | * | ||
453 | * Normally, a call to a pv_op function is a simple indirect call: | ||
454 | * (pv_op_struct.operations)(args...). | ||
455 | * | ||
456 | * Unfortunately, this is a relatively slow operation for modern CPUs, | ||
457 | * because it cannot necessarily determine what the destination | ||
458 | * address is. In this case, the address is a runtime constant, so at | ||
459 | * the very least we can patch the call to e a simple direct call, or | ||
460 | * ideally, patch an inline implementation into the callsite. (Direct | ||
461 | * calls are essentially free, because the call and return addresses | ||
462 | * are completely predictable.) | ||
463 | * | ||
464 | * For i386, these macros rely on the standard gcc "regparm(3)" calling | ||
465 | * convention, in which the first three arguments are placed in %eax, | ||
466 | * %edx, %ecx (in that order), and the remaining arguments are placed | ||
467 | * on the stack. All caller-save registers (eax,edx,ecx) are expected | ||
468 | * to be modified (either clobbered or used for return values). | ||
469 | * X86_64, on the other hand, already specifies a register-based calling | ||
470 | * conventions, returning at %rax, with parameteres going on %rdi, %rsi, | ||
471 | * %rdx, and %rcx. Note that for this reason, x86_64 does not need any | ||
472 | * special handling for dealing with 4 arguments, unlike i386. | ||
473 | * However, x86_64 also have to clobber all caller saved registers, which | ||
474 | * unfortunately, are quite a bit (r8 - r11) | ||
475 | * | ||
476 | * The call instruction itself is marked by placing its start address | ||
477 | * and size into the .parainstructions section, so that | ||
478 | * apply_paravirt() in arch/i386/kernel/alternative.c can do the | ||
479 | * appropriate patching under the control of the backend pv_init_ops | ||
480 | * implementation. | ||
481 | * | ||
482 | * Unfortunately there's no way to get gcc to generate the args setup | ||
483 | * for the call, and then allow the call itself to be generated by an | ||
484 | * inline asm. Because of this, we must do the complete arg setup and | ||
485 | * return value handling from within these macros. This is fairly | ||
486 | * cumbersome. | ||
487 | * | ||
488 | * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. | ||
489 | * It could be extended to more arguments, but there would be little | ||
490 | * to be gained from that. For each number of arguments, there are | ||
491 | * the two VCALL and CALL variants for void and non-void functions. | ||
492 | * | ||
493 | * When there is a return value, the invoker of the macro must specify | ||
494 | * the return type. The macro then uses sizeof() on that type to | ||
495 | * determine whether its a 32 or 64 bit value, and places the return | ||
496 | * in the right register(s) (just %eax for 32-bit, and %edx:%eax for | ||
497 | * 64-bit). For x86_64 machines, it just returns at %rax regardless of | ||
498 | * the return value size. | ||
499 | * | ||
500 | * 64-bit arguments are passed as a pair of adjacent 32-bit arguments | ||
501 | * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments | ||
502 | * in low,high order | ||
503 | * | ||
504 | * Small structures are passed and returned in registers. The macro | ||
505 | * calling convention can't directly deal with this, so the wrapper | ||
506 | * functions must do this. | ||
507 | * | ||
508 | * These PVOP_* macros are only defined within this header. This | ||
509 | * means that all uses must be wrapped in inline functions. This also | ||
510 | * makes sure the incoming and outgoing types are always correct. | ||
511 | */ | ||
512 | #ifdef CONFIG_X86_32 | ||
513 | #define PVOP_VCALL_ARGS \ | ||
514 | unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx | ||
515 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS | ||
516 | |||
517 | #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) | ||
518 | #define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) | ||
519 | #define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) | ||
520 | |||
521 | #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ | ||
522 | "=c" (__ecx) | ||
523 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS | ||
524 | |||
525 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) | ||
526 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | ||
527 | |||
528 | #define EXTRA_CLOBBERS | ||
529 | #define VEXTRA_CLOBBERS | ||
530 | #else /* CONFIG_X86_64 */ | ||
531 | #define PVOP_VCALL_ARGS \ | ||
532 | unsigned long __edi = __edi, __esi = __esi, \ | ||
533 | __edx = __edx, __ecx = __ecx | ||
534 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax | ||
535 | |||
536 | #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) | ||
537 | #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) | ||
538 | #define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) | ||
539 | #define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) | ||
540 | |||
541 | #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ | ||
542 | "=S" (__esi), "=d" (__edx), \ | ||
543 | "=c" (__ecx) | ||
544 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) | ||
545 | |||
546 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) | ||
547 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | ||
548 | |||
549 | #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" | ||
550 | #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" | ||
551 | #endif /* CONFIG_X86_32 */ | ||
552 | |||
553 | #ifdef CONFIG_PARAVIRT_DEBUG | ||
554 | #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) | ||
555 | #else | ||
556 | #define PVOP_TEST_NULL(op) ((void)op) | ||
557 | #endif | ||
558 | |||
559 | #define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ | ||
560 | pre, post, ...) \ | ||
561 | ({ \ | ||
562 | rettype __ret; \ | ||
563 | PVOP_CALL_ARGS; \ | ||
564 | PVOP_TEST_NULL(op); \ | ||
565 | /* This is 32-bit specific, but is okay in 64-bit */ \ | ||
566 | /* since this condition will never hold */ \ | ||
567 | if (sizeof(rettype) > sizeof(unsigned long)) { \ | ||
568 | asm volatile(pre \ | ||
569 | paravirt_alt(PARAVIRT_CALL) \ | ||
570 | post \ | ||
571 | : call_clbr \ | ||
572 | : paravirt_type(op), \ | ||
573 | paravirt_clobber(clbr), \ | ||
574 | ##__VA_ARGS__ \ | ||
575 | : "memory", "cc" extra_clbr); \ | ||
576 | __ret = (rettype)((((u64)__edx) << 32) | __eax); \ | ||
577 | } else { \ | ||
578 | asm volatile(pre \ | ||
579 | paravirt_alt(PARAVIRT_CALL) \ | ||
580 | post \ | ||
581 | : call_clbr \ | ||
582 | : paravirt_type(op), \ | ||
583 | paravirt_clobber(clbr), \ | ||
584 | ##__VA_ARGS__ \ | ||
585 | : "memory", "cc" extra_clbr); \ | ||
586 | __ret = (rettype)__eax; \ | ||
587 | } \ | ||
588 | __ret; \ | ||
589 | }) | ||
590 | |||
591 | #define __PVOP_CALL(rettype, op, pre, post, ...) \ | ||
592 | ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ | ||
593 | EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) | ||
594 | |||
595 | #define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ | ||
596 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | ||
597 | PVOP_CALLEE_CLOBBERS, , \ | ||
598 | pre, post, ##__VA_ARGS__) | ||
599 | |||
600 | |||
601 | #define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ | ||
602 | ({ \ | ||
603 | PVOP_VCALL_ARGS; \ | ||
604 | PVOP_TEST_NULL(op); \ | ||
605 | asm volatile(pre \ | ||
606 | paravirt_alt(PARAVIRT_CALL) \ | ||
607 | post \ | ||
608 | : call_clbr \ | ||
609 | : paravirt_type(op), \ | ||
610 | paravirt_clobber(clbr), \ | ||
611 | ##__VA_ARGS__ \ | ||
612 | : "memory", "cc" extra_clbr); \ | ||
613 | }) | ||
614 | |||
615 | #define __PVOP_VCALL(op, pre, post, ...) \ | ||
616 | ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ | ||
617 | VEXTRA_CLOBBERS, \ | ||
618 | pre, post, ##__VA_ARGS__) | ||
619 | |||
620 | #define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ | ||
621 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | ||
622 | PVOP_VCALLEE_CLOBBERS, , \ | ||
623 | pre, post, ##__VA_ARGS__) | ||
624 | |||
625 | |||
626 | |||
627 | #define PVOP_CALL0(rettype, op) \ | ||
628 | __PVOP_CALL(rettype, op, "", "") | ||
629 | #define PVOP_VCALL0(op) \ | ||
630 | __PVOP_VCALL(op, "", "") | ||
631 | |||
632 | #define PVOP_CALLEE0(rettype, op) \ | ||
633 | __PVOP_CALLEESAVE(rettype, op, "", "") | ||
634 | #define PVOP_VCALLEE0(op) \ | ||
635 | __PVOP_VCALLEESAVE(op, "", "") | ||
636 | |||
637 | |||
638 | #define PVOP_CALL1(rettype, op, arg1) \ | ||
639 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) | ||
640 | #define PVOP_VCALL1(op, arg1) \ | ||
641 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) | ||
642 | |||
643 | #define PVOP_CALLEE1(rettype, op, arg1) \ | ||
644 | __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) | ||
645 | #define PVOP_VCALLEE1(op, arg1) \ | ||
646 | __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) | ||
647 | |||
648 | |||
649 | #define PVOP_CALL2(rettype, op, arg1, arg2) \ | ||
650 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
651 | PVOP_CALL_ARG2(arg2)) | ||
652 | #define PVOP_VCALL2(op, arg1, arg2) \ | ||
653 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
654 | PVOP_CALL_ARG2(arg2)) | ||
655 | |||
656 | #define PVOP_CALLEE2(rettype, op, arg1, arg2) \ | ||
657 | __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
658 | PVOP_CALL_ARG2(arg2)) | ||
659 | #define PVOP_VCALLEE2(op, arg1, arg2) \ | ||
660 | __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
661 | PVOP_CALL_ARG2(arg2)) | ||
662 | |||
663 | |||
664 | #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ | ||
665 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
666 | PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) | ||
667 | #define PVOP_VCALL3(op, arg1, arg2, arg3) \ | ||
668 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
669 | PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) | ||
670 | |||
671 | /* This is the only difference in x86_64. We can make it much simpler */ | ||
672 | #ifdef CONFIG_X86_32 | ||
673 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | ||
674 | __PVOP_CALL(rettype, op, \ | ||
675 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | ||
676 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
677 | PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) | ||
678 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | ||
679 | __PVOP_VCALL(op, \ | ||
680 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | ||
681 | "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ | ||
682 | "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) | ||
683 | #else | ||
684 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | ||
685 | __PVOP_CALL(rettype, op, "", "", \ | ||
686 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
687 | PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) | ||
688 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | ||
689 | __PVOP_VCALL(op, "", "", \ | ||
690 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
691 | PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) | ||
692 | #endif | ||
693 | 15 | ||
694 | static inline int paravirt_enabled(void) | 16 | static inline int paravirt_enabled(void) |
695 | { | 17 | { |
@@ -820,15 +142,22 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err) | |||
820 | { | 142 | { |
821 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); | 143 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); |
822 | } | 144 | } |
823 | static inline u64 paravirt_read_msr_amd(unsigned msr, int *err) | 145 | |
146 | static inline int paravirt_rdmsr_regs(u32 *regs) | ||
824 | { | 147 | { |
825 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err); | 148 | return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs); |
826 | } | 149 | } |
150 | |||
827 | static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) | 151 | static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) |
828 | { | 152 | { |
829 | return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); | 153 | return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); |
830 | } | 154 | } |
831 | 155 | ||
156 | static inline int paravirt_wrmsr_regs(u32 *regs) | ||
157 | { | ||
158 | return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs); | ||
159 | } | ||
160 | |||
832 | /* These should all do BUG_ON(_err), but our headers are too tangled. */ | 161 | /* These should all do BUG_ON(_err), but our headers are too tangled. */ |
833 | #define rdmsr(msr, val1, val2) \ | 162 | #define rdmsr(msr, val1, val2) \ |
834 | do { \ | 163 | do { \ |
@@ -862,6 +191,9 @@ do { \ | |||
862 | _err; \ | 191 | _err; \ |
863 | }) | 192 | }) |
864 | 193 | ||
194 | #define rdmsr_safe_regs(regs) paravirt_rdmsr_regs(regs) | ||
195 | #define wrmsr_safe_regs(regs) paravirt_wrmsr_regs(regs) | ||
196 | |||
865 | static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | 197 | static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) |
866 | { | 198 | { |
867 | int err; | 199 | int err; |
@@ -871,12 +203,31 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | |||
871 | } | 203 | } |
872 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | 204 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) |
873 | { | 205 | { |
206 | u32 gprs[8] = { 0 }; | ||
874 | int err; | 207 | int err; |
875 | 208 | ||
876 | *p = paravirt_read_msr_amd(msr, &err); | 209 | gprs[1] = msr; |
210 | gprs[7] = 0x9c5a203a; | ||
211 | |||
212 | err = paravirt_rdmsr_regs(gprs); | ||
213 | |||
214 | *p = gprs[0] | ((u64)gprs[2] << 32); | ||
215 | |||
877 | return err; | 216 | return err; |
878 | } | 217 | } |
879 | 218 | ||
219 | static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) | ||
220 | { | ||
221 | u32 gprs[8] = { 0 }; | ||
222 | |||
223 | gprs[0] = (u32)val; | ||
224 | gprs[1] = msr; | ||
225 | gprs[2] = val >> 32; | ||
226 | gprs[7] = 0x9c5a203a; | ||
227 | |||
228 | return paravirt_wrmsr_regs(gprs); | ||
229 | } | ||
230 | |||
880 | static inline u64 paravirt_read_tsc(void) | 231 | static inline u64 paravirt_read_tsc(void) |
881 | { | 232 | { |
882 | return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); | 233 | return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); |
@@ -1393,20 +744,6 @@ static inline void pmd_clear(pmd_t *pmdp) | |||
1393 | } | 744 | } |
1394 | #endif /* CONFIG_X86_PAE */ | 745 | #endif /* CONFIG_X86_PAE */ |
1395 | 746 | ||
1396 | /* Lazy mode for batching updates / context switch */ | ||
1397 | enum paravirt_lazy_mode { | ||
1398 | PARAVIRT_LAZY_NONE, | ||
1399 | PARAVIRT_LAZY_MMU, | ||
1400 | PARAVIRT_LAZY_CPU, | ||
1401 | }; | ||
1402 | |||
1403 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); | ||
1404 | void paravirt_start_context_switch(struct task_struct *prev); | ||
1405 | void paravirt_end_context_switch(struct task_struct *next); | ||
1406 | |||
1407 | void paravirt_enter_lazy_mmu(void); | ||
1408 | void paravirt_leave_lazy_mmu(void); | ||
1409 | |||
1410 | #define __HAVE_ARCH_START_CONTEXT_SWITCH | 747 | #define __HAVE_ARCH_START_CONTEXT_SWITCH |
1411 | static inline void arch_start_context_switch(struct task_struct *prev) | 748 | static inline void arch_start_context_switch(struct task_struct *prev) |
1412 | { | 749 | { |
@@ -1437,12 +774,6 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, | |||
1437 | pv_mmu_ops.set_fixmap(idx, phys, flags); | 774 | pv_mmu_ops.set_fixmap(idx, phys, flags); |
1438 | } | 775 | } |
1439 | 776 | ||
1440 | void _paravirt_nop(void); | ||
1441 | u32 _paravirt_ident_32(u32); | ||
1442 | u64 _paravirt_ident_64(u64); | ||
1443 | |||
1444 | #define paravirt_nop ((void *)_paravirt_nop) | ||
1445 | |||
1446 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) | 777 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) |
1447 | 778 | ||
1448 | static inline int __raw_spin_is_locked(struct raw_spinlock *lock) | 779 | static inline int __raw_spin_is_locked(struct raw_spinlock *lock) |
@@ -1479,17 +810,6 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) | |||
1479 | 810 | ||
1480 | #endif | 811 | #endif |
1481 | 812 | ||
1482 | /* These all sit in the .parainstructions section to tell us what to patch. */ | ||
1483 | struct paravirt_patch_site { | ||
1484 | u8 *instr; /* original instructions */ | ||
1485 | u8 instrtype; /* type of this instruction */ | ||
1486 | u8 len; /* length of original instruction */ | ||
1487 | u16 clobbers; /* what registers you may clobber */ | ||
1488 | }; | ||
1489 | |||
1490 | extern struct paravirt_patch_site __parainstructions[], | ||
1491 | __parainstructions_end[]; | ||
1492 | |||
1493 | #ifdef CONFIG_X86_32 | 813 | #ifdef CONFIG_X86_32 |
1494 | #define PV_SAVE_REGS "pushl %ecx; pushl %edx;" | 814 | #define PV_SAVE_REGS "pushl %ecx; pushl %edx;" |
1495 | #define PV_RESTORE_REGS "popl %edx; popl %ecx;" | 815 | #define PV_RESTORE_REGS "popl %edx; popl %ecx;" |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h new file mode 100644 index 000000000000..25402d0006e7 --- /dev/null +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -0,0 +1,721 @@ | |||
1 | #ifndef _ASM_X86_PARAVIRT_TYPES_H | ||
2 | #define _ASM_X86_PARAVIRT_TYPES_H | ||
3 | |||
4 | /* Bitmask of what can be clobbered: usually at least eax. */ | ||
5 | #define CLBR_NONE 0 | ||
6 | #define CLBR_EAX (1 << 0) | ||
7 | #define CLBR_ECX (1 << 1) | ||
8 | #define CLBR_EDX (1 << 2) | ||
9 | #define CLBR_EDI (1 << 3) | ||
10 | |||
11 | #ifdef CONFIG_X86_32 | ||
12 | /* CLBR_ANY should match all regs platform has. For i386, that's just it */ | ||
13 | #define CLBR_ANY ((1 << 4) - 1) | ||
14 | |||
15 | #define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) | ||
16 | #define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) | ||
17 | #define CLBR_SCRATCH (0) | ||
18 | #else | ||
19 | #define CLBR_RAX CLBR_EAX | ||
20 | #define CLBR_RCX CLBR_ECX | ||
21 | #define CLBR_RDX CLBR_EDX | ||
22 | #define CLBR_RDI CLBR_EDI | ||
23 | #define CLBR_RSI (1 << 4) | ||
24 | #define CLBR_R8 (1 << 5) | ||
25 | #define CLBR_R9 (1 << 6) | ||
26 | #define CLBR_R10 (1 << 7) | ||
27 | #define CLBR_R11 (1 << 8) | ||
28 | |||
29 | #define CLBR_ANY ((1 << 9) - 1) | ||
30 | |||
31 | #define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ | ||
32 | CLBR_RCX | CLBR_R8 | CLBR_R9) | ||
33 | #define CLBR_RET_REG (CLBR_RAX) | ||
34 | #define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) | ||
35 | |||
36 | #endif /* X86_64 */ | ||
37 | |||
38 | #define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) | ||
39 | |||
40 | #ifndef __ASSEMBLY__ | ||
41 | |||
42 | #include <asm/desc_defs.h> | ||
43 | #include <asm/kmap_types.h> | ||
44 | |||
45 | struct page; | ||
46 | struct thread_struct; | ||
47 | struct desc_ptr; | ||
48 | struct tss_struct; | ||
49 | struct mm_struct; | ||
50 | struct desc_struct; | ||
51 | struct task_struct; | ||
52 | struct cpumask; | ||
53 | |||
54 | /* | ||
55 | * Wrapper type for pointers to code which uses the non-standard | ||
56 | * calling convention. See PV_CALL_SAVE_REGS_THUNK below. | ||
57 | */ | ||
58 | struct paravirt_callee_save { | ||
59 | void *func; | ||
60 | }; | ||
61 | |||
62 | /* general info */ | ||
63 | struct pv_info { | ||
64 | unsigned int kernel_rpl; | ||
65 | int shared_kernel_pmd; | ||
66 | int paravirt_enabled; | ||
67 | const char *name; | ||
68 | }; | ||
69 | |||
70 | struct pv_init_ops { | ||
71 | /* | ||
72 | * Patch may replace one of the defined code sequences with | ||
73 | * arbitrary code, subject to the same register constraints. | ||
74 | * This generally means the code is not free to clobber any | ||
75 | * registers other than EAX. The patch function should return | ||
76 | * the number of bytes of code generated, as we nop pad the | ||
77 | * rest in generic code. | ||
78 | */ | ||
79 | unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, | ||
80 | unsigned long addr, unsigned len); | ||
81 | |||
82 | /* Basic arch-specific setup */ | ||
83 | void (*arch_setup)(void); | ||
84 | char *(*memory_setup)(void); | ||
85 | void (*post_allocator_init)(void); | ||
86 | |||
87 | /* Print a banner to identify the environment */ | ||
88 | void (*banner)(void); | ||
89 | }; | ||
90 | |||
91 | |||
92 | struct pv_lazy_ops { | ||
93 | /* Set deferred update mode, used for batching operations. */ | ||
94 | void (*enter)(void); | ||
95 | void (*leave)(void); | ||
96 | }; | ||
97 | |||
98 | struct pv_time_ops { | ||
99 | void (*time_init)(void); | ||
100 | |||
101 | /* Set and set time of day */ | ||
102 | unsigned long (*get_wallclock)(void); | ||
103 | int (*set_wallclock)(unsigned long); | ||
104 | |||
105 | unsigned long long (*sched_clock)(void); | ||
106 | unsigned long (*get_tsc_khz)(void); | ||
107 | }; | ||
108 | |||
109 | struct pv_cpu_ops { | ||
110 | /* hooks for various privileged instructions */ | ||
111 | unsigned long (*get_debugreg)(int regno); | ||
112 | void (*set_debugreg)(int regno, unsigned long value); | ||
113 | |||
114 | void (*clts)(void); | ||
115 | |||
116 | unsigned long (*read_cr0)(void); | ||
117 | void (*write_cr0)(unsigned long); | ||
118 | |||
119 | unsigned long (*read_cr4_safe)(void); | ||
120 | unsigned long (*read_cr4)(void); | ||
121 | void (*write_cr4)(unsigned long); | ||
122 | |||
123 | #ifdef CONFIG_X86_64 | ||
124 | unsigned long (*read_cr8)(void); | ||
125 | void (*write_cr8)(unsigned long); | ||
126 | #endif | ||
127 | |||
128 | /* Segment descriptor handling */ | ||
129 | void (*load_tr_desc)(void); | ||
130 | void (*load_gdt)(const struct desc_ptr *); | ||
131 | void (*load_idt)(const struct desc_ptr *); | ||
132 | void (*store_gdt)(struct desc_ptr *); | ||
133 | void (*store_idt)(struct desc_ptr *); | ||
134 | void (*set_ldt)(const void *desc, unsigned entries); | ||
135 | unsigned long (*store_tr)(void); | ||
136 | void (*load_tls)(struct thread_struct *t, unsigned int cpu); | ||
137 | #ifdef CONFIG_X86_64 | ||
138 | void (*load_gs_index)(unsigned int idx); | ||
139 | #endif | ||
140 | void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, | ||
141 | const void *desc); | ||
142 | void (*write_gdt_entry)(struct desc_struct *, | ||
143 | int entrynum, const void *desc, int size); | ||
144 | void (*write_idt_entry)(gate_desc *, | ||
145 | int entrynum, const gate_desc *gate); | ||
146 | void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); | ||
147 | void (*free_ldt)(struct desc_struct *ldt, unsigned entries); | ||
148 | |||
149 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); | ||
150 | |||
151 | void (*set_iopl_mask)(unsigned mask); | ||
152 | |||
153 | void (*wbinvd)(void); | ||
154 | void (*io_delay)(void); | ||
155 | |||
156 | /* cpuid emulation, mostly so that caps bits can be disabled */ | ||
157 | void (*cpuid)(unsigned int *eax, unsigned int *ebx, | ||
158 | unsigned int *ecx, unsigned int *edx); | ||
159 | |||
160 | /* MSR, PMC and TSR operations. | ||
161 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ | ||
162 | u64 (*read_msr)(unsigned int msr, int *err); | ||
163 | int (*rdmsr_regs)(u32 *regs); | ||
164 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); | ||
165 | int (*wrmsr_regs)(u32 *regs); | ||
166 | |||
167 | u64 (*read_tsc)(void); | ||
168 | u64 (*read_pmc)(int counter); | ||
169 | unsigned long long (*read_tscp)(unsigned int *aux); | ||
170 | |||
171 | /* | ||
172 | * Atomically enable interrupts and return to userspace. This | ||
173 | * is only ever used to return to 32-bit processes; in a | ||
174 | * 64-bit kernel, it's used for 32-on-64 compat processes, but | ||
175 | * never native 64-bit processes. (Jump, not call.) | ||
176 | */ | ||
177 | void (*irq_enable_sysexit)(void); | ||
178 | |||
179 | /* | ||
180 | * Switch to usermode gs and return to 64-bit usermode using | ||
181 | * sysret. Only used in 64-bit kernels to return to 64-bit | ||
182 | * processes. Usermode register state, including %rsp, must | ||
183 | * already be restored. | ||
184 | */ | ||
185 | void (*usergs_sysret64)(void); | ||
186 | |||
187 | /* | ||
188 | * Switch to usermode gs and return to 32-bit usermode using | ||
189 | * sysret. Used to return to 32-on-64 compat processes. | ||
190 | * Other usermode register state, including %esp, must already | ||
191 | * be restored. | ||
192 | */ | ||
193 | void (*usergs_sysret32)(void); | ||
194 | |||
195 | /* Normal iret. Jump to this with the standard iret stack | ||
196 | frame set up. */ | ||
197 | void (*iret)(void); | ||
198 | |||
199 | void (*swapgs)(void); | ||
200 | |||
201 | void (*start_context_switch)(struct task_struct *prev); | ||
202 | void (*end_context_switch)(struct task_struct *next); | ||
203 | }; | ||
204 | |||
205 | struct pv_irq_ops { | ||
206 | void (*init_IRQ)(void); | ||
207 | |||
208 | /* | ||
209 | * Get/set interrupt state. save_fl and restore_fl are only | ||
210 | * expected to use X86_EFLAGS_IF; all other bits | ||
211 | * returned from save_fl are undefined, and may be ignored by | ||
212 | * restore_fl. | ||
213 | * | ||
214 | * NOTE: These functions callers expect the callee to preserve | ||
215 | * more registers than the standard C calling convention. | ||
216 | */ | ||
217 | struct paravirt_callee_save save_fl; | ||
218 | struct paravirt_callee_save restore_fl; | ||
219 | struct paravirt_callee_save irq_disable; | ||
220 | struct paravirt_callee_save irq_enable; | ||
221 | |||
222 | void (*safe_halt)(void); | ||
223 | void (*halt)(void); | ||
224 | |||
225 | #ifdef CONFIG_X86_64 | ||
226 | void (*adjust_exception_frame)(void); | ||
227 | #endif | ||
228 | }; | ||
229 | |||
230 | struct pv_apic_ops { | ||
231 | #ifdef CONFIG_X86_LOCAL_APIC | ||
232 | void (*setup_boot_clock)(void); | ||
233 | void (*setup_secondary_clock)(void); | ||
234 | |||
235 | void (*startup_ipi_hook)(int phys_apicid, | ||
236 | unsigned long start_eip, | ||
237 | unsigned long start_esp); | ||
238 | #endif | ||
239 | }; | ||
240 | |||
241 | struct pv_mmu_ops { | ||
242 | /* | ||
243 | * Called before/after init_mm pagetable setup. setup_start | ||
244 | * may reset %cr3, and may pre-install parts of the pagetable; | ||
245 | * pagetable setup is expected to preserve any existing | ||
246 | * mapping. | ||
247 | */ | ||
248 | void (*pagetable_setup_start)(pgd_t *pgd_base); | ||
249 | void (*pagetable_setup_done)(pgd_t *pgd_base); | ||
250 | |||
251 | unsigned long (*read_cr2)(void); | ||
252 | void (*write_cr2)(unsigned long); | ||
253 | |||
254 | unsigned long (*read_cr3)(void); | ||
255 | void (*write_cr3)(unsigned long); | ||
256 | |||
257 | /* | ||
258 | * Hooks for intercepting the creation/use/destruction of an | ||
259 | * mm_struct. | ||
260 | */ | ||
261 | void (*activate_mm)(struct mm_struct *prev, | ||
262 | struct mm_struct *next); | ||
263 | void (*dup_mmap)(struct mm_struct *oldmm, | ||
264 | struct mm_struct *mm); | ||
265 | void (*exit_mmap)(struct mm_struct *mm); | ||
266 | |||
267 | |||
268 | /* TLB operations */ | ||
269 | void (*flush_tlb_user)(void); | ||
270 | void (*flush_tlb_kernel)(void); | ||
271 | void (*flush_tlb_single)(unsigned long addr); | ||
272 | void (*flush_tlb_others)(const struct cpumask *cpus, | ||
273 | struct mm_struct *mm, | ||
274 | unsigned long va); | ||
275 | |||
276 | /* Hooks for allocating and freeing a pagetable top-level */ | ||
277 | int (*pgd_alloc)(struct mm_struct *mm); | ||
278 | void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); | ||
279 | |||
280 | /* | ||
281 | * Hooks for allocating/releasing pagetable pages when they're | ||
282 | * attached to a pagetable | ||
283 | */ | ||
284 | void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); | ||
285 | void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); | ||
286 | void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count); | ||
287 | void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); | ||
288 | void (*release_pte)(unsigned long pfn); | ||
289 | void (*release_pmd)(unsigned long pfn); | ||
290 | void (*release_pud)(unsigned long pfn); | ||
291 | |||
292 | /* Pagetable manipulation functions */ | ||
293 | void (*set_pte)(pte_t *ptep, pte_t pteval); | ||
294 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, | ||
295 | pte_t *ptep, pte_t pteval); | ||
296 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); | ||
297 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, | ||
298 | pte_t *ptep); | ||
299 | void (*pte_update_defer)(struct mm_struct *mm, | ||
300 | unsigned long addr, pte_t *ptep); | ||
301 | |||
302 | pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, | ||
303 | pte_t *ptep); | ||
304 | void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, | ||
305 | pte_t *ptep, pte_t pte); | ||
306 | |||
307 | struct paravirt_callee_save pte_val; | ||
308 | struct paravirt_callee_save make_pte; | ||
309 | |||
310 | struct paravirt_callee_save pgd_val; | ||
311 | struct paravirt_callee_save make_pgd; | ||
312 | |||
313 | #if PAGETABLE_LEVELS >= 3 | ||
314 | #ifdef CONFIG_X86_PAE | ||
315 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); | ||
316 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, | ||
317 | pte_t *ptep); | ||
318 | void (*pmd_clear)(pmd_t *pmdp); | ||
319 | |||
320 | #endif /* CONFIG_X86_PAE */ | ||
321 | |||
322 | void (*set_pud)(pud_t *pudp, pud_t pudval); | ||
323 | |||
324 | struct paravirt_callee_save pmd_val; | ||
325 | struct paravirt_callee_save make_pmd; | ||
326 | |||
327 | #if PAGETABLE_LEVELS == 4 | ||
328 | struct paravirt_callee_save pud_val; | ||
329 | struct paravirt_callee_save make_pud; | ||
330 | |||
331 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); | ||
332 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
333 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
334 | |||
335 | #ifdef CONFIG_HIGHPTE | ||
336 | void *(*kmap_atomic_pte)(struct page *page, enum km_type type); | ||
337 | #endif | ||
338 | |||
339 | struct pv_lazy_ops lazy_mode; | ||
340 | |||
341 | /* dom0 ops */ | ||
342 | |||
343 | /* Sometimes the physical address is a pfn, and sometimes its | ||
344 | an mfn. We can tell which is which from the index. */ | ||
345 | void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, | ||
346 | phys_addr_t phys, pgprot_t flags); | ||
347 | }; | ||
348 | |||
349 | struct raw_spinlock; | ||
350 | struct pv_lock_ops { | ||
351 | int (*spin_is_locked)(struct raw_spinlock *lock); | ||
352 | int (*spin_is_contended)(struct raw_spinlock *lock); | ||
353 | void (*spin_lock)(struct raw_spinlock *lock); | ||
354 | void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); | ||
355 | int (*spin_trylock)(struct raw_spinlock *lock); | ||
356 | void (*spin_unlock)(struct raw_spinlock *lock); | ||
357 | }; | ||
358 | |||
359 | /* This contains all the paravirt structures: we get a convenient | ||
360 | * number for each function using the offset which we use to indicate | ||
361 | * what to patch. */ | ||
362 | struct paravirt_patch_template { | ||
363 | struct pv_init_ops pv_init_ops; | ||
364 | struct pv_time_ops pv_time_ops; | ||
365 | struct pv_cpu_ops pv_cpu_ops; | ||
366 | struct pv_irq_ops pv_irq_ops; | ||
367 | struct pv_apic_ops pv_apic_ops; | ||
368 | struct pv_mmu_ops pv_mmu_ops; | ||
369 | struct pv_lock_ops pv_lock_ops; | ||
370 | }; | ||
371 | |||
372 | extern struct pv_info pv_info; | ||
373 | extern struct pv_init_ops pv_init_ops; | ||
374 | extern struct pv_time_ops pv_time_ops; | ||
375 | extern struct pv_cpu_ops pv_cpu_ops; | ||
376 | extern struct pv_irq_ops pv_irq_ops; | ||
377 | extern struct pv_apic_ops pv_apic_ops; | ||
378 | extern struct pv_mmu_ops pv_mmu_ops; | ||
379 | extern struct pv_lock_ops pv_lock_ops; | ||
380 | |||
381 | #define PARAVIRT_PATCH(x) \ | ||
382 | (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) | ||
383 | |||
384 | #define paravirt_type(op) \ | ||
385 | [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ | ||
386 | [paravirt_opptr] "i" (&(op)) | ||
387 | #define paravirt_clobber(clobber) \ | ||
388 | [paravirt_clobber] "i" (clobber) | ||
389 | |||
390 | /* | ||
391 | * Generate some code, and mark it as patchable by the | ||
392 | * apply_paravirt() alternate instruction patcher. | ||
393 | */ | ||
394 | #define _paravirt_alt(insn_string, type, clobber) \ | ||
395 | "771:\n\t" insn_string "\n" "772:\n" \ | ||
396 | ".pushsection .parainstructions,\"a\"\n" \ | ||
397 | _ASM_ALIGN "\n" \ | ||
398 | _ASM_PTR " 771b\n" \ | ||
399 | " .byte " type "\n" \ | ||
400 | " .byte 772b-771b\n" \ | ||
401 | " .short " clobber "\n" \ | ||
402 | ".popsection\n" | ||
403 | |||
404 | /* Generate patchable code, with the default asm parameters. */ | ||
405 | #define paravirt_alt(insn_string) \ | ||
406 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") | ||
407 | |||
408 | /* Simple instruction patching code. */ | ||
409 | #define DEF_NATIVE(ops, name, code) \ | ||
410 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | ||
411 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | ||
412 | |||
413 | unsigned paravirt_patch_nop(void); | ||
414 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); | ||
415 | unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); | ||
416 | unsigned paravirt_patch_ignore(unsigned len); | ||
417 | unsigned paravirt_patch_call(void *insnbuf, | ||
418 | const void *target, u16 tgt_clobbers, | ||
419 | unsigned long addr, u16 site_clobbers, | ||
420 | unsigned len); | ||
421 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, | ||
422 | unsigned long addr, unsigned len); | ||
423 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | ||
424 | unsigned long addr, unsigned len); | ||
425 | |||
426 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | ||
427 | const char *start, const char *end); | ||
428 | |||
429 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | ||
430 | unsigned long addr, unsigned len); | ||
431 | |||
432 | int paravirt_disable_iospace(void); | ||
433 | |||
434 | /* | ||
435 | * This generates an indirect call based on the operation type number. | ||
436 | * The type number, computed in PARAVIRT_PATCH, is derived from the | ||
437 | * offset into the paravirt_patch_template structure, and can therefore be | ||
438 | * freely converted back into a structure offset. | ||
439 | */ | ||
440 | #define PARAVIRT_CALL "call *%c[paravirt_opptr];" | ||
441 | |||
442 | /* | ||
443 | * These macros are intended to wrap calls through one of the paravirt | ||
444 | * ops structs, so that they can be later identified and patched at | ||
445 | * runtime. | ||
446 | * | ||
447 | * Normally, a call to a pv_op function is a simple indirect call: | ||
448 | * (pv_op_struct.operations)(args...). | ||
449 | * | ||
450 | * Unfortunately, this is a relatively slow operation for modern CPUs, | ||
451 | * because it cannot necessarily determine what the destination | ||
452 | * address is. In this case, the address is a runtime constant, so at | ||
453 | * the very least we can patch the call to e a simple direct call, or | ||
454 | * ideally, patch an inline implementation into the callsite. (Direct | ||
455 | * calls are essentially free, because the call and return addresses | ||
456 | * are completely predictable.) | ||
457 | * | ||
458 | * For i386, these macros rely on the standard gcc "regparm(3)" calling | ||
459 | * convention, in which the first three arguments are placed in %eax, | ||
460 | * %edx, %ecx (in that order), and the remaining arguments are placed | ||
461 | * on the stack. All caller-save registers (eax,edx,ecx) are expected | ||
462 | * to be modified (either clobbered or used for return values). | ||
463 | * X86_64, on the other hand, already specifies a register-based calling | ||
464 | * conventions, returning at %rax, with parameteres going on %rdi, %rsi, | ||
465 | * %rdx, and %rcx. Note that for this reason, x86_64 does not need any | ||
466 | * special handling for dealing with 4 arguments, unlike i386. | ||
467 | * However, x86_64 also have to clobber all caller saved registers, which | ||
468 | * unfortunately, are quite a bit (r8 - r11) | ||
469 | * | ||
470 | * The call instruction itself is marked by placing its start address | ||
471 | * and size into the .parainstructions section, so that | ||
472 | * apply_paravirt() in arch/i386/kernel/alternative.c can do the | ||
473 | * appropriate patching under the control of the backend pv_init_ops | ||
474 | * implementation. | ||
475 | * | ||
476 | * Unfortunately there's no way to get gcc to generate the args setup | ||
477 | * for the call, and then allow the call itself to be generated by an | ||
478 | * inline asm. Because of this, we must do the complete arg setup and | ||
479 | * return value handling from within these macros. This is fairly | ||
480 | * cumbersome. | ||
481 | * | ||
482 | * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. | ||
483 | * It could be extended to more arguments, but there would be little | ||
484 | * to be gained from that. For each number of arguments, there are | ||
485 | * the two VCALL and CALL variants for void and non-void functions. | ||
486 | * | ||
487 | * When there is a return value, the invoker of the macro must specify | ||
488 | * the return type. The macro then uses sizeof() on that type to | ||
489 | * determine whether its a 32 or 64 bit value, and places the return | ||
490 | * in the right register(s) (just %eax for 32-bit, and %edx:%eax for | ||
491 | * 64-bit). For x86_64 machines, it just returns at %rax regardless of | ||
492 | * the return value size. | ||
493 | * | ||
494 | * 64-bit arguments are passed as a pair of adjacent 32-bit arguments | ||
495 | * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments | ||
496 | * in low,high order | ||
497 | * | ||
498 | * Small structures are passed and returned in registers. The macro | ||
499 | * calling convention can't directly deal with this, so the wrapper | ||
500 | * functions must do this. | ||
501 | * | ||
502 | * These PVOP_* macros are only defined within this header. This | ||
503 | * means that all uses must be wrapped in inline functions. This also | ||
504 | * makes sure the incoming and outgoing types are always correct. | ||
505 | */ | ||
506 | #ifdef CONFIG_X86_32 | ||
507 | #define PVOP_VCALL_ARGS \ | ||
508 | unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx | ||
509 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS | ||
510 | |||
511 | #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) | ||
512 | #define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) | ||
513 | #define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) | ||
514 | |||
515 | #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ | ||
516 | "=c" (__ecx) | ||
517 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS | ||
518 | |||
519 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) | ||
520 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | ||
521 | |||
522 | #define EXTRA_CLOBBERS | ||
523 | #define VEXTRA_CLOBBERS | ||
524 | #else /* CONFIG_X86_64 */ | ||
525 | #define PVOP_VCALL_ARGS \ | ||
526 | unsigned long __edi = __edi, __esi = __esi, \ | ||
527 | __edx = __edx, __ecx = __ecx | ||
528 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax | ||
529 | |||
530 | #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) | ||
531 | #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) | ||
532 | #define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) | ||
533 | #define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) | ||
534 | |||
535 | #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ | ||
536 | "=S" (__esi), "=d" (__edx), \ | ||
537 | "=c" (__ecx) | ||
538 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) | ||
539 | |||
540 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) | ||
541 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | ||
542 | |||
543 | #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" | ||
544 | #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" | ||
545 | #endif /* CONFIG_X86_32 */ | ||
546 | |||
547 | #ifdef CONFIG_PARAVIRT_DEBUG | ||
548 | #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) | ||
549 | #else | ||
550 | #define PVOP_TEST_NULL(op) ((void)op) | ||
551 | #endif | ||
552 | |||
553 | #define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ | ||
554 | pre, post, ...) \ | ||
555 | ({ \ | ||
556 | rettype __ret; \ | ||
557 | PVOP_CALL_ARGS; \ | ||
558 | PVOP_TEST_NULL(op); \ | ||
559 | /* This is 32-bit specific, but is okay in 64-bit */ \ | ||
560 | /* since this condition will never hold */ \ | ||
561 | if (sizeof(rettype) > sizeof(unsigned long)) { \ | ||
562 | asm volatile(pre \ | ||
563 | paravirt_alt(PARAVIRT_CALL) \ | ||
564 | post \ | ||
565 | : call_clbr \ | ||
566 | : paravirt_type(op), \ | ||
567 | paravirt_clobber(clbr), \ | ||
568 | ##__VA_ARGS__ \ | ||
569 | : "memory", "cc" extra_clbr); \ | ||
570 | __ret = (rettype)((((u64)__edx) << 32) | __eax); \ | ||
571 | } else { \ | ||
572 | asm volatile(pre \ | ||
573 | paravirt_alt(PARAVIRT_CALL) \ | ||
574 | post \ | ||
575 | : call_clbr \ | ||
576 | : paravirt_type(op), \ | ||
577 | paravirt_clobber(clbr), \ | ||
578 | ##__VA_ARGS__ \ | ||
579 | : "memory", "cc" extra_clbr); \ | ||
580 | __ret = (rettype)__eax; \ | ||
581 | } \ | ||
582 | __ret; \ | ||
583 | }) | ||
584 | |||
585 | #define __PVOP_CALL(rettype, op, pre, post, ...) \ | ||
586 | ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ | ||
587 | EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) | ||
588 | |||
589 | #define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ | ||
590 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | ||
591 | PVOP_CALLEE_CLOBBERS, , \ | ||
592 | pre, post, ##__VA_ARGS__) | ||
593 | |||
594 | |||
595 | #define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ | ||
596 | ({ \ | ||
597 | PVOP_VCALL_ARGS; \ | ||
598 | PVOP_TEST_NULL(op); \ | ||
599 | asm volatile(pre \ | ||
600 | paravirt_alt(PARAVIRT_CALL) \ | ||
601 | post \ | ||
602 | : call_clbr \ | ||
603 | : paravirt_type(op), \ | ||
604 | paravirt_clobber(clbr), \ | ||
605 | ##__VA_ARGS__ \ | ||
606 | : "memory", "cc" extra_clbr); \ | ||
607 | }) | ||
608 | |||
609 | #define __PVOP_VCALL(op, pre, post, ...) \ | ||
610 | ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ | ||
611 | VEXTRA_CLOBBERS, \ | ||
612 | pre, post, ##__VA_ARGS__) | ||
613 | |||
614 | #define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ | ||
615 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | ||
616 | PVOP_VCALLEE_CLOBBERS, , \ | ||
617 | pre, post, ##__VA_ARGS__) | ||
618 | |||
619 | |||
620 | |||
621 | #define PVOP_CALL0(rettype, op) \ | ||
622 | __PVOP_CALL(rettype, op, "", "") | ||
623 | #define PVOP_VCALL0(op) \ | ||
624 | __PVOP_VCALL(op, "", "") | ||
625 | |||
626 | #define PVOP_CALLEE0(rettype, op) \ | ||
627 | __PVOP_CALLEESAVE(rettype, op, "", "") | ||
628 | #define PVOP_VCALLEE0(op) \ | ||
629 | __PVOP_VCALLEESAVE(op, "", "") | ||
630 | |||
631 | |||
632 | #define PVOP_CALL1(rettype, op, arg1) \ | ||
633 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) | ||
634 | #define PVOP_VCALL1(op, arg1) \ | ||
635 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) | ||
636 | |||
637 | #define PVOP_CALLEE1(rettype, op, arg1) \ | ||
638 | __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) | ||
639 | #define PVOP_VCALLEE1(op, arg1) \ | ||
640 | __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) | ||
641 | |||
642 | |||
643 | #define PVOP_CALL2(rettype, op, arg1, arg2) \ | ||
644 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
645 | PVOP_CALL_ARG2(arg2)) | ||
646 | #define PVOP_VCALL2(op, arg1, arg2) \ | ||
647 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
648 | PVOP_CALL_ARG2(arg2)) | ||
649 | |||
650 | #define PVOP_CALLEE2(rettype, op, arg1, arg2) \ | ||
651 | __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
652 | PVOP_CALL_ARG2(arg2)) | ||
653 | #define PVOP_VCALLEE2(op, arg1, arg2) \ | ||
654 | __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
655 | PVOP_CALL_ARG2(arg2)) | ||
656 | |||
657 | |||
658 | #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ | ||
659 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
660 | PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) | ||
661 | #define PVOP_VCALL3(op, arg1, arg2, arg3) \ | ||
662 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
663 | PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) | ||
664 | |||
665 | /* This is the only difference in x86_64. We can make it much simpler */ | ||
666 | #ifdef CONFIG_X86_32 | ||
667 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | ||
668 | __PVOP_CALL(rettype, op, \ | ||
669 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | ||
670 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
671 | PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) | ||
672 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | ||
673 | __PVOP_VCALL(op, \ | ||
674 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | ||
675 | "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ | ||
676 | "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) | ||
677 | #else | ||
678 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | ||
679 | __PVOP_CALL(rettype, op, "", "", \ | ||
680 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
681 | PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) | ||
682 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | ||
683 | __PVOP_VCALL(op, "", "", \ | ||
684 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
685 | PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) | ||
686 | #endif | ||
687 | |||
688 | /* Lazy mode for batching updates / context switch */ | ||
689 | enum paravirt_lazy_mode { | ||
690 | PARAVIRT_LAZY_NONE, | ||
691 | PARAVIRT_LAZY_MMU, | ||
692 | PARAVIRT_LAZY_CPU, | ||
693 | }; | ||
694 | |||
695 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); | ||
696 | void paravirt_start_context_switch(struct task_struct *prev); | ||
697 | void paravirt_end_context_switch(struct task_struct *next); | ||
698 | |||
699 | void paravirt_enter_lazy_mmu(void); | ||
700 | void paravirt_leave_lazy_mmu(void); | ||
701 | |||
702 | void _paravirt_nop(void); | ||
703 | u32 _paravirt_ident_32(u32); | ||
704 | u64 _paravirt_ident_64(u64); | ||
705 | |||
706 | #define paravirt_nop ((void *)_paravirt_nop) | ||
707 | |||
708 | /* These all sit in the .parainstructions section to tell us what to patch. */ | ||
709 | struct paravirt_patch_site { | ||
710 | u8 *instr; /* original instructions */ | ||
711 | u8 instrtype; /* type of this instruction */ | ||
712 | u8 len; /* length of original instruction */ | ||
713 | u16 clobbers; /* what registers you may clobber */ | ||
714 | }; | ||
715 | |||
716 | extern struct paravirt_patch_site __parainstructions[], | ||
717 | __parainstructions_end[]; | ||
718 | |||
719 | #endif /* __ASSEMBLY__ */ | ||
720 | |||
721 | #endif /* _ASM_X86_PARAVIRT_TYPES_H */ | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ac7e79654f3a..e08ea043e085 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -713,13 +713,23 @@ static inline void cpu_relax(void) | |||
713 | rep_nop(); | 713 | rep_nop(); |
714 | } | 714 | } |
715 | 715 | ||
716 | /* Stop speculative execution: */ | 716 | /* Stop speculative execution and prefetching of modified code. */ |
717 | static inline void sync_core(void) | 717 | static inline void sync_core(void) |
718 | { | 718 | { |
719 | int tmp; | 719 | int tmp; |
720 | 720 | ||
721 | asm volatile("cpuid" : "=a" (tmp) : "0" (1) | 721 | #if defined(CONFIG_M386) || defined(CONFIG_M486) |
722 | : "ebx", "ecx", "edx", "memory"); | 722 | if (boot_cpu_data.x86 < 5) |
723 | /* There is no speculative execution. | ||
724 | * jmp is a barrier to prefetching. */ | ||
725 | asm volatile("jmp 1f\n1:\n" ::: "memory"); | ||
726 | else | ||
727 | #endif | ||
728 | /* cpuid is a barrier to speculative execution. | ||
729 | * Prefetched instructions are automatically | ||
730 | * invalidated when modified. */ | ||
731 | asm volatile("cpuid" : "=a" (tmp) : "0" (1) | ||
732 | : "ebx", "ecx", "edx", "memory"); | ||
723 | } | 733 | } |
724 | 734 | ||
725 | static inline void __monitor(const void *eax, unsigned long ecx, | 735 | static inline void __monitor(const void *eax, unsigned long ecx, |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 486935143e02..de7353c0ce9c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -498,8 +498,8 @@ static void *__init_or_module text_poke_early(void *addr, const void *opcode, | |||
498 | unsigned long flags; | 498 | unsigned long flags; |
499 | local_irq_save(flags); | 499 | local_irq_save(flags); |
500 | memcpy(addr, opcode, len); | 500 | memcpy(addr, opcode, len); |
501 | local_irq_restore(flags); | ||
502 | sync_core(); | 501 | sync_core(); |
502 | local_irq_restore(flags); | ||
503 | /* Could also do a CLFLUSH here to speed up CPU recovery; but | 503 | /* Could also do a CLFLUSH here to speed up CPU recovery; but |
504 | that causes hangs on some VIA CPUs. */ | 504 | that causes hangs on some VIA CPUs. */ |
505 | return addr; | 505 | return addr; |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 83b217c7225f..22a47c82f3c0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -253,6 +253,64 @@ static int __cpuinit nearby_node(int apicid) | |||
253 | #endif | 253 | #endif |
254 | 254 | ||
255 | /* | 255 | /* |
256 | * Fixup core topology information for AMD multi-node processors. | ||
257 | * Assumption 1: Number of cores in each internal node is the same. | ||
258 | * Assumption 2: Mixed systems with both single-node and dual-node | ||
259 | * processors are not supported. | ||
260 | */ | ||
261 | #ifdef CONFIG_X86_HT | ||
262 | static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) | ||
263 | { | ||
264 | #ifdef CONFIG_PCI | ||
265 | u32 t, cpn; | ||
266 | u8 n, n_id; | ||
267 | int cpu = smp_processor_id(); | ||
268 | |||
269 | /* fixup topology information only once for a core */ | ||
270 | if (cpu_has(c, X86_FEATURE_AMD_DCM)) | ||
271 | return; | ||
272 | |||
273 | /* check for multi-node processor on boot cpu */ | ||
274 | t = read_pci_config(0, 24, 3, 0xe8); | ||
275 | if (!(t & (1 << 29))) | ||
276 | return; | ||
277 | |||
278 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); | ||
279 | |||
280 | /* cores per node: each internal node has half the number of cores */ | ||
281 | cpn = c->x86_max_cores >> 1; | ||
282 | |||
283 | /* even-numbered NB_id of this dual-node processor */ | ||
284 | n = c->phys_proc_id << 1; | ||
285 | |||
286 | /* | ||
287 | * determine internal node id and assign cores fifty-fifty to | ||
288 | * each node of the dual-node processor | ||
289 | */ | ||
290 | t = read_pci_config(0, 24 + n, 3, 0xe8); | ||
291 | n = (t>>30) & 0x3; | ||
292 | if (n == 0) { | ||
293 | if (c->cpu_core_id < cpn) | ||
294 | n_id = 0; | ||
295 | else | ||
296 | n_id = 1; | ||
297 | } else { | ||
298 | if (c->cpu_core_id < cpn) | ||
299 | n_id = 1; | ||
300 | else | ||
301 | n_id = 0; | ||
302 | } | ||
303 | |||
304 | /* compute entire NodeID, use llc_shared_map to store sibling info */ | ||
305 | per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id; | ||
306 | |||
307 | /* fixup core id to be in range from 0 to cpn */ | ||
308 | c->cpu_core_id = c->cpu_core_id % cpn; | ||
309 | #endif | ||
310 | } | ||
311 | #endif | ||
312 | |||
313 | /* | ||
256 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | 314 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. |
257 | * Assumes number of cores is a power of two. | 315 | * Assumes number of cores is a power of two. |
258 | */ | 316 | */ |
@@ -269,6 +327,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |||
269 | c->phys_proc_id = c->initial_apicid >> bits; | 327 | c->phys_proc_id = c->initial_apicid >> bits; |
270 | /* use socket ID also for last level cache */ | 328 | /* use socket ID also for last level cache */ |
271 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; | 329 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; |
330 | /* fixup topology information on multi-node processors */ | ||
331 | if ((c->x86 == 0x10) && (c->x86_model == 9)) | ||
332 | amd_fixup_dcm(c); | ||
272 | #endif | 333 | #endif |
273 | } | 334 | } |
274 | 335 | ||
@@ -277,9 +338,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | |||
277 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 338 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) |
278 | int cpu = smp_processor_id(); | 339 | int cpu = smp_processor_id(); |
279 | int node; | 340 | int node; |
280 | unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; | 341 | unsigned apicid = c->apicid; |
342 | |||
343 | node = per_cpu(cpu_llc_id, cpu); | ||
281 | 344 | ||
282 | node = c->phys_proc_id; | ||
283 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | 345 | if (apicid_to_node[apicid] != NUMA_NO_NODE) |
284 | node = apicid_to_node[apicid]; | 346 | node = apicid_to_node[apicid]; |
285 | if (!node_online(node)) { | 347 | if (!node_online(node)) { |
@@ -406,12 +468,24 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
406 | /* | 468 | /* |
407 | * Some BIOSes incorrectly force this feature, but only K8 | 469 | * Some BIOSes incorrectly force this feature, but only K8 |
408 | * revision D (model = 0x14) and later actually support it. | 470 | * revision D (model = 0x14) and later actually support it. |
471 | * (AMD Erratum #110, docId: 25759). | ||
409 | */ | 472 | */ |
410 | if (c->x86_model < 0x14) | 473 | if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { |
474 | u64 val; | ||
475 | |||
411 | clear_cpu_cap(c, X86_FEATURE_LAHF_LM); | 476 | clear_cpu_cap(c, X86_FEATURE_LAHF_LM); |
477 | if (!rdmsrl_amd_safe(0xc001100d, &val)) { | ||
478 | val &= ~(1ULL << 32); | ||
479 | wrmsrl_amd_safe(0xc001100d, val); | ||
480 | } | ||
481 | } | ||
482 | |||
412 | } | 483 | } |
413 | if (c->x86 == 0x10 || c->x86 == 0x11) | 484 | if (c->x86 == 0x10 || c->x86 == 0x11) |
414 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 485 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
486 | |||
487 | /* get apicid instead of initial apic id from cpuid */ | ||
488 | c->apicid = hard_smp_processor_id(); | ||
415 | #else | 489 | #else |
416 | 490 | ||
417 | /* | 491 | /* |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 306bf0dca061..804c40e2bc3e 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -241,7 +241,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
241 | case 0: | 241 | case 0: |
242 | if (!l1->val) | 242 | if (!l1->val) |
243 | return; | 243 | return; |
244 | assoc = l1->assoc; | 244 | assoc = assocs[l1->assoc]; |
245 | line_size = l1->line_size; | 245 | line_size = l1->line_size; |
246 | lines_per_tag = l1->lines_per_tag; | 246 | lines_per_tag = l1->lines_per_tag; |
247 | size_in_kb = l1->size_in_kb; | 247 | size_in_kb = l1->size_in_kb; |
@@ -249,7 +249,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
249 | case 2: | 249 | case 2: |
250 | if (!l2.val) | 250 | if (!l2.val) |
251 | return; | 251 | return; |
252 | assoc = l2.assoc; | 252 | assoc = assocs[l2.assoc]; |
253 | line_size = l2.line_size; | 253 | line_size = l2.line_size; |
254 | lines_per_tag = l2.lines_per_tag; | 254 | lines_per_tag = l2.lines_per_tag; |
255 | /* cpu_data has errata corrections for K7 applied */ | 255 | /* cpu_data has errata corrections for K7 applied */ |
@@ -258,10 +258,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
258 | case 3: | 258 | case 3: |
259 | if (!l3.val) | 259 | if (!l3.val) |
260 | return; | 260 | return; |
261 | assoc = l3.assoc; | 261 | assoc = assocs[l3.assoc]; |
262 | line_size = l3.line_size; | 262 | line_size = l3.line_size; |
263 | lines_per_tag = l3.lines_per_tag; | 263 | lines_per_tag = l3.lines_per_tag; |
264 | size_in_kb = l3.size_encoded * 512; | 264 | size_in_kb = l3.size_encoded * 512; |
265 | if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { | ||
266 | size_in_kb = size_in_kb >> 1; | ||
267 | assoc = assoc >> 1; | ||
268 | } | ||
265 | break; | 269 | break; |
266 | default: | 270 | default: |
267 | return; | 271 | return; |
@@ -270,18 +274,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
270 | eax->split.is_self_initializing = 1; | 274 | eax->split.is_self_initializing = 1; |
271 | eax->split.type = types[leaf]; | 275 | eax->split.type = types[leaf]; |
272 | eax->split.level = levels[leaf]; | 276 | eax->split.level = levels[leaf]; |
273 | if (leaf == 3) | 277 | eax->split.num_threads_sharing = 0; |
274 | eax->split.num_threads_sharing = | ||
275 | current_cpu_data.x86_max_cores - 1; | ||
276 | else | ||
277 | eax->split.num_threads_sharing = 0; | ||
278 | eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; | 278 | eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; |
279 | 279 | ||
280 | 280 | ||
281 | if (assoc == 0xf) | 281 | if (assoc == 0xffff) |
282 | eax->split.is_fully_associative = 1; | 282 | eax->split.is_fully_associative = 1; |
283 | ebx->split.coherency_line_size = line_size - 1; | 283 | ebx->split.coherency_line_size = line_size - 1; |
284 | ebx->split.ways_of_associativity = assocs[assoc] - 1; | 284 | ebx->split.ways_of_associativity = assoc - 1; |
285 | ebx->split.physical_line_partition = lines_per_tag - 1; | 285 | ebx->split.physical_line_partition = lines_per_tag - 1; |
286 | ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / | 286 | ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / |
287 | (ebx->split.ways_of_associativity + 1) - 1; | 287 | (ebx->split.ways_of_associativity + 1) - 1; |
@@ -523,6 +523,18 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | |||
523 | int index_msb, i; | 523 | int index_msb, i; |
524 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 524 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
525 | 525 | ||
526 | if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { | ||
527 | struct cpuinfo_x86 *d; | ||
528 | for_each_online_cpu(i) { | ||
529 | if (!per_cpu(cpuid4_info, i)) | ||
530 | continue; | ||
531 | d = &cpu_data(i); | ||
532 | this_leaf = CPUID4_INFO_IDX(i, index); | ||
533 | cpumask_copy(to_cpumask(this_leaf->shared_cpu_map), | ||
534 | d->llc_shared_map); | ||
535 | } | ||
536 | return; | ||
537 | } | ||
526 | this_leaf = CPUID4_INFO_IDX(cpu, index); | 538 | this_leaf = CPUID4_INFO_IDX(cpu, index); |
527 | num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; | 539 | num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; |
528 | 540 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ddae21620bda..1fecba404fd8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -489,12 +489,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
489 | int i, err = 0; | 489 | int i, err = 0; |
490 | struct threshold_bank *b = NULL; | 490 | struct threshold_bank *b = NULL; |
491 | char name[32]; | 491 | char name[32]; |
492 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
493 | |||
492 | 494 | ||
493 | sprintf(name, "threshold_bank%i", bank); | 495 | sprintf(name, "threshold_bank%i", bank); |
494 | 496 | ||
495 | #ifdef CONFIG_SMP | 497 | #ifdef CONFIG_SMP |
496 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ | 498 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ |
497 | i = cpumask_first(cpu_core_mask(cpu)); | 499 | i = cpumask_first(c->llc_shared_map); |
498 | 500 | ||
499 | /* first core not up yet */ | 501 | /* first core not up yet */ |
500 | if (cpu_data(i).cpu_core_id) | 502 | if (cpu_data(i).cpu_core_id) |
@@ -514,7 +516,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
514 | if (err) | 516 | if (err) |
515 | goto out; | 517 | goto out; |
516 | 518 | ||
517 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); | 519 | cpumask_copy(b->cpus, c->llc_shared_map); |
518 | per_cpu(threshold_banks, cpu)[bank] = b; | 520 | per_cpu(threshold_banks, cpu)[bank] = b; |
519 | 521 | ||
520 | goto out; | 522 | goto out; |
@@ -539,7 +541,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
539 | #ifndef CONFIG_SMP | 541 | #ifndef CONFIG_SMP |
540 | cpumask_setall(b->cpus); | 542 | cpumask_setall(b->cpus); |
541 | #else | 543 | #else |
542 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); | 544 | cpumask_copy(b->cpus, c->llc_shared_map); |
543 | #endif | 545 | #endif |
544 | 546 | ||
545 | per_cpu(threshold_banks, cpu)[bank] = b; | 547 | per_cpu(threshold_banks, cpu)[bank] = b; |
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 1e904346bbf4..62ac8cb6ba27 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -116,11 +116,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
116 | seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); | 116 | seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); |
117 | #endif | 117 | #endif |
118 | seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); | 118 | seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); |
119 | #ifdef CONFIG_X86_64 | ||
120 | seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); | 119 | seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); |
121 | seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", | 120 | seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", |
122 | c->x86_phys_bits, c->x86_virt_bits); | 121 | c->x86_phys_bits, c->x86_virt_bits); |
123 | #endif | ||
124 | 122 | ||
125 | seq_printf(m, "power management:"); | 123 | seq_printf(m, "power management:"); |
126 | for (i = 0; i < 32; i++) { | 124 | for (i = 0; i < 32; i++) { |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 98fd6cd4e3a4..7dd950094178 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* ----------------------------------------------------------------------- * | 1 | /* ----------------------------------------------------------------------- * |
2 | * | 2 | * |
3 | * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved | 3 | * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved |
4 | * Copyright 2009 Intel Corporation; author: H. Peter Anvin | ||
4 | * | 5 | * |
5 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
@@ -80,11 +81,8 @@ static ssize_t msr_read(struct file *file, char __user *buf, | |||
80 | 81 | ||
81 | for (; count; count -= 8) { | 82 | for (; count; count -= 8) { |
82 | err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); | 83 | err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); |
83 | if (err) { | 84 | if (err) |
84 | if (err == -EFAULT) /* Fix idiotic error code */ | ||
85 | err = -EIO; | ||
86 | break; | 85 | break; |
87 | } | ||
88 | if (copy_to_user(tmp, &data, 8)) { | 86 | if (copy_to_user(tmp, &data, 8)) { |
89 | err = -EFAULT; | 87 | err = -EFAULT; |
90 | break; | 88 | break; |
@@ -115,11 +113,8 @@ static ssize_t msr_write(struct file *file, const char __user *buf, | |||
115 | break; | 113 | break; |
116 | } | 114 | } |
117 | err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); | 115 | err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); |
118 | if (err) { | 116 | if (err) |
119 | if (err == -EFAULT) /* Fix idiotic error code */ | ||
120 | err = -EIO; | ||
121 | break; | 117 | break; |
122 | } | ||
123 | tmp += 2; | 118 | tmp += 2; |
124 | bytes += 8; | 119 | bytes += 8; |
125 | } | 120 | } |
@@ -127,6 +122,54 @@ static ssize_t msr_write(struct file *file, const char __user *buf, | |||
127 | return bytes ? bytes : err; | 122 | return bytes ? bytes : err; |
128 | } | 123 | } |
129 | 124 | ||
125 | static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) | ||
126 | { | ||
127 | u32 __user *uregs = (u32 __user *)arg; | ||
128 | u32 regs[8]; | ||
129 | int cpu = iminor(file->f_path.dentry->d_inode); | ||
130 | int err; | ||
131 | |||
132 | switch (ioc) { | ||
133 | case X86_IOC_RDMSR_REGS: | ||
134 | if (!(file->f_mode & FMODE_READ)) { | ||
135 | err = -EBADF; | ||
136 | break; | ||
137 | } | ||
138 | if (copy_from_user(®s, uregs, sizeof regs)) { | ||
139 | err = -EFAULT; | ||
140 | break; | ||
141 | } | ||
142 | err = rdmsr_safe_regs_on_cpu(cpu, regs); | ||
143 | if (err) | ||
144 | break; | ||
145 | if (copy_to_user(uregs, ®s, sizeof regs)) | ||
146 | err = -EFAULT; | ||
147 | break; | ||
148 | |||
149 | case X86_IOC_WRMSR_REGS: | ||
150 | if (!(file->f_mode & FMODE_WRITE)) { | ||
151 | err = -EBADF; | ||
152 | break; | ||
153 | } | ||
154 | if (copy_from_user(®s, uregs, sizeof regs)) { | ||
155 | err = -EFAULT; | ||
156 | break; | ||
157 | } | ||
158 | err = wrmsr_safe_regs_on_cpu(cpu, regs); | ||
159 | if (err) | ||
160 | break; | ||
161 | if (copy_to_user(uregs, ®s, sizeof regs)) | ||
162 | err = -EFAULT; | ||
163 | break; | ||
164 | |||
165 | default: | ||
166 | err = -ENOTTY; | ||
167 | break; | ||
168 | } | ||
169 | |||
170 | return err; | ||
171 | } | ||
172 | |||
130 | static int msr_open(struct inode *inode, struct file *file) | 173 | static int msr_open(struct inode *inode, struct file *file) |
131 | { | 174 | { |
132 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); | 175 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); |
@@ -157,6 +200,8 @@ static const struct file_operations msr_fops = { | |||
157 | .read = msr_read, | 200 | .read = msr_read, |
158 | .write = msr_write, | 201 | .write = msr_write, |
159 | .open = msr_open, | 202 | .open = msr_open, |
203 | .unlocked_ioctl = msr_ioctl, | ||
204 | .compat_ioctl = msr_ioctl, | ||
160 | }; | 205 | }; |
161 | 206 | ||
162 | static int __cpuinit msr_device_create(int cpu) | 207 | static int __cpuinit msr_device_create(int cpu) |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 70ec9b951d76..f5b0b4a01fb2 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -362,8 +362,9 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
362 | #endif | 362 | #endif |
363 | .wbinvd = native_wbinvd, | 363 | .wbinvd = native_wbinvd, |
364 | .read_msr = native_read_msr_safe, | 364 | .read_msr = native_read_msr_safe, |
365 | .read_msr_amd = native_read_msr_amd_safe, | 365 | .rdmsr_regs = native_rdmsr_safe_regs, |
366 | .write_msr = native_write_msr_safe, | 366 | .write_msr = native_write_msr_safe, |
367 | .wrmsr_regs = native_wrmsr_safe_regs, | ||
367 | .read_tsc = native_read_tsc, | 368 | .read_tsc = native_read_tsc, |
368 | .read_pmc = native_read_pmc, | 369 | .read_pmc = native_read_pmc, |
369 | .read_tscp = native_read_tscp, | 370 | .read_tscp = native_read_tscp, |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2fecda69ee64..c36cc1452cdc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -434,7 +434,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu) | |||
434 | * For perf, we return last level cache shared map. | 434 | * For perf, we return last level cache shared map. |
435 | * And for power savings, we return cpu_core_map | 435 | * And for power savings, we return cpu_core_map |
436 | */ | 436 | */ |
437 | if (sched_mc_power_savings || sched_smt_power_savings) | 437 | if ((sched_mc_power_savings || sched_smt_power_savings) && |
438 | !(cpu_has(c, X86_FEATURE_AMD_DCM))) | ||
438 | return cpu_core_mask(cpu); | 439 | return cpu_core_mask(cpu); |
439 | else | 440 | else |
440 | return c->llc_shared_map; | 441 | return c->llc_shared_map; |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 07c31899c9c2..9e609206fac9 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -9,6 +9,8 @@ lib-y += thunk_$(BITS).o | |||
9 | lib-y += usercopy_$(BITS).o getuser.o putuser.o | 9 | lib-y += usercopy_$(BITS).o getuser.o putuser.o |
10 | lib-y += memcpy_$(BITS).o | 10 | lib-y += memcpy_$(BITS).o |
11 | 11 | ||
12 | obj-y += msr-reg.o msr-reg-export.o | ||
13 | |||
12 | ifeq ($(CONFIG_X86_32),y) | 14 | ifeq ($(CONFIG_X86_32),y) |
13 | obj-y += atomic64_32.o | 15 | obj-y += atomic64_32.o |
14 | lib-y += checksum_32.o | 16 | lib-y += checksum_32.o |
diff --git a/arch/x86/lib/msr-reg-export.c b/arch/x86/lib/msr-reg-export.c new file mode 100644 index 000000000000..a311cc59b65d --- /dev/null +++ b/arch/x86/lib/msr-reg-export.c | |||
@@ -0,0 +1,5 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <asm/msr.h> | ||
3 | |||
4 | EXPORT_SYMBOL(native_rdmsr_safe_regs); | ||
5 | EXPORT_SYMBOL(native_wrmsr_safe_regs); | ||
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S new file mode 100644 index 000000000000..69fa10623f21 --- /dev/null +++ b/arch/x86/lib/msr-reg.S | |||
@@ -0,0 +1,102 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <asm/dwarf2.h> | ||
4 | #include <asm/asm.h> | ||
5 | #include <asm/msr.h> | ||
6 | |||
7 | #ifdef CONFIG_X86_64 | ||
8 | /* | ||
9 | * int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]); | ||
10 | * | ||
11 | * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi] | ||
12 | * | ||
13 | */ | ||
14 | .macro op_safe_regs op | ||
15 | ENTRY(native_\op\()_safe_regs) | ||
16 | CFI_STARTPROC | ||
17 | pushq_cfi %rbx | ||
18 | pushq_cfi %rbp | ||
19 | movq %rdi, %r10 /* Save pointer */ | ||
20 | xorl %r11d, %r11d /* Return value */ | ||
21 | movl (%rdi), %eax | ||
22 | movl 4(%rdi), %ecx | ||
23 | movl 8(%rdi), %edx | ||
24 | movl 12(%rdi), %ebx | ||
25 | movl 20(%rdi), %ebp | ||
26 | movl 24(%rdi), %esi | ||
27 | movl 28(%rdi), %edi | ||
28 | CFI_REMEMBER_STATE | ||
29 | 1: \op | ||
30 | 2: movl %eax, (%r10) | ||
31 | movl %r11d, %eax /* Return value */ | ||
32 | movl %ecx, 4(%r10) | ||
33 | movl %edx, 8(%r10) | ||
34 | movl %ebx, 12(%r10) | ||
35 | movl %ebp, 20(%r10) | ||
36 | movl %esi, 24(%r10) | ||
37 | movl %edi, 28(%r10) | ||
38 | popq_cfi %rbp | ||
39 | popq_cfi %rbx | ||
40 | ret | ||
41 | 3: | ||
42 | CFI_RESTORE_STATE | ||
43 | movl $-EIO, %r11d | ||
44 | jmp 2b | ||
45 | |||
46 | _ASM_EXTABLE(1b, 3b) | ||
47 | CFI_ENDPROC | ||
48 | ENDPROC(native_\op\()_safe_regs) | ||
49 | .endm | ||
50 | |||
51 | #else /* X86_32 */ | ||
52 | |||
53 | .macro op_safe_regs op | ||
54 | ENTRY(native_\op\()_safe_regs) | ||
55 | CFI_STARTPROC | ||
56 | pushl_cfi %ebx | ||
57 | pushl_cfi %ebp | ||
58 | pushl_cfi %esi | ||
59 | pushl_cfi %edi | ||
60 | pushl_cfi $0 /* Return value */ | ||
61 | pushl_cfi %eax | ||
62 | movl 4(%eax), %ecx | ||
63 | movl 8(%eax), %edx | ||
64 | movl 12(%eax), %ebx | ||
65 | movl 20(%eax), %ebp | ||
66 | movl 24(%eax), %esi | ||
67 | movl 28(%eax), %edi | ||
68 | movl (%eax), %eax | ||
69 | CFI_REMEMBER_STATE | ||
70 | 1: \op | ||
71 | 2: pushl_cfi %eax | ||
72 | movl 4(%esp), %eax | ||
73 | popl_cfi (%eax) | ||
74 | addl $4, %esp | ||
75 | CFI_ADJUST_CFA_OFFSET -4 | ||
76 | movl %ecx, 4(%eax) | ||
77 | movl %edx, 8(%eax) | ||
78 | movl %ebx, 12(%eax) | ||
79 | movl %ebp, 20(%eax) | ||
80 | movl %esi, 24(%eax) | ||
81 | movl %edi, 28(%eax) | ||
82 | popl_cfi %eax | ||
83 | popl_cfi %edi | ||
84 | popl_cfi %esi | ||
85 | popl_cfi %ebp | ||
86 | popl_cfi %ebx | ||
87 | ret | ||
88 | 3: | ||
89 | CFI_RESTORE_STATE | ||
90 | movl $-EIO, 4(%esp) | ||
91 | jmp 2b | ||
92 | |||
93 | _ASM_EXTABLE(1b, 3b) | ||
94 | CFI_ENDPROC | ||
95 | ENDPROC(native_\op\()_safe_regs) | ||
96 | .endm | ||
97 | |||
98 | #endif | ||
99 | |||
100 | op_safe_regs rdmsr | ||
101 | op_safe_regs wrmsr | ||
102 | |||
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index caa24aca8115..33a1e3ca22d8 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c | |||
@@ -175,3 +175,52 @@ int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | |||
175 | return err ? err : rv.err; | 175 | return err ? err : rv.err; |
176 | } | 176 | } |
177 | EXPORT_SYMBOL(wrmsr_safe_on_cpu); | 177 | EXPORT_SYMBOL(wrmsr_safe_on_cpu); |
178 | |||
179 | /* | ||
180 | * These variants are significantly slower, but allows control over | ||
181 | * the entire 32-bit GPR set. | ||
182 | */ | ||
183 | struct msr_regs_info { | ||
184 | u32 *regs; | ||
185 | int err; | ||
186 | }; | ||
187 | |||
188 | static void __rdmsr_safe_regs_on_cpu(void *info) | ||
189 | { | ||
190 | struct msr_regs_info *rv = info; | ||
191 | |||
192 | rv->err = rdmsr_safe_regs(rv->regs); | ||
193 | } | ||
194 | |||
195 | static void __wrmsr_safe_regs_on_cpu(void *info) | ||
196 | { | ||
197 | struct msr_regs_info *rv = info; | ||
198 | |||
199 | rv->err = wrmsr_safe_regs(rv->regs); | ||
200 | } | ||
201 | |||
202 | int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) | ||
203 | { | ||
204 | int err; | ||
205 | struct msr_regs_info rv; | ||
206 | |||
207 | rv.regs = regs; | ||
208 | rv.err = -EIO; | ||
209 | err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); | ||
210 | |||
211 | return err ? err : rv.err; | ||
212 | } | ||
213 | EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); | ||
214 | |||
215 | int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) | ||
216 | { | ||
217 | int err; | ||
218 | struct msr_regs_info rv; | ||
219 | |||
220 | rv.regs = regs; | ||
221 | rv.err = -EIO; | ||
222 | err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); | ||
223 | |||
224 | return err ? err : rv.err; | ||
225 | } | ||
226 | EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index eb33aaa8415d..b62ccb840cfb 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -714,7 +714,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | |||
714 | set: | 714 | set: |
715 | base = ((u64)high << 32) | low; | 715 | base = ((u64)high << 32) | low; |
716 | if (HYPERVISOR_set_segment_base(which, base) != 0) | 716 | if (HYPERVISOR_set_segment_base(which, base) != 0) |
717 | ret = -EFAULT; | 717 | ret = -EIO; |
718 | break; | 718 | break; |
719 | #endif | 719 | #endif |
720 | 720 | ||