diff options
Diffstat (limited to 'arch/x86')
304 files changed, 11856 insertions, 6536 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8da93745c087..55298e891571 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -49,7 +49,11 @@ config X86 | |||
49 | select HAVE_KERNEL_GZIP | 49 | select HAVE_KERNEL_GZIP |
50 | select HAVE_KERNEL_BZIP2 | 50 | select HAVE_KERNEL_BZIP2 |
51 | select HAVE_KERNEL_LZMA | 51 | select HAVE_KERNEL_LZMA |
52 | select HAVE_HW_BREAKPOINT | ||
53 | select PERF_EVENTS | ||
54 | select ANON_INODES | ||
52 | select HAVE_ARCH_KMEMCHECK | 55 | select HAVE_ARCH_KMEMCHECK |
56 | select HAVE_USER_RETURN_NOTIFIER | ||
53 | 57 | ||
54 | config OUTPUT_FORMAT | 58 | config OUTPUT_FORMAT |
55 | string | 59 | string |
@@ -86,10 +90,6 @@ config STACKTRACE_SUPPORT | |||
86 | config HAVE_LATENCYTOP_SUPPORT | 90 | config HAVE_LATENCYTOP_SUPPORT |
87 | def_bool y | 91 | def_bool y |
88 | 92 | ||
89 | config FAST_CMPXCHG_LOCAL | ||
90 | bool | ||
91 | default y | ||
92 | |||
93 | config MMU | 93 | config MMU |
94 | def_bool y | 94 | def_bool y |
95 | 95 | ||
@@ -495,7 +495,7 @@ if PARAVIRT_GUEST | |||
495 | source "arch/x86/xen/Kconfig" | 495 | source "arch/x86/xen/Kconfig" |
496 | 496 | ||
497 | config VMI | 497 | config VMI |
498 | bool "VMI Guest support" | 498 | bool "VMI Guest support (DEPRECATED)" |
499 | select PARAVIRT | 499 | select PARAVIRT |
500 | depends on X86_32 | 500 | depends on X86_32 |
501 | ---help--- | 501 | ---help--- |
@@ -504,6 +504,15 @@ config VMI | |||
504 | at the moment), by linking the kernel to a GPL-ed ROM module | 504 | at the moment), by linking the kernel to a GPL-ed ROM module |
505 | provided by the hypervisor. | 505 | provided by the hypervisor. |
506 | 506 | ||
507 | As of September 2009, VMware has started a phased retirement | ||
508 | of this feature from VMware's products. Please see | ||
509 | feature-removal-schedule.txt for details. If you are | ||
510 | planning to enable this option, please note that you cannot | ||
511 | live migrate a VMI enabled VM to a future VMware product, | ||
512 | which doesn't support VMI. So if you expect your kernel to | ||
513 | seamlessly migrate to newer VMware products, keep this | ||
514 | disabled. | ||
515 | |||
507 | config KVM_CLOCK | 516 | config KVM_CLOCK |
508 | bool "KVM paravirtualized clock" | 517 | bool "KVM paravirtualized clock" |
509 | select PARAVIRT | 518 | select PARAVIRT |
@@ -1325,7 +1334,9 @@ config MATH_EMULATION | |||
1325 | kernel, it won't hurt. | 1334 | kernel, it won't hurt. |
1326 | 1335 | ||
1327 | config MTRR | 1336 | config MTRR |
1328 | bool "MTRR (Memory Type Range Register) support" | 1337 | bool |
1338 | default y | ||
1339 | prompt "MTRR (Memory Type Range Register) support" if EMBEDDED | ||
1329 | ---help--- | 1340 | ---help--- |
1330 | On Intel P6 family processors (Pentium Pro, Pentium II and later) | 1341 | On Intel P6 family processors (Pentium Pro, Pentium II and later) |
1331 | the Memory Type Range Registers (MTRRs) may be used to control | 1342 | the Memory Type Range Registers (MTRRs) may be used to control |
@@ -1391,7 +1402,8 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT | |||
1391 | 1402 | ||
1392 | config X86_PAT | 1403 | config X86_PAT |
1393 | bool | 1404 | bool |
1394 | prompt "x86 PAT support" | 1405 | default y |
1406 | prompt "x86 PAT support" if EMBEDDED | ||
1395 | depends on MTRR | 1407 | depends on MTRR |
1396 | ---help--- | 1408 | ---help--- |
1397 | Use PAT attributes to setup page level cache control. | 1409 | Use PAT attributes to setup page level cache control. |
@@ -1438,12 +1450,8 @@ config SECCOMP | |||
1438 | 1450 | ||
1439 | If unsure, say Y. Only embedded should say N here. | 1451 | If unsure, say Y. Only embedded should say N here. |
1440 | 1452 | ||
1441 | config CC_STACKPROTECTOR_ALL | ||
1442 | bool | ||
1443 | |||
1444 | config CC_STACKPROTECTOR | 1453 | config CC_STACKPROTECTOR |
1445 | bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" | 1454 | bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" |
1446 | select CC_STACKPROTECTOR_ALL | ||
1447 | ---help--- | 1455 | ---help--- |
1448 | This option turns on the -fstack-protector GCC feature. This | 1456 | This option turns on the -fstack-protector GCC feature. This |
1449 | feature puts, at the beginning of functions, a canary value on | 1457 | feature puts, at the beginning of functions, a canary value on |
@@ -1601,7 +1609,7 @@ config COMPAT_VDSO | |||
1601 | depends on X86_32 || IA32_EMULATION | 1609 | depends on X86_32 || IA32_EMULATION |
1602 | ---help--- | 1610 | ---help--- |
1603 | Map the 32-bit VDSO to the predictable old-style address too. | 1611 | Map the 32-bit VDSO to the predictable old-style address too. |
1604 | ---help--- | 1612 | |
1605 | Say N here if you are running a sufficiently recent glibc | 1613 | Say N here if you are running a sufficiently recent glibc |
1606 | version (2.3.3 or later), to remove the high-mapped | 1614 | version (2.3.3 or later), to remove the high-mapped |
1607 | VDSO mapping and to exclusively use the randomized VDSO. | 1615 | VDSO mapping and to exclusively use the randomized VDSO. |
@@ -2006,18 +2014,9 @@ config SCx200HR_TIMER | |||
2006 | processor goes idle (as is done by the scheduler). The | 2014 | processor goes idle (as is done by the scheduler). The |
2007 | other workaround is idle=poll boot option. | 2015 | other workaround is idle=poll boot option. |
2008 | 2016 | ||
2009 | config GEODE_MFGPT_TIMER | ||
2010 | def_bool y | ||
2011 | prompt "Geode Multi-Function General Purpose Timer (MFGPT) events" | ||
2012 | depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS | ||
2013 | ---help--- | ||
2014 | This driver provides a clock event source based on the MFGPT | ||
2015 | timer(s) in the CS5535 and CS5536 companion chip for the geode. | ||
2016 | MFGPTs have a better resolution and max interval than the | ||
2017 | generic PIT, and are suitable for use as high-res timers. | ||
2018 | |||
2019 | config OLPC | 2017 | config OLPC |
2020 | bool "One Laptop Per Child support" | 2018 | bool "One Laptop Per Child support" |
2019 | select GPIOLIB | ||
2021 | default n | 2020 | default n |
2022 | ---help--- | 2021 | ---help--- |
2023 | Add support for detecting the unique features of the OLPC | 2022 | Add support for detecting the unique features of the OLPC |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 527519b8a9f9..08e442bc3ab9 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -301,15 +301,11 @@ config X86_CPU | |||
301 | 301 | ||
302 | # | 302 | # |
303 | # Define implied options from the CPU selection here | 303 | # Define implied options from the CPU selection here |
304 | config X86_L1_CACHE_BYTES | 304 | config X86_INTERNODE_CACHE_SHIFT |
305 | int | 305 | int |
306 | default "128" if MPSC | 306 | default "12" if X86_VSMP |
307 | default "64" if GENERIC_CPU || MK8 || MCORE2 || MATOM || X86_32 | 307 | default "7" if NUMA |
308 | 308 | default X86_L1_CACHE_SHIFT | |
309 | config X86_INTERNODE_CACHE_BYTES | ||
310 | int | ||
311 | default "4096" if X86_VSMP | ||
312 | default X86_L1_CACHE_BYTES if !X86_VSMP | ||
313 | 309 | ||
314 | config X86_CMPXCHG | 310 | config X86_CMPXCHG |
315 | def_bool X86_64 || (X86_32 && !M386) | 311 | def_bool X86_64 || (X86_32 && !M386) |
@@ -317,9 +313,9 @@ config X86_CMPXCHG | |||
317 | config X86_L1_CACHE_SHIFT | 313 | config X86_L1_CACHE_SHIFT |
318 | int | 314 | int |
319 | default "7" if MPENTIUM4 || MPSC | 315 | default "7" if MPENTIUM4 || MPSC |
316 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU | ||
320 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 | 317 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 |
321 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX | 318 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX |
322 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU | ||
323 | 319 | ||
324 | config X86_XADD | 320 | config X86_XADD |
325 | def_bool y | 321 | def_bool y |
@@ -400,18 +396,19 @@ config X86_TSC | |||
400 | 396 | ||
401 | config X86_CMPXCHG64 | 397 | config X86_CMPXCHG64 |
402 | def_bool y | 398 | def_bool y |
403 | depends on X86_PAE || X86_64 | 399 | depends on !M386 && !M486 |
404 | 400 | ||
405 | # this should be set for all -march=.. options where the compiler | 401 | # this should be set for all -march=.. options where the compiler |
406 | # generates cmov. | 402 | # generates cmov. |
407 | config X86_CMOV | 403 | config X86_CMOV |
408 | def_bool y | 404 | def_bool y |
409 | depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM) | 405 | depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) |
410 | 406 | ||
411 | config X86_MINIMUM_CPU_FAMILY | 407 | config X86_MINIMUM_CPU_FAMILY |
412 | int | 408 | int |
413 | default "64" if X86_64 | 409 | default "64" if X86_64 |
414 | default "6" if X86_32 && X86_P6_NOP | 410 | default "6" if X86_32 && X86_P6_NOP |
411 | default "5" if X86_32 && X86_CMPXCHG64 | ||
415 | default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) | 412 | default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) |
416 | default "3" | 413 | default "3" |
417 | 414 | ||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29bb6bb..bc01e3ebfeb2 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -186,6 +186,15 @@ config X86_DS_SELFTEST | |||
186 | config HAVE_MMIOTRACE_SUPPORT | 186 | config HAVE_MMIOTRACE_SUPPORT |
187 | def_bool y | 187 | def_bool y |
188 | 188 | ||
189 | config X86_DECODER_SELFTEST | ||
190 | bool "x86 instruction decoder selftest" | ||
191 | depends on DEBUG_KERNEL && KPROBES | ||
192 | ---help--- | ||
193 | Perform x86 instruction decoder selftests at build time. | ||
194 | This option is useful for checking the sanity of x86 instruction | ||
195 | decoder code. | ||
196 | If unsure, say "N". | ||
197 | |||
189 | # | 198 | # |
190 | # IO delay types: | 199 | # IO delay types: |
191 | # | 200 | # |
@@ -287,4 +296,18 @@ config OPTIMIZE_INLINING | |||
287 | 296 | ||
288 | If unsure, say N. | 297 | If unsure, say N. |
289 | 298 | ||
299 | config DEBUG_STRICT_USER_COPY_CHECKS | ||
300 | bool "Strict copy size checks" | ||
301 | depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING | ||
302 | ---help--- | ||
303 | Enabling this option turns a certain set of sanity checks for user | ||
304 | copy operations into compile time failures. | ||
305 | |||
306 | The copy_from_user() etc checks are there to help test if there | ||
307 | are sufficient security checks on the length argument of | ||
308 | the copy operation, by having gcc prove that the argument is | ||
309 | within bounds. | ||
310 | |||
311 | If unsure, or if you run an older (pre 4.4) gcc, say N. | ||
312 | |||
290 | endmenu | 313 | endmenu |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a012ee8ef803..78b32be55e9e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -76,7 +76,6 @@ ifdef CONFIG_CC_STACKPROTECTOR | |||
76 | cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh | 76 | cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh |
77 | ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y) | 77 | ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y) |
78 | stackp-y := -fstack-protector | 78 | stackp-y := -fstack-protector |
79 | stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all | ||
80 | KBUILD_CFLAGS += $(stackp-y) | 79 | KBUILD_CFLAGS += $(stackp-y) |
81 | else | 80 | else |
82 | $(warning stack protector enabled but no compiler support) | 81 | $(warning stack protector enabled but no compiler support) |
@@ -156,6 +155,9 @@ all: bzImage | |||
156 | KBUILD_IMAGE := $(boot)/bzImage | 155 | KBUILD_IMAGE := $(boot)/bzImage |
157 | 156 | ||
158 | bzImage: vmlinux | 157 | bzImage: vmlinux |
158 | ifeq ($(CONFIG_X86_DECODER_SELFTEST),y) | ||
159 | $(Q)$(MAKE) $(build)=arch/x86/tools posttest | ||
160 | endif | ||
159 | $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) | 161 | $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) |
160 | $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot | 162 | $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot |
161 | $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ | 163 | $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ |
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 30e9a264f69d..1255d953c65d 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu | |||
@@ -41,11 +41,18 @@ cflags-$(CONFIG_X86_ELAN) += -march=i486 | |||
41 | 41 | ||
42 | # Geode GX1 support | 42 | # Geode GX1 support |
43 | cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx | 43 | cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx |
44 | 44 | cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) | |
45 | # add at the end to overwrite eventual tuning options from earlier | 45 | # add at the end to overwrite eventual tuning options from earlier |
46 | # cpu entries | 46 | # cpu entries |
47 | cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) | 47 | cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) |
48 | 48 | ||
49 | # Work around the pentium-mmx code generator madness of gcc4.4.x which | ||
50 | # does stack alignment by generating horrible code _before_ the mcount | ||
51 | # prologue (push %ebp, mov %esp, %ebp) which breaks the function graph | ||
52 | # tracer assumptions. For i686, generic, core2 this is set by the | ||
53 | # compiler anyway | ||
54 | cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args) | ||
55 | |||
49 | # Bug fix for binutils: this option is required in order to keep | 56 | # Bug fix for binutils: this option is required in order to keep |
50 | # binutils from generating NOPL instructions against our will. | 57 | # binutils from generating NOPL instructions against our will. |
51 | ifneq ($(CONFIG_X86_P6_NOP),y) | 58 | ifneq ($(CONFIG_X86_P6_NOP),y) |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f8ed0658404c..f25bbd37765a 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -9,6 +9,7 @@ targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinu | |||
9 | KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 | 9 | KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 |
10 | KBUILD_CFLAGS += -fno-strict-aliasing -fPIC | 10 | KBUILD_CFLAGS += -fno-strict-aliasing -fPIC |
11 | KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING | 11 | KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING |
12 | cflags-$(CONFIG_X86_32) := -march=i386 | ||
12 | cflags-$(CONFIG_X86_64) := -mcmodel=small | 13 | cflags-$(CONFIG_X86_64) := -mcmodel=small |
13 | KBUILD_CFLAGS += $(cflags-y) | 14 | KBUILD_CFLAGS += $(cflags-y) |
14 | KBUILD_CFLAGS += $(call cc-option,-ffreestanding) | 15 | KBUILD_CFLAGS += $(call cc-option,-ffreestanding) |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 077e1b69198e..faff0dc9c06a 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -107,8 +107,7 @@ ENTRY(startup_32) | |||
107 | lgdt gdt(%ebp) | 107 | lgdt gdt(%ebp) |
108 | 108 | ||
109 | /* Enable PAE mode */ | 109 | /* Enable PAE mode */ |
110 | xorl %eax, %eax | 110 | movl $(X86_CR4_PAE), %eax |
111 | orl $(X86_CR4_PAE), %eax | ||
112 | movl %eax, %cr4 | 111 | movl %eax, %cr4 |
113 | 112 | ||
114 | /* | 113 | /* |
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index bbeb0c3fbd90..89bbf4e4d05d 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c | |||
@@ -9,6 +9,9 @@ | |||
9 | #include <byteswap.h> | 9 | #include <byteswap.h> |
10 | #define USE_BSD | 10 | #define USE_BSD |
11 | #include <endian.h> | 11 | #include <endian.h> |
12 | #include <regex.h> | ||
13 | |||
14 | static void die(char *fmt, ...); | ||
12 | 15 | ||
13 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) | 16 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) |
14 | static Elf32_Ehdr ehdr; | 17 | static Elf32_Ehdr ehdr; |
@@ -30,25 +33,47 @@ static struct section *secs; | |||
30 | * the address for which it has been compiled. Don't warn user about | 33 | * the address for which it has been compiled. Don't warn user about |
31 | * absolute relocations present w.r.t these symbols. | 34 | * absolute relocations present w.r.t these symbols. |
32 | */ | 35 | */ |
33 | static const char* safe_abs_relocs[] = { | 36 | static const char abs_sym_regex[] = |
34 | "xen_irq_disable_direct_reloc", | 37 | "^(xen_irq_disable_direct_reloc$|" |
35 | "xen_save_fl_direct_reloc", | 38 | "xen_save_fl_direct_reloc$|" |
36 | }; | 39 | "VDSO|" |
40 | "__crc_)"; | ||
41 | static regex_t abs_sym_regex_c; | ||
42 | static int is_abs_reloc(const char *sym_name) | ||
43 | { | ||
44 | return !regexec(&abs_sym_regex_c, sym_name, 0, NULL, 0); | ||
45 | } | ||
37 | 46 | ||
38 | static int is_safe_abs_reloc(const char* sym_name) | 47 | /* |
48 | * These symbols are known to be relative, even if the linker marks them | ||
49 | * as absolute (typically defined outside any section in the linker script.) | ||
50 | */ | ||
51 | static const char rel_sym_regex[] = | ||
52 | "^_end$"; | ||
53 | static regex_t rel_sym_regex_c; | ||
54 | static int is_rel_reloc(const char *sym_name) | ||
39 | { | 55 | { |
40 | int i; | 56 | return !regexec(&rel_sym_regex_c, sym_name, 0, NULL, 0); |
57 | } | ||
41 | 58 | ||
42 | for (i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) { | 59 | static void regex_init(void) |
43 | if (!strcmp(sym_name, safe_abs_relocs[i])) | 60 | { |
44 | /* Match found */ | 61 | char errbuf[128]; |
45 | return 1; | 62 | int err; |
46 | } | 63 | |
47 | if (strncmp(sym_name, "VDSO", 4) == 0) | 64 | err = regcomp(&abs_sym_regex_c, abs_sym_regex, |
48 | return 1; | 65 | REG_EXTENDED|REG_NOSUB); |
49 | if (strncmp(sym_name, "__crc_", 6) == 0) | 66 | if (err) { |
50 | return 1; | 67 | regerror(err, &abs_sym_regex_c, errbuf, sizeof errbuf); |
51 | return 0; | 68 | die("%s", errbuf); |
69 | } | ||
70 | |||
71 | err = regcomp(&rel_sym_regex_c, rel_sym_regex, | ||
72 | REG_EXTENDED|REG_NOSUB); | ||
73 | if (err) { | ||
74 | regerror(err, &rel_sym_regex_c, errbuf, sizeof errbuf); | ||
75 | die("%s", errbuf); | ||
76 | } | ||
52 | } | 77 | } |
53 | 78 | ||
54 | static void die(char *fmt, ...) | 79 | static void die(char *fmt, ...) |
@@ -131,7 +156,7 @@ static const char *rel_type(unsigned type) | |||
131 | #undef REL_TYPE | 156 | #undef REL_TYPE |
132 | }; | 157 | }; |
133 | const char *name = "unknown type rel type name"; | 158 | const char *name = "unknown type rel type name"; |
134 | if (type < ARRAY_SIZE(type_name)) { | 159 | if (type < ARRAY_SIZE(type_name) && type_name[type]) { |
135 | name = type_name[type]; | 160 | name = type_name[type]; |
136 | } | 161 | } |
137 | return name; | 162 | return name; |
@@ -448,7 +473,7 @@ static void print_absolute_relocs(void) | |||
448 | * Before warning check if this absolute symbol | 473 | * Before warning check if this absolute symbol |
449 | * relocation is harmless. | 474 | * relocation is harmless. |
450 | */ | 475 | */ |
451 | if (is_safe_abs_reloc(name)) | 476 | if (is_abs_reloc(name) || is_rel_reloc(name)) |
452 | continue; | 477 | continue; |
453 | 478 | ||
454 | if (!printed) { | 479 | if (!printed) { |
@@ -501,21 +526,26 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) | |||
501 | sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; | 526 | sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; |
502 | r_type = ELF32_R_TYPE(rel->r_info); | 527 | r_type = ELF32_R_TYPE(rel->r_info); |
503 | /* Don't visit relocations to absolute symbols */ | 528 | /* Don't visit relocations to absolute symbols */ |
504 | if (sym->st_shndx == SHN_ABS) { | 529 | if (sym->st_shndx == SHN_ABS && |
530 | !is_rel_reloc(sym_name(sym_strtab, sym))) { | ||
505 | continue; | 531 | continue; |
506 | } | 532 | } |
507 | if (r_type == R_386_NONE || r_type == R_386_PC32) { | 533 | switch (r_type) { |
534 | case R_386_NONE: | ||
535 | case R_386_PC32: | ||
508 | /* | 536 | /* |
509 | * NONE can be ignored and and PC relative | 537 | * NONE can be ignored and and PC relative |
510 | * relocations don't need to be adjusted. | 538 | * relocations don't need to be adjusted. |
511 | */ | 539 | */ |
512 | } | 540 | break; |
513 | else if (r_type == R_386_32) { | 541 | case R_386_32: |
514 | /* Visit relocations that need to be adjusted */ | 542 | /* Visit relocations that need to be adjusted */ |
515 | visit(rel, sym); | 543 | visit(rel, sym); |
516 | } | 544 | break; |
517 | else { | 545 | default: |
518 | die("Unsupported relocation type: %d\n", r_type); | 546 | die("Unsupported relocation type: %s (%d)\n", |
547 | rel_type(r_type), r_type); | ||
548 | break; | ||
519 | } | 549 | } |
520 | } | 550 | } |
521 | } | 551 | } |
@@ -571,16 +601,15 @@ static void emit_relocs(int as_text) | |||
571 | } | 601 | } |
572 | else { | 602 | else { |
573 | unsigned char buf[4]; | 603 | unsigned char buf[4]; |
574 | buf[0] = buf[1] = buf[2] = buf[3] = 0; | ||
575 | /* Print a stop */ | 604 | /* Print a stop */ |
576 | printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); | 605 | fwrite("\0\0\0\0", 4, 1, stdout); |
577 | /* Now print each relocation */ | 606 | /* Now print each relocation */ |
578 | for (i = 0; i < reloc_count; i++) { | 607 | for (i = 0; i < reloc_count; i++) { |
579 | buf[0] = (relocs[i] >> 0) & 0xff; | 608 | buf[0] = (relocs[i] >> 0) & 0xff; |
580 | buf[1] = (relocs[i] >> 8) & 0xff; | 609 | buf[1] = (relocs[i] >> 8) & 0xff; |
581 | buf[2] = (relocs[i] >> 16) & 0xff; | 610 | buf[2] = (relocs[i] >> 16) & 0xff; |
582 | buf[3] = (relocs[i] >> 24) & 0xff; | 611 | buf[3] = (relocs[i] >> 24) & 0xff; |
583 | printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); | 612 | fwrite(buf, 4, 1, stdout); |
584 | } | 613 | } |
585 | } | 614 | } |
586 | } | 615 | } |
@@ -598,6 +627,8 @@ int main(int argc, char **argv) | |||
598 | FILE *fp; | 627 | FILE *fp; |
599 | int i; | 628 | int i; |
600 | 629 | ||
630 | regex_init(); | ||
631 | |||
601 | show_absolute_syms = 0; | 632 | show_absolute_syms = 0; |
602 | show_absolute_relocs = 0; | 633 | show_absolute_relocs = 0; |
603 | as_text = 0; | 634 | as_text = 0; |
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index f4193bb48782..a6f1a59a5b0c 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S | |||
@@ -4,6 +4,7 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) | |||
4 | 4 | ||
5 | #undef i386 | 5 | #undef i386 |
6 | 6 | ||
7 | #include <asm/cache.h> | ||
7 | #include <asm/page_types.h> | 8 | #include <asm/page_types.h> |
8 | 9 | ||
9 | #ifdef CONFIG_X86_64 | 10 | #ifdef CONFIG_X86_64 |
@@ -46,7 +47,7 @@ SECTIONS | |||
46 | *(.data.*) | 47 | *(.data.*) |
47 | _edata = . ; | 48 | _edata = . ; |
48 | } | 49 | } |
49 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 50 | . = ALIGN(L1_CACHE_BYTES); |
50 | .bss : { | 51 | .bss : { |
51 | _bss = . ; | 52 | _bss = . ; |
52 | *(.bss) | 53 | *(.bss) |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b31cc54b4641..93e689f4bd86 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -16,7 +16,7 @@ | |||
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <asm/segment.h> | 18 | #include <asm/segment.h> |
19 | #include <linux/utsrelease.h> | 19 | #include <generated/utsrelease.h> |
20 | #include <asm/boot.h> | 20 | #include <asm/boot.h> |
21 | #include <asm/e820.h> | 21 | #include <asm/e820.h> |
22 | #include <asm/page_types.h> | 22 | #include <asm/page_types.h> |
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 0f6ec455a2b1..03c0683636b6 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld | |||
@@ -53,6 +53,9 @@ SECTIONS | |||
53 | 53 | ||
54 | /DISCARD/ : { *(.note*) } | 54 | /DISCARD/ : { *(.note*) } |
55 | 55 | ||
56 | /* | ||
57 | * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: | ||
58 | */ | ||
56 | . = ASSERT(_end <= 0x8000, "Setup too big!"); | 59 | . = ASSERT(_end <= 0x8000, "Setup too big!"); |
57 | . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); | 60 | . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); |
58 | /* Necessary for the very-old-loader check to work... */ | 61 | /* Necessary for the very-old-loader check to work... */ |
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c index 2723d9b5ce43..2b15aa488ffb 100644 --- a/arch/x86/boot/version.c +++ b/arch/x86/boot/version.c | |||
@@ -13,8 +13,8 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include "boot.h" | 15 | #include "boot.h" |
16 | #include <linux/utsrelease.h> | 16 | #include <generated/utsrelease.h> |
17 | #include <linux/compile.h> | 17 | #include <generated/compile.h> |
18 | 18 | ||
19 | const char kernel_version[] = | 19 | const char kernel_version[] = |
20 | UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " | 20 | UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " |
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index d42da3802499..f767164cd5df 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c | |||
@@ -27,6 +27,12 @@ static void store_cursor_position(void) | |||
27 | 27 | ||
28 | boot_params.screen_info.orig_x = oreg.dl; | 28 | boot_params.screen_info.orig_x = oreg.dl; |
29 | boot_params.screen_info.orig_y = oreg.dh; | 29 | boot_params.screen_info.orig_y = oreg.dh; |
30 | |||
31 | if (oreg.ch & 0x20) | ||
32 | boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR; | ||
33 | |||
34 | if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f)) | ||
35 | boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR; | ||
30 | } | 36 | } |
31 | 37 | ||
32 | static void store_video_mode(void) | 38 | static void store_video_mode(void) |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index cfb0010fa940..1a58ad89fdf7 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o | |||
12 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 12 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
13 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o | 13 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o |
14 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o | 14 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o |
15 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o | ||
15 | 16 | ||
16 | obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o | 17 | obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o |
17 | 18 | ||
@@ -24,3 +25,5 @@ twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | |||
24 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 25 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
25 | 26 | ||
26 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o | 27 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o |
28 | |||
29 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | ||
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index eb0566e83319..20bb0e1ac681 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -16,6 +16,7 @@ | |||
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/linkage.h> | 18 | #include <linux/linkage.h> |
19 | #include <asm/inst.h> | ||
19 | 20 | ||
20 | .text | 21 | .text |
21 | 22 | ||
@@ -122,103 +123,72 @@ ENTRY(aesni_set_key) | |||
122 | movups 0x10(%rsi), %xmm2 # other user key | 123 | movups 0x10(%rsi), %xmm2 # other user key |
123 | movaps %xmm2, (%rcx) | 124 | movaps %xmm2, (%rcx) |
124 | add $0x10, %rcx | 125 | add $0x10, %rcx |
125 | # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 | 126 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
126 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 | ||
127 | call _key_expansion_256a | 127 | call _key_expansion_256a |
128 | # aeskeygenassist $0x1, %xmm0, %xmm1 | 128 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 |
129 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 | ||
130 | call _key_expansion_256b | 129 | call _key_expansion_256b |
131 | # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 | 130 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 |
132 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 | ||
133 | call _key_expansion_256a | 131 | call _key_expansion_256a |
134 | # aeskeygenassist $0x2, %xmm0, %xmm1 | 132 | AESKEYGENASSIST 0x2 %xmm0 %xmm1 |
135 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 | ||
136 | call _key_expansion_256b | 133 | call _key_expansion_256b |
137 | # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 | 134 | AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 |
138 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 | ||
139 | call _key_expansion_256a | 135 | call _key_expansion_256a |
140 | # aeskeygenassist $0x4, %xmm0, %xmm1 | 136 | AESKEYGENASSIST 0x4 %xmm0 %xmm1 |
141 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 | ||
142 | call _key_expansion_256b | 137 | call _key_expansion_256b |
143 | # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 | 138 | AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 |
144 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 | ||
145 | call _key_expansion_256a | 139 | call _key_expansion_256a |
146 | # aeskeygenassist $0x8, %xmm0, %xmm1 | 140 | AESKEYGENASSIST 0x8 %xmm0 %xmm1 |
147 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 | ||
148 | call _key_expansion_256b | 141 | call _key_expansion_256b |
149 | # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 | 142 | AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 |
150 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 | ||
151 | call _key_expansion_256a | 143 | call _key_expansion_256a |
152 | # aeskeygenassist $0x10, %xmm0, %xmm1 | 144 | AESKEYGENASSIST 0x10 %xmm0 %xmm1 |
153 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 | ||
154 | call _key_expansion_256b | 145 | call _key_expansion_256b |
155 | # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 | 146 | AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 |
156 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 | ||
157 | call _key_expansion_256a | 147 | call _key_expansion_256a |
158 | # aeskeygenassist $0x20, %xmm0, %xmm1 | 148 | AESKEYGENASSIST 0x20 %xmm0 %xmm1 |
159 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 | ||
160 | call _key_expansion_256b | 149 | call _key_expansion_256b |
161 | # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 | 150 | AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 |
162 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 | ||
163 | call _key_expansion_256a | 151 | call _key_expansion_256a |
164 | jmp .Ldec_key | 152 | jmp .Ldec_key |
165 | .Lenc_key192: | 153 | .Lenc_key192: |
166 | movq 0x10(%rsi), %xmm2 # other user key | 154 | movq 0x10(%rsi), %xmm2 # other user key |
167 | # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 | 155 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
168 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 | ||
169 | call _key_expansion_192a | 156 | call _key_expansion_192a |
170 | # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 | 157 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 |
171 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 | ||
172 | call _key_expansion_192b | 158 | call _key_expansion_192b |
173 | # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 | 159 | AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 |
174 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 | ||
175 | call _key_expansion_192a | 160 | call _key_expansion_192a |
176 | # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 | 161 | AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 |
177 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 | ||
178 | call _key_expansion_192b | 162 | call _key_expansion_192b |
179 | # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 | 163 | AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 |
180 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 | ||
181 | call _key_expansion_192a | 164 | call _key_expansion_192a |
182 | # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 | 165 | AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 |
183 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 | ||
184 | call _key_expansion_192b | 166 | call _key_expansion_192b |
185 | # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 | 167 | AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 |
186 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 | ||
187 | call _key_expansion_192a | 168 | call _key_expansion_192a |
188 | # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 | 169 | AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8 |
189 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80 | ||
190 | call _key_expansion_192b | 170 | call _key_expansion_192b |
191 | jmp .Ldec_key | 171 | jmp .Ldec_key |
192 | .Lenc_key128: | 172 | .Lenc_key128: |
193 | # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 | 173 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1 |
194 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 | ||
195 | call _key_expansion_128 | 174 | call _key_expansion_128 |
196 | # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 | 175 | AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2 |
197 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 | ||
198 | call _key_expansion_128 | 176 | call _key_expansion_128 |
199 | # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 | 177 | AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3 |
200 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 | ||
201 | call _key_expansion_128 | 178 | call _key_expansion_128 |
202 | # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 | 179 | AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4 |
203 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 | ||
204 | call _key_expansion_128 | 180 | call _key_expansion_128 |
205 | # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 | 181 | AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5 |
206 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 | ||
207 | call _key_expansion_128 | 182 | call _key_expansion_128 |
208 | # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 | 183 | AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6 |
209 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 | ||
210 | call _key_expansion_128 | 184 | call _key_expansion_128 |
211 | # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 | 185 | AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7 |
212 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40 | ||
213 | call _key_expansion_128 | 186 | call _key_expansion_128 |
214 | # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 | 187 | AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8 |
215 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80 | ||
216 | call _key_expansion_128 | 188 | call _key_expansion_128 |
217 | # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 | 189 | AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9 |
218 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b | ||
219 | call _key_expansion_128 | 190 | call _key_expansion_128 |
220 | # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 | 191 | AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 |
221 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36 | ||
222 | call _key_expansion_128 | 192 | call _key_expansion_128 |
223 | .Ldec_key: | 193 | .Ldec_key: |
224 | sub $0x10, %rcx | 194 | sub $0x10, %rcx |
@@ -231,8 +201,7 @@ ENTRY(aesni_set_key) | |||
231 | .align 4 | 201 | .align 4 |
232 | .Ldec_key_loop: | 202 | .Ldec_key_loop: |
233 | movaps (%rdi), %xmm0 | 203 | movaps (%rdi), %xmm0 |
234 | # aesimc %xmm0, %xmm1 | 204 | AESIMC %xmm0 %xmm1 |
235 | .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8 | ||
236 | movaps %xmm1, (%rsi) | 205 | movaps %xmm1, (%rsi) |
237 | add $0x10, %rdi | 206 | add $0x10, %rdi |
238 | sub $0x10, %rsi | 207 | sub $0x10, %rsi |
@@ -274,51 +243,37 @@ _aesni_enc1: | |||
274 | je .Lenc192 | 243 | je .Lenc192 |
275 | add $0x20, TKEYP | 244 | add $0x20, TKEYP |
276 | movaps -0x60(TKEYP), KEY | 245 | movaps -0x60(TKEYP), KEY |
277 | # aesenc KEY, STATE | 246 | AESENC KEY STATE |
278 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
279 | movaps -0x50(TKEYP), KEY | 247 | movaps -0x50(TKEYP), KEY |
280 | # aesenc KEY, STATE | 248 | AESENC KEY STATE |
281 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
282 | .align 4 | 249 | .align 4 |
283 | .Lenc192: | 250 | .Lenc192: |
284 | movaps -0x40(TKEYP), KEY | 251 | movaps -0x40(TKEYP), KEY |
285 | # aesenc KEY, STATE | 252 | AESENC KEY STATE |
286 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
287 | movaps -0x30(TKEYP), KEY | 253 | movaps -0x30(TKEYP), KEY |
288 | # aesenc KEY, STATE | 254 | AESENC KEY STATE |
289 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
290 | .align 4 | 255 | .align 4 |
291 | .Lenc128: | 256 | .Lenc128: |
292 | movaps -0x20(TKEYP), KEY | 257 | movaps -0x20(TKEYP), KEY |
293 | # aesenc KEY, STATE | 258 | AESENC KEY STATE |
294 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
295 | movaps -0x10(TKEYP), KEY | 259 | movaps -0x10(TKEYP), KEY |
296 | # aesenc KEY, STATE | 260 | AESENC KEY STATE |
297 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
298 | movaps (TKEYP), KEY | 261 | movaps (TKEYP), KEY |
299 | # aesenc KEY, STATE | 262 | AESENC KEY STATE |
300 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
301 | movaps 0x10(TKEYP), KEY | 263 | movaps 0x10(TKEYP), KEY |
302 | # aesenc KEY, STATE | 264 | AESENC KEY STATE |
303 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
304 | movaps 0x20(TKEYP), KEY | 265 | movaps 0x20(TKEYP), KEY |
305 | # aesenc KEY, STATE | 266 | AESENC KEY STATE |
306 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
307 | movaps 0x30(TKEYP), KEY | 267 | movaps 0x30(TKEYP), KEY |
308 | # aesenc KEY, STATE | 268 | AESENC KEY STATE |
309 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
310 | movaps 0x40(TKEYP), KEY | 269 | movaps 0x40(TKEYP), KEY |
311 | # aesenc KEY, STATE | 270 | AESENC KEY STATE |
312 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
313 | movaps 0x50(TKEYP), KEY | 271 | movaps 0x50(TKEYP), KEY |
314 | # aesenc KEY, STATE | 272 | AESENC KEY STATE |
315 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
316 | movaps 0x60(TKEYP), KEY | 273 | movaps 0x60(TKEYP), KEY |
317 | # aesenc KEY, STATE | 274 | AESENC KEY STATE |
318 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
319 | movaps 0x70(TKEYP), KEY | 275 | movaps 0x70(TKEYP), KEY |
320 | # aesenclast KEY, STATE # last round | 276 | AESENCLAST KEY STATE |
321 | .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 | ||
322 | ret | 277 | ret |
323 | 278 | ||
324 | /* | 279 | /* |
@@ -353,135 +308,79 @@ _aesni_enc4: | |||
353 | je .L4enc192 | 308 | je .L4enc192 |
354 | add $0x20, TKEYP | 309 | add $0x20, TKEYP |
355 | movaps -0x60(TKEYP), KEY | 310 | movaps -0x60(TKEYP), KEY |
356 | # aesenc KEY, STATE1 | 311 | AESENC KEY STATE1 |
357 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 312 | AESENC KEY STATE2 |
358 | # aesenc KEY, STATE2 | 313 | AESENC KEY STATE3 |
359 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 314 | AESENC KEY STATE4 |
360 | # aesenc KEY, STATE3 | ||
361 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
362 | # aesenc KEY, STATE4 | ||
363 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
364 | movaps -0x50(TKEYP), KEY | 315 | movaps -0x50(TKEYP), KEY |
365 | # aesenc KEY, STATE1 | 316 | AESENC KEY STATE1 |
366 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 317 | AESENC KEY STATE2 |
367 | # aesenc KEY, STATE2 | 318 | AESENC KEY STATE3 |
368 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 319 | AESENC KEY STATE4 |
369 | # aesenc KEY, STATE3 | ||
370 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
371 | # aesenc KEY, STATE4 | ||
372 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
373 | #.align 4 | 320 | #.align 4 |
374 | .L4enc192: | 321 | .L4enc192: |
375 | movaps -0x40(TKEYP), KEY | 322 | movaps -0x40(TKEYP), KEY |
376 | # aesenc KEY, STATE1 | 323 | AESENC KEY STATE1 |
377 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 324 | AESENC KEY STATE2 |
378 | # aesenc KEY, STATE2 | 325 | AESENC KEY STATE3 |
379 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 326 | AESENC KEY STATE4 |
380 | # aesenc KEY, STATE3 | ||
381 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
382 | # aesenc KEY, STATE4 | ||
383 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
384 | movaps -0x30(TKEYP), KEY | 327 | movaps -0x30(TKEYP), KEY |
385 | # aesenc KEY, STATE1 | 328 | AESENC KEY STATE1 |
386 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 329 | AESENC KEY STATE2 |
387 | # aesenc KEY, STATE2 | 330 | AESENC KEY STATE3 |
388 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 331 | AESENC KEY STATE4 |
389 | # aesenc KEY, STATE3 | ||
390 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
391 | # aesenc KEY, STATE4 | ||
392 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
393 | #.align 4 | 332 | #.align 4 |
394 | .L4enc128: | 333 | .L4enc128: |
395 | movaps -0x20(TKEYP), KEY | 334 | movaps -0x20(TKEYP), KEY |
396 | # aesenc KEY, STATE1 | 335 | AESENC KEY STATE1 |
397 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 336 | AESENC KEY STATE2 |
398 | # aesenc KEY, STATE2 | 337 | AESENC KEY STATE3 |
399 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 338 | AESENC KEY STATE4 |
400 | # aesenc KEY, STATE3 | ||
401 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
402 | # aesenc KEY, STATE4 | ||
403 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
404 | movaps -0x10(TKEYP), KEY | 339 | movaps -0x10(TKEYP), KEY |
405 | # aesenc KEY, STATE1 | 340 | AESENC KEY STATE1 |
406 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 341 | AESENC KEY STATE2 |
407 | # aesenc KEY, STATE2 | 342 | AESENC KEY STATE3 |
408 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 343 | AESENC KEY STATE4 |
409 | # aesenc KEY, STATE3 | ||
410 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
411 | # aesenc KEY, STATE4 | ||
412 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
413 | movaps (TKEYP), KEY | 344 | movaps (TKEYP), KEY |
414 | # aesenc KEY, STATE1 | 345 | AESENC KEY STATE1 |
415 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 346 | AESENC KEY STATE2 |
416 | # aesenc KEY, STATE2 | 347 | AESENC KEY STATE3 |
417 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 348 | AESENC KEY STATE4 |
418 | # aesenc KEY, STATE3 | ||
419 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
420 | # aesenc KEY, STATE4 | ||
421 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
422 | movaps 0x10(TKEYP), KEY | 349 | movaps 0x10(TKEYP), KEY |
423 | # aesenc KEY, STATE1 | 350 | AESENC KEY STATE1 |
424 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 351 | AESENC KEY STATE2 |
425 | # aesenc KEY, STATE2 | 352 | AESENC KEY STATE3 |
426 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 353 | AESENC KEY STATE4 |
427 | # aesenc KEY, STATE3 | ||
428 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
429 | # aesenc KEY, STATE4 | ||
430 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
431 | movaps 0x20(TKEYP), KEY | 354 | movaps 0x20(TKEYP), KEY |
432 | # aesenc KEY, STATE1 | 355 | AESENC KEY STATE1 |
433 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 356 | AESENC KEY STATE2 |
434 | # aesenc KEY, STATE2 | 357 | AESENC KEY STATE3 |
435 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 358 | AESENC KEY STATE4 |
436 | # aesenc KEY, STATE3 | ||
437 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
438 | # aesenc KEY, STATE4 | ||
439 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
440 | movaps 0x30(TKEYP), KEY | 359 | movaps 0x30(TKEYP), KEY |
441 | # aesenc KEY, STATE1 | 360 | AESENC KEY STATE1 |
442 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 361 | AESENC KEY STATE2 |
443 | # aesenc KEY, STATE2 | 362 | AESENC KEY STATE3 |
444 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 363 | AESENC KEY STATE4 |
445 | # aesenc KEY, STATE3 | ||
446 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
447 | # aesenc KEY, STATE4 | ||
448 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
449 | movaps 0x40(TKEYP), KEY | 364 | movaps 0x40(TKEYP), KEY |
450 | # aesenc KEY, STATE1 | 365 | AESENC KEY STATE1 |
451 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 366 | AESENC KEY STATE2 |
452 | # aesenc KEY, STATE2 | 367 | AESENC KEY STATE3 |
453 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 368 | AESENC KEY STATE4 |
454 | # aesenc KEY, STATE3 | ||
455 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
456 | # aesenc KEY, STATE4 | ||
457 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
458 | movaps 0x50(TKEYP), KEY | 369 | movaps 0x50(TKEYP), KEY |
459 | # aesenc KEY, STATE1 | 370 | AESENC KEY STATE1 |
460 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 371 | AESENC KEY STATE2 |
461 | # aesenc KEY, STATE2 | 372 | AESENC KEY STATE3 |
462 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 373 | AESENC KEY STATE4 |
463 | # aesenc KEY, STATE3 | ||
464 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
465 | # aesenc KEY, STATE4 | ||
466 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
467 | movaps 0x60(TKEYP), KEY | 374 | movaps 0x60(TKEYP), KEY |
468 | # aesenc KEY, STATE1 | 375 | AESENC KEY STATE1 |
469 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 376 | AESENC KEY STATE2 |
470 | # aesenc KEY, STATE2 | 377 | AESENC KEY STATE3 |
471 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 378 | AESENC KEY STATE4 |
472 | # aesenc KEY, STATE3 | ||
473 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
474 | # aesenc KEY, STATE4 | ||
475 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
476 | movaps 0x70(TKEYP), KEY | 379 | movaps 0x70(TKEYP), KEY |
477 | # aesenclast KEY, STATE1 # last round | 380 | AESENCLAST KEY STATE1 # last round |
478 | .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 | 381 | AESENCLAST KEY STATE2 |
479 | # aesenclast KEY, STATE2 | 382 | AESENCLAST KEY STATE3 |
480 | .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2 | 383 | AESENCLAST KEY STATE4 |
481 | # aesenclast KEY, STATE3 | ||
482 | .byte 0x66, 0x0f, 0x38, 0xdd, 0xea | ||
483 | # aesenclast KEY, STATE4 | ||
484 | .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2 | ||
485 | ret | 384 | ret |
486 | 385 | ||
487 | /* | 386 | /* |
@@ -518,51 +417,37 @@ _aesni_dec1: | |||
518 | je .Ldec192 | 417 | je .Ldec192 |
519 | add $0x20, TKEYP | 418 | add $0x20, TKEYP |
520 | movaps -0x60(TKEYP), KEY | 419 | movaps -0x60(TKEYP), KEY |
521 | # aesdec KEY, STATE | 420 | AESDEC KEY STATE |
522 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
523 | movaps -0x50(TKEYP), KEY | 421 | movaps -0x50(TKEYP), KEY |
524 | # aesdec KEY, STATE | 422 | AESDEC KEY STATE |
525 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
526 | .align 4 | 423 | .align 4 |
527 | .Ldec192: | 424 | .Ldec192: |
528 | movaps -0x40(TKEYP), KEY | 425 | movaps -0x40(TKEYP), KEY |
529 | # aesdec KEY, STATE | 426 | AESDEC KEY STATE |
530 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
531 | movaps -0x30(TKEYP), KEY | 427 | movaps -0x30(TKEYP), KEY |
532 | # aesdec KEY, STATE | 428 | AESDEC KEY STATE |
533 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
534 | .align 4 | 429 | .align 4 |
535 | .Ldec128: | 430 | .Ldec128: |
536 | movaps -0x20(TKEYP), KEY | 431 | movaps -0x20(TKEYP), KEY |
537 | # aesdec KEY, STATE | 432 | AESDEC KEY STATE |
538 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
539 | movaps -0x10(TKEYP), KEY | 433 | movaps -0x10(TKEYP), KEY |
540 | # aesdec KEY, STATE | 434 | AESDEC KEY STATE |
541 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
542 | movaps (TKEYP), KEY | 435 | movaps (TKEYP), KEY |
543 | # aesdec KEY, STATE | 436 | AESDEC KEY STATE |
544 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
545 | movaps 0x10(TKEYP), KEY | 437 | movaps 0x10(TKEYP), KEY |
546 | # aesdec KEY, STATE | 438 | AESDEC KEY STATE |
547 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
548 | movaps 0x20(TKEYP), KEY | 439 | movaps 0x20(TKEYP), KEY |
549 | # aesdec KEY, STATE | 440 | AESDEC KEY STATE |
550 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
551 | movaps 0x30(TKEYP), KEY | 441 | movaps 0x30(TKEYP), KEY |
552 | # aesdec KEY, STATE | 442 | AESDEC KEY STATE |
553 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
554 | movaps 0x40(TKEYP), KEY | 443 | movaps 0x40(TKEYP), KEY |
555 | # aesdec KEY, STATE | 444 | AESDEC KEY STATE |
556 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
557 | movaps 0x50(TKEYP), KEY | 445 | movaps 0x50(TKEYP), KEY |
558 | # aesdec KEY, STATE | 446 | AESDEC KEY STATE |
559 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
560 | movaps 0x60(TKEYP), KEY | 447 | movaps 0x60(TKEYP), KEY |
561 | # aesdec KEY, STATE | 448 | AESDEC KEY STATE |
562 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
563 | movaps 0x70(TKEYP), KEY | 449 | movaps 0x70(TKEYP), KEY |
564 | # aesdeclast KEY, STATE # last round | 450 | AESDECLAST KEY STATE |
565 | .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 | ||
566 | ret | 451 | ret |
567 | 452 | ||
568 | /* | 453 | /* |
@@ -597,135 +482,79 @@ _aesni_dec4: | |||
597 | je .L4dec192 | 482 | je .L4dec192 |
598 | add $0x20, TKEYP | 483 | add $0x20, TKEYP |
599 | movaps -0x60(TKEYP), KEY | 484 | movaps -0x60(TKEYP), KEY |
600 | # aesdec KEY, STATE1 | 485 | AESDEC KEY STATE1 |
601 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 486 | AESDEC KEY STATE2 |
602 | # aesdec KEY, STATE2 | 487 | AESDEC KEY STATE3 |
603 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 488 | AESDEC KEY STATE4 |
604 | # aesdec KEY, STATE3 | ||
605 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
606 | # aesdec KEY, STATE4 | ||
607 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
608 | movaps -0x50(TKEYP), KEY | 489 | movaps -0x50(TKEYP), KEY |
609 | # aesdec KEY, STATE1 | 490 | AESDEC KEY STATE1 |
610 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 491 | AESDEC KEY STATE2 |
611 | # aesdec KEY, STATE2 | 492 | AESDEC KEY STATE3 |
612 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 493 | AESDEC KEY STATE4 |
613 | # aesdec KEY, STATE3 | ||
614 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
615 | # aesdec KEY, STATE4 | ||
616 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
617 | .align 4 | 494 | .align 4 |
618 | .L4dec192: | 495 | .L4dec192: |
619 | movaps -0x40(TKEYP), KEY | 496 | movaps -0x40(TKEYP), KEY |
620 | # aesdec KEY, STATE1 | 497 | AESDEC KEY STATE1 |
621 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 498 | AESDEC KEY STATE2 |
622 | # aesdec KEY, STATE2 | 499 | AESDEC KEY STATE3 |
623 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 500 | AESDEC KEY STATE4 |
624 | # aesdec KEY, STATE3 | ||
625 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
626 | # aesdec KEY, STATE4 | ||
627 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
628 | movaps -0x30(TKEYP), KEY | 501 | movaps -0x30(TKEYP), KEY |
629 | # aesdec KEY, STATE1 | 502 | AESDEC KEY STATE1 |
630 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 503 | AESDEC KEY STATE2 |
631 | # aesdec KEY, STATE2 | 504 | AESDEC KEY STATE3 |
632 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 505 | AESDEC KEY STATE4 |
633 | # aesdec KEY, STATE3 | ||
634 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
635 | # aesdec KEY, STATE4 | ||
636 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
637 | .align 4 | 506 | .align 4 |
638 | .L4dec128: | 507 | .L4dec128: |
639 | movaps -0x20(TKEYP), KEY | 508 | movaps -0x20(TKEYP), KEY |
640 | # aesdec KEY, STATE1 | 509 | AESDEC KEY STATE1 |
641 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 510 | AESDEC KEY STATE2 |
642 | # aesdec KEY, STATE2 | 511 | AESDEC KEY STATE3 |
643 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 512 | AESDEC KEY STATE4 |
644 | # aesdec KEY, STATE3 | ||
645 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
646 | # aesdec KEY, STATE4 | ||
647 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
648 | movaps -0x10(TKEYP), KEY | 513 | movaps -0x10(TKEYP), KEY |
649 | # aesdec KEY, STATE1 | 514 | AESDEC KEY STATE1 |
650 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 515 | AESDEC KEY STATE2 |
651 | # aesdec KEY, STATE2 | 516 | AESDEC KEY STATE3 |
652 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 517 | AESDEC KEY STATE4 |
653 | # aesdec KEY, STATE3 | ||
654 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
655 | # aesdec KEY, STATE4 | ||
656 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
657 | movaps (TKEYP), KEY | 518 | movaps (TKEYP), KEY |
658 | # aesdec KEY, STATE1 | 519 | AESDEC KEY STATE1 |
659 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 520 | AESDEC KEY STATE2 |
660 | # aesdec KEY, STATE2 | 521 | AESDEC KEY STATE3 |
661 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 522 | AESDEC KEY STATE4 |
662 | # aesdec KEY, STATE3 | ||
663 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
664 | # aesdec KEY, STATE4 | ||
665 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
666 | movaps 0x10(TKEYP), KEY | 523 | movaps 0x10(TKEYP), KEY |
667 | # aesdec KEY, STATE1 | 524 | AESDEC KEY STATE1 |
668 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 525 | AESDEC KEY STATE2 |
669 | # aesdec KEY, STATE2 | 526 | AESDEC KEY STATE3 |
670 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 527 | AESDEC KEY STATE4 |
671 | # aesdec KEY, STATE3 | ||
672 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
673 | # aesdec KEY, STATE4 | ||
674 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
675 | movaps 0x20(TKEYP), KEY | 528 | movaps 0x20(TKEYP), KEY |
676 | # aesdec KEY, STATE1 | 529 | AESDEC KEY STATE1 |
677 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 530 | AESDEC KEY STATE2 |
678 | # aesdec KEY, STATE2 | 531 | AESDEC KEY STATE3 |
679 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 532 | AESDEC KEY STATE4 |
680 | # aesdec KEY, STATE3 | ||
681 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
682 | # aesdec KEY, STATE4 | ||
683 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
684 | movaps 0x30(TKEYP), KEY | 533 | movaps 0x30(TKEYP), KEY |
685 | # aesdec KEY, STATE1 | 534 | AESDEC KEY STATE1 |
686 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 535 | AESDEC KEY STATE2 |
687 | # aesdec KEY, STATE2 | 536 | AESDEC KEY STATE3 |
688 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 537 | AESDEC KEY STATE4 |
689 | # aesdec KEY, STATE3 | ||
690 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
691 | # aesdec KEY, STATE4 | ||
692 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
693 | movaps 0x40(TKEYP), KEY | 538 | movaps 0x40(TKEYP), KEY |
694 | # aesdec KEY, STATE1 | 539 | AESDEC KEY STATE1 |
695 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 540 | AESDEC KEY STATE2 |
696 | # aesdec KEY, STATE2 | 541 | AESDEC KEY STATE3 |
697 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 542 | AESDEC KEY STATE4 |
698 | # aesdec KEY, STATE3 | ||
699 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
700 | # aesdec KEY, STATE4 | ||
701 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
702 | movaps 0x50(TKEYP), KEY | 543 | movaps 0x50(TKEYP), KEY |
703 | # aesdec KEY, STATE1 | 544 | AESDEC KEY STATE1 |
704 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 545 | AESDEC KEY STATE2 |
705 | # aesdec KEY, STATE2 | 546 | AESDEC KEY STATE3 |
706 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 547 | AESDEC KEY STATE4 |
707 | # aesdec KEY, STATE3 | ||
708 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
709 | # aesdec KEY, STATE4 | ||
710 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
711 | movaps 0x60(TKEYP), KEY | 548 | movaps 0x60(TKEYP), KEY |
712 | # aesdec KEY, STATE1 | 549 | AESDEC KEY STATE1 |
713 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 550 | AESDEC KEY STATE2 |
714 | # aesdec KEY, STATE2 | 551 | AESDEC KEY STATE3 |
715 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 552 | AESDEC KEY STATE4 |
716 | # aesdec KEY, STATE3 | ||
717 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
718 | # aesdec KEY, STATE4 | ||
719 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
720 | movaps 0x70(TKEYP), KEY | 553 | movaps 0x70(TKEYP), KEY |
721 | # aesdeclast KEY, STATE1 # last round | 554 | AESDECLAST KEY STATE1 # last round |
722 | .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 | 555 | AESDECLAST KEY STATE2 |
723 | # aesdeclast KEY, STATE2 | 556 | AESDECLAST KEY STATE3 |
724 | .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2 | 557 | AESDECLAST KEY STATE4 |
725 | # aesdeclast KEY, STATE3 | ||
726 | .byte 0x66, 0x0f, 0x38, 0xdf, 0xea | ||
727 | # aesdeclast KEY, STATE4 | ||
728 | .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2 | ||
729 | ret | 558 | ret |
730 | 559 | ||
731 | /* | 560 | /* |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 585edebe12cf..49c552c060e9 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -82,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, | |||
82 | return -EINVAL; | 82 | return -EINVAL; |
83 | } | 83 | } |
84 | 84 | ||
85 | if (irq_fpu_usable()) | 85 | if (!irq_fpu_usable()) |
86 | err = crypto_aes_expand_key(ctx, in_key, key_len); | 86 | err = crypto_aes_expand_key(ctx, in_key, key_len); |
87 | else { | 87 | else { |
88 | kernel_fpu_begin(); | 88 | kernel_fpu_begin(); |
@@ -103,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
103 | { | 103 | { |
104 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); | 104 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); |
105 | 105 | ||
106 | if (irq_fpu_usable()) | 106 | if (!irq_fpu_usable()) |
107 | crypto_aes_encrypt_x86(ctx, dst, src); | 107 | crypto_aes_encrypt_x86(ctx, dst, src); |
108 | else { | 108 | else { |
109 | kernel_fpu_begin(); | 109 | kernel_fpu_begin(); |
@@ -116,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
116 | { | 116 | { |
117 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); | 117 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); |
118 | 118 | ||
119 | if (irq_fpu_usable()) | 119 | if (!irq_fpu_usable()) |
120 | crypto_aes_decrypt_x86(ctx, dst, src); | 120 | crypto_aes_decrypt_x86(ctx, dst, src); |
121 | else { | 121 | else { |
122 | kernel_fpu_begin(); | 122 | kernel_fpu_begin(); |
@@ -342,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req) | |||
342 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | 342 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); |
343 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | 343 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); |
344 | 344 | ||
345 | if (irq_fpu_usable()) { | 345 | if (!irq_fpu_usable()) { |
346 | struct ablkcipher_request *cryptd_req = | 346 | struct ablkcipher_request *cryptd_req = |
347 | ablkcipher_request_ctx(req); | 347 | ablkcipher_request_ctx(req); |
348 | memcpy(cryptd_req, req, sizeof(*req)); | 348 | memcpy(cryptd_req, req, sizeof(*req)); |
@@ -363,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req) | |||
363 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | 363 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); |
364 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | 364 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); |
365 | 365 | ||
366 | if (irq_fpu_usable()) { | 366 | if (!irq_fpu_usable()) { |
367 | struct ablkcipher_request *cryptd_req = | 367 | struct ablkcipher_request *cryptd_req = |
368 | ablkcipher_request_ctx(req); | 368 | ablkcipher_request_ctx(req); |
369 | memcpy(cryptd_req, req, sizeof(*req)); | 369 | memcpy(cryptd_req, req, sizeof(*req)); |
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S new file mode 100644 index 000000000000..1eb7f90cb7b9 --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S | |||
@@ -0,0 +1,157 @@ | |||
1 | /* | ||
2 | * Accelerated GHASH implementation with Intel PCLMULQDQ-NI | ||
3 | * instructions. This file contains accelerated part of ghash | ||
4 | * implementation. More information about PCLMULQDQ can be found at: | ||
5 | * | ||
6 | * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ | ||
7 | * | ||
8 | * Copyright (c) 2009 Intel Corp. | ||
9 | * Author: Huang Ying <ying.huang@intel.com> | ||
10 | * Vinodh Gopal | ||
11 | * Erdinc Ozturk | ||
12 | * Deniz Karakoyunlu | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify it | ||
15 | * under the terms of the GNU General Public License version 2 as published | ||
16 | * by the Free Software Foundation. | ||
17 | */ | ||
18 | |||
19 | #include <linux/linkage.h> | ||
20 | #include <asm/inst.h> | ||
21 | |||
22 | .data | ||
23 | |||
24 | .align 16 | ||
25 | .Lbswap_mask: | ||
26 | .octa 0x000102030405060708090a0b0c0d0e0f | ||
27 | .Lpoly: | ||
28 | .octa 0xc2000000000000000000000000000001 | ||
29 | .Ltwo_one: | ||
30 | .octa 0x00000001000000000000000000000001 | ||
31 | |||
32 | #define DATA %xmm0 | ||
33 | #define SHASH %xmm1 | ||
34 | #define T1 %xmm2 | ||
35 | #define T2 %xmm3 | ||
36 | #define T3 %xmm4 | ||
37 | #define BSWAP %xmm5 | ||
38 | #define IN1 %xmm6 | ||
39 | |||
40 | .text | ||
41 | |||
42 | /* | ||
43 | * __clmul_gf128mul_ble: internal ABI | ||
44 | * input: | ||
45 | * DATA: operand1 | ||
46 | * SHASH: operand2, hash_key << 1 mod poly | ||
47 | * output: | ||
48 | * DATA: operand1 * operand2 mod poly | ||
49 | * changed: | ||
50 | * T1 | ||
51 | * T2 | ||
52 | * T3 | ||
53 | */ | ||
54 | __clmul_gf128mul_ble: | ||
55 | movaps DATA, T1 | ||
56 | pshufd $0b01001110, DATA, T2 | ||
57 | pshufd $0b01001110, SHASH, T3 | ||
58 | pxor DATA, T2 | ||
59 | pxor SHASH, T3 | ||
60 | |||
61 | PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 | ||
62 | PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 | ||
63 | PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) | ||
64 | pxor DATA, T2 | ||
65 | pxor T1, T2 # T2 = a0 * b1 + a1 * b0 | ||
66 | |||
67 | movaps T2, T3 | ||
68 | pslldq $8, T3 | ||
69 | psrldq $8, T2 | ||
70 | pxor T3, DATA | ||
71 | pxor T2, T1 # <T1:DATA> is result of | ||
72 | # carry-less multiplication | ||
73 | |||
74 | # first phase of the reduction | ||
75 | movaps DATA, T3 | ||
76 | psllq $1, T3 | ||
77 | pxor DATA, T3 | ||
78 | psllq $5, T3 | ||
79 | pxor DATA, T3 | ||
80 | psllq $57, T3 | ||
81 | movaps T3, T2 | ||
82 | pslldq $8, T2 | ||
83 | psrldq $8, T3 | ||
84 | pxor T2, DATA | ||
85 | pxor T3, T1 | ||
86 | |||
87 | # second phase of the reduction | ||
88 | movaps DATA, T2 | ||
89 | psrlq $5, T2 | ||
90 | pxor DATA, T2 | ||
91 | psrlq $1, T2 | ||
92 | pxor DATA, T2 | ||
93 | psrlq $1, T2 | ||
94 | pxor T2, T1 | ||
95 | pxor T1, DATA | ||
96 | ret | ||
97 | |||
98 | /* void clmul_ghash_mul(char *dst, const be128 *shash) */ | ||
99 | ENTRY(clmul_ghash_mul) | ||
100 | movups (%rdi), DATA | ||
101 | movups (%rsi), SHASH | ||
102 | movaps .Lbswap_mask, BSWAP | ||
103 | PSHUFB_XMM BSWAP DATA | ||
104 | call __clmul_gf128mul_ble | ||
105 | PSHUFB_XMM BSWAP DATA | ||
106 | movups DATA, (%rdi) | ||
107 | ret | ||
108 | |||
109 | /* | ||
110 | * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, | ||
111 | * const be128 *shash); | ||
112 | */ | ||
113 | ENTRY(clmul_ghash_update) | ||
114 | cmp $16, %rdx | ||
115 | jb .Lupdate_just_ret # check length | ||
116 | movaps .Lbswap_mask, BSWAP | ||
117 | movups (%rdi), DATA | ||
118 | movups (%rcx), SHASH | ||
119 | PSHUFB_XMM BSWAP DATA | ||
120 | .align 4 | ||
121 | .Lupdate_loop: | ||
122 | movups (%rsi), IN1 | ||
123 | PSHUFB_XMM BSWAP IN1 | ||
124 | pxor IN1, DATA | ||
125 | call __clmul_gf128mul_ble | ||
126 | sub $16, %rdx | ||
127 | add $16, %rsi | ||
128 | cmp $16, %rdx | ||
129 | jge .Lupdate_loop | ||
130 | PSHUFB_XMM BSWAP DATA | ||
131 | movups DATA, (%rdi) | ||
132 | .Lupdate_just_ret: | ||
133 | ret | ||
134 | |||
135 | /* | ||
136 | * void clmul_ghash_setkey(be128 *shash, const u8 *key); | ||
137 | * | ||
138 | * Calculate hash_key << 1 mod poly | ||
139 | */ | ||
140 | ENTRY(clmul_ghash_setkey) | ||
141 | movaps .Lbswap_mask, BSWAP | ||
142 | movups (%rsi), %xmm0 | ||
143 | PSHUFB_XMM BSWAP %xmm0 | ||
144 | movaps %xmm0, %xmm1 | ||
145 | psllq $1, %xmm0 | ||
146 | psrlq $63, %xmm1 | ||
147 | movaps %xmm1, %xmm2 | ||
148 | pslldq $8, %xmm1 | ||
149 | psrldq $8, %xmm2 | ||
150 | por %xmm1, %xmm0 | ||
151 | # reduction | ||
152 | pshufd $0b00100100, %xmm2, %xmm1 | ||
153 | pcmpeqd .Ltwo_one, %xmm1 | ||
154 | pand .Lpoly, %xmm1 | ||
155 | pxor %xmm1, %xmm0 | ||
156 | movups %xmm0, (%rdi) | ||
157 | ret | ||
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c new file mode 100644 index 000000000000..cbcc8d8ea93a --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c | |||
@@ -0,0 +1,333 @@ | |||
1 | /* | ||
2 | * Accelerated GHASH implementation with Intel PCLMULQDQ-NI | ||
3 | * instructions. This file contains glue code. | ||
4 | * | ||
5 | * Copyright (c) 2009 Intel Corp. | ||
6 | * Author: Huang Ying <ying.huang@intel.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of the GNU General Public License version 2 as published | ||
10 | * by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/crypto.h> | ||
17 | #include <crypto/algapi.h> | ||
18 | #include <crypto/cryptd.h> | ||
19 | #include <crypto/gf128mul.h> | ||
20 | #include <crypto/internal/hash.h> | ||
21 | #include <asm/i387.h> | ||
22 | |||
23 | #define GHASH_BLOCK_SIZE 16 | ||
24 | #define GHASH_DIGEST_SIZE 16 | ||
25 | |||
26 | void clmul_ghash_mul(char *dst, const be128 *shash); | ||
27 | |||
28 | void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, | ||
29 | const be128 *shash); | ||
30 | |||
31 | void clmul_ghash_setkey(be128 *shash, const u8 *key); | ||
32 | |||
33 | struct ghash_async_ctx { | ||
34 | struct cryptd_ahash *cryptd_tfm; | ||
35 | }; | ||
36 | |||
37 | struct ghash_ctx { | ||
38 | be128 shash; | ||
39 | }; | ||
40 | |||
41 | struct ghash_desc_ctx { | ||
42 | u8 buffer[GHASH_BLOCK_SIZE]; | ||
43 | u32 bytes; | ||
44 | }; | ||
45 | |||
46 | static int ghash_init(struct shash_desc *desc) | ||
47 | { | ||
48 | struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); | ||
49 | |||
50 | memset(dctx, 0, sizeof(*dctx)); | ||
51 | |||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | static int ghash_setkey(struct crypto_shash *tfm, | ||
56 | const u8 *key, unsigned int keylen) | ||
57 | { | ||
58 | struct ghash_ctx *ctx = crypto_shash_ctx(tfm); | ||
59 | |||
60 | if (keylen != GHASH_BLOCK_SIZE) { | ||
61 | crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
62 | return -EINVAL; | ||
63 | } | ||
64 | |||
65 | clmul_ghash_setkey(&ctx->shash, key); | ||
66 | |||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | static int ghash_update(struct shash_desc *desc, | ||
71 | const u8 *src, unsigned int srclen) | ||
72 | { | ||
73 | struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); | ||
74 | struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); | ||
75 | u8 *dst = dctx->buffer; | ||
76 | |||
77 | kernel_fpu_begin(); | ||
78 | if (dctx->bytes) { | ||
79 | int n = min(srclen, dctx->bytes); | ||
80 | u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); | ||
81 | |||
82 | dctx->bytes -= n; | ||
83 | srclen -= n; | ||
84 | |||
85 | while (n--) | ||
86 | *pos++ ^= *src++; | ||
87 | |||
88 | if (!dctx->bytes) | ||
89 | clmul_ghash_mul(dst, &ctx->shash); | ||
90 | } | ||
91 | |||
92 | clmul_ghash_update(dst, src, srclen, &ctx->shash); | ||
93 | kernel_fpu_end(); | ||
94 | |||
95 | if (srclen & 0xf) { | ||
96 | src += srclen - (srclen & 0xf); | ||
97 | srclen &= 0xf; | ||
98 | dctx->bytes = GHASH_BLOCK_SIZE - srclen; | ||
99 | while (srclen--) | ||
100 | *dst++ ^= *src++; | ||
101 | } | ||
102 | |||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) | ||
107 | { | ||
108 | u8 *dst = dctx->buffer; | ||
109 | |||
110 | if (dctx->bytes) { | ||
111 | u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); | ||
112 | |||
113 | while (dctx->bytes--) | ||
114 | *tmp++ ^= 0; | ||
115 | |||
116 | kernel_fpu_begin(); | ||
117 | clmul_ghash_mul(dst, &ctx->shash); | ||
118 | kernel_fpu_end(); | ||
119 | } | ||
120 | |||
121 | dctx->bytes = 0; | ||
122 | } | ||
123 | |||
124 | static int ghash_final(struct shash_desc *desc, u8 *dst) | ||
125 | { | ||
126 | struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); | ||
127 | struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); | ||
128 | u8 *buf = dctx->buffer; | ||
129 | |||
130 | ghash_flush(ctx, dctx); | ||
131 | memcpy(dst, buf, GHASH_BLOCK_SIZE); | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static struct shash_alg ghash_alg = { | ||
137 | .digestsize = GHASH_DIGEST_SIZE, | ||
138 | .init = ghash_init, | ||
139 | .update = ghash_update, | ||
140 | .final = ghash_final, | ||
141 | .setkey = ghash_setkey, | ||
142 | .descsize = sizeof(struct ghash_desc_ctx), | ||
143 | .base = { | ||
144 | .cra_name = "__ghash", | ||
145 | .cra_driver_name = "__ghash-pclmulqdqni", | ||
146 | .cra_priority = 0, | ||
147 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
148 | .cra_blocksize = GHASH_BLOCK_SIZE, | ||
149 | .cra_ctxsize = sizeof(struct ghash_ctx), | ||
150 | .cra_module = THIS_MODULE, | ||
151 | .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), | ||
152 | }, | ||
153 | }; | ||
154 | |||
155 | static int ghash_async_init(struct ahash_request *req) | ||
156 | { | ||
157 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
158 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
159 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
160 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
161 | |||
162 | if (!irq_fpu_usable()) { | ||
163 | memcpy(cryptd_req, req, sizeof(*req)); | ||
164 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
165 | return crypto_ahash_init(cryptd_req); | ||
166 | } else { | ||
167 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
168 | struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); | ||
169 | |||
170 | desc->tfm = child; | ||
171 | desc->flags = req->base.flags; | ||
172 | return crypto_shash_init(desc); | ||
173 | } | ||
174 | } | ||
175 | |||
176 | static int ghash_async_update(struct ahash_request *req) | ||
177 | { | ||
178 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
179 | |||
180 | if (!irq_fpu_usable()) { | ||
181 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
182 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
183 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
184 | |||
185 | memcpy(cryptd_req, req, sizeof(*req)); | ||
186 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
187 | return crypto_ahash_update(cryptd_req); | ||
188 | } else { | ||
189 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
190 | return shash_ahash_update(req, desc); | ||
191 | } | ||
192 | } | ||
193 | |||
194 | static int ghash_async_final(struct ahash_request *req) | ||
195 | { | ||
196 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
197 | |||
198 | if (!irq_fpu_usable()) { | ||
199 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
200 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
201 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
202 | |||
203 | memcpy(cryptd_req, req, sizeof(*req)); | ||
204 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
205 | return crypto_ahash_final(cryptd_req); | ||
206 | } else { | ||
207 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
208 | return crypto_shash_final(desc, req->result); | ||
209 | } | ||
210 | } | ||
211 | |||
212 | static int ghash_async_digest(struct ahash_request *req) | ||
213 | { | ||
214 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
215 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
216 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
217 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
218 | |||
219 | if (!irq_fpu_usable()) { | ||
220 | memcpy(cryptd_req, req, sizeof(*req)); | ||
221 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
222 | return crypto_ahash_digest(cryptd_req); | ||
223 | } else { | ||
224 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
225 | struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); | ||
226 | |||
227 | desc->tfm = child; | ||
228 | desc->flags = req->base.flags; | ||
229 | return shash_ahash_digest(req, desc); | ||
230 | } | ||
231 | } | ||
232 | |||
233 | static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, | ||
234 | unsigned int keylen) | ||
235 | { | ||
236 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
237 | struct crypto_ahash *child = &ctx->cryptd_tfm->base; | ||
238 | int err; | ||
239 | |||
240 | crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
241 | crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) | ||
242 | & CRYPTO_TFM_REQ_MASK); | ||
243 | err = crypto_ahash_setkey(child, key, keylen); | ||
244 | crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) | ||
245 | & CRYPTO_TFM_RES_MASK); | ||
246 | |||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | static int ghash_async_init_tfm(struct crypto_tfm *tfm) | ||
251 | { | ||
252 | struct cryptd_ahash *cryptd_tfm; | ||
253 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); | ||
254 | |||
255 | cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); | ||
256 | if (IS_ERR(cryptd_tfm)) | ||
257 | return PTR_ERR(cryptd_tfm); | ||
258 | ctx->cryptd_tfm = cryptd_tfm; | ||
259 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
260 | sizeof(struct ahash_request) + | ||
261 | crypto_ahash_reqsize(&cryptd_tfm->base)); | ||
262 | |||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | static void ghash_async_exit_tfm(struct crypto_tfm *tfm) | ||
267 | { | ||
268 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); | ||
269 | |||
270 | cryptd_free_ahash(ctx->cryptd_tfm); | ||
271 | } | ||
272 | |||
273 | static struct ahash_alg ghash_async_alg = { | ||
274 | .init = ghash_async_init, | ||
275 | .update = ghash_async_update, | ||
276 | .final = ghash_async_final, | ||
277 | .setkey = ghash_async_setkey, | ||
278 | .digest = ghash_async_digest, | ||
279 | .halg = { | ||
280 | .digestsize = GHASH_DIGEST_SIZE, | ||
281 | .base = { | ||
282 | .cra_name = "ghash", | ||
283 | .cra_driver_name = "ghash-clmulni", | ||
284 | .cra_priority = 400, | ||
285 | .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, | ||
286 | .cra_blocksize = GHASH_BLOCK_SIZE, | ||
287 | .cra_type = &crypto_ahash_type, | ||
288 | .cra_module = THIS_MODULE, | ||
289 | .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list), | ||
290 | .cra_init = ghash_async_init_tfm, | ||
291 | .cra_exit = ghash_async_exit_tfm, | ||
292 | }, | ||
293 | }, | ||
294 | }; | ||
295 | |||
296 | static int __init ghash_pclmulqdqni_mod_init(void) | ||
297 | { | ||
298 | int err; | ||
299 | |||
300 | if (!cpu_has_pclmulqdq) { | ||
301 | printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not" | ||
302 | " detected.\n"); | ||
303 | return -ENODEV; | ||
304 | } | ||
305 | |||
306 | err = crypto_register_shash(&ghash_alg); | ||
307 | if (err) | ||
308 | goto err_out; | ||
309 | err = crypto_register_ahash(&ghash_async_alg); | ||
310 | if (err) | ||
311 | goto err_shash; | ||
312 | |||
313 | return 0; | ||
314 | |||
315 | err_shash: | ||
316 | crypto_unregister_shash(&ghash_alg); | ||
317 | err_out: | ||
318 | return err; | ||
319 | } | ||
320 | |||
321 | static void __exit ghash_pclmulqdqni_mod_exit(void) | ||
322 | { | ||
323 | crypto_unregister_ahash(&ghash_async_alg); | ||
324 | crypto_unregister_shash(&ghash_alg); | ||
325 | } | ||
326 | |||
327 | module_init(ghash_pclmulqdqni_mod_init); | ||
328 | module_exit(ghash_pclmulqdqni_mod_exit); | ||
329 | |||
330 | MODULE_LICENSE("GPL"); | ||
331 | MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " | ||
332 | "acclerated by PCLMULQDQ-NI"); | ||
333 | MODULE_ALIAS("ghash"); | ||
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 74619c4f9fda..53147ad85b96 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -21,8 +21,8 @@ | |||
21 | #define __AUDIT_ARCH_LE 0x40000000 | 21 | #define __AUDIT_ARCH_LE 0x40000000 |
22 | 22 | ||
23 | #ifndef CONFIG_AUDITSYSCALL | 23 | #ifndef CONFIG_AUDITSYSCALL |
24 | #define sysexit_audit int_ret_from_sys_call | 24 | #define sysexit_audit ia32_ret_from_sys_call |
25 | #define sysretl_audit int_ret_from_sys_call | 25 | #define sysretl_audit ia32_ret_from_sys_call |
26 | #endif | 26 | #endif |
27 | 27 | ||
28 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | 28 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) |
@@ -39,12 +39,12 @@ | |||
39 | .endm | 39 | .endm |
40 | 40 | ||
41 | /* clobbers %eax */ | 41 | /* clobbers %eax */ |
42 | .macro CLEAR_RREGS _r9=rax | 42 | .macro CLEAR_RREGS offset=0, _r9=rax |
43 | xorl %eax,%eax | 43 | xorl %eax,%eax |
44 | movq %rax,R11(%rsp) | 44 | movq %rax,\offset+R11(%rsp) |
45 | movq %rax,R10(%rsp) | 45 | movq %rax,\offset+R10(%rsp) |
46 | movq %\_r9,R9(%rsp) | 46 | movq %\_r9,\offset+R9(%rsp) |
47 | movq %rax,R8(%rsp) | 47 | movq %rax,\offset+R8(%rsp) |
48 | .endm | 48 | .endm |
49 | 49 | ||
50 | /* | 50 | /* |
@@ -172,6 +172,10 @@ sysexit_from_sys_call: | |||
172 | movl RIP-R11(%rsp),%edx /* User %eip */ | 172 | movl RIP-R11(%rsp),%edx /* User %eip */ |
173 | CFI_REGISTER rip,rdx | 173 | CFI_REGISTER rip,rdx |
174 | RESTORE_ARGS 1,24,1,1,1,1 | 174 | RESTORE_ARGS 1,24,1,1,1,1 |
175 | xorq %r8,%r8 | ||
176 | xorq %r9,%r9 | ||
177 | xorq %r10,%r10 | ||
178 | xorq %r11,%r11 | ||
175 | popfq | 179 | popfq |
176 | CFI_ADJUST_CFA_OFFSET -8 | 180 | CFI_ADJUST_CFA_OFFSET -8 |
177 | /*CFI_RESTORE rflags*/ | 181 | /*CFI_RESTORE rflags*/ |
@@ -200,9 +204,9 @@ sysexit_from_sys_call: | |||
200 | movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ | 204 | movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ |
201 | .endm | 205 | .endm |
202 | 206 | ||
203 | .macro auditsys_exit exit,ebpsave=RBP | 207 | .macro auditsys_exit exit |
204 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | 208 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) |
205 | jnz int_ret_from_sys_call | 209 | jnz ia32_ret_from_sys_call |
206 | TRACE_IRQS_ON | 210 | TRACE_IRQS_ON |
207 | sti | 211 | sti |
208 | movl %eax,%esi /* second arg, syscall return value */ | 212 | movl %eax,%esi /* second arg, syscall return value */ |
@@ -213,13 +217,13 @@ sysexit_from_sys_call: | |||
213 | call audit_syscall_exit | 217 | call audit_syscall_exit |
214 | GET_THREAD_INFO(%r10) | 218 | GET_THREAD_INFO(%r10) |
215 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ | 219 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ |
216 | movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */ | ||
217 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 220 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
218 | cli | 221 | cli |
219 | TRACE_IRQS_OFF | 222 | TRACE_IRQS_OFF |
220 | testl %edi,TI_flags(%r10) | 223 | testl %edi,TI_flags(%r10) |
221 | jnz int_with_check | 224 | jz \exit |
222 | jmp \exit | 225 | CLEAR_RREGS -ARGOFFSET |
226 | jmp int_with_check | ||
223 | .endm | 227 | .endm |
224 | 228 | ||
225 | sysenter_auditsys: | 229 | sysenter_auditsys: |
@@ -329,6 +333,9 @@ sysretl_from_sys_call: | |||
329 | CFI_REGISTER rip,rcx | 333 | CFI_REGISTER rip,rcx |
330 | movl EFLAGS-ARGOFFSET(%rsp),%r11d | 334 | movl EFLAGS-ARGOFFSET(%rsp),%r11d |
331 | /*CFI_REGISTER rflags,r11*/ | 335 | /*CFI_REGISTER rflags,r11*/ |
336 | xorq %r10,%r10 | ||
337 | xorq %r9,%r9 | ||
338 | xorq %r8,%r8 | ||
332 | TRACE_IRQS_ON | 339 | TRACE_IRQS_ON |
333 | movl RSP-ARGOFFSET(%rsp),%esp | 340 | movl RSP-ARGOFFSET(%rsp),%esp |
334 | CFI_RESTORE rsp | 341 | CFI_RESTORE rsp |
@@ -343,7 +350,7 @@ cstar_auditsys: | |||
343 | jmp cstar_dispatch | 350 | jmp cstar_dispatch |
344 | 351 | ||
345 | sysretl_audit: | 352 | sysretl_audit: |
346 | auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */ | 353 | auditsys_exit sysretl_from_sys_call |
347 | #endif | 354 | #endif |
348 | 355 | ||
349 | cstar_tracesys: | 356 | cstar_tracesys: |
@@ -353,7 +360,7 @@ cstar_tracesys: | |||
353 | #endif | 360 | #endif |
354 | xchgl %r9d,%ebp | 361 | xchgl %r9d,%ebp |
355 | SAVE_REST | 362 | SAVE_REST |
356 | CLEAR_RREGS r9 | 363 | CLEAR_RREGS 0, r9 |
357 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 364 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
358 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 365 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
359 | call syscall_trace_enter | 366 | call syscall_trace_enter |
@@ -425,6 +432,8 @@ ia32_do_call: | |||
425 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative | 432 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative |
426 | ia32_sysret: | 433 | ia32_sysret: |
427 | movq %rax,RAX-ARGOFFSET(%rsp) | 434 | movq %rax,RAX-ARGOFFSET(%rsp) |
435 | ia32_ret_from_sys_call: | ||
436 | CLEAR_RREGS -ARGOFFSET | ||
428 | jmp int_ret_from_sys_call | 437 | jmp int_ret_from_sys_call |
429 | 438 | ||
430 | ia32_tracesys: | 439 | ia32_tracesys: |
@@ -442,8 +451,8 @@ END(ia32_syscall) | |||
442 | 451 | ||
443 | ia32_badsys: | 452 | ia32_badsys: |
444 | movq $0,ORIG_RAX-ARGOFFSET(%rsp) | 453 | movq $0,ORIG_RAX-ARGOFFSET(%rsp) |
445 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 454 | movq $-ENOSYS,%rax |
446 | jmp int_ret_from_sys_call | 455 | jmp ia32_sysret |
447 | 456 | ||
448 | quiet_ni_syscall: | 457 | quiet_ni_syscall: |
449 | movq $-ENOSYS,%rax | 458 | movq $-ENOSYS,%rax |
@@ -644,7 +653,7 @@ ia32_sys_call_table: | |||
644 | .quad compat_sys_writev | 653 | .quad compat_sys_writev |
645 | .quad sys_getsid | 654 | .quad sys_getsid |
646 | .quad sys_fdatasync | 655 | .quad sys_fdatasync |
647 | .quad sys32_sysctl /* sysctl */ | 656 | .quad compat_sys_sysctl /* sysctl */ |
648 | .quad sys_mlock /* 150 */ | 657 | .quad sys_mlock /* 150 */ |
649 | .quad sys_munlock | 658 | .quad sys_munlock |
650 | .quad sys_mlockall | 659 | .quad sys_mlockall |
@@ -687,7 +696,7 @@ ia32_sys_call_table: | |||
687 | .quad quiet_ni_syscall /* streams2 */ | 696 | .quad quiet_ni_syscall /* streams2 */ |
688 | .quad stub32_vfork /* 190 */ | 697 | .quad stub32_vfork /* 190 */ |
689 | .quad compat_sys_getrlimit | 698 | .quad compat_sys_getrlimit |
690 | .quad sys32_mmap2 | 699 | .quad sys_mmap_pgoff |
691 | .quad sys32_truncate64 | 700 | .quad sys32_truncate64 |
692 | .quad sys32_ftruncate64 | 701 | .quad sys32_ftruncate64 |
693 | .quad sys32_stat64 /* 195 */ | 702 | .quad sys32_stat64 /* 195 */ |
@@ -832,4 +841,5 @@ ia32_sys_call_table: | |||
832 | .quad compat_sys_pwritev | 841 | .quad compat_sys_pwritev |
833 | .quad compat_sys_rt_tgsigqueueinfo /* 335 */ | 842 | .quad compat_sys_rt_tgsigqueueinfo /* 335 */ |
834 | .quad sys_perf_event_open | 843 | .quad sys_perf_event_open |
844 | .quad compat_sys_recvmmsg | ||
835 | ia32_syscall_end: | 845 | ia32_syscall_end: |
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 9f5527198825..422572c77923 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c | |||
@@ -155,9 +155,6 @@ struct mmap_arg_struct { | |||
155 | asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) | 155 | asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) |
156 | { | 156 | { |
157 | struct mmap_arg_struct a; | 157 | struct mmap_arg_struct a; |
158 | struct file *file = NULL; | ||
159 | unsigned long retval; | ||
160 | struct mm_struct *mm ; | ||
161 | 158 | ||
162 | if (copy_from_user(&a, arg, sizeof(a))) | 159 | if (copy_from_user(&a, arg, sizeof(a))) |
163 | return -EFAULT; | 160 | return -EFAULT; |
@@ -165,22 +162,8 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) | |||
165 | if (a.offset & ~PAGE_MASK) | 162 | if (a.offset & ~PAGE_MASK) |
166 | return -EINVAL; | 163 | return -EINVAL; |
167 | 164 | ||
168 | if (!(a.flags & MAP_ANONYMOUS)) { | 165 | return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, |
169 | file = fget(a.fd); | ||
170 | if (!file) | ||
171 | return -EBADF; | ||
172 | } | ||
173 | |||
174 | mm = current->mm; | ||
175 | down_write(&mm->mmap_sem); | ||
176 | retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, | ||
177 | a.offset>>PAGE_SHIFT); | 166 | a.offset>>PAGE_SHIFT); |
178 | if (file) | ||
179 | fput(file); | ||
180 | |||
181 | up_write(&mm->mmap_sem); | ||
182 | |||
183 | return retval; | ||
184 | } | 167 | } |
185 | 168 | ||
186 | asmlinkage long sys32_mprotect(unsigned long start, size_t len, | 169 | asmlinkage long sys32_mprotect(unsigned long start, size_t len, |
@@ -434,62 +417,6 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, | |||
434 | return ret; | 417 | return ret; |
435 | } | 418 | } |
436 | 419 | ||
437 | #ifdef CONFIG_SYSCTL_SYSCALL | ||
438 | struct sysctl_ia32 { | ||
439 | unsigned int name; | ||
440 | int nlen; | ||
441 | unsigned int oldval; | ||
442 | unsigned int oldlenp; | ||
443 | unsigned int newval; | ||
444 | unsigned int newlen; | ||
445 | unsigned int __unused[4]; | ||
446 | }; | ||
447 | |||
448 | |||
449 | asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *args32) | ||
450 | { | ||
451 | struct sysctl_ia32 a32; | ||
452 | mm_segment_t old_fs = get_fs(); | ||
453 | void __user *oldvalp, *newvalp; | ||
454 | size_t oldlen; | ||
455 | int __user *namep; | ||
456 | long ret; | ||
457 | |||
458 | if (copy_from_user(&a32, args32, sizeof(a32))) | ||
459 | return -EFAULT; | ||
460 | |||
461 | /* | ||
462 | * We need to pre-validate these because we have to disable | ||
463 | * address checking before calling do_sysctl() because of | ||
464 | * OLDLEN but we can't run the risk of the user specifying bad | ||
465 | * addresses here. Well, since we're dealing with 32 bit | ||
466 | * addresses, we KNOW that access_ok() will always succeed, so | ||
467 | * this is an expensive NOP, but so what... | ||
468 | */ | ||
469 | namep = compat_ptr(a32.name); | ||
470 | oldvalp = compat_ptr(a32.oldval); | ||
471 | newvalp = compat_ptr(a32.newval); | ||
472 | |||
473 | if ((oldvalp && get_user(oldlen, (int __user *)compat_ptr(a32.oldlenp))) | ||
474 | || !access_ok(VERIFY_WRITE, namep, 0) | ||
475 | || !access_ok(VERIFY_WRITE, oldvalp, 0) | ||
476 | || !access_ok(VERIFY_WRITE, newvalp, 0)) | ||
477 | return -EFAULT; | ||
478 | |||
479 | set_fs(KERNEL_DS); | ||
480 | lock_kernel(); | ||
481 | ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *)&oldlen, | ||
482 | newvalp, (size_t) a32.newlen); | ||
483 | unlock_kernel(); | ||
484 | set_fs(old_fs); | ||
485 | |||
486 | if (oldvalp && put_user(oldlen, (int __user *)compat_ptr(a32.oldlenp))) | ||
487 | return -EFAULT; | ||
488 | |||
489 | return ret; | ||
490 | } | ||
491 | #endif | ||
492 | |||
493 | /* warning: next two assume little endian */ | 420 | /* warning: next two assume little endian */ |
494 | asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, | 421 | asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, |
495 | u32 poslo, u32 poshi) | 422 | u32 poslo, u32 poshi) |
@@ -539,30 +466,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, | |||
539 | return ret; | 466 | return ret; |
540 | } | 467 | } |
541 | 468 | ||
542 | asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, | ||
543 | unsigned long prot, unsigned long flags, | ||
544 | unsigned long fd, unsigned long pgoff) | ||
545 | { | ||
546 | struct mm_struct *mm = current->mm; | ||
547 | unsigned long error; | ||
548 | struct file *file = NULL; | ||
549 | |||
550 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
551 | if (!(flags & MAP_ANONYMOUS)) { | ||
552 | file = fget(fd); | ||
553 | if (!file) | ||
554 | return -EBADF; | ||
555 | } | ||
556 | |||
557 | down_write(&mm->mmap_sem); | ||
558 | error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
559 | up_write(&mm->mmap_sem); | ||
560 | |||
561 | if (file) | ||
562 | fput(file); | ||
563 | return error; | ||
564 | } | ||
565 | |||
566 | asmlinkage long sys32_olduname(struct oldold_utsname __user *name) | 469 | asmlinkage long sys32_olduname(struct oldold_utsname __user *name) |
567 | { | 470 | { |
568 | char *arch = "x86_64"; | 471 | char *arch = "x86_64"; |
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80cdcfa5..9f828f87ca35 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild | |||
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h | |||
10 | header-y += sigcontext32.h | 10 | header-y += sigcontext32.h |
11 | header-y += ucontext.h | 11 | header-y += ucontext.h |
12 | header-y += processor-flags.h | 12 | header-y += processor-flags.h |
13 | header-y += hw_breakpoint.h | ||
13 | 14 | ||
14 | unifdef-y += e820.h | 15 | unifdef-y += e820.h |
15 | unifdef-y += ist.h | 16 | unifdef-y += ist.h |
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index bb70e397aa84..7a15588e45d4 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | #include <linux/user.h> | 18 | #include <linux/user.h> |
19 | #include <linux/elfcore.h> | 19 | #include <linux/elfcore.h> |
20 | #include <asm/debugreg.h> | ||
20 | 21 | ||
21 | /* | 22 | /* |
22 | * fill in the user structure for an a.out core dump | 23 | * fill in the user structure for an a.out core dump |
@@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) | |||
32 | >> PAGE_SHIFT; | 33 | >> PAGE_SHIFT; |
33 | dump->u_dsize -= dump->u_tsize; | 34 | dump->u_dsize -= dump->u_tsize; |
34 | dump->u_ssize = 0; | 35 | dump->u_ssize = 0; |
35 | dump->u_debugreg[0] = current->thread.debugreg0; | 36 | aout_dump_debugregs(dump); |
36 | dump->u_debugreg[1] = current->thread.debugreg1; | ||
37 | dump->u_debugreg[2] = current->thread.debugreg2; | ||
38 | dump->u_debugreg[3] = current->thread.debugreg3; | ||
39 | dump->u_debugreg[4] = 0; | ||
40 | dump->u_debugreg[5] = 0; | ||
41 | dump->u_debugreg[6] = current->thread.debugreg6; | ||
42 | dump->u_debugreg[7] = current->thread.debugreg7; | ||
43 | 37 | ||
44 | if (dump->start_stack < TASK_SIZE) | 38 | if (dump->start_stack < TASK_SIZE) |
45 | dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) | 39 | dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 4518dc500903..56f462cf22d2 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -118,7 +118,7 @@ extern void acpi_restore_state_mem(void); | |||
118 | extern unsigned long acpi_wakeup_address; | 118 | extern unsigned long acpi_wakeup_address; |
119 | 119 | ||
120 | /* early initialization routine */ | 120 | /* early initialization routine */ |
121 | extern void acpi_reserve_bootmem(void); | 121 | extern void acpi_reserve_wakeup_memory(void); |
122 | 122 | ||
123 | /* | 123 | /* |
124 | * Check if the CPU can handle C2 and deeper | 124 | * Check if the CPU can handle C2 and deeper |
@@ -142,6 +142,32 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) | |||
142 | return max_cstate; | 142 | return max_cstate; |
143 | } | 143 | } |
144 | 144 | ||
145 | static inline bool arch_has_acpi_pdc(void) | ||
146 | { | ||
147 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
148 | return (c->x86_vendor == X86_VENDOR_INTEL || | ||
149 | c->x86_vendor == X86_VENDOR_CENTAUR); | ||
150 | } | ||
151 | |||
152 | static inline void arch_acpi_set_pdc_bits(u32 *buf) | ||
153 | { | ||
154 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
155 | |||
156 | buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; | ||
157 | |||
158 | if (cpu_has(c, X86_FEATURE_EST)) | ||
159 | buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; | ||
160 | |||
161 | if (cpu_has(c, X86_FEATURE_ACPI)) | ||
162 | buf[2] |= ACPI_PDC_T_FFH; | ||
163 | |||
164 | /* | ||
165 | * If mwait/monitor is unsupported, C2/C3_FFH will be disabled | ||
166 | */ | ||
167 | if (!cpu_has(c, X86_FEATURE_MWAIT)) | ||
168 | buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); | ||
169 | } | ||
170 | |||
145 | #else /* !CONFIG_ACPI */ | 171 | #else /* !CONFIG_ACPI */ |
146 | 172 | ||
147 | #define acpi_lapic 0 | 173 | #define acpi_lapic 0 |
@@ -158,6 +184,7 @@ struct bootnode; | |||
158 | 184 | ||
159 | #ifdef CONFIG_ACPI_NUMA | 185 | #ifdef CONFIG_ACPI_NUMA |
160 | extern int acpi_numa; | 186 | extern int acpi_numa; |
187 | extern int acpi_get_nodes(struct bootnode *physnodes); | ||
161 | extern int acpi_scan_nodes(unsigned long start, unsigned long end); | 188 | extern int acpi_scan_nodes(unsigned long start, unsigned long end); |
162 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) | 189 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) |
163 | extern void acpi_fake_nodes(const struct bootnode *fake_nodes, | 190 | extern void acpi_fake_nodes(const struct bootnode *fake_nodes, |
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index e2077d343c33..b97f786a48d5 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -1,17 +1,13 @@ | |||
1 | #ifdef __ASSEMBLY__ | 1 | #ifdef __ASSEMBLY__ |
2 | 2 | ||
3 | #ifdef CONFIG_X86_32 | 3 | #include <asm/asm.h> |
4 | # define X86_ALIGN .long | ||
5 | #else | ||
6 | # define X86_ALIGN .quad | ||
7 | #endif | ||
8 | 4 | ||
9 | #ifdef CONFIG_SMP | 5 | #ifdef CONFIG_SMP |
10 | .macro LOCK_PREFIX | 6 | .macro LOCK_PREFIX |
11 | 1: lock | 7 | 1: lock |
12 | .section .smp_locks,"a" | 8 | .section .smp_locks,"a" |
13 | .align 4 | 9 | _ASM_ALIGN |
14 | X86_ALIGN 1b | 10 | _ASM_PTR 1b |
15 | .previous | 11 | .previous |
16 | .endm | 12 | .endm |
17 | #else | 13 | #else |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index c240efc74e00..69b74a7b877f 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -84,6 +84,7 @@ static inline void alternatives_smp_switch(int smp) {} | |||
84 | " .byte " __stringify(feature) "\n" /* feature bit */ \ | 84 | " .byte " __stringify(feature) "\n" /* feature bit */ \ |
85 | " .byte 662b-661b\n" /* sourcelen */ \ | 85 | " .byte 662b-661b\n" /* sourcelen */ \ |
86 | " .byte 664f-663f\n" /* replacementlen */ \ | 86 | " .byte 664f-663f\n" /* replacementlen */ \ |
87 | " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ | ||
87 | ".previous\n" \ | 88 | ".previous\n" \ |
88 | ".section .altinstr_replacement, \"ax\"\n" \ | 89 | ".section .altinstr_replacement, \"ax\"\n" \ |
89 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | 90 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ |
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index ac95995b7bad..5af2982133b5 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
@@ -23,18 +23,13 @@ | |||
23 | #include <linux/irqreturn.h> | 23 | #include <linux/irqreturn.h> |
24 | 24 | ||
25 | #ifdef CONFIG_AMD_IOMMU | 25 | #ifdef CONFIG_AMD_IOMMU |
26 | extern int amd_iommu_init(void); | 26 | |
27 | extern int amd_iommu_init_dma_ops(void); | ||
28 | extern int amd_iommu_init_passthrough(void); | ||
29 | extern void amd_iommu_detect(void); | 27 | extern void amd_iommu_detect(void); |
30 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | 28 | |
31 | extern void amd_iommu_flush_all_domains(void); | ||
32 | extern void amd_iommu_flush_all_devices(void); | ||
33 | extern void amd_iommu_shutdown(void); | ||
34 | #else | 29 | #else |
35 | static inline int amd_iommu_init(void) { return -ENODEV; } | 30 | |
36 | static inline void amd_iommu_detect(void) { } | 31 | static inline void amd_iommu_detect(void) { } |
37 | static inline void amd_iommu_shutdown(void) { } | 32 | |
38 | #endif | 33 | #endif |
39 | 34 | ||
40 | #endif /* _ASM_X86_AMD_IOMMU_H */ | 35 | #endif /* _ASM_X86_AMD_IOMMU_H */ |
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h new file mode 100644 index 000000000000..4d817f9e6e77 --- /dev/null +++ b/arch/x86/include/asm/amd_iommu_proto.h | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Advanced Micro Devices, Inc. | ||
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published | ||
7 | * by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | #ifndef _ASM_X86_AMD_IOMMU_PROTO_H | ||
20 | #define _ASM_X86_AMD_IOMMU_PROTO_H | ||
21 | |||
22 | struct amd_iommu; | ||
23 | |||
24 | extern int amd_iommu_init_dma_ops(void); | ||
25 | extern int amd_iommu_init_passthrough(void); | ||
26 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | ||
27 | extern void amd_iommu_flush_all_domains(void); | ||
28 | extern void amd_iommu_flush_all_devices(void); | ||
29 | extern void amd_iommu_apply_erratum_63(u16 devid); | ||
30 | extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); | ||
31 | extern int amd_iommu_init_devices(void); | ||
32 | extern void amd_iommu_uninit_devices(void); | ||
33 | extern void amd_iommu_init_notifier(void); | ||
34 | #ifndef CONFIG_AMD_IOMMU_STATS | ||
35 | |||
36 | static inline void amd_iommu_stats_init(void) { } | ||
37 | |||
38 | #endif /* !CONFIG_AMD_IOMMU_STATS */ | ||
39 | |||
40 | #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ | ||
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 2a2cc7a78a81..ba19ad4c47d0 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
@@ -25,6 +25,11 @@ | |||
25 | #include <linux/spinlock.h> | 25 | #include <linux/spinlock.h> |
26 | 26 | ||
27 | /* | 27 | /* |
28 | * Maximum number of IOMMUs supported | ||
29 | */ | ||
30 | #define MAX_IOMMUS 32 | ||
31 | |||
32 | /* | ||
28 | * some size calculation constants | 33 | * some size calculation constants |
29 | */ | 34 | */ |
30 | #define DEV_TABLE_ENTRY_SIZE 32 | 35 | #define DEV_TABLE_ENTRY_SIZE 32 |
@@ -206,6 +211,9 @@ extern bool amd_iommu_dump; | |||
206 | printk(KERN_INFO "AMD-Vi: " format, ## arg); \ | 211 | printk(KERN_INFO "AMD-Vi: " format, ## arg); \ |
207 | } while(0); | 212 | } while(0); |
208 | 213 | ||
214 | /* global flag if IOMMUs cache non-present entries */ | ||
215 | extern bool amd_iommu_np_cache; | ||
216 | |||
209 | /* | 217 | /* |
210 | * Make iterating over all IOMMUs easier | 218 | * Make iterating over all IOMMUs easier |
211 | */ | 219 | */ |
@@ -226,6 +234,8 @@ extern bool amd_iommu_dump; | |||
226 | * independent of their use. | 234 | * independent of their use. |
227 | */ | 235 | */ |
228 | struct protection_domain { | 236 | struct protection_domain { |
237 | struct list_head list; /* for list of all protection domains */ | ||
238 | struct list_head dev_list; /* List of all devices in this domain */ | ||
229 | spinlock_t lock; /* mostly used to lock the page table*/ | 239 | spinlock_t lock; /* mostly used to lock the page table*/ |
230 | u16 id; /* the domain id written to the device table */ | 240 | u16 id; /* the domain id written to the device table */ |
231 | int mode; /* paging mode (0-6 levels) */ | 241 | int mode; /* paging mode (0-6 levels) */ |
@@ -233,7 +243,20 @@ struct protection_domain { | |||
233 | unsigned long flags; /* flags to find out type of domain */ | 243 | unsigned long flags; /* flags to find out type of domain */ |
234 | bool updated; /* complete domain flush required */ | 244 | bool updated; /* complete domain flush required */ |
235 | unsigned dev_cnt; /* devices assigned to this domain */ | 245 | unsigned dev_cnt; /* devices assigned to this domain */ |
246 | unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ | ||
236 | void *priv; /* private data */ | 247 | void *priv; /* private data */ |
248 | |||
249 | }; | ||
250 | |||
251 | /* | ||
252 | * This struct contains device specific data for the IOMMU | ||
253 | */ | ||
254 | struct iommu_dev_data { | ||
255 | struct list_head list; /* For domain->dev_list */ | ||
256 | struct device *dev; /* Device this data belong to */ | ||
257 | struct device *alias; /* The Alias Device */ | ||
258 | struct protection_domain *domain; /* Domain the device is bound to */ | ||
259 | atomic_t bind; /* Domain attach reverent count */ | ||
237 | }; | 260 | }; |
238 | 261 | ||
239 | /* | 262 | /* |
@@ -291,6 +314,9 @@ struct dma_ops_domain { | |||
291 | struct amd_iommu { | 314 | struct amd_iommu { |
292 | struct list_head list; | 315 | struct list_head list; |
293 | 316 | ||
317 | /* Index within the IOMMU array */ | ||
318 | int index; | ||
319 | |||
294 | /* locks the accesses to the hardware */ | 320 | /* locks the accesses to the hardware */ |
295 | spinlock_t lock; | 321 | spinlock_t lock; |
296 | 322 | ||
@@ -357,6 +383,21 @@ struct amd_iommu { | |||
357 | extern struct list_head amd_iommu_list; | 383 | extern struct list_head amd_iommu_list; |
358 | 384 | ||
359 | /* | 385 | /* |
386 | * Array with pointers to each IOMMU struct | ||
387 | * The indices are referenced in the protection domains | ||
388 | */ | ||
389 | extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; | ||
390 | |||
391 | /* Number of IOMMUs present in the system */ | ||
392 | extern int amd_iommus_present; | ||
393 | |||
394 | /* | ||
395 | * Declarations for the global list of all protection domains | ||
396 | */ | ||
397 | extern spinlock_t amd_iommu_pd_lock; | ||
398 | extern struct list_head amd_iommu_pd_list; | ||
399 | |||
400 | /* | ||
360 | * Structure defining one entry in the device table | 401 | * Structure defining one entry in the device table |
361 | */ | 402 | */ |
362 | struct dev_table_entry { | 403 | struct dev_table_entry { |
@@ -416,15 +457,9 @@ extern unsigned amd_iommu_aperture_order; | |||
416 | /* largest PCI device id we expect translation requests for */ | 457 | /* largest PCI device id we expect translation requests for */ |
417 | extern u16 amd_iommu_last_bdf; | 458 | extern u16 amd_iommu_last_bdf; |
418 | 459 | ||
419 | /* data structures for protection domain handling */ | ||
420 | extern struct protection_domain **amd_iommu_pd_table; | ||
421 | |||
422 | /* allocation bitmap for domain ids */ | 460 | /* allocation bitmap for domain ids */ |
423 | extern unsigned long *amd_iommu_pd_alloc_bitmap; | 461 | extern unsigned long *amd_iommu_pd_alloc_bitmap; |
424 | 462 | ||
425 | /* will be 1 if device isolation is enabled */ | ||
426 | extern bool amd_iommu_isolate; | ||
427 | |||
428 | /* | 463 | /* |
429 | * If true, the addresses will be flushed on unmap time, not when | 464 | * If true, the addresses will be flushed on unmap time, not when |
430 | * they are reused | 465 | * they are reused |
@@ -462,11 +497,6 @@ struct __iommu_counter { | |||
462 | #define ADD_STATS_COUNTER(name, x) | 497 | #define ADD_STATS_COUNTER(name, x) |
463 | #define SUB_STATS_COUNTER(name, x) | 498 | #define SUB_STATS_COUNTER(name, x) |
464 | 499 | ||
465 | static inline void amd_iommu_stats_init(void) { } | ||
466 | |||
467 | #endif /* CONFIG_AMD_IOMMU_STATS */ | 500 | #endif /* CONFIG_AMD_IOMMU_STATS */ |
468 | 501 | ||
469 | /* some function prototypes */ | ||
470 | extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); | ||
471 | |||
472 | #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ | 502 | #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 474d80d3e6cc..b4ac2cdcb64f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -297,20 +297,20 @@ struct apic { | |||
297 | int disable_esr; | 297 | int disable_esr; |
298 | 298 | ||
299 | int dest_logical; | 299 | int dest_logical; |
300 | unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); | 300 | unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); |
301 | unsigned long (*check_apicid_present)(int apicid); | 301 | unsigned long (*check_apicid_present)(int apicid); |
302 | 302 | ||
303 | void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); | 303 | void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); |
304 | void (*init_apic_ldr)(void); | 304 | void (*init_apic_ldr)(void); |
305 | 305 | ||
306 | physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); | 306 | void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); |
307 | 307 | ||
308 | void (*setup_apic_routing)(void); | 308 | void (*setup_apic_routing)(void); |
309 | int (*multi_timer_check)(int apic, int irq); | 309 | int (*multi_timer_check)(int apic, int irq); |
310 | int (*apicid_to_node)(int logical_apicid); | 310 | int (*apicid_to_node)(int logical_apicid); |
311 | int (*cpu_to_logical_apicid)(int cpu); | 311 | int (*cpu_to_logical_apicid)(int cpu); |
312 | int (*cpu_present_to_apicid)(int mps_cpu); | 312 | int (*cpu_present_to_apicid)(int mps_cpu); |
313 | physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); | 313 | void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); |
314 | void (*setup_portio_remap)(void); | 314 | void (*setup_portio_remap)(void); |
315 | int (*check_phys_apicid_present)(int phys_apicid); | 315 | int (*check_phys_apicid_present)(int phys_apicid); |
316 | void (*enable_apic_mode)(void); | 316 | void (*enable_apic_mode)(void); |
@@ -488,6 +488,8 @@ static inline unsigned int read_apic_id(void) | |||
488 | 488 | ||
489 | extern void default_setup_apic_routing(void); | 489 | extern void default_setup_apic_routing(void); |
490 | 490 | ||
491 | extern struct apic apic_noop; | ||
492 | |||
491 | #ifdef CONFIG_X86_32 | 493 | #ifdef CONFIG_X86_32 |
492 | 494 | ||
493 | extern struct apic apic_default; | 495 | extern struct apic apic_default; |
@@ -532,9 +534,9 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
532 | return (unsigned int)(mask1 & mask2 & mask3); | 534 | return (unsigned int)(mask1 & mask2 & mask3); |
533 | } | 535 | } |
534 | 536 | ||
535 | static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) | 537 | static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) |
536 | { | 538 | { |
537 | return physid_isset(apicid, bitmap); | 539 | return physid_isset(apicid, *map); |
538 | } | 540 | } |
539 | 541 | ||
540 | static inline unsigned long default_check_apicid_present(int bit) | 542 | static inline unsigned long default_check_apicid_present(int bit) |
@@ -542,9 +544,9 @@ static inline unsigned long default_check_apicid_present(int bit) | |||
542 | return physid_isset(bit, phys_cpu_present_map); | 544 | return physid_isset(bit, phys_cpu_present_map); |
543 | } | 545 | } |
544 | 546 | ||
545 | static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) | 547 | static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
546 | { | 548 | { |
547 | return phys_map; | 549 | *retmap = *phys_map; |
548 | } | 550 | } |
549 | 551 | ||
550 | /* Mapping from cpu number to logical apicid */ | 552 | /* Mapping from cpu number to logical apicid */ |
@@ -583,11 +585,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu); | |||
583 | extern int default_check_phys_apicid_present(int phys_apicid); | 585 | extern int default_check_phys_apicid_present(int phys_apicid); |
584 | #endif | 586 | #endif |
585 | 587 | ||
586 | static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) | ||
587 | { | ||
588 | return physid_mask_of_physid(phys_apicid); | ||
589 | } | ||
590 | |||
591 | #endif /* CONFIG_X86_LOCAL_APIC */ | 588 | #endif /* CONFIG_X86_LOCAL_APIC */ |
592 | 589 | ||
593 | #ifdef CONFIG_X86_32 | 590 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 3b62da926de9..7fe3b3060f08 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -11,6 +11,12 @@ | |||
11 | #define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 | 11 | #define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 |
12 | #define APIC_DEFAULT_PHYS_BASE 0xfee00000 | 12 | #define APIC_DEFAULT_PHYS_BASE 0xfee00000 |
13 | 13 | ||
14 | /* | ||
15 | * This is the IO-APIC register space as specified | ||
16 | * by Intel docs: | ||
17 | */ | ||
18 | #define IO_APIC_SLOT_SIZE 1024 | ||
19 | |||
14 | #define APIC_ID 0x20 | 20 | #define APIC_ID 0x20 |
15 | 21 | ||
16 | #define APIC_LVR 0x30 | 22 | #define APIC_LVR 0x30 |
diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h deleted file mode 100644 index 82f613c607ce..000000000000 --- a/arch/x86/include/asm/apicnum.h +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | #ifndef _ASM_X86_APICNUM_H | ||
2 | #define _ASM_X86_APICNUM_H | ||
3 | |||
4 | /* define MAX_IO_APICS */ | ||
5 | #ifdef CONFIG_X86_32 | ||
6 | # define MAX_IO_APICS 64 | ||
7 | #else | ||
8 | # define MAX_IO_APICS 128 | ||
9 | # define MAX_LOCAL_APIC 32768 | ||
10 | #endif | ||
11 | |||
12 | #endif /* _ASM_X86_APICNUM_H */ | ||
diff --git a/arch/x86/include/asm/asm-offsets.h b/arch/x86/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/x86/include/asm/asm-offsets.h | |||
@@ -0,0 +1 @@ | |||
#include <generated/asm-offsets.h> | |||
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index d9cf1cd156d2..f654d1bb17fb 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h | |||
@@ -22,14 +22,14 @@ do { \ | |||
22 | ".popsection" \ | 22 | ".popsection" \ |
23 | : : "i" (__FILE__), "i" (__LINE__), \ | 23 | : : "i" (__FILE__), "i" (__LINE__), \ |
24 | "i" (sizeof(struct bug_entry))); \ | 24 | "i" (sizeof(struct bug_entry))); \ |
25 | for (;;) ; \ | 25 | unreachable(); \ |
26 | } while (0) | 26 | } while (0) |
27 | 27 | ||
28 | #else | 28 | #else |
29 | #define BUG() \ | 29 | #define BUG() \ |
30 | do { \ | 30 | do { \ |
31 | asm volatile("ud2"); \ | 31 | asm volatile("ud2"); \ |
32 | for (;;) ; \ | 32 | unreachable(); \ |
33 | } while (0) | 33 | } while (0) |
34 | #endif | 34 | #endif |
35 | 35 | ||
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h index 549860d3be8f..2f9047cfaaca 100644 --- a/arch/x86/include/asm/cache.h +++ b/arch/x86/include/asm/cache.h | |||
@@ -9,12 +9,13 @@ | |||
9 | 9 | ||
10 | #define __read_mostly __attribute__((__section__(".data.read_mostly"))) | 10 | #define __read_mostly __attribute__((__section__(".data.read_mostly"))) |
11 | 11 | ||
12 | #define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT | ||
13 | #define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT) | ||
14 | |||
12 | #ifdef CONFIG_X86_VSMP | 15 | #ifdef CONFIG_X86_VSMP |
13 | /* vSMP Internode cacheline shift */ | ||
14 | #define INTERNODE_CACHE_SHIFT (12) | ||
15 | #ifdef CONFIG_SMP | 16 | #ifdef CONFIG_SMP |
16 | #define __cacheline_aligned_in_smp \ | 17 | #define __cacheline_aligned_in_smp \ |
17 | __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \ | 18 | __attribute__((__aligned__(INTERNODE_CACHE_BYTES))) \ |
18 | __page_aligned_data | 19 | __page_aligned_data |
19 | #endif | 20 | #endif |
20 | #endif | 21 | #endif |
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index b54f6afe7ec4..634c40a739a6 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
@@ -12,6 +12,7 @@ static inline void flush_cache_range(struct vm_area_struct *vma, | |||
12 | unsigned long start, unsigned long end) { } | 12 | unsigned long start, unsigned long end) { } |
13 | static inline void flush_cache_page(struct vm_area_struct *vma, | 13 | static inline void flush_cache_page(struct vm_area_struct *vma, |
14 | unsigned long vmaddr, unsigned long pfn) { } | 14 | unsigned long vmaddr, unsigned long pfn) { } |
15 | #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 | ||
15 | static inline void flush_dcache_page(struct page *page) { } | 16 | static inline void flush_dcache_page(struct page *page) { } |
16 | static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } | 17 | static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } |
17 | static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } | 18 | static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } |
@@ -176,6 +177,7 @@ void clflush_cache_range(void *addr, unsigned int size); | |||
176 | #ifdef CONFIG_DEBUG_RODATA | 177 | #ifdef CONFIG_DEBUG_RODATA |
177 | void mark_rodata_ro(void); | 178 | void mark_rodata_ro(void); |
178 | extern const int rodata_test_data; | 179 | extern const int rodata_test_data; |
180 | extern int kernel_set_to_readonly; | ||
179 | void set_kernel_text_rw(void); | 181 | void set_kernel_text_rw(void); |
180 | void set_kernel_text_ro(void); | 182 | void set_kernel_text_ro(void); |
181 | #else | 183 | #else |
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h index b03bedb62aa7..0918654305af 100644 --- a/arch/x86/include/asm/calgary.h +++ b/arch/x86/include/asm/calgary.h | |||
@@ -62,10 +62,8 @@ struct cal_chipset_ops { | |||
62 | extern int use_calgary; | 62 | extern int use_calgary; |
63 | 63 | ||
64 | #ifdef CONFIG_CALGARY_IOMMU | 64 | #ifdef CONFIG_CALGARY_IOMMU |
65 | extern int calgary_iommu_init(void); | ||
66 | extern void detect_calgary(void); | 65 | extern void detect_calgary(void); |
67 | #else | 66 | #else |
68 | static inline int calgary_iommu_init(void) { return 1; } | ||
69 | static inline void detect_calgary(void) { return; } | 67 | static inline void detect_calgary(void) { return; } |
70 | #endif | 68 | #endif |
71 | 69 | ||
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ee1931be6593..ffb9bb6b6c37 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
@@ -8,14 +8,50 @@ | |||
8 | * you need to test for the feature in boot_cpu_data. | 8 | * you need to test for the feature in boot_cpu_data. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #define xchg(ptr, v) \ | 11 | extern void __xchg_wrong_size(void); |
12 | ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr)))) | 12 | |
13 | /* | ||
14 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway | ||
15 | * Note 2: xchg has side effect, so that attribute volatile is necessary, | ||
16 | * but generally the primitive is invalid, *ptr is output argument. --ANK | ||
17 | */ | ||
13 | 18 | ||
14 | struct __xchg_dummy { | 19 | struct __xchg_dummy { |
15 | unsigned long a[100]; | 20 | unsigned long a[100]; |
16 | }; | 21 | }; |
17 | #define __xg(x) ((struct __xchg_dummy *)(x)) | 22 | #define __xg(x) ((struct __xchg_dummy *)(x)) |
18 | 23 | ||
24 | #define __xchg(x, ptr, size) \ | ||
25 | ({ \ | ||
26 | __typeof(*(ptr)) __x = (x); \ | ||
27 | switch (size) { \ | ||
28 | case 1: \ | ||
29 | asm volatile("xchgb %b0,%1" \ | ||
30 | : "=q" (__x) \ | ||
31 | : "m" (*__xg(ptr)), "0" (__x) \ | ||
32 | : "memory"); \ | ||
33 | break; \ | ||
34 | case 2: \ | ||
35 | asm volatile("xchgw %w0,%1" \ | ||
36 | : "=r" (__x) \ | ||
37 | : "m" (*__xg(ptr)), "0" (__x) \ | ||
38 | : "memory"); \ | ||
39 | break; \ | ||
40 | case 4: \ | ||
41 | asm volatile("xchgl %0,%1" \ | ||
42 | : "=r" (__x) \ | ||
43 | : "m" (*__xg(ptr)), "0" (__x) \ | ||
44 | : "memory"); \ | ||
45 | break; \ | ||
46 | default: \ | ||
47 | __xchg_wrong_size(); \ | ||
48 | } \ | ||
49 | __x; \ | ||
50 | }) | ||
51 | |||
52 | #define xchg(ptr, v) \ | ||
53 | __xchg((v), (ptr), sizeof(*ptr)) | ||
54 | |||
19 | /* | 55 | /* |
20 | * The semantics of XCHGCMP8B are a bit strange, this is why | 56 | * The semantics of XCHGCMP8B are a bit strange, this is why |
21 | * there is a loop and the loading of %%eax and %%edx has to | 57 | * there is a loop and the loading of %%eax and %%edx has to |
@@ -71,57 +107,63 @@ static inline void __set_64bit_var(unsigned long long *ptr, | |||
71 | (unsigned int)((value) >> 32)) \ | 107 | (unsigned int)((value) >> 32)) \ |
72 | : __set_64bit(ptr, ll_low((value)), ll_high((value)))) | 108 | : __set_64bit(ptr, ll_low((value)), ll_high((value)))) |
73 | 109 | ||
74 | /* | 110 | extern void __cmpxchg_wrong_size(void); |
75 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway | ||
76 | * Note 2: xchg has side effect, so that attribute volatile is necessary, | ||
77 | * but generally the primitive is invalid, *ptr is output argument. --ANK | ||
78 | */ | ||
79 | static inline unsigned long __xchg(unsigned long x, volatile void *ptr, | ||
80 | int size) | ||
81 | { | ||
82 | switch (size) { | ||
83 | case 1: | ||
84 | asm volatile("xchgb %b0,%1" | ||
85 | : "=q" (x) | ||
86 | : "m" (*__xg(ptr)), "0" (x) | ||
87 | : "memory"); | ||
88 | break; | ||
89 | case 2: | ||
90 | asm volatile("xchgw %w0,%1" | ||
91 | : "=r" (x) | ||
92 | : "m" (*__xg(ptr)), "0" (x) | ||
93 | : "memory"); | ||
94 | break; | ||
95 | case 4: | ||
96 | asm volatile("xchgl %0,%1" | ||
97 | : "=r" (x) | ||
98 | : "m" (*__xg(ptr)), "0" (x) | ||
99 | : "memory"); | ||
100 | break; | ||
101 | } | ||
102 | return x; | ||
103 | } | ||
104 | 111 | ||
105 | /* | 112 | /* |
106 | * Atomic compare and exchange. Compare OLD with MEM, if identical, | 113 | * Atomic compare and exchange. Compare OLD with MEM, if identical, |
107 | * store NEW in MEM. Return the initial value in MEM. Success is | 114 | * store NEW in MEM. Return the initial value in MEM. Success is |
108 | * indicated by comparing RETURN with OLD. | 115 | * indicated by comparing RETURN with OLD. |
109 | */ | 116 | */ |
117 | #define __raw_cmpxchg(ptr, old, new, size, lock) \ | ||
118 | ({ \ | ||
119 | __typeof__(*(ptr)) __ret; \ | ||
120 | __typeof__(*(ptr)) __old = (old); \ | ||
121 | __typeof__(*(ptr)) __new = (new); \ | ||
122 | switch (size) { \ | ||
123 | case 1: \ | ||
124 | asm volatile(lock "cmpxchgb %b1,%2" \ | ||
125 | : "=a"(__ret) \ | ||
126 | : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
127 | : "memory"); \ | ||
128 | break; \ | ||
129 | case 2: \ | ||
130 | asm volatile(lock "cmpxchgw %w1,%2" \ | ||
131 | : "=a"(__ret) \ | ||
132 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
133 | : "memory"); \ | ||
134 | break; \ | ||
135 | case 4: \ | ||
136 | asm volatile(lock "cmpxchgl %1,%2" \ | ||
137 | : "=a"(__ret) \ | ||
138 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
139 | : "memory"); \ | ||
140 | break; \ | ||
141 | default: \ | ||
142 | __cmpxchg_wrong_size(); \ | ||
143 | } \ | ||
144 | __ret; \ | ||
145 | }) | ||
146 | |||
147 | #define __cmpxchg(ptr, old, new, size) \ | ||
148 | __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) | ||
149 | |||
150 | #define __sync_cmpxchg(ptr, old, new, size) \ | ||
151 | __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") | ||
152 | |||
153 | #define __cmpxchg_local(ptr, old, new, size) \ | ||
154 | __raw_cmpxchg((ptr), (old), (new), (size), "") | ||
110 | 155 | ||
111 | #ifdef CONFIG_X86_CMPXCHG | 156 | #ifdef CONFIG_X86_CMPXCHG |
112 | #define __HAVE_ARCH_CMPXCHG 1 | 157 | #define __HAVE_ARCH_CMPXCHG 1 |
113 | #define cmpxchg(ptr, o, n) \ | 158 | |
114 | ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ | 159 | #define cmpxchg(ptr, old, new) \ |
115 | (unsigned long)(n), \ | 160 | __cmpxchg((ptr), (old), (new), sizeof(*ptr)) |
116 | sizeof(*(ptr)))) | 161 | |
117 | #define sync_cmpxchg(ptr, o, n) \ | 162 | #define sync_cmpxchg(ptr, old, new) \ |
118 | ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ | 163 | __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) |
119 | (unsigned long)(n), \ | 164 | |
120 | sizeof(*(ptr)))) | 165 | #define cmpxchg_local(ptr, old, new) \ |
121 | #define cmpxchg_local(ptr, o, n) \ | 166 | __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) |
122 | ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ | ||
123 | (unsigned long)(n), \ | ||
124 | sizeof(*(ptr)))) | ||
125 | #endif | 167 | #endif |
126 | 168 | ||
127 | #ifdef CONFIG_X86_CMPXCHG64 | 169 | #ifdef CONFIG_X86_CMPXCHG64 |
@@ -133,94 +175,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, | |||
133 | (unsigned long long)(n))) | 175 | (unsigned long long)(n))) |
134 | #endif | 176 | #endif |
135 | 177 | ||
136 | static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, | ||
137 | unsigned long new, int size) | ||
138 | { | ||
139 | unsigned long prev; | ||
140 | switch (size) { | ||
141 | case 1: | ||
142 | asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" | ||
143 | : "=a"(prev) | ||
144 | : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
145 | : "memory"); | ||
146 | return prev; | ||
147 | case 2: | ||
148 | asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" | ||
149 | : "=a"(prev) | ||
150 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
151 | : "memory"); | ||
152 | return prev; | ||
153 | case 4: | ||
154 | asm volatile(LOCK_PREFIX "cmpxchgl %1,%2" | ||
155 | : "=a"(prev) | ||
156 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
157 | : "memory"); | ||
158 | return prev; | ||
159 | } | ||
160 | return old; | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * Always use locked operations when touching memory shared with a | ||
165 | * hypervisor, since the system may be SMP even if the guest kernel | ||
166 | * isn't. | ||
167 | */ | ||
168 | static inline unsigned long __sync_cmpxchg(volatile void *ptr, | ||
169 | unsigned long old, | ||
170 | unsigned long new, int size) | ||
171 | { | ||
172 | unsigned long prev; | ||
173 | switch (size) { | ||
174 | case 1: | ||
175 | asm volatile("lock; cmpxchgb %b1,%2" | ||
176 | : "=a"(prev) | ||
177 | : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
178 | : "memory"); | ||
179 | return prev; | ||
180 | case 2: | ||
181 | asm volatile("lock; cmpxchgw %w1,%2" | ||
182 | : "=a"(prev) | ||
183 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
184 | : "memory"); | ||
185 | return prev; | ||
186 | case 4: | ||
187 | asm volatile("lock; cmpxchgl %1,%2" | ||
188 | : "=a"(prev) | ||
189 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
190 | : "memory"); | ||
191 | return prev; | ||
192 | } | ||
193 | return old; | ||
194 | } | ||
195 | |||
196 | static inline unsigned long __cmpxchg_local(volatile void *ptr, | ||
197 | unsigned long old, | ||
198 | unsigned long new, int size) | ||
199 | { | ||
200 | unsigned long prev; | ||
201 | switch (size) { | ||
202 | case 1: | ||
203 | asm volatile("cmpxchgb %b1,%2" | ||
204 | : "=a"(prev) | ||
205 | : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
206 | : "memory"); | ||
207 | return prev; | ||
208 | case 2: | ||
209 | asm volatile("cmpxchgw %w1,%2" | ||
210 | : "=a"(prev) | ||
211 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
212 | : "memory"); | ||
213 | return prev; | ||
214 | case 4: | ||
215 | asm volatile("cmpxchgl %1,%2" | ||
216 | : "=a"(prev) | ||
217 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
218 | : "memory"); | ||
219 | return prev; | ||
220 | } | ||
221 | return old; | ||
222 | } | ||
223 | |||
224 | static inline unsigned long long __cmpxchg64(volatile void *ptr, | 178 | static inline unsigned long long __cmpxchg64(volatile void *ptr, |
225 | unsigned long long old, | 179 | unsigned long long old, |
226 | unsigned long long new) | 180 | unsigned long long new) |
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 52de72e0de8c..485ae415faec 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h | |||
@@ -3,9 +3,6 @@ | |||
3 | 3 | ||
4 | #include <asm/alternative.h> /* Provides LOCK_PREFIX */ | 4 | #include <asm/alternative.h> /* Provides LOCK_PREFIX */ |
5 | 5 | ||
6 | #define xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), \ | ||
7 | (ptr), sizeof(*(ptr)))) | ||
8 | |||
9 | #define __xg(x) ((volatile long *)(x)) | 6 | #define __xg(x) ((volatile long *)(x)) |
10 | 7 | ||
11 | static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) | 8 | static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) |
@@ -15,167 +12,118 @@ static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) | |||
15 | 12 | ||
16 | #define _set_64bit set_64bit | 13 | #define _set_64bit set_64bit |
17 | 14 | ||
15 | extern void __xchg_wrong_size(void); | ||
16 | extern void __cmpxchg_wrong_size(void); | ||
17 | |||
18 | /* | 18 | /* |
19 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway | 19 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway |
20 | * Note 2: xchg has side effect, so that attribute volatile is necessary, | 20 | * Note 2: xchg has side effect, so that attribute volatile is necessary, |
21 | * but generally the primitive is invalid, *ptr is output argument. --ANK | 21 | * but generally the primitive is invalid, *ptr is output argument. --ANK |
22 | */ | 22 | */ |
23 | static inline unsigned long __xchg(unsigned long x, volatile void *ptr, | 23 | #define __xchg(x, ptr, size) \ |
24 | int size) | 24 | ({ \ |
25 | { | 25 | __typeof(*(ptr)) __x = (x); \ |
26 | switch (size) { | 26 | switch (size) { \ |
27 | case 1: | 27 | case 1: \ |
28 | asm volatile("xchgb %b0,%1" | 28 | asm volatile("xchgb %b0,%1" \ |
29 | : "=q" (x) | 29 | : "=q" (__x) \ |
30 | : "m" (*__xg(ptr)), "0" (x) | 30 | : "m" (*__xg(ptr)), "0" (__x) \ |
31 | : "memory"); | 31 | : "memory"); \ |
32 | break; | 32 | break; \ |
33 | case 2: | 33 | case 2: \ |
34 | asm volatile("xchgw %w0,%1" | 34 | asm volatile("xchgw %w0,%1" \ |
35 | : "=r" (x) | 35 | : "=r" (__x) \ |
36 | : "m" (*__xg(ptr)), "0" (x) | 36 | : "m" (*__xg(ptr)), "0" (__x) \ |
37 | : "memory"); | 37 | : "memory"); \ |
38 | break; | 38 | break; \ |
39 | case 4: | 39 | case 4: \ |
40 | asm volatile("xchgl %k0,%1" | 40 | asm volatile("xchgl %k0,%1" \ |
41 | : "=r" (x) | 41 | : "=r" (__x) \ |
42 | : "m" (*__xg(ptr)), "0" (x) | 42 | : "m" (*__xg(ptr)), "0" (__x) \ |
43 | : "memory"); | 43 | : "memory"); \ |
44 | break; | 44 | break; \ |
45 | case 8: | 45 | case 8: \ |
46 | asm volatile("xchgq %0,%1" | 46 | asm volatile("xchgq %0,%1" \ |
47 | : "=r" (x) | 47 | : "=r" (__x) \ |
48 | : "m" (*__xg(ptr)), "0" (x) | 48 | : "m" (*__xg(ptr)), "0" (__x) \ |
49 | : "memory"); | 49 | : "memory"); \ |
50 | break; | 50 | break; \ |
51 | } | 51 | default: \ |
52 | return x; | 52 | __xchg_wrong_size(); \ |
53 | } | 53 | } \ |
54 | __x; \ | ||
55 | }) | ||
56 | |||
57 | #define xchg(ptr, v) \ | ||
58 | __xchg((v), (ptr), sizeof(*ptr)) | ||
59 | |||
60 | #define __HAVE_ARCH_CMPXCHG 1 | ||
54 | 61 | ||
55 | /* | 62 | /* |
56 | * Atomic compare and exchange. Compare OLD with MEM, if identical, | 63 | * Atomic compare and exchange. Compare OLD with MEM, if identical, |
57 | * store NEW in MEM. Return the initial value in MEM. Success is | 64 | * store NEW in MEM. Return the initial value in MEM. Success is |
58 | * indicated by comparing RETURN with OLD. | 65 | * indicated by comparing RETURN with OLD. |
59 | */ | 66 | */ |
67 | #define __raw_cmpxchg(ptr, old, new, size, lock) \ | ||
68 | ({ \ | ||
69 | __typeof__(*(ptr)) __ret; \ | ||
70 | __typeof__(*(ptr)) __old = (old); \ | ||
71 | __typeof__(*(ptr)) __new = (new); \ | ||
72 | switch (size) { \ | ||
73 | case 1: \ | ||
74 | asm volatile(lock "cmpxchgb %b1,%2" \ | ||
75 | : "=a"(__ret) \ | ||
76 | : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
77 | : "memory"); \ | ||
78 | break; \ | ||
79 | case 2: \ | ||
80 | asm volatile(lock "cmpxchgw %w1,%2" \ | ||
81 | : "=a"(__ret) \ | ||
82 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
83 | : "memory"); \ | ||
84 | break; \ | ||
85 | case 4: \ | ||
86 | asm volatile(lock "cmpxchgl %k1,%2" \ | ||
87 | : "=a"(__ret) \ | ||
88 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
89 | : "memory"); \ | ||
90 | break; \ | ||
91 | case 8: \ | ||
92 | asm volatile(lock "cmpxchgq %1,%2" \ | ||
93 | : "=a"(__ret) \ | ||
94 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
95 | : "memory"); \ | ||
96 | break; \ | ||
97 | default: \ | ||
98 | __cmpxchg_wrong_size(); \ | ||
99 | } \ | ||
100 | __ret; \ | ||
101 | }) | ||
60 | 102 | ||
61 | #define __HAVE_ARCH_CMPXCHG 1 | 103 | #define __cmpxchg(ptr, old, new, size) \ |
104 | __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) | ||
62 | 105 | ||
63 | static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, | 106 | #define __sync_cmpxchg(ptr, old, new, size) \ |
64 | unsigned long new, int size) | 107 | __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") |
65 | { | ||
66 | unsigned long prev; | ||
67 | switch (size) { | ||
68 | case 1: | ||
69 | asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" | ||
70 | : "=a"(prev) | ||
71 | : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
72 | : "memory"); | ||
73 | return prev; | ||
74 | case 2: | ||
75 | asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" | ||
76 | : "=a"(prev) | ||
77 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
78 | : "memory"); | ||
79 | return prev; | ||
80 | case 4: | ||
81 | asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2" | ||
82 | : "=a"(prev) | ||
83 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
84 | : "memory"); | ||
85 | return prev; | ||
86 | case 8: | ||
87 | asm volatile(LOCK_PREFIX "cmpxchgq %1,%2" | ||
88 | : "=a"(prev) | ||
89 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
90 | : "memory"); | ||
91 | return prev; | ||
92 | } | ||
93 | return old; | ||
94 | } | ||
95 | 108 | ||
96 | /* | 109 | #define __cmpxchg_local(ptr, old, new, size) \ |
97 | * Always use locked operations when touching memory shared with a | 110 | __raw_cmpxchg((ptr), (old), (new), (size), "") |
98 | * hypervisor, since the system may be SMP even if the guest kernel | ||
99 | * isn't. | ||
100 | */ | ||
101 | static inline unsigned long __sync_cmpxchg(volatile void *ptr, | ||
102 | unsigned long old, | ||
103 | unsigned long new, int size) | ||
104 | { | ||
105 | unsigned long prev; | ||
106 | switch (size) { | ||
107 | case 1: | ||
108 | asm volatile("lock; cmpxchgb %b1,%2" | ||
109 | : "=a"(prev) | ||
110 | : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
111 | : "memory"); | ||
112 | return prev; | ||
113 | case 2: | ||
114 | asm volatile("lock; cmpxchgw %w1,%2" | ||
115 | : "=a"(prev) | ||
116 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
117 | : "memory"); | ||
118 | return prev; | ||
119 | case 4: | ||
120 | asm volatile("lock; cmpxchgl %1,%2" | ||
121 | : "=a"(prev) | ||
122 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
123 | : "memory"); | ||
124 | return prev; | ||
125 | } | ||
126 | return old; | ||
127 | } | ||
128 | 111 | ||
129 | static inline unsigned long __cmpxchg_local(volatile void *ptr, | 112 | #define cmpxchg(ptr, old, new) \ |
130 | unsigned long old, | 113 | __cmpxchg((ptr), (old), (new), sizeof(*ptr)) |
131 | unsigned long new, int size) | 114 | |
132 | { | 115 | #define sync_cmpxchg(ptr, old, new) \ |
133 | unsigned long prev; | 116 | __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) |
134 | switch (size) { | 117 | |
135 | case 1: | 118 | #define cmpxchg_local(ptr, old, new) \ |
136 | asm volatile("cmpxchgb %b1,%2" | 119 | __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) |
137 | : "=a"(prev) | ||
138 | : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
139 | : "memory"); | ||
140 | return prev; | ||
141 | case 2: | ||
142 | asm volatile("cmpxchgw %w1,%2" | ||
143 | : "=a"(prev) | ||
144 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
145 | : "memory"); | ||
146 | return prev; | ||
147 | case 4: | ||
148 | asm volatile("cmpxchgl %k1,%2" | ||
149 | : "=a"(prev) | ||
150 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
151 | : "memory"); | ||
152 | return prev; | ||
153 | case 8: | ||
154 | asm volatile("cmpxchgq %1,%2" | ||
155 | : "=a"(prev) | ||
156 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
157 | : "memory"); | ||
158 | return prev; | ||
159 | } | ||
160 | return old; | ||
161 | } | ||
162 | 120 | ||
163 | #define cmpxchg(ptr, o, n) \ | ||
164 | ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ | ||
165 | (unsigned long)(n), sizeof(*(ptr)))) | ||
166 | #define cmpxchg64(ptr, o, n) \ | 121 | #define cmpxchg64(ptr, o, n) \ |
167 | ({ \ | 122 | ({ \ |
168 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ | 123 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ |
169 | cmpxchg((ptr), (o), (n)); \ | 124 | cmpxchg((ptr), (o), (n)); \ |
170 | }) | 125 | }) |
171 | #define cmpxchg_local(ptr, o, n) \ | 126 | |
172 | ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ | ||
173 | (unsigned long)(n), \ | ||
174 | sizeof(*(ptr)))) | ||
175 | #define sync_cmpxchg(ptr, o, n) \ | ||
176 | ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ | ||
177 | (unsigned long)(n), \ | ||
178 | sizeof(*(ptr)))) | ||
179 | #define cmpxchg64_local(ptr, o, n) \ | 127 | #define cmpxchg64_local(ptr, o, n) \ |
180 | ({ \ | 128 | ({ \ |
181 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ | 129 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 9cfc88b97742..637e1ec963c3 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -153,6 +153,7 @@ | |||
153 | #define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ | 153 | #define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ |
154 | #define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ | 154 | #define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ |
155 | #define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ | 155 | #define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ |
156 | #define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ | ||
156 | 157 | ||
157 | /* | 158 | /* |
158 | * Auxiliary flags: Linux defined - For features scattered in various | 159 | * Auxiliary flags: Linux defined - For features scattered in various |
@@ -248,6 +249,7 @@ extern const char * const x86_power_flags[32]; | |||
248 | #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) | 249 | #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) |
249 | #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) | 250 | #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) |
250 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) | 251 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) |
252 | #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) | ||
251 | 253 | ||
252 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) | 254 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) |
253 | # define cpu_has_invlpg 1 | 255 | # define cpu_has_invlpg 1 |
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 3ea6f37be9e2..8240f76b531e 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
@@ -18,6 +18,7 @@ | |||
18 | #define DR_TRAP1 (0x2) /* db1 */ | 18 | #define DR_TRAP1 (0x2) /* db1 */ |
19 | #define DR_TRAP2 (0x4) /* db2 */ | 19 | #define DR_TRAP2 (0x4) /* db2 */ |
20 | #define DR_TRAP3 (0x8) /* db3 */ | 20 | #define DR_TRAP3 (0x8) /* db3 */ |
21 | #define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) | ||
21 | 22 | ||
22 | #define DR_STEP (0x4000) /* single-step */ | 23 | #define DR_STEP (0x4000) /* single-step */ |
23 | #define DR_SWITCH (0x8000) /* task switch */ | 24 | #define DR_SWITCH (0x8000) /* task switch */ |
@@ -49,6 +50,8 @@ | |||
49 | 50 | ||
50 | #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ | 51 | #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ |
51 | #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ | 52 | #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ |
53 | #define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ | ||
54 | #define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ | ||
52 | #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ | 55 | #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ |
53 | 56 | ||
54 | #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ | 57 | #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ |
@@ -67,4 +70,34 @@ | |||
67 | #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ | 70 | #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ |
68 | #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ | 71 | #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ |
69 | 72 | ||
73 | /* | ||
74 | * HW breakpoint additions | ||
75 | */ | ||
76 | #ifdef __KERNEL__ | ||
77 | |||
78 | DECLARE_PER_CPU(unsigned long, cpu_dr7); | ||
79 | |||
80 | static inline void hw_breakpoint_disable(void) | ||
81 | { | ||
82 | /* Zero the control register for HW Breakpoint */ | ||
83 | set_debugreg(0UL, 7); | ||
84 | |||
85 | /* Zero-out the individual HW breakpoint address registers */ | ||
86 | set_debugreg(0UL, 0); | ||
87 | set_debugreg(0UL, 1); | ||
88 | set_debugreg(0UL, 2); | ||
89 | set_debugreg(0UL, 3); | ||
90 | } | ||
91 | |||
92 | static inline int hw_breakpoint_active(void) | ||
93 | { | ||
94 | return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; | ||
95 | } | ||
96 | |||
97 | extern void aout_dump_debugregs(struct user *dump); | ||
98 | |||
99 | extern void hw_breakpoint_restore(void); | ||
100 | |||
101 | #endif /* __KERNEL__ */ | ||
102 | |||
70 | #endif /* _ASM_X86_DEBUGREG_H */ | 103 | #endif /* _ASM_X86_DEBUGREG_H */ |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index e8de2f6f5ca5..617bd56b3070 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -288,7 +288,7 @@ static inline void load_LDT(mm_context_t *pc) | |||
288 | 288 | ||
289 | static inline unsigned long get_desc_base(const struct desc_struct *desc) | 289 | static inline unsigned long get_desc_base(const struct desc_struct *desc) |
290 | { | 290 | { |
291 | return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); | 291 | return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); |
292 | } | 292 | } |
293 | 293 | ||
294 | static inline void set_desc_base(struct desc_struct *desc, unsigned long base) | 294 | static inline void set_desc_base(struct desc_struct *desc, unsigned long base) |
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index 9d6684849fd9..278441f39856 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h | |||
@@ -12,9 +12,9 @@ | |||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * FIXME: Acessing the desc_struct through its fields is more elegant, | 15 | * FIXME: Accessing the desc_struct through its fields is more elegant, |
16 | * and should be the one valid thing to do. However, a lot of open code | 16 | * and should be the one valid thing to do. However, a lot of open code |
17 | * still touches the a and b acessors, and doing this allow us to do it | 17 | * still touches the a and b accessors, and doing this allow us to do it |
18 | * incrementally. We keep the signature as a struct, rather than an union, | 18 | * incrementally. We keep the signature as a struct, rather than an union, |
19 | * so we can get rid of it transparently in the future -- glommer | 19 | * so we can get rid of it transparently in the future -- glommer |
20 | */ | 20 | */ |
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index cee34e9ca45b..029f230ab637 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h | |||
@@ -8,7 +8,7 @@ struct dev_archdata { | |||
8 | #ifdef CONFIG_X86_64 | 8 | #ifdef CONFIG_X86_64 |
9 | struct dma_map_ops *dma_ops; | 9 | struct dma_map_ops *dma_ops; |
10 | #endif | 10 | #endif |
11 | #ifdef CONFIG_DMAR | 11 | #if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU) |
12 | void *iommu; /* hook for IOMMU specific extension */ | 12 | void *iommu; /* hook for IOMMU specific extension */ |
13 | #endif | 13 | #endif |
14 | }; | 14 | }; |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 0ee770d23d0e..ac91eed21061 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -14,7 +14,14 @@ | |||
14 | #include <asm/swiotlb.h> | 14 | #include <asm/swiotlb.h> |
15 | #include <asm-generic/dma-coherent.h> | 15 | #include <asm-generic/dma-coherent.h> |
16 | 16 | ||
17 | extern dma_addr_t bad_dma_address; | 17 | #ifdef CONFIG_ISA |
18 | # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) | ||
19 | #else | ||
20 | # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) | ||
21 | #endif | ||
22 | |||
23 | #define DMA_ERROR_CODE 0 | ||
24 | |||
18 | extern int iommu_merge; | 25 | extern int iommu_merge; |
19 | extern struct device x86_dma_fallback_dev; | 26 | extern struct device x86_dma_fallback_dev; |
20 | extern int panic_on_overflow; | 27 | extern int panic_on_overflow; |
@@ -42,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | |||
42 | if (ops->mapping_error) | 49 | if (ops->mapping_error) |
43 | return ops->mapping_error(dev, dma_addr); | 50 | return ops->mapping_error(dev, dma_addr); |
44 | 51 | ||
45 | return (dma_addr == bad_dma_address); | 52 | return (dma_addr == DMA_ERROR_CODE); |
46 | } | 53 | } |
47 | 54 | ||
48 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | 55 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) |
@@ -60,7 +67,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | |||
60 | if (!dev->dma_mask) | 67 | if (!dev->dma_mask) |
61 | return 0; | 68 | return 0; |
62 | 69 | ||
63 | return addr + size <= *dev->dma_mask; | 70 | return addr + size - 1 <= *dev->dma_mask; |
64 | } | 71 | } |
65 | 72 | ||
66 | static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) | 73 | static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) |
@@ -124,10 +131,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
124 | if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) | 131 | if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) |
125 | return memory; | 132 | return memory; |
126 | 133 | ||
127 | if (!dev) { | 134 | if (!dev) |
128 | dev = &x86_dma_fallback_dev; | 135 | dev = &x86_dma_fallback_dev; |
129 | gfp |= GFP_DMA; | ||
130 | } | ||
131 | 136 | ||
132 | if (!is_device_dma_capable(dev)) | 137 | if (!is_device_dma_capable(dev)) |
133 | return NULL; | 138 | return NULL; |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 40b4e614fe71..761249e396fe 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -61,6 +61,12 @@ struct e820map { | |||
61 | struct e820entry map[E820_X_MAX]; | 61 | struct e820entry map[E820_X_MAX]; |
62 | }; | 62 | }; |
63 | 63 | ||
64 | #define ISA_START_ADDRESS 0xa0000 | ||
65 | #define ISA_END_ADDRESS 0x100000 | ||
66 | |||
67 | #define BIOS_BEGIN 0x000a0000 | ||
68 | #define BIOS_END 0x00100000 | ||
69 | |||
64 | #ifdef __KERNEL__ | 70 | #ifdef __KERNEL__ |
65 | /* see comment in arch/x86/kernel/e820.c */ | 71 | /* see comment in arch/x86/kernel/e820.c */ |
66 | extern struct e820map e820; | 72 | extern struct e820map e820; |
@@ -126,15 +132,18 @@ extern void e820_reserve_resources(void); | |||
126 | extern void e820_reserve_resources_late(void); | 132 | extern void e820_reserve_resources_late(void); |
127 | extern void setup_memory_map(void); | 133 | extern void setup_memory_map(void); |
128 | extern char *default_machine_specific_memory_setup(void); | 134 | extern char *default_machine_specific_memory_setup(void); |
129 | #endif /* __KERNEL__ */ | ||
130 | #endif /* __ASSEMBLY__ */ | ||
131 | 135 | ||
132 | #define ISA_START_ADDRESS 0xa0000 | 136 | /* |
133 | #define ISA_END_ADDRESS 0x100000 | 137 | * Returns true iff the specified range [s,e) is completely contained inside |
134 | #define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS) | 138 | * the ISA region. |
139 | */ | ||
140 | static inline bool is_ISA_range(u64 s, u64 e) | ||
141 | { | ||
142 | return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS; | ||
143 | } | ||
135 | 144 | ||
136 | #define BIOS_BEGIN 0x000a0000 | 145 | #endif /* __KERNEL__ */ |
137 | #define BIOS_END 0x00100000 | 146 | #endif /* __ASSEMBLY__ */ |
138 | 147 | ||
139 | #ifdef __KERNEL__ | 148 | #ifdef __KERNEL__ |
140 | #include <linux/ioport.h> | 149 | #include <linux/ioport.h> |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 456a304b8172..b4501ee223ad 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -157,19 +157,6 @@ do { \ | |||
157 | 157 | ||
158 | #define compat_elf_check_arch(x) elf_check_arch_ia32(x) | 158 | #define compat_elf_check_arch(x) elf_check_arch_ia32(x) |
159 | 159 | ||
160 | static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp) | ||
161 | { | ||
162 | loadsegment(fs, 0); | ||
163 | loadsegment(ds, __USER32_DS); | ||
164 | loadsegment(es, __USER32_DS); | ||
165 | load_gs_index(0); | ||
166 | regs->ip = ip; | ||
167 | regs->sp = sp; | ||
168 | regs->flags = X86_EFLAGS_IF; | ||
169 | regs->cs = __USER32_CS; | ||
170 | regs->ss = __USER32_DS; | ||
171 | } | ||
172 | |||
173 | static inline void elf_common_init(struct thread_struct *t, | 160 | static inline void elf_common_init(struct thread_struct *t, |
174 | struct pt_regs *regs, const u16 ds) | 161 | struct pt_regs *regs, const u16 ds) |
175 | { | 162 | { |
@@ -191,11 +178,8 @@ do { \ | |||
191 | #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ | 178 | #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ |
192 | elf_common_init(¤t->thread, regs, __USER_DS) | 179 | elf_common_init(¤t->thread, regs, __USER_DS) |
193 | 180 | ||
194 | #define compat_start_thread(regs, ip, sp) \ | 181 | void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); |
195 | do { \ | 182 | #define compat_start_thread start_thread_ia32 |
196 | start_ia32_thread(regs, ip, sp); \ | ||
197 | set_fs(USER_DS); \ | ||
198 | } while (0) | ||
199 | 183 | ||
200 | #define COMPAT_SET_PERSONALITY(ex) \ | 184 | #define COMPAT_SET_PERSONALITY(ex) \ |
201 | do { \ | 185 | do { \ |
@@ -255,7 +239,6 @@ extern int force_personality32; | |||
255 | #endif /* !CONFIG_X86_32 */ | 239 | #endif /* !CONFIG_X86_32 */ |
256 | 240 | ||
257 | #define CORE_DUMP_USE_REGSET | 241 | #define CORE_DUMP_USE_REGSET |
258 | #define USE_ELF_CORE_DUMP | ||
259 | #define ELF_EXEC_PAGESIZE 4096 | 242 | #define ELF_EXEC_PAGESIZE 4096 |
260 | 243 | ||
261 | /* This is the location that an ET_DYN program is loaded if exec'ed. Typical | 244 | /* This is the location that an ET_DYN program is loaded if exec'ed. Typical |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index f5693c81a1db..8e8ec663a98f 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -34,7 +34,7 @@ BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, | |||
34 | smp_invalidate_interrupt) | 34 | smp_invalidate_interrupt) |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | BUILD_INTERRUPT(generic_interrupt, GENERIC_INTERRUPT_VECTOR) | 37 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * every pentium local APIC has two 'local interrupts', with a | 40 | * every pentium local APIC has two 'local interrupts', with a |
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 6cfdafa409d8..4ac5b0f33fc1 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h | |||
@@ -35,8 +35,7 @@ extern int gart_iommu_aperture_allowed; | |||
35 | extern int gart_iommu_aperture_disabled; | 35 | extern int gart_iommu_aperture_disabled; |
36 | 36 | ||
37 | extern void early_gart_iommu_check(void); | 37 | extern void early_gart_iommu_check(void); |
38 | extern void gart_iommu_init(void); | 38 | extern int gart_iommu_init(void); |
39 | extern void gart_iommu_shutdown(void); | ||
40 | extern void __init gart_parse_options(char *); | 39 | extern void __init gart_parse_options(char *); |
41 | extern void gart_iommu_hole_init(void); | 40 | extern void gart_iommu_hole_init(void); |
42 | 41 | ||
@@ -48,12 +47,6 @@ extern void gart_iommu_hole_init(void); | |||
48 | static inline void early_gart_iommu_check(void) | 47 | static inline void early_gart_iommu_check(void) |
49 | { | 48 | { |
50 | } | 49 | } |
51 | static inline void gart_iommu_init(void) | ||
52 | { | ||
53 | } | ||
54 | static inline void gart_iommu_shutdown(void) | ||
55 | { | ||
56 | } | ||
57 | static inline void gart_parse_options(char *options) | 50 | static inline void gart_parse_options(char *options) |
58 | { | 51 | { |
59 | } | 52 | } |
diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h index ad3c2ed75481..7cd73552a4e8 100644 --- a/arch/x86/include/asm/geode.h +++ b/arch/x86/include/asm/geode.h | |||
@@ -12,160 +12,7 @@ | |||
12 | 12 | ||
13 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
14 | #include <linux/io.h> | 14 | #include <linux/io.h> |
15 | 15 | #include <linux/cs5535.h> | |
16 | /* Generic southbridge functions */ | ||
17 | |||
18 | #define GEODE_DEV_PMS 0 | ||
19 | #define GEODE_DEV_ACPI 1 | ||
20 | #define GEODE_DEV_GPIO 2 | ||
21 | #define GEODE_DEV_MFGPT 3 | ||
22 | |||
23 | extern int geode_get_dev_base(unsigned int dev); | ||
24 | |||
25 | /* Useful macros */ | ||
26 | #define geode_pms_base() geode_get_dev_base(GEODE_DEV_PMS) | ||
27 | #define geode_acpi_base() geode_get_dev_base(GEODE_DEV_ACPI) | ||
28 | #define geode_gpio_base() geode_get_dev_base(GEODE_DEV_GPIO) | ||
29 | #define geode_mfgpt_base() geode_get_dev_base(GEODE_DEV_MFGPT) | ||
30 | |||
31 | /* MSRS */ | ||
32 | |||
33 | #define MSR_GLIU_P2D_RO0 0x10000029 | ||
34 | |||
35 | #define MSR_LX_GLD_MSR_CONFIG 0x48002001 | ||
36 | #define MSR_LX_MSR_PADSEL 0x48002011 /* NOT 0x48000011; the data | ||
37 | * sheet has the wrong value */ | ||
38 | #define MSR_GLCP_SYS_RSTPLL 0x4C000014 | ||
39 | #define MSR_GLCP_DOTPLL 0x4C000015 | ||
40 | |||
41 | #define MSR_LBAR_SMB 0x5140000B | ||
42 | #define MSR_LBAR_GPIO 0x5140000C | ||
43 | #define MSR_LBAR_MFGPT 0x5140000D | ||
44 | #define MSR_LBAR_ACPI 0x5140000E | ||
45 | #define MSR_LBAR_PMS 0x5140000F | ||
46 | |||
47 | #define MSR_DIVIL_SOFT_RESET 0x51400017 | ||
48 | |||
49 | #define MSR_PIC_YSEL_LOW 0x51400020 | ||
50 | #define MSR_PIC_YSEL_HIGH 0x51400021 | ||
51 | #define MSR_PIC_ZSEL_LOW 0x51400022 | ||
52 | #define MSR_PIC_ZSEL_HIGH 0x51400023 | ||
53 | #define MSR_PIC_IRQM_LPC 0x51400025 | ||
54 | |||
55 | #define MSR_MFGPT_IRQ 0x51400028 | ||
56 | #define MSR_MFGPT_NR 0x51400029 | ||
57 | #define MSR_MFGPT_SETUP 0x5140002B | ||
58 | |||
59 | #define MSR_LX_SPARE_MSR 0x80000011 /* DC-specific */ | ||
60 | |||
61 | #define MSR_GX_GLD_MSR_CONFIG 0xC0002001 | ||
62 | #define MSR_GX_MSR_PADSEL 0xC0002011 | ||
63 | |||
64 | /* Resource Sizes */ | ||
65 | |||
66 | #define LBAR_GPIO_SIZE 0xFF | ||
67 | #define LBAR_MFGPT_SIZE 0x40 | ||
68 | #define LBAR_ACPI_SIZE 0x40 | ||
69 | #define LBAR_PMS_SIZE 0x80 | ||
70 | |||
71 | /* ACPI registers (PMS block) */ | ||
72 | |||
73 | /* | ||
74 | * PM1_EN is only valid when VSA is enabled for 16 bit reads. | ||
75 | * When VSA is not enabled, *always* read both PM1_STS and PM1_EN | ||
76 | * with a 32 bit read at offset 0x0 | ||
77 | */ | ||
78 | |||
79 | #define PM1_STS 0x00 | ||
80 | #define PM1_EN 0x02 | ||
81 | #define PM1_CNT 0x08 | ||
82 | #define PM2_CNT 0x0C | ||
83 | #define PM_TMR 0x10 | ||
84 | #define PM_GPE0_STS 0x18 | ||
85 | #define PM_GPE0_EN 0x1C | ||
86 | |||
87 | /* PMC registers (PMS block) */ | ||
88 | |||
89 | #define PM_SSD 0x00 | ||
90 | #define PM_SCXA 0x04 | ||
91 | #define PM_SCYA 0x08 | ||
92 | #define PM_OUT_SLPCTL 0x0C | ||
93 | #define PM_SCLK 0x10 | ||
94 | #define PM_SED 0x1 | ||
95 | #define PM_SCXD 0x18 | ||
96 | #define PM_SCYD 0x1C | ||
97 | #define PM_IN_SLPCTL 0x20 | ||
98 | #define PM_WKD 0x30 | ||
99 | #define PM_WKXD 0x34 | ||
100 | #define PM_RD 0x38 | ||
101 | #define PM_WKXA 0x3C | ||
102 | #define PM_FSD 0x40 | ||
103 | #define PM_TSD 0x44 | ||
104 | #define PM_PSD 0x48 | ||
105 | #define PM_NWKD 0x4C | ||
106 | #define PM_AWKD 0x50 | ||
107 | #define PM_SSC 0x54 | ||
108 | |||
109 | /* VSA2 magic values */ | ||
110 | |||
111 | #define VSA_VRC_INDEX 0xAC1C | ||
112 | #define VSA_VRC_DATA 0xAC1E | ||
113 | #define VSA_VR_UNLOCK 0xFC53 /* unlock virtual register */ | ||
114 | #define VSA_VR_SIGNATURE 0x0003 | ||
115 | #define VSA_VR_MEM_SIZE 0x0200 | ||
116 | #define AMD_VSA_SIG 0x4132 /* signature is ascii 'VSA2' */ | ||
117 | #define GSW_VSA_SIG 0x534d /* General Software signature */ | ||
118 | /* GPIO */ | ||
119 | |||
120 | #define GPIO_OUTPUT_VAL 0x00 | ||
121 | #define GPIO_OUTPUT_ENABLE 0x04 | ||
122 | #define GPIO_OUTPUT_OPEN_DRAIN 0x08 | ||
123 | #define GPIO_OUTPUT_INVERT 0x0C | ||
124 | #define GPIO_OUTPUT_AUX1 0x10 | ||
125 | #define GPIO_OUTPUT_AUX2 0x14 | ||
126 | #define GPIO_PULL_UP 0x18 | ||
127 | #define GPIO_PULL_DOWN 0x1C | ||
128 | #define GPIO_INPUT_ENABLE 0x20 | ||
129 | #define GPIO_INPUT_INVERT 0x24 | ||
130 | #define GPIO_INPUT_FILTER 0x28 | ||
131 | #define GPIO_INPUT_EVENT_COUNT 0x2C | ||
132 | #define GPIO_READ_BACK 0x30 | ||
133 | #define GPIO_INPUT_AUX1 0x34 | ||
134 | #define GPIO_EVENTS_ENABLE 0x38 | ||
135 | #define GPIO_LOCK_ENABLE 0x3C | ||
136 | #define GPIO_POSITIVE_EDGE_EN 0x40 | ||
137 | #define GPIO_NEGATIVE_EDGE_EN 0x44 | ||
138 | #define GPIO_POSITIVE_EDGE_STS 0x48 | ||
139 | #define GPIO_NEGATIVE_EDGE_STS 0x4C | ||
140 | |||
141 | #define GPIO_MAP_X 0xE0 | ||
142 | #define GPIO_MAP_Y 0xE4 | ||
143 | #define GPIO_MAP_Z 0xE8 | ||
144 | #define GPIO_MAP_W 0xEC | ||
145 | |||
146 | static inline u32 geode_gpio(unsigned int nr) | ||
147 | { | ||
148 | BUG_ON(nr > 28); | ||
149 | return 1 << nr; | ||
150 | } | ||
151 | |||
152 | extern void geode_gpio_set(u32, unsigned int); | ||
153 | extern void geode_gpio_clear(u32, unsigned int); | ||
154 | extern int geode_gpio_isset(u32, unsigned int); | ||
155 | extern void geode_gpio_setup_event(unsigned int, int, int); | ||
156 | extern void geode_gpio_set_irq(unsigned int, unsigned int); | ||
157 | |||
158 | static inline void geode_gpio_event_irq(unsigned int gpio, int pair) | ||
159 | { | ||
160 | geode_gpio_setup_event(gpio, pair, 0); | ||
161 | } | ||
162 | |||
163 | static inline void geode_gpio_event_pme(unsigned int gpio, int pair) | ||
164 | { | ||
165 | geode_gpio_setup_event(gpio, pair, 1); | ||
166 | } | ||
167 | |||
168 | /* Specific geode tests */ | ||
169 | 16 | ||
170 | static inline int is_geode_gx(void) | 17 | static inline int is_geode_gx(void) |
171 | { | 18 | { |
@@ -186,68 +33,4 @@ static inline int is_geode(void) | |||
186 | return (is_geode_gx() || is_geode_lx()); | 33 | return (is_geode_gx() || is_geode_lx()); |
187 | } | 34 | } |
188 | 35 | ||
189 | #ifdef CONFIG_MGEODE_LX | ||
190 | extern int geode_has_vsa2(void); | ||
191 | #else | ||
192 | static inline int geode_has_vsa2(void) | ||
193 | { | ||
194 | return 0; | ||
195 | } | ||
196 | #endif | ||
197 | |||
198 | /* MFGPTs */ | ||
199 | |||
200 | #define MFGPT_MAX_TIMERS 8 | ||
201 | #define MFGPT_TIMER_ANY (-1) | ||
202 | |||
203 | #define MFGPT_DOMAIN_WORKING 1 | ||
204 | #define MFGPT_DOMAIN_STANDBY 2 | ||
205 | #define MFGPT_DOMAIN_ANY (MFGPT_DOMAIN_WORKING | MFGPT_DOMAIN_STANDBY) | ||
206 | |||
207 | #define MFGPT_CMP1 0 | ||
208 | #define MFGPT_CMP2 1 | ||
209 | |||
210 | #define MFGPT_EVENT_IRQ 0 | ||
211 | #define MFGPT_EVENT_NMI 1 | ||
212 | #define MFGPT_EVENT_RESET 3 | ||
213 | |||
214 | #define MFGPT_REG_CMP1 0 | ||
215 | #define MFGPT_REG_CMP2 2 | ||
216 | #define MFGPT_REG_COUNTER 4 | ||
217 | #define MFGPT_REG_SETUP 6 | ||
218 | |||
219 | #define MFGPT_SETUP_CNTEN (1 << 15) | ||
220 | #define MFGPT_SETUP_CMP2 (1 << 14) | ||
221 | #define MFGPT_SETUP_CMP1 (1 << 13) | ||
222 | #define MFGPT_SETUP_SETUP (1 << 12) | ||
223 | #define MFGPT_SETUP_STOPEN (1 << 11) | ||
224 | #define MFGPT_SETUP_EXTEN (1 << 10) | ||
225 | #define MFGPT_SETUP_REVEN (1 << 5) | ||
226 | #define MFGPT_SETUP_CLKSEL (1 << 4) | ||
227 | |||
228 | static inline void geode_mfgpt_write(int timer, u16 reg, u16 value) | ||
229 | { | ||
230 | u32 base = geode_get_dev_base(GEODE_DEV_MFGPT); | ||
231 | outw(value, base + reg + (timer * 8)); | ||
232 | } | ||
233 | |||
234 | static inline u16 geode_mfgpt_read(int timer, u16 reg) | ||
235 | { | ||
236 | u32 base = geode_get_dev_base(GEODE_DEV_MFGPT); | ||
237 | return inw(base + reg + (timer * 8)); | ||
238 | } | ||
239 | |||
240 | extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable); | ||
241 | extern int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable); | ||
242 | extern int geode_mfgpt_alloc_timer(int timer, int domain); | ||
243 | |||
244 | #define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1) | ||
245 | #define geode_mfgpt_release_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 0) | ||
246 | |||
247 | #ifdef CONFIG_GEODE_MFGPT_TIMER | ||
248 | extern int __init mfgpt_timer_setup(void); | ||
249 | #else | ||
250 | static inline int mfgpt_timer_setup(void) { return 0; } | ||
251 | #endif | ||
252 | |||
253 | #endif /* _ASM_X86_GEODE_H */ | 36 | #endif /* _ASM_X86_GEODE_H */ |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 82e3e8f01043..0f8576427cfe 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -12,7 +12,7 @@ typedef struct { | |||
12 | unsigned int apic_timer_irqs; /* arch dependent */ | 12 | unsigned int apic_timer_irqs; /* arch dependent */ |
13 | unsigned int irq_spurious_count; | 13 | unsigned int irq_spurious_count; |
14 | #endif | 14 | #endif |
15 | unsigned int generic_irqs; /* arch dependent */ | 15 | unsigned int x86_platform_ipis; /* arch dependent */ |
16 | unsigned int apic_perf_irqs; | 16 | unsigned int apic_perf_irqs; |
17 | unsigned int apic_pending_irqs; | 17 | unsigned int apic_pending_irqs; |
18 | #ifdef CONFIG_SMP | 18 | #ifdef CONFIG_SMP |
@@ -20,11 +20,11 @@ typedef struct { | |||
20 | unsigned int irq_call_count; | 20 | unsigned int irq_call_count; |
21 | unsigned int irq_tlb_count; | 21 | unsigned int irq_tlb_count; |
22 | #endif | 22 | #endif |
23 | #ifdef CONFIG_X86_MCE | 23 | #ifdef CONFIG_X86_THERMAL_VECTOR |
24 | unsigned int irq_thermal_count; | 24 | unsigned int irq_thermal_count; |
25 | # ifdef CONFIG_X86_MCE_THRESHOLD | 25 | #endif |
26 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
26 | unsigned int irq_threshold_count; | 27 | unsigned int irq_threshold_count; |
27 | # endif | ||
28 | #endif | 28 | #endif |
29 | } ____cacheline_aligned irq_cpustat_t; | 29 | } ____cacheline_aligned irq_cpustat_t; |
30 | 30 | ||
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 1c22cb05ad6a..5d89fd2a3690 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h | |||
@@ -65,11 +65,12 @@ | |||
65 | /* hpet memory map physical address */ | 65 | /* hpet memory map physical address */ |
66 | extern unsigned long hpet_address; | 66 | extern unsigned long hpet_address; |
67 | extern unsigned long force_hpet_address; | 67 | extern unsigned long force_hpet_address; |
68 | extern u8 hpet_blockid; | ||
68 | extern int hpet_force_user; | 69 | extern int hpet_force_user; |
69 | extern int is_hpet_enabled(void); | 70 | extern int is_hpet_enabled(void); |
70 | extern int hpet_enable(void); | 71 | extern int hpet_enable(void); |
71 | extern void hpet_disable(void); | 72 | extern void hpet_disable(void); |
72 | extern unsigned long hpet_readl(unsigned long a); | 73 | extern unsigned int hpet_readl(unsigned int a); |
73 | extern void force_hpet_resume(void); | 74 | extern void force_hpet_resume(void); |
74 | 75 | ||
75 | extern void hpet_msi_unmask(unsigned int irq); | 76 | extern void hpet_msi_unmask(unsigned int irq); |
@@ -78,9 +79,9 @@ extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg); | |||
78 | extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg); | 79 | extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg); |
79 | 80 | ||
80 | #ifdef CONFIG_PCI_MSI | 81 | #ifdef CONFIG_PCI_MSI |
81 | extern int arch_setup_hpet_msi(unsigned int irq); | 82 | extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); |
82 | #else | 83 | #else |
83 | static inline int arch_setup_hpet_msi(unsigned int irq) | 84 | static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id) |
84 | { | 85 | { |
85 | return -EINVAL; | 86 | return -EINVAL; |
86 | } | 87 | } |
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 000000000000..0675a7c4c20e --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
@@ -0,0 +1,73 @@ | |||
1 | #ifndef _I386_HW_BREAKPOINT_H | ||
2 | #define _I386_HW_BREAKPOINT_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | #define __ARCH_HW_BREAKPOINT_H | ||
6 | |||
7 | /* | ||
8 | * The name should probably be something dealt in | ||
9 | * a higher level. While dealing with the user | ||
10 | * (display/resolving) | ||
11 | */ | ||
12 | struct arch_hw_breakpoint { | ||
13 | char *name; /* Contains name of the symbol to set bkpt */ | ||
14 | unsigned long address; | ||
15 | u8 len; | ||
16 | u8 type; | ||
17 | }; | ||
18 | |||
19 | #include <linux/kdebug.h> | ||
20 | #include <linux/percpu.h> | ||
21 | #include <linux/list.h> | ||
22 | |||
23 | /* Available HW breakpoint length encodings */ | ||
24 | #define X86_BREAKPOINT_LEN_1 0x40 | ||
25 | #define X86_BREAKPOINT_LEN_2 0x44 | ||
26 | #define X86_BREAKPOINT_LEN_4 0x4c | ||
27 | #define X86_BREAKPOINT_LEN_EXECUTE 0x40 | ||
28 | |||
29 | #ifdef CONFIG_X86_64 | ||
30 | #define X86_BREAKPOINT_LEN_8 0x48 | ||
31 | #endif | ||
32 | |||
33 | /* Available HW breakpoint type encodings */ | ||
34 | |||
35 | /* trigger on instruction execute */ | ||
36 | #define X86_BREAKPOINT_EXECUTE 0x80 | ||
37 | /* trigger on memory write */ | ||
38 | #define X86_BREAKPOINT_WRITE 0x81 | ||
39 | /* trigger on memory read or write */ | ||
40 | #define X86_BREAKPOINT_RW 0x83 | ||
41 | |||
42 | /* Total number of available HW breakpoint registers */ | ||
43 | #define HBP_NUM 4 | ||
44 | |||
45 | struct perf_event; | ||
46 | struct pmu; | ||
47 | |||
48 | extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); | ||
49 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp, | ||
50 | struct task_struct *tsk); | ||
51 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, | ||
52 | unsigned long val, void *data); | ||
53 | |||
54 | |||
55 | int arch_install_hw_breakpoint(struct perf_event *bp); | ||
56 | void arch_uninstall_hw_breakpoint(struct perf_event *bp); | ||
57 | void hw_breakpoint_pmu_read(struct perf_event *bp); | ||
58 | void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); | ||
59 | |||
60 | extern void | ||
61 | arch_fill_perf_breakpoint(struct perf_event *bp); | ||
62 | |||
63 | unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); | ||
64 | int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); | ||
65 | |||
66 | extern int arch_bp_generic_fields(int x86_len, int x86_type, | ||
67 | int *gen_len, int *gen_type); | ||
68 | |||
69 | extern struct pmu perf_ops_bp; | ||
70 | |||
71 | #endif /* __KERNEL__ */ | ||
72 | #endif /* _I386_HW_BREAKPOINT_H */ | ||
73 | |||
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index ba180d93b08c..eeac829a0f44 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -27,7 +27,7 @@ | |||
27 | 27 | ||
28 | /* Interrupt handlers registered during init_IRQ */ | 28 | /* Interrupt handlers registered during init_IRQ */ |
29 | extern void apic_timer_interrupt(void); | 29 | extern void apic_timer_interrupt(void); |
30 | extern void generic_interrupt(void); | 30 | extern void x86_platform_ipi(void); |
31 | extern void error_interrupt(void); | 31 | extern void error_interrupt(void); |
32 | extern void perf_pending_interrupt(void); | 32 | extern void perf_pending_interrupt(void); |
33 | 33 | ||
@@ -79,14 +79,33 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, | |||
79 | int ioapic, int ioapic_pin, | 79 | int ioapic, int ioapic_pin, |
80 | int trigger, int polarity) | 80 | int trigger, int polarity) |
81 | { | 81 | { |
82 | irq_attr->ioapic = ioapic; | 82 | irq_attr->ioapic = ioapic; |
83 | irq_attr->ioapic_pin = ioapic_pin; | 83 | irq_attr->ioapic_pin = ioapic_pin; |
84 | irq_attr->trigger = trigger; | 84 | irq_attr->trigger = trigger; |
85 | irq_attr->polarity = polarity; | 85 | irq_attr->polarity = polarity; |
86 | } | 86 | } |
87 | 87 | ||
88 | extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, | 88 | /* |
89 | struct io_apic_irq_attr *irq_attr); | 89 | * This is performance-critical, we want to do it O(1) |
90 | * | ||
91 | * Most irqs are mapped 1:1 with pins. | ||
92 | */ | ||
93 | struct irq_cfg { | ||
94 | struct irq_pin_list *irq_2_pin; | ||
95 | cpumask_var_t domain; | ||
96 | cpumask_var_t old_domain; | ||
97 | u8 vector; | ||
98 | u8 move_in_progress : 1; | ||
99 | }; | ||
100 | |||
101 | extern struct irq_cfg *irq_cfg(unsigned int); | ||
102 | extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); | ||
103 | extern void send_cleanup_vector(struct irq_cfg *); | ||
104 | |||
105 | struct irq_desc; | ||
106 | extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *, | ||
107 | unsigned int *dest_id); | ||
108 | extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); | ||
90 | extern void setup_ioapic_dest(void); | 109 | extern void setup_ioapic_dest(void); |
91 | 110 | ||
92 | extern void enable_IO_APIC(void); | 111 | extern void enable_IO_APIC(void); |
@@ -101,7 +120,7 @@ extern void eisa_set_level_irq(unsigned int irq); | |||
101 | /* SMP */ | 120 | /* SMP */ |
102 | extern void smp_apic_timer_interrupt(struct pt_regs *); | 121 | extern void smp_apic_timer_interrupt(struct pt_regs *); |
103 | extern void smp_spurious_interrupt(struct pt_regs *); | 122 | extern void smp_spurious_interrupt(struct pt_regs *); |
104 | extern void smp_generic_interrupt(struct pt_regs *); | 123 | extern void smp_x86_platform_ipi(struct pt_regs *); |
105 | extern void smp_error_interrupt(struct pt_regs *); | 124 | extern void smp_error_interrupt(struct pt_regs *); |
106 | #ifdef CONFIG_X86_IO_APIC | 125 | #ifdef CONFIG_X86_IO_APIC |
107 | extern asmlinkage void smp_irq_move_cleanup_interrupt(void); | 126 | extern asmlinkage void smp_irq_move_cleanup_interrupt(void); |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 0b20bbb758f2..ebfb8a9e11f7 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -10,6 +10,8 @@ | |||
10 | #ifndef _ASM_X86_I387_H | 10 | #ifndef _ASM_X86_I387_H |
11 | #define _ASM_X86_I387_H | 11 | #define _ASM_X86_I387_H |
12 | 12 | ||
13 | #ifndef __ASSEMBLY__ | ||
14 | |||
13 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
14 | #include <linux/kernel_stat.h> | 16 | #include <linux/kernel_stat.h> |
15 | #include <linux/regset.h> | 17 | #include <linux/regset.h> |
@@ -411,4 +413,9 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) | |||
411 | } | 413 | } |
412 | } | 414 | } |
413 | 415 | ||
416 | #endif /* __ASSEMBLY__ */ | ||
417 | |||
418 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 | ||
419 | #define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 | ||
420 | |||
414 | #endif /* _ASM_X86_I387_H */ | 421 | #endif /* _ASM_X86_I387_H */ |
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h new file mode 100644 index 000000000000..205b063e3e32 --- /dev/null +++ b/arch/x86/include/asm/inat.h | |||
@@ -0,0 +1,220 @@ | |||
1 | #ifndef _ASM_X86_INAT_H | ||
2 | #define _ASM_X86_INAT_H | ||
3 | /* | ||
4 | * x86 instruction attributes | ||
5 | * | ||
6 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
21 | * | ||
22 | */ | ||
23 | #include <asm/inat_types.h> | ||
24 | |||
25 | /* | ||
26 | * Internal bits. Don't use bitmasks directly, because these bits are | ||
27 | * unstable. You should use checking functions. | ||
28 | */ | ||
29 | |||
30 | #define INAT_OPCODE_TABLE_SIZE 256 | ||
31 | #define INAT_GROUP_TABLE_SIZE 8 | ||
32 | |||
33 | /* Legacy last prefixes */ | ||
34 | #define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ | ||
35 | #define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ | ||
36 | #define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ | ||
37 | /* Other Legacy prefixes */ | ||
38 | #define INAT_PFX_LOCK 4 /* 0xF0 */ | ||
39 | #define INAT_PFX_CS 5 /* 0x2E */ | ||
40 | #define INAT_PFX_DS 6 /* 0x3E */ | ||
41 | #define INAT_PFX_ES 7 /* 0x26 */ | ||
42 | #define INAT_PFX_FS 8 /* 0x64 */ | ||
43 | #define INAT_PFX_GS 9 /* 0x65 */ | ||
44 | #define INAT_PFX_SS 10 /* 0x36 */ | ||
45 | #define INAT_PFX_ADDRSZ 11 /* 0x67 */ | ||
46 | /* x86-64 REX prefix */ | ||
47 | #define INAT_PFX_REX 12 /* 0x4X */ | ||
48 | /* AVX VEX prefixes */ | ||
49 | #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ | ||
50 | #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ | ||
51 | |||
52 | #define INAT_LSTPFX_MAX 3 | ||
53 | #define INAT_LGCPFX_MAX 11 | ||
54 | |||
55 | /* Immediate size */ | ||
56 | #define INAT_IMM_BYTE 1 | ||
57 | #define INAT_IMM_WORD 2 | ||
58 | #define INAT_IMM_DWORD 3 | ||
59 | #define INAT_IMM_QWORD 4 | ||
60 | #define INAT_IMM_PTR 5 | ||
61 | #define INAT_IMM_VWORD32 6 | ||
62 | #define INAT_IMM_VWORD 7 | ||
63 | |||
64 | /* Legacy prefix */ | ||
65 | #define INAT_PFX_OFFS 0 | ||
66 | #define INAT_PFX_BITS 4 | ||
67 | #define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) | ||
68 | #define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) | ||
69 | /* Escape opcodes */ | ||
70 | #define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) | ||
71 | #define INAT_ESC_BITS 2 | ||
72 | #define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) | ||
73 | #define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) | ||
74 | /* Group opcodes (1-16) */ | ||
75 | #define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) | ||
76 | #define INAT_GRP_BITS 5 | ||
77 | #define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) | ||
78 | #define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) | ||
79 | /* Immediates */ | ||
80 | #define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) | ||
81 | #define INAT_IMM_BITS 3 | ||
82 | #define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) | ||
83 | /* Flags */ | ||
84 | #define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) | ||
85 | #define INAT_MODRM (1 << (INAT_FLAG_OFFS)) | ||
86 | #define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) | ||
87 | #define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) | ||
88 | #define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) | ||
89 | #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) | ||
90 | #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) | ||
91 | #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) | ||
92 | /* Attribute making macros for attribute tables */ | ||
93 | #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) | ||
94 | #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) | ||
95 | #define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) | ||
96 | #define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) | ||
97 | |||
98 | /* Attribute search APIs */ | ||
99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); | ||
100 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, | ||
101 | insn_byte_t last_pfx, | ||
102 | insn_attr_t esc_attr); | ||
103 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, | ||
104 | insn_byte_t last_pfx, | ||
105 | insn_attr_t esc_attr); | ||
106 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, | ||
107 | insn_byte_t vex_m, | ||
108 | insn_byte_t vex_pp); | ||
109 | |||
110 | /* Attribute checking functions */ | ||
111 | static inline int inat_is_legacy_prefix(insn_attr_t attr) | ||
112 | { | ||
113 | attr &= INAT_PFX_MASK; | ||
114 | return attr && attr <= INAT_LGCPFX_MAX; | ||
115 | } | ||
116 | |||
117 | static inline int inat_is_address_size_prefix(insn_attr_t attr) | ||
118 | { | ||
119 | return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; | ||
120 | } | ||
121 | |||
122 | static inline int inat_is_operand_size_prefix(insn_attr_t attr) | ||
123 | { | ||
124 | return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; | ||
125 | } | ||
126 | |||
127 | static inline int inat_is_rex_prefix(insn_attr_t attr) | ||
128 | { | ||
129 | return (attr & INAT_PFX_MASK) == INAT_PFX_REX; | ||
130 | } | ||
131 | |||
132 | static inline int inat_last_prefix_id(insn_attr_t attr) | ||
133 | { | ||
134 | if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) | ||
135 | return 0; | ||
136 | else | ||
137 | return attr & INAT_PFX_MASK; | ||
138 | } | ||
139 | |||
140 | static inline int inat_is_vex_prefix(insn_attr_t attr) | ||
141 | { | ||
142 | attr &= INAT_PFX_MASK; | ||
143 | return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; | ||
144 | } | ||
145 | |||
146 | static inline int inat_is_vex3_prefix(insn_attr_t attr) | ||
147 | { | ||
148 | return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; | ||
149 | } | ||
150 | |||
151 | static inline int inat_is_escape(insn_attr_t attr) | ||
152 | { | ||
153 | return attr & INAT_ESC_MASK; | ||
154 | } | ||
155 | |||
156 | static inline int inat_escape_id(insn_attr_t attr) | ||
157 | { | ||
158 | return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; | ||
159 | } | ||
160 | |||
161 | static inline int inat_is_group(insn_attr_t attr) | ||
162 | { | ||
163 | return attr & INAT_GRP_MASK; | ||
164 | } | ||
165 | |||
166 | static inline int inat_group_id(insn_attr_t attr) | ||
167 | { | ||
168 | return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; | ||
169 | } | ||
170 | |||
171 | static inline int inat_group_common_attribute(insn_attr_t attr) | ||
172 | { | ||
173 | return attr & ~INAT_GRP_MASK; | ||
174 | } | ||
175 | |||
176 | static inline int inat_has_immediate(insn_attr_t attr) | ||
177 | { | ||
178 | return attr & INAT_IMM_MASK; | ||
179 | } | ||
180 | |||
181 | static inline int inat_immediate_size(insn_attr_t attr) | ||
182 | { | ||
183 | return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; | ||
184 | } | ||
185 | |||
186 | static inline int inat_has_modrm(insn_attr_t attr) | ||
187 | { | ||
188 | return attr & INAT_MODRM; | ||
189 | } | ||
190 | |||
191 | static inline int inat_is_force64(insn_attr_t attr) | ||
192 | { | ||
193 | return attr & INAT_FORCE64; | ||
194 | } | ||
195 | |||
196 | static inline int inat_has_second_immediate(insn_attr_t attr) | ||
197 | { | ||
198 | return attr & INAT_SCNDIMM; | ||
199 | } | ||
200 | |||
201 | static inline int inat_has_moffset(insn_attr_t attr) | ||
202 | { | ||
203 | return attr & INAT_MOFFSET; | ||
204 | } | ||
205 | |||
206 | static inline int inat_has_variant(insn_attr_t attr) | ||
207 | { | ||
208 | return attr & INAT_VARIANT; | ||
209 | } | ||
210 | |||
211 | static inline int inat_accept_vex(insn_attr_t attr) | ||
212 | { | ||
213 | return attr & INAT_VEXOK; | ||
214 | } | ||
215 | |||
216 | static inline int inat_must_vex(insn_attr_t attr) | ||
217 | { | ||
218 | return attr & INAT_VEXONLY; | ||
219 | } | ||
220 | #endif | ||
diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h new file mode 100644 index 000000000000..cb3c20ce39cf --- /dev/null +++ b/arch/x86/include/asm/inat_types.h | |||
@@ -0,0 +1,29 @@ | |||
1 | #ifndef _ASM_X86_INAT_TYPES_H | ||
2 | #define _ASM_X86_INAT_TYPES_H | ||
3 | /* | ||
4 | * x86 instruction attributes | ||
5 | * | ||
6 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | /* Instruction attributes */ | ||
25 | typedef unsigned int insn_attr_t; | ||
26 | typedef unsigned char insn_byte_t; | ||
27 | typedef signed int insn_value_t; | ||
28 | |||
29 | #endif | ||
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h new file mode 100644 index 000000000000..96c2e0ad04ca --- /dev/null +++ b/arch/x86/include/asm/insn.h | |||
@@ -0,0 +1,184 @@ | |||
1 | #ifndef _ASM_X86_INSN_H | ||
2 | #define _ASM_X86_INSN_H | ||
3 | /* | ||
4 | * x86 instruction analysis | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | * Copyright (C) IBM Corporation, 2009 | ||
21 | */ | ||
22 | |||
23 | /* insn_attr_t is defined in inat.h */ | ||
24 | #include <asm/inat.h> | ||
25 | |||
26 | struct insn_field { | ||
27 | union { | ||
28 | insn_value_t value; | ||
29 | insn_byte_t bytes[4]; | ||
30 | }; | ||
31 | /* !0 if we've run insn_get_xxx() for this field */ | ||
32 | unsigned char got; | ||
33 | unsigned char nbytes; | ||
34 | }; | ||
35 | |||
36 | struct insn { | ||
37 | struct insn_field prefixes; /* | ||
38 | * Prefixes | ||
39 | * prefixes.bytes[3]: last prefix | ||
40 | */ | ||
41 | struct insn_field rex_prefix; /* REX prefix */ | ||
42 | struct insn_field vex_prefix; /* VEX prefix */ | ||
43 | struct insn_field opcode; /* | ||
44 | * opcode.bytes[0]: opcode1 | ||
45 | * opcode.bytes[1]: opcode2 | ||
46 | * opcode.bytes[2]: opcode3 | ||
47 | */ | ||
48 | struct insn_field modrm; | ||
49 | struct insn_field sib; | ||
50 | struct insn_field displacement; | ||
51 | union { | ||
52 | struct insn_field immediate; | ||
53 | struct insn_field moffset1; /* for 64bit MOV */ | ||
54 | struct insn_field immediate1; /* for 64bit imm or off16/32 */ | ||
55 | }; | ||
56 | union { | ||
57 | struct insn_field moffset2; /* for 64bit MOV */ | ||
58 | struct insn_field immediate2; /* for 64bit imm or seg16 */ | ||
59 | }; | ||
60 | |||
61 | insn_attr_t attr; | ||
62 | unsigned char opnd_bytes; | ||
63 | unsigned char addr_bytes; | ||
64 | unsigned char length; | ||
65 | unsigned char x86_64; | ||
66 | |||
67 | const insn_byte_t *kaddr; /* kernel address of insn to analyze */ | ||
68 | const insn_byte_t *next_byte; | ||
69 | }; | ||
70 | |||
71 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) | ||
72 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) | ||
73 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) | ||
74 | |||
75 | #define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) | ||
76 | #define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) | ||
77 | #define X86_SIB_BASE(sib) ((sib) & 0x07) | ||
78 | |||
79 | #define X86_REX_W(rex) ((rex) & 8) | ||
80 | #define X86_REX_R(rex) ((rex) & 4) | ||
81 | #define X86_REX_X(rex) ((rex) & 2) | ||
82 | #define X86_REX_B(rex) ((rex) & 1) | ||
83 | |||
84 | /* VEX bit flags */ | ||
85 | #define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ | ||
86 | #define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ | ||
87 | #define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ | ||
88 | #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ | ||
89 | #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ | ||
90 | /* VEX bit fields */ | ||
91 | #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ | ||
92 | #define X86_VEX2_M 1 /* VEX2.M always 1 */ | ||
93 | #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ | ||
94 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ | ||
95 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ | ||
96 | |||
97 | /* The last prefix is needed for two-byte and three-byte opcodes */ | ||
98 | static inline insn_byte_t insn_last_prefix(struct insn *insn) | ||
99 | { | ||
100 | return insn->prefixes.bytes[3]; | ||
101 | } | ||
102 | |||
103 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); | ||
104 | extern void insn_get_prefixes(struct insn *insn); | ||
105 | extern void insn_get_opcode(struct insn *insn); | ||
106 | extern void insn_get_modrm(struct insn *insn); | ||
107 | extern void insn_get_sib(struct insn *insn); | ||
108 | extern void insn_get_displacement(struct insn *insn); | ||
109 | extern void insn_get_immediate(struct insn *insn); | ||
110 | extern void insn_get_length(struct insn *insn); | ||
111 | |||
112 | /* Attribute will be determined after getting ModRM (for opcode groups) */ | ||
113 | static inline void insn_get_attribute(struct insn *insn) | ||
114 | { | ||
115 | insn_get_modrm(insn); | ||
116 | } | ||
117 | |||
118 | /* Instruction uses RIP-relative addressing */ | ||
119 | extern int insn_rip_relative(struct insn *insn); | ||
120 | |||
121 | /* Init insn for kernel text */ | ||
122 | static inline void kernel_insn_init(struct insn *insn, const void *kaddr) | ||
123 | { | ||
124 | #ifdef CONFIG_X86_64 | ||
125 | insn_init(insn, kaddr, 1); | ||
126 | #else /* CONFIG_X86_32 */ | ||
127 | insn_init(insn, kaddr, 0); | ||
128 | #endif | ||
129 | } | ||
130 | |||
131 | static inline int insn_is_avx(struct insn *insn) | ||
132 | { | ||
133 | if (!insn->prefixes.got) | ||
134 | insn_get_prefixes(insn); | ||
135 | return (insn->vex_prefix.value != 0); | ||
136 | } | ||
137 | |||
138 | static inline insn_byte_t insn_vex_m_bits(struct insn *insn) | ||
139 | { | ||
140 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ | ||
141 | return X86_VEX2_M; | ||
142 | else | ||
143 | return X86_VEX3_M(insn->vex_prefix.bytes[1]); | ||
144 | } | ||
145 | |||
146 | static inline insn_byte_t insn_vex_p_bits(struct insn *insn) | ||
147 | { | ||
148 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ | ||
149 | return X86_VEX_P(insn->vex_prefix.bytes[1]); | ||
150 | else | ||
151 | return X86_VEX_P(insn->vex_prefix.bytes[2]); | ||
152 | } | ||
153 | |||
154 | /* Offset of each field from kaddr */ | ||
155 | static inline int insn_offset_rex_prefix(struct insn *insn) | ||
156 | { | ||
157 | return insn->prefixes.nbytes; | ||
158 | } | ||
159 | static inline int insn_offset_vex_prefix(struct insn *insn) | ||
160 | { | ||
161 | return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; | ||
162 | } | ||
163 | static inline int insn_offset_opcode(struct insn *insn) | ||
164 | { | ||
165 | return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; | ||
166 | } | ||
167 | static inline int insn_offset_modrm(struct insn *insn) | ||
168 | { | ||
169 | return insn_offset_opcode(insn) + insn->opcode.nbytes; | ||
170 | } | ||
171 | static inline int insn_offset_sib(struct insn *insn) | ||
172 | { | ||
173 | return insn_offset_modrm(insn) + insn->modrm.nbytes; | ||
174 | } | ||
175 | static inline int insn_offset_displacement(struct insn *insn) | ||
176 | { | ||
177 | return insn_offset_sib(insn) + insn->sib.nbytes; | ||
178 | } | ||
179 | static inline int insn_offset_immediate(struct insn *insn) | ||
180 | { | ||
181 | return insn_offset_displacement(insn) + insn->displacement.nbytes; | ||
182 | } | ||
183 | |||
184 | #endif /* _ASM_X86_INSN_H */ | ||
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h new file mode 100644 index 000000000000..14cf526091f9 --- /dev/null +++ b/arch/x86/include/asm/inst.h | |||
@@ -0,0 +1,150 @@ | |||
1 | /* | ||
2 | * Generate .byte code for some instructions not supported by old | ||
3 | * binutils. | ||
4 | */ | ||
5 | #ifndef X86_ASM_INST_H | ||
6 | #define X86_ASM_INST_H | ||
7 | |||
8 | #ifdef __ASSEMBLY__ | ||
9 | |||
10 | .macro XMM_NUM opd xmm | ||
11 | .ifc \xmm,%xmm0 | ||
12 | \opd = 0 | ||
13 | .endif | ||
14 | .ifc \xmm,%xmm1 | ||
15 | \opd = 1 | ||
16 | .endif | ||
17 | .ifc \xmm,%xmm2 | ||
18 | \opd = 2 | ||
19 | .endif | ||
20 | .ifc \xmm,%xmm3 | ||
21 | \opd = 3 | ||
22 | .endif | ||
23 | .ifc \xmm,%xmm4 | ||
24 | \opd = 4 | ||
25 | .endif | ||
26 | .ifc \xmm,%xmm5 | ||
27 | \opd = 5 | ||
28 | .endif | ||
29 | .ifc \xmm,%xmm6 | ||
30 | \opd = 6 | ||
31 | .endif | ||
32 | .ifc \xmm,%xmm7 | ||
33 | \opd = 7 | ||
34 | .endif | ||
35 | .ifc \xmm,%xmm8 | ||
36 | \opd = 8 | ||
37 | .endif | ||
38 | .ifc \xmm,%xmm9 | ||
39 | \opd = 9 | ||
40 | .endif | ||
41 | .ifc \xmm,%xmm10 | ||
42 | \opd = 10 | ||
43 | .endif | ||
44 | .ifc \xmm,%xmm11 | ||
45 | \opd = 11 | ||
46 | .endif | ||
47 | .ifc \xmm,%xmm12 | ||
48 | \opd = 12 | ||
49 | .endif | ||
50 | .ifc \xmm,%xmm13 | ||
51 | \opd = 13 | ||
52 | .endif | ||
53 | .ifc \xmm,%xmm14 | ||
54 | \opd = 14 | ||
55 | .endif | ||
56 | .ifc \xmm,%xmm15 | ||
57 | \opd = 15 | ||
58 | .endif | ||
59 | .endm | ||
60 | |||
61 | .macro PFX_OPD_SIZE | ||
62 | .byte 0x66 | ||
63 | .endm | ||
64 | |||
65 | .macro PFX_REX opd1 opd2 | ||
66 | .if (\opd1 | \opd2) & 8 | ||
67 | .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | ||
68 | .endif | ||
69 | .endm | ||
70 | |||
71 | .macro MODRM mod opd1 opd2 | ||
72 | .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) | ||
73 | .endm | ||
74 | |||
75 | .macro PSHUFB_XMM xmm1 xmm2 | ||
76 | XMM_NUM pshufb_opd1 \xmm1 | ||
77 | XMM_NUM pshufb_opd2 \xmm2 | ||
78 | PFX_OPD_SIZE | ||
79 | PFX_REX pshufb_opd1 pshufb_opd2 | ||
80 | .byte 0x0f, 0x38, 0x00 | ||
81 | MODRM 0xc0 pshufb_opd1 pshufb_opd2 | ||
82 | .endm | ||
83 | |||
84 | .macro PCLMULQDQ imm8 xmm1 xmm2 | ||
85 | XMM_NUM clmul_opd1 \xmm1 | ||
86 | XMM_NUM clmul_opd2 \xmm2 | ||
87 | PFX_OPD_SIZE | ||
88 | PFX_REX clmul_opd1 clmul_opd2 | ||
89 | .byte 0x0f, 0x3a, 0x44 | ||
90 | MODRM 0xc0 clmul_opd1 clmul_opd2 | ||
91 | .byte \imm8 | ||
92 | .endm | ||
93 | |||
94 | .macro AESKEYGENASSIST rcon xmm1 xmm2 | ||
95 | XMM_NUM aeskeygen_opd1 \xmm1 | ||
96 | XMM_NUM aeskeygen_opd2 \xmm2 | ||
97 | PFX_OPD_SIZE | ||
98 | PFX_REX aeskeygen_opd1 aeskeygen_opd2 | ||
99 | .byte 0x0f, 0x3a, 0xdf | ||
100 | MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2 | ||
101 | .byte \rcon | ||
102 | .endm | ||
103 | |||
104 | .macro AESIMC xmm1 xmm2 | ||
105 | XMM_NUM aesimc_opd1 \xmm1 | ||
106 | XMM_NUM aesimc_opd2 \xmm2 | ||
107 | PFX_OPD_SIZE | ||
108 | PFX_REX aesimc_opd1 aesimc_opd2 | ||
109 | .byte 0x0f, 0x38, 0xdb | ||
110 | MODRM 0xc0 aesimc_opd1 aesimc_opd2 | ||
111 | .endm | ||
112 | |||
113 | .macro AESENC xmm1 xmm2 | ||
114 | XMM_NUM aesenc_opd1 \xmm1 | ||
115 | XMM_NUM aesenc_opd2 \xmm2 | ||
116 | PFX_OPD_SIZE | ||
117 | PFX_REX aesenc_opd1 aesenc_opd2 | ||
118 | .byte 0x0f, 0x38, 0xdc | ||
119 | MODRM 0xc0 aesenc_opd1 aesenc_opd2 | ||
120 | .endm | ||
121 | |||
122 | .macro AESENCLAST xmm1 xmm2 | ||
123 | XMM_NUM aesenclast_opd1 \xmm1 | ||
124 | XMM_NUM aesenclast_opd2 \xmm2 | ||
125 | PFX_OPD_SIZE | ||
126 | PFX_REX aesenclast_opd1 aesenclast_opd2 | ||
127 | .byte 0x0f, 0x38, 0xdd | ||
128 | MODRM 0xc0 aesenclast_opd1 aesenclast_opd2 | ||
129 | .endm | ||
130 | |||
131 | .macro AESDEC xmm1 xmm2 | ||
132 | XMM_NUM aesdec_opd1 \xmm1 | ||
133 | XMM_NUM aesdec_opd2 \xmm2 | ||
134 | PFX_OPD_SIZE | ||
135 | PFX_REX aesdec_opd1 aesdec_opd2 | ||
136 | .byte 0x0f, 0x38, 0xde | ||
137 | MODRM 0xc0 aesdec_opd1 aesdec_opd2 | ||
138 | .endm | ||
139 | |||
140 | .macro AESDECLAST xmm1 xmm2 | ||
141 | XMM_NUM aesdeclast_opd1 \xmm1 | ||
142 | XMM_NUM aesdeclast_opd2 \xmm2 | ||
143 | PFX_OPD_SIZE | ||
144 | PFX_REX aesdeclast_opd1 aesdeclast_opd2 | ||
145 | .byte 0x0f, 0x38, 0xdf | ||
146 | MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2 | ||
147 | .endm | ||
148 | #endif | ||
149 | |||
150 | #endif | ||
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index fd6d21bbee6c..345c99cef152 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h | |||
@@ -1,8 +1,6 @@ | |||
1 | #ifndef _ASM_X86_IOMMU_H | 1 | #ifndef _ASM_X86_IOMMU_H |
2 | #define _ASM_X86_IOMMU_H | 2 | #define _ASM_X86_IOMMU_H |
3 | 3 | ||
4 | extern void pci_iommu_shutdown(void); | ||
5 | extern void no_iommu_init(void); | ||
6 | extern struct dma_map_ops nommu_dma_ops; | 4 | extern struct dma_map_ops nommu_dma_ops; |
7 | extern int force_iommu, no_iommu; | 5 | extern int force_iommu, no_iommu; |
8 | extern int iommu_detected; | 6 | extern int iommu_detected; |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ddda6cbed6f4..5458380b6ef8 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -34,9 +34,10 @@ static inline int irq_canonicalize(int irq) | |||
34 | #ifdef CONFIG_HOTPLUG_CPU | 34 | #ifdef CONFIG_HOTPLUG_CPU |
35 | #include <linux/cpumask.h> | 35 | #include <linux/cpumask.h> |
36 | extern void fixup_irqs(void); | 36 | extern void fixup_irqs(void); |
37 | extern void irq_force_complete_move(int); | ||
37 | #endif | 38 | #endif |
38 | 39 | ||
39 | extern void (*generic_interrupt_extension)(void); | 40 | extern void (*x86_platform_ipi_callback)(void); |
40 | extern void native_init_IRQ(void); | 41 | extern void native_init_IRQ(void); |
41 | extern bool handle_irq(unsigned irq, struct pt_regs *regs); | 42 | extern bool handle_irq(unsigned irq, struct pt_regs *regs); |
42 | 43 | ||
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 5b21f0ec3df2..4611f085cd43 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -106,14 +106,14 @@ | |||
106 | /* | 106 | /* |
107 | * Generic system vector for platform specific use | 107 | * Generic system vector for platform specific use |
108 | */ | 108 | */ |
109 | #define GENERIC_INTERRUPT_VECTOR 0xed | 109 | #define X86_PLATFORM_IPI_VECTOR 0xed |
110 | 110 | ||
111 | /* | 111 | /* |
112 | * Performance monitoring pending work vector: | 112 | * Performance monitoring pending work vector: |
113 | */ | 113 | */ |
114 | #define LOCAL_PENDING_VECTOR 0xec | 114 | #define LOCAL_PENDING_VECTOR 0xec |
115 | 115 | ||
116 | #define UV_BAU_MESSAGE 0xec | 116 | #define UV_BAU_MESSAGE 0xea |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * Self IPI vector for machine checks | 119 | * Self IPI vector for machine checks |
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index c2d1f3b58e5f..f70e60071fe8 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h | |||
@@ -4,13 +4,16 @@ | |||
4 | #include <linux/pci.h> | 4 | #include <linux/pci.h> |
5 | 5 | ||
6 | extern struct pci_device_id k8_nb_ids[]; | 6 | extern struct pci_device_id k8_nb_ids[]; |
7 | struct bootnode; | ||
7 | 8 | ||
8 | extern int early_is_k8_nb(u32 value); | 9 | extern int early_is_k8_nb(u32 value); |
9 | extern struct pci_dev **k8_northbridges; | 10 | extern struct pci_dev **k8_northbridges; |
10 | extern int num_k8_northbridges; | 11 | extern int num_k8_northbridges; |
11 | extern int cache_k8_northbridges(void); | 12 | extern int cache_k8_northbridges(void); |
12 | extern void k8_flush_garts(void); | 13 | extern void k8_flush_garts(void); |
13 | extern int k8_scan_nodes(unsigned long start, unsigned long end); | 14 | extern int k8_get_nodes(struct bootnode *nodes); |
15 | extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); | ||
16 | extern int k8_scan_nodes(void); | ||
14 | 17 | ||
15 | #ifdef CONFIG_K8_NB | 18 | #ifdef CONFIG_K8_NB |
16 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 19 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 4a5fe914dc59..f46b79f6c16c 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -19,6 +19,8 @@ | |||
19 | #define __KVM_HAVE_MSIX | 19 | #define __KVM_HAVE_MSIX |
20 | #define __KVM_HAVE_MCE | 20 | #define __KVM_HAVE_MCE |
21 | #define __KVM_HAVE_PIT_STATE2 | 21 | #define __KVM_HAVE_PIT_STATE2 |
22 | #define __KVM_HAVE_XEN_HVM | ||
23 | #define __KVM_HAVE_VCPU_EVENTS | ||
22 | 24 | ||
23 | /* Architectural interrupt line count. */ | 25 | /* Architectural interrupt line count. */ |
24 | #define KVM_NR_INTERRUPTS 256 | 26 | #define KVM_NR_INTERRUPTS 256 |
@@ -79,6 +81,7 @@ struct kvm_ioapic_state { | |||
79 | #define KVM_IRQCHIP_PIC_MASTER 0 | 81 | #define KVM_IRQCHIP_PIC_MASTER 0 |
80 | #define KVM_IRQCHIP_PIC_SLAVE 1 | 82 | #define KVM_IRQCHIP_PIC_SLAVE 1 |
81 | #define KVM_IRQCHIP_IOAPIC 2 | 83 | #define KVM_IRQCHIP_IOAPIC 2 |
84 | #define KVM_NR_IRQCHIPS 3 | ||
82 | 85 | ||
83 | /* for KVM_GET_REGS and KVM_SET_REGS */ | 86 | /* for KVM_GET_REGS and KVM_SET_REGS */ |
84 | struct kvm_regs { | 87 | struct kvm_regs { |
@@ -250,4 +253,35 @@ struct kvm_reinject_control { | |||
250 | __u8 pit_reinject; | 253 | __u8 pit_reinject; |
251 | __u8 reserved[31]; | 254 | __u8 reserved[31]; |
252 | }; | 255 | }; |
256 | |||
257 | /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ | ||
258 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 | ||
259 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 | ||
260 | |||
261 | /* for KVM_GET/SET_VCPU_EVENTS */ | ||
262 | struct kvm_vcpu_events { | ||
263 | struct { | ||
264 | __u8 injected; | ||
265 | __u8 nr; | ||
266 | __u8 has_error_code; | ||
267 | __u8 pad; | ||
268 | __u32 error_code; | ||
269 | } exception; | ||
270 | struct { | ||
271 | __u8 injected; | ||
272 | __u8 nr; | ||
273 | __u8 soft; | ||
274 | __u8 pad; | ||
275 | } interrupt; | ||
276 | struct { | ||
277 | __u8 injected; | ||
278 | __u8 pending; | ||
279 | __u8 masked; | ||
280 | __u8 pad; | ||
281 | } nmi; | ||
282 | __u32 sipi_vector; | ||
283 | __u32 flags; | ||
284 | __u32 reserved[10]; | ||
285 | }; | ||
286 | |||
253 | #endif /* _ASM_X86_KVM_H */ | 287 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b7ed2c423116..7c18e1230f54 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -129,7 +129,7 @@ struct decode_cache { | |||
129 | u8 seg_override; | 129 | u8 seg_override; |
130 | unsigned int d; | 130 | unsigned int d; |
131 | unsigned long regs[NR_VCPU_REGS]; | 131 | unsigned long regs[NR_VCPU_REGS]; |
132 | unsigned long eip; | 132 | unsigned long eip, eip_orig; |
133 | /* modrm */ | 133 | /* modrm */ |
134 | u8 modrm; | 134 | u8 modrm; |
135 | u8 modrm_mod; | 135 | u8 modrm_mod; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3be000435fad..4f865e8b8540 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -354,7 +354,6 @@ struct kvm_vcpu_arch { | |||
354 | unsigned int time_offset; | 354 | unsigned int time_offset; |
355 | struct page *time_page; | 355 | struct page *time_page; |
356 | 356 | ||
357 | bool singlestep; /* guest is single stepped by KVM */ | ||
358 | bool nmi_pending; | 357 | bool nmi_pending; |
359 | bool nmi_injected; | 358 | bool nmi_injected; |
360 | 359 | ||
@@ -371,6 +370,10 @@ struct kvm_vcpu_arch { | |||
371 | u64 mcg_status; | 370 | u64 mcg_status; |
372 | u64 mcg_ctl; | 371 | u64 mcg_ctl; |
373 | u64 *mce_banks; | 372 | u64 *mce_banks; |
373 | |||
374 | /* used for guest single stepping over the given code position */ | ||
375 | u16 singlestep_cs; | ||
376 | unsigned long singlestep_rip; | ||
374 | }; | 377 | }; |
375 | 378 | ||
376 | struct kvm_mem_alias { | 379 | struct kvm_mem_alias { |
@@ -397,7 +400,6 @@ struct kvm_arch{ | |||
397 | struct kvm_pic *vpic; | 400 | struct kvm_pic *vpic; |
398 | struct kvm_ioapic *vioapic; | 401 | struct kvm_ioapic *vioapic; |
399 | struct kvm_pit *vpit; | 402 | struct kvm_pit *vpit; |
400 | struct hlist_head irq_ack_notifier_list; | ||
401 | int vapics_in_nmi_mode; | 403 | int vapics_in_nmi_mode; |
402 | 404 | ||
403 | unsigned int tss_addr; | 405 | unsigned int tss_addr; |
@@ -410,8 +412,10 @@ struct kvm_arch{ | |||
410 | gpa_t ept_identity_map_addr; | 412 | gpa_t ept_identity_map_addr; |
411 | 413 | ||
412 | unsigned long irq_sources_bitmap; | 414 | unsigned long irq_sources_bitmap; |
413 | unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; | ||
414 | u64 vm_init_tsc; | 415 | u64 vm_init_tsc; |
416 | s64 kvmclock_offset; | ||
417 | |||
418 | struct kvm_xen_hvm_config xen_hvm_config; | ||
415 | }; | 419 | }; |
416 | 420 | ||
417 | struct kvm_vm_stat { | 421 | struct kvm_vm_stat { |
@@ -461,7 +465,7 @@ struct descriptor_table { | |||
461 | struct kvm_x86_ops { | 465 | struct kvm_x86_ops { |
462 | int (*cpu_has_kvm_support)(void); /* __init */ | 466 | int (*cpu_has_kvm_support)(void); /* __init */ |
463 | int (*disabled_by_bios)(void); /* __init */ | 467 | int (*disabled_by_bios)(void); /* __init */ |
464 | void (*hardware_enable)(void *dummy); /* __init */ | 468 | int (*hardware_enable)(void *dummy); |
465 | void (*hardware_disable)(void *dummy); | 469 | void (*hardware_disable)(void *dummy); |
466 | void (*check_processor_compatibility)(void *rtn); | 470 | void (*check_processor_compatibility)(void *rtn); |
467 | int (*hardware_setup)(void); /* __init */ | 471 | int (*hardware_setup)(void); /* __init */ |
@@ -477,8 +481,8 @@ struct kvm_x86_ops { | |||
477 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); | 481 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); |
478 | void (*vcpu_put)(struct kvm_vcpu *vcpu); | 482 | void (*vcpu_put)(struct kvm_vcpu *vcpu); |
479 | 483 | ||
480 | int (*set_guest_debug)(struct kvm_vcpu *vcpu, | 484 | void (*set_guest_debug)(struct kvm_vcpu *vcpu, |
481 | struct kvm_guest_debug *dbg); | 485 | struct kvm_guest_debug *dbg); |
482 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); | 486 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); |
483 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 487 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
484 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); | 488 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); |
@@ -506,8 +510,8 @@ struct kvm_x86_ops { | |||
506 | 510 | ||
507 | void (*tlb_flush)(struct kvm_vcpu *vcpu); | 511 | void (*tlb_flush)(struct kvm_vcpu *vcpu); |
508 | 512 | ||
509 | void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); | 513 | void (*run)(struct kvm_vcpu *vcpu); |
510 | int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); | 514 | int (*handle_exit)(struct kvm_vcpu *vcpu); |
511 | void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); | 515 | void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); |
512 | void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); | 516 | void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); |
513 | u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); | 517 | u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); |
@@ -519,6 +523,8 @@ struct kvm_x86_ops { | |||
519 | bool has_error_code, u32 error_code); | 523 | bool has_error_code, u32 error_code); |
520 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); | 524 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); |
521 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); | 525 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
526 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); | ||
527 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); | ||
522 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); | 528 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
523 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | 529 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); |
524 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | 530 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
@@ -568,7 +574,7 @@ enum emulation_result { | |||
568 | #define EMULTYPE_NO_DECODE (1 << 0) | 574 | #define EMULTYPE_NO_DECODE (1 << 0) |
569 | #define EMULTYPE_TRAP_UD (1 << 1) | 575 | #define EMULTYPE_TRAP_UD (1 << 1) |
570 | #define EMULTYPE_SKIP (1 << 2) | 576 | #define EMULTYPE_SKIP (1 << 2) |
571 | int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, | 577 | int emulate_instruction(struct kvm_vcpu *vcpu, |
572 | unsigned long cr2, u16 error_code, int emulation_type); | 578 | unsigned long cr2, u16 error_code, int emulation_type); |
573 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | 579 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); |
574 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 580 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
@@ -585,9 +591,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | |||
585 | 591 | ||
586 | struct x86_emulate_ctxt; | 592 | struct x86_emulate_ctxt; |
587 | 593 | ||
588 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 594 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, |
589 | int size, unsigned port); | 595 | int size, unsigned port); |
590 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 596 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, |
591 | int size, unsigned long count, int down, | 597 | int size, unsigned long count, int down, |
592 | gva_t address, int rep, unsigned port); | 598 | gva_t address, int rep, unsigned port); |
593 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 599 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
@@ -616,6 +622,9 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | |||
616 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); | 622 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); |
617 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 623 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); |
618 | 624 | ||
625 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); | ||
626 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | ||
627 | |||
619 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); | 628 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
620 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 629 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
621 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, | 630 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, |
@@ -796,9 +805,13 @@ asmlinkage void kvm_handle_fault_on_reboot(void); | |||
796 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 805 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
797 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 806 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
798 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 807 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
808 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | ||
799 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); | 809 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); |
800 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 810 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
801 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 811 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
802 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | 812 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
803 | 813 | ||
814 | void kvm_define_shared_msr(unsigned index, u32 msr); | ||
815 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | ||
816 | |||
804 | #endif /* _ASM_X86_KVM_HOST_H */ | 817 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index b608a64c5814..858baa061cfc 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -108,6 +108,8 @@ struct mce_log { | |||
108 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) | 108 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) |
109 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) | 109 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) |
110 | 110 | ||
111 | extern struct atomic_notifier_head x86_mce_decoder_chain; | ||
112 | |||
111 | #ifdef __KERNEL__ | 113 | #ifdef __KERNEL__ |
112 | 114 | ||
113 | #include <linux/percpu.h> | 115 | #include <linux/percpu.h> |
@@ -118,9 +120,11 @@ extern int mce_disabled; | |||
118 | extern int mce_p5_enabled; | 120 | extern int mce_p5_enabled; |
119 | 121 | ||
120 | #ifdef CONFIG_X86_MCE | 122 | #ifdef CONFIG_X86_MCE |
121 | void mcheck_init(struct cpuinfo_x86 *c); | 123 | int mcheck_init(void); |
124 | void mcheck_cpu_init(struct cpuinfo_x86 *c); | ||
122 | #else | 125 | #else |
123 | static inline void mcheck_init(struct cpuinfo_x86 *c) {} | 126 | static inline int mcheck_init(void) { return 0; } |
127 | static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} | ||
124 | #endif | 128 | #endif |
125 | 129 | ||
126 | #ifdef CONFIG_X86_ANCIENT_MCE | 130 | #ifdef CONFIG_X86_ANCIENT_MCE |
@@ -133,6 +137,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | |||
133 | static inline void enable_p5_mce(void) {} | 137 | static inline void enable_p5_mce(void) {} |
134 | #endif | 138 | #endif |
135 | 139 | ||
140 | extern void (*x86_mce_decode_callback)(struct mce *m); | ||
141 | |||
136 | void mce_setup(struct mce *m); | 142 | void mce_setup(struct mce *m); |
137 | void mce_log(struct mce *m); | 143 | void mce_log(struct mce *m); |
138 | DECLARE_PER_CPU(struct sys_device, mce_dev); | 144 | DECLARE_PER_CPU(struct sys_device, mce_dev); |
@@ -212,5 +218,11 @@ void intel_init_thermal(struct cpuinfo_x86 *c); | |||
212 | 218 | ||
213 | void mce_log_therm_throt_event(__u64 status); | 219 | void mce_log_therm_throt_event(__u64 status); |
214 | 220 | ||
221 | #ifdef CONFIG_X86_THERMAL_VECTOR | ||
222 | extern void mcheck_intel_therm_init(void); | ||
223 | #else | ||
224 | static inline void mcheck_intel_therm_init(void) { } | ||
225 | #endif | ||
226 | |||
215 | #endif /* __KERNEL__ */ | 227 | #endif /* __KERNEL__ */ |
216 | #endif /* _ASM_X86_MCE_H */ | 228 | #endif /* _ASM_X86_MCE_H */ |
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index ef51b501e22a..c24ca9a56458 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -12,6 +12,8 @@ struct device; | |||
12 | enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; | 12 | enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; |
13 | 13 | ||
14 | struct microcode_ops { | 14 | struct microcode_ops { |
15 | void (*init)(struct device *device); | ||
16 | void (*fini)(void); | ||
15 | enum ucode_state (*request_microcode_user) (int cpu, | 17 | enum ucode_state (*request_microcode_user) (int cpu, |
16 | const void __user *buf, size_t size); | 18 | const void __user *buf, size_t size); |
17 | 19 | ||
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index ede6998bd92c..91df7c51806c 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h | |||
@@ -47,7 +47,7 @@ static inline void resume_map_numa_kva(pgd_t *pgd) {} | |||
47 | /* | 47 | /* |
48 | * generic node memory support, the following assumptions apply: | 48 | * generic node memory support, the following assumptions apply: |
49 | * | 49 | * |
50 | * 1) memory comes in 64Mb contigious chunks which are either present or not | 50 | * 1) memory comes in 64Mb contiguous chunks which are either present or not |
51 | * 2) we will not have more than 64Gb in total | 51 | * 2) we will not have more than 64Gb in total |
52 | * | 52 | * |
53 | * for now assume that 64Gb is max amount of RAM for whole system | 53 | * for now assume that 64Gb is max amount of RAM for whole system |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 79c94500c0bb..d8bf23a88d05 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -71,12 +71,7 @@ static inline void early_get_smp_config(void) | |||
71 | 71 | ||
72 | static inline void find_smp_config(void) | 72 | static inline void find_smp_config(void) |
73 | { | 73 | { |
74 | x86_init.mpparse.find_smp_config(1); | 74 | x86_init.mpparse.find_smp_config(); |
75 | } | ||
76 | |||
77 | static inline void early_find_smp_config(void) | ||
78 | { | ||
79 | x86_init.mpparse.find_smp_config(0); | ||
80 | } | 75 | } |
81 | 76 | ||
82 | #ifdef CONFIG_X86_MPPARSE | 77 | #ifdef CONFIG_X86_MPPARSE |
@@ -89,7 +84,7 @@ extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str); | |||
89 | # else | 84 | # else |
90 | # define default_mpc_oem_bus_info NULL | 85 | # define default_mpc_oem_bus_info NULL |
91 | # endif | 86 | # endif |
92 | extern void default_find_smp_config(unsigned int reserve); | 87 | extern void default_find_smp_config(void); |
93 | extern void default_get_smp_config(unsigned int early); | 88 | extern void default_get_smp_config(unsigned int early); |
94 | #else | 89 | #else |
95 | static inline void early_reserve_e820_mpc_new(void) { } | 90 | static inline void early_reserve_e820_mpc_new(void) { } |
@@ -97,7 +92,7 @@ static inline void early_reserve_e820_mpc_new(void) { } | |||
97 | #define default_mpc_apic_id NULL | 92 | #define default_mpc_apic_id NULL |
98 | #define default_smp_read_mpc_oem NULL | 93 | #define default_smp_read_mpc_oem NULL |
99 | #define default_mpc_oem_bus_info NULL | 94 | #define default_mpc_oem_bus_info NULL |
100 | #define default_find_smp_config x86_init_uint_noop | 95 | #define default_find_smp_config x86_init_noop |
101 | #define default_get_smp_config x86_init_uint_noop | 96 | #define default_get_smp_config x86_init_uint_noop |
102 | #endif | 97 | #endif |
103 | 98 | ||
@@ -163,14 +158,16 @@ typedef struct physid_mask physid_mask_t; | |||
163 | #define physids_shift_left(d, s, n) \ | 158 | #define physids_shift_left(d, s, n) \ |
164 | bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) | 159 | bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) |
165 | 160 | ||
166 | #define physids_coerce(map) ((map).mask[0]) | 161 | static inline unsigned long physids_coerce(physid_mask_t *map) |
162 | { | ||
163 | return map->mask[0]; | ||
164 | } | ||
167 | 165 | ||
168 | #define physids_promote(physids) \ | 166 | static inline void physids_promote(unsigned long physids, physid_mask_t *map) |
169 | ({ \ | 167 | { |
170 | physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ | 168 | physids_clear(*map); |
171 | __physid_mask.mask[0] = physids; \ | 169 | map->mask[0] = physids; |
172 | __physid_mask; \ | 170 | } |
173 | }) | ||
174 | 171 | ||
175 | /* Note: will create very large stack frames if physid_mask_t is big */ | 172 | /* Note: will create very large stack frames if physid_mask_t is big */ |
176 | #define physid_mask_of_physid(physid) \ | 173 | #define physid_mask_of_physid(physid) \ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4ffe09b2ad75..1cd58cdbc03f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -12,6 +12,7 @@ | |||
12 | #define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ | 12 | #define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ |
13 | #define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ | 13 | #define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ |
14 | #define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ | 14 | #define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ |
15 | #define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ | ||
15 | 16 | ||
16 | /* EFER bits: */ | 17 | /* EFER bits: */ |
17 | #define _EFER_SCE 0 /* SYSCALL/SYSRET */ | 18 | #define _EFER_SCE 0 /* SYSCALL/SYSRET */ |
@@ -123,6 +124,7 @@ | |||
123 | #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 | 124 | #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 |
124 | #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff | 125 | #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff |
125 | #define FAM10H_MMIO_CONF_BASE_SHIFT 20 | 126 | #define FAM10H_MMIO_CONF_BASE_SHIFT 20 |
127 | #define MSR_FAM10H_NODE_ID 0xc001100c | ||
126 | 128 | ||
127 | /* K8 MSRs */ | 129 | /* K8 MSRs */ |
128 | #define MSR_K8_TOP_MEM1 0xc001001a | 130 | #define MSR_K8_TOP_MEM1 0xc001001a |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7e2b6ba962ff..c5bc4c2d33f5 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -27,6 +27,18 @@ struct msr { | |||
27 | }; | 27 | }; |
28 | }; | 28 | }; |
29 | 29 | ||
30 | struct msr_info { | ||
31 | u32 msr_no; | ||
32 | struct msr reg; | ||
33 | struct msr *msrs; | ||
34 | int err; | ||
35 | }; | ||
36 | |||
37 | struct msr_regs_info { | ||
38 | u32 *regs; | ||
39 | int err; | ||
40 | }; | ||
41 | |||
30 | static inline unsigned long long native_read_tscp(unsigned int *aux) | 42 | static inline unsigned long long native_read_tscp(unsigned int *aux) |
31 | { | 43 | { |
32 | unsigned long low, high; | 44 | unsigned long low, high; |
@@ -240,15 +252,18 @@ do { \ | |||
240 | #define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ | 252 | #define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ |
241 | (u32)((val) >> 32)) | 253 | (u32)((val) >> 32)) |
242 | 254 | ||
243 | #define write_tsc(val1, val2) wrmsr(0x10, (val1), (val2)) | 255 | #define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2)) |
256 | |||
257 | #define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0) | ||
244 | 258 | ||
245 | #define write_rdtscp_aux(val) wrmsr(0xc0000103, (val), 0) | 259 | struct msr *msrs_alloc(void); |
260 | void msrs_free(struct msr *msrs); | ||
246 | 261 | ||
247 | #ifdef CONFIG_SMP | 262 | #ifdef CONFIG_SMP |
248 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); | 263 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); |
249 | int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); | 264 | int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); |
250 | void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); | 265 | void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); |
251 | void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); | 266 | void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); |
252 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); | 267 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); |
253 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); | 268 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); |
254 | int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); | 269 | int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); |
@@ -264,12 +279,12 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | |||
264 | wrmsr(msr_no, l, h); | 279 | wrmsr(msr_no, l, h); |
265 | return 0; | 280 | return 0; |
266 | } | 281 | } |
267 | static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no, | 282 | static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no, |
268 | struct msr *msrs) | 283 | struct msr *msrs) |
269 | { | 284 | { |
270 | rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); | 285 | rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); |
271 | } | 286 | } |
272 | static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no, | 287 | static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no, |
273 | struct msr *msrs) | 288 | struct msr *msrs) |
274 | { | 289 | { |
275 | wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); | 290 | wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); |
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h index 834a30295fab..3a57385d9fa7 100644 --- a/arch/x86/include/asm/olpc.h +++ b/arch/x86/include/asm/olpc.h | |||
@@ -120,7 +120,7 @@ extern int olpc_ec_mask_unset(uint8_t bits); | |||
120 | 120 | ||
121 | /* GPIO assignments */ | 121 | /* GPIO assignments */ |
122 | 122 | ||
123 | #define OLPC_GPIO_MIC_AC geode_gpio(1) | 123 | #define OLPC_GPIO_MIC_AC 1 |
124 | #define OLPC_GPIO_DCON_IRQ geode_gpio(7) | 124 | #define OLPC_GPIO_DCON_IRQ geode_gpio(7) |
125 | #define OLPC_GPIO_THRM_ALRM geode_gpio(10) | 125 | #define OLPC_GPIO_THRM_ALRM geode_gpio(10) |
126 | #define OLPC_GPIO_SMB_CLK geode_gpio(14) | 126 | #define OLPC_GPIO_SMB_CLK geode_gpio(14) |
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 6473f5ccff85..642fe34b36a2 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -49,7 +49,8 @@ extern unsigned long max_pfn_mapped; | |||
49 | extern unsigned long init_memory_mapping(unsigned long start, | 49 | extern unsigned long init_memory_mapping(unsigned long start, |
50 | unsigned long end); | 50 | unsigned long end); |
51 | 51 | ||
52 | extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); | 52 | extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
53 | int acpi, int k8); | ||
53 | extern void free_initmem(void); | 54 | extern void free_initmem(void); |
54 | 55 | ||
55 | #endif /* !__ASSEMBLY__ */ | 56 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 8aebcc41041d..dd59a85a918f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -731,34 +731,34 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, | |||
731 | 731 | ||
732 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) | 732 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) |
733 | 733 | ||
734 | static inline int __raw_spin_is_locked(struct raw_spinlock *lock) | 734 | static inline int arch_spin_is_locked(struct arch_spinlock *lock) |
735 | { | 735 | { |
736 | return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); | 736 | return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); |
737 | } | 737 | } |
738 | 738 | ||
739 | static inline int __raw_spin_is_contended(struct raw_spinlock *lock) | 739 | static inline int arch_spin_is_contended(struct arch_spinlock *lock) |
740 | { | 740 | { |
741 | return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); | 741 | return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); |
742 | } | 742 | } |
743 | #define __raw_spin_is_contended __raw_spin_is_contended | 743 | #define arch_spin_is_contended arch_spin_is_contended |
744 | 744 | ||
745 | static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) | 745 | static __always_inline void arch_spin_lock(struct arch_spinlock *lock) |
746 | { | 746 | { |
747 | PVOP_VCALL1(pv_lock_ops.spin_lock, lock); | 747 | PVOP_VCALL1(pv_lock_ops.spin_lock, lock); |
748 | } | 748 | } |
749 | 749 | ||
750 | static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock, | 750 | static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock, |
751 | unsigned long flags) | 751 | unsigned long flags) |
752 | { | 752 | { |
753 | PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); | 753 | PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); |
754 | } | 754 | } |
755 | 755 | ||
756 | static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) | 756 | static __always_inline int arch_spin_trylock(struct arch_spinlock *lock) |
757 | { | 757 | { |
758 | return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); | 758 | return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); |
759 | } | 759 | } |
760 | 760 | ||
761 | static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) | 761 | static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) |
762 | { | 762 | { |
763 | PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); | 763 | PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); |
764 | } | 764 | } |
@@ -840,42 +840,22 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) | |||
840 | 840 | ||
841 | static inline unsigned long __raw_local_save_flags(void) | 841 | static inline unsigned long __raw_local_save_flags(void) |
842 | { | 842 | { |
843 | unsigned long f; | 843 | return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); |
844 | |||
845 | asm volatile(paravirt_alt(PARAVIRT_CALL) | ||
846 | : "=a"(f) | ||
847 | : paravirt_type(pv_irq_ops.save_fl), | ||
848 | paravirt_clobber(CLBR_EAX) | ||
849 | : "memory", "cc"); | ||
850 | return f; | ||
851 | } | 844 | } |
852 | 845 | ||
853 | static inline void raw_local_irq_restore(unsigned long f) | 846 | static inline void raw_local_irq_restore(unsigned long f) |
854 | { | 847 | { |
855 | asm volatile(paravirt_alt(PARAVIRT_CALL) | 848 | PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); |
856 | : "=a"(f) | ||
857 | : PV_FLAGS_ARG(f), | ||
858 | paravirt_type(pv_irq_ops.restore_fl), | ||
859 | paravirt_clobber(CLBR_EAX) | ||
860 | : "memory", "cc"); | ||
861 | } | 849 | } |
862 | 850 | ||
863 | static inline void raw_local_irq_disable(void) | 851 | static inline void raw_local_irq_disable(void) |
864 | { | 852 | { |
865 | asm volatile(paravirt_alt(PARAVIRT_CALL) | 853 | PVOP_VCALLEE0(pv_irq_ops.irq_disable); |
866 | : | ||
867 | : paravirt_type(pv_irq_ops.irq_disable), | ||
868 | paravirt_clobber(CLBR_EAX) | ||
869 | : "memory", "eax", "cc"); | ||
870 | } | 854 | } |
871 | 855 | ||
872 | static inline void raw_local_irq_enable(void) | 856 | static inline void raw_local_irq_enable(void) |
873 | { | 857 | { |
874 | asm volatile(paravirt_alt(PARAVIRT_CALL) | 858 | PVOP_VCALLEE0(pv_irq_ops.irq_enable); |
875 | : | ||
876 | : paravirt_type(pv_irq_ops.irq_enable), | ||
877 | paravirt_clobber(CLBR_EAX) | ||
878 | : "memory", "eax", "cc"); | ||
879 | } | 859 | } |
880 | 860 | ||
881 | static inline unsigned long __raw_local_irq_save(void) | 861 | static inline unsigned long __raw_local_irq_save(void) |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index dd0f5b32489d..b1e70d51e40c 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -318,14 +318,14 @@ struct pv_mmu_ops { | |||
318 | phys_addr_t phys, pgprot_t flags); | 318 | phys_addr_t phys, pgprot_t flags); |
319 | }; | 319 | }; |
320 | 320 | ||
321 | struct raw_spinlock; | 321 | struct arch_spinlock; |
322 | struct pv_lock_ops { | 322 | struct pv_lock_ops { |
323 | int (*spin_is_locked)(struct raw_spinlock *lock); | 323 | int (*spin_is_locked)(struct arch_spinlock *lock); |
324 | int (*spin_is_contended)(struct raw_spinlock *lock); | 324 | int (*spin_is_contended)(struct arch_spinlock *lock); |
325 | void (*spin_lock)(struct raw_spinlock *lock); | 325 | void (*spin_lock)(struct arch_spinlock *lock); |
326 | void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); | 326 | void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags); |
327 | int (*spin_trylock)(struct raw_spinlock *lock); | 327 | int (*spin_trylock)(struct arch_spinlock *lock); |
328 | void (*spin_unlock)(struct raw_spinlock *lock); | 328 | void (*spin_unlock)(struct arch_spinlock *lock); |
329 | }; | 329 | }; |
330 | 330 | ||
331 | /* This contains all the paravirt structures: we get a convenient | 331 | /* This contains all the paravirt structures: we get a convenient |
@@ -494,10 +494,11 @@ int paravirt_disable_iospace(void); | |||
494 | #define EXTRA_CLOBBERS | 494 | #define EXTRA_CLOBBERS |
495 | #define VEXTRA_CLOBBERS | 495 | #define VEXTRA_CLOBBERS |
496 | #else /* CONFIG_X86_64 */ | 496 | #else /* CONFIG_X86_64 */ |
497 | /* [re]ax isn't an arg, but the return val */ | ||
497 | #define PVOP_VCALL_ARGS \ | 498 | #define PVOP_VCALL_ARGS \ |
498 | unsigned long __edi = __edi, __esi = __esi, \ | 499 | unsigned long __edi = __edi, __esi = __esi, \ |
499 | __edx = __edx, __ecx = __ecx | 500 | __edx = __edx, __ecx = __ecx, __eax = __eax |
500 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax | 501 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS |
501 | 502 | ||
502 | #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) | 503 | #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) |
503 | #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) | 504 | #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) |
@@ -509,6 +510,7 @@ int paravirt_disable_iospace(void); | |||
509 | "=c" (__ecx) | 510 | "=c" (__ecx) |
510 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) | 511 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) |
511 | 512 | ||
513 | /* void functions are still allowed [re]ax for scratch */ | ||
512 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) | 514 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) |
513 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | 515 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS |
514 | 516 | ||
@@ -583,8 +585,8 @@ int paravirt_disable_iospace(void); | |||
583 | VEXTRA_CLOBBERS, \ | 585 | VEXTRA_CLOBBERS, \ |
584 | pre, post, ##__VA_ARGS__) | 586 | pre, post, ##__VA_ARGS__) |
585 | 587 | ||
586 | #define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ | 588 | #define __PVOP_VCALLEESAVE(op, pre, post, ...) \ |
587 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | 589 | ____PVOP_VCALL(op.func, CLBR_RET_REG, \ |
588 | PVOP_VCALLEE_CLOBBERS, , \ | 590 | PVOP_VCALLEE_CLOBBERS, , \ |
589 | pre, post, ##__VA_ARGS__) | 591 | pre, post, ##__VA_ARGS__) |
590 | 592 | ||
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b399988eee3a..b4bf9a942ed0 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h | |||
@@ -118,11 +118,27 @@ extern int __init pcibios_init(void); | |||
118 | 118 | ||
119 | /* pci-mmconfig.c */ | 119 | /* pci-mmconfig.c */ |
120 | 120 | ||
121 | /* "PCI MMCONFIG %04x [bus %02x-%02x]" */ | ||
122 | #define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2) | ||
123 | |||
124 | struct pci_mmcfg_region { | ||
125 | struct list_head list; | ||
126 | struct resource res; | ||
127 | u64 address; | ||
128 | char __iomem *virt; | ||
129 | u16 segment; | ||
130 | u8 start_bus; | ||
131 | u8 end_bus; | ||
132 | char name[PCI_MMCFG_RESOURCE_NAME_LEN]; | ||
133 | }; | ||
134 | |||
121 | extern int __init pci_mmcfg_arch_init(void); | 135 | extern int __init pci_mmcfg_arch_init(void); |
122 | extern void __init pci_mmcfg_arch_free(void); | 136 | extern void __init pci_mmcfg_arch_free(void); |
137 | extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); | ||
138 | |||
139 | extern struct list_head pci_mmcfg_list; | ||
123 | 140 | ||
124 | extern struct acpi_mcfg_allocation *pci_mmcfg_config; | 141 | #define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20) |
125 | extern int pci_mmcfg_config_num; | ||
126 | 142 | ||
127 | /* | 143 | /* |
128 | * AMD Fam10h CPUs are buggy, and cannot access MMIO config space | 144 | * AMD Fam10h CPUs are buggy, and cannot access MMIO config space |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index ad7ce3fd5065..8d9f8548a870 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -28,9 +28,20 @@ | |||
28 | */ | 28 | */ |
29 | #define ARCH_PERFMON_EVENT_MASK 0xffff | 29 | #define ARCH_PERFMON_EVENT_MASK 0xffff |
30 | 30 | ||
31 | /* | ||
32 | * filter mask to validate fixed counter events. | ||
33 | * the following filters disqualify for fixed counters: | ||
34 | * - inv | ||
35 | * - edge | ||
36 | * - cnt-mask | ||
37 | * The other filters are supported by fixed counters. | ||
38 | * The any-thread option is supported starting with v3. | ||
39 | */ | ||
40 | #define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 | ||
41 | |||
31 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | 42 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c |
32 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | 43 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) |
33 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 | 44 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 |
34 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ | 45 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ |
35 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | 46 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) |
36 | 47 | ||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index af6fd360ab35..a34c785c5a63 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -16,6 +16,8 @@ | |||
16 | 16 | ||
17 | #ifndef __ASSEMBLY__ | 17 | #ifndef __ASSEMBLY__ |
18 | 18 | ||
19 | #include <asm/x86_init.h> | ||
20 | |||
19 | /* | 21 | /* |
20 | * ZERO_PAGE is a global shared page that is always zero: used | 22 | * ZERO_PAGE is a global shared page that is always zero: used |
21 | * for zero-mapped memory areas etc.. | 23 | * for zero-mapped memory areas etc.. |
@@ -270,9 +272,9 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, | |||
270 | unsigned long new_flags) | 272 | unsigned long new_flags) |
271 | { | 273 | { |
272 | /* | 274 | /* |
273 | * PAT type is always WB for ISA. So no need to check. | 275 | * PAT type is always WB for untracked ranges, so no need to check. |
274 | */ | 276 | */ |
275 | if (is_ISA_range(paddr, paddr + size - 1)) | 277 | if (x86_platform.is_untracked_pat_range(paddr, paddr + size)) |
276 | return 1; | 278 | return 1; |
277 | 279 | ||
278 | /* | 280 | /* |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c3429e8b2424..fc801bab1b3b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -30,6 +30,7 @@ struct mm_struct; | |||
30 | #include <linux/math64.h> | 30 | #include <linux/math64.h> |
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | 32 | ||
33 | #define HBP_NUM 4 | ||
33 | /* | 34 | /* |
34 | * Default implementation of macro that returns current | 35 | * Default implementation of macro that returns current |
35 | * instruction pointer ("program counter"). | 36 | * instruction pointer ("program counter"). |
@@ -180,7 +181,7 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, | |||
180 | unsigned int *ecx, unsigned int *edx) | 181 | unsigned int *ecx, unsigned int *edx) |
181 | { | 182 | { |
182 | /* ecx is often an input as well as an output. */ | 183 | /* ecx is often an input as well as an output. */ |
183 | asm("cpuid" | 184 | asm volatile("cpuid" |
184 | : "=a" (*eax), | 185 | : "=a" (*eax), |
185 | "=b" (*ebx), | 186 | "=b" (*ebx), |
186 | "=c" (*ecx), | 187 | "=c" (*ecx), |
@@ -422,6 +423,8 @@ extern unsigned int xstate_size; | |||
422 | extern void free_thread_xstate(struct task_struct *); | 423 | extern void free_thread_xstate(struct task_struct *); |
423 | extern struct kmem_cache *task_xstate_cachep; | 424 | extern struct kmem_cache *task_xstate_cachep; |
424 | 425 | ||
426 | struct perf_event; | ||
427 | |||
425 | struct thread_struct { | 428 | struct thread_struct { |
426 | /* Cached TLS descriptors: */ | 429 | /* Cached TLS descriptors: */ |
427 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; | 430 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; |
@@ -443,13 +446,10 @@ struct thread_struct { | |||
443 | unsigned long fs; | 446 | unsigned long fs; |
444 | #endif | 447 | #endif |
445 | unsigned long gs; | 448 | unsigned long gs; |
446 | /* Hardware debugging registers: */ | 449 | /* Save middle states of ptrace breakpoints */ |
447 | unsigned long debugreg0; | 450 | struct perf_event *ptrace_bps[HBP_NUM]; |
448 | unsigned long debugreg1; | 451 | /* Debug status used for traps, single steps, etc... */ |
449 | unsigned long debugreg2; | 452 | unsigned long debugreg6; |
450 | unsigned long debugreg3; | ||
451 | unsigned long debugreg6; | ||
452 | unsigned long debugreg7; | ||
453 | /* Fault info: */ | 453 | /* Fault info: */ |
454 | unsigned long cr2; | 454 | unsigned long cr2; |
455 | unsigned long trap_no; | 455 | unsigned long trap_no; |
@@ -1000,7 +1000,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); | |||
1000 | #define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) | 1000 | #define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) |
1001 | 1001 | ||
1002 | #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) | 1002 | #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) |
1003 | #define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */ | 1003 | extern unsigned long KSTK_ESP(struct task_struct *task); |
1004 | #endif /* CONFIG_X86_64 */ | 1004 | #endif /* CONFIG_X86_64 */ |
1005 | 1005 | ||
1006 | extern void start_thread(struct pt_regs *regs, unsigned long new_ip, | 1006 | extern void start_thread(struct pt_regs *regs, unsigned long new_ip, |
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 621f56d73121..4009f6534f52 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h | |||
@@ -5,18 +5,19 @@ | |||
5 | 5 | ||
6 | /* misc architecture specific prototypes */ | 6 | /* misc architecture specific prototypes */ |
7 | 7 | ||
8 | extern void early_idt_handler(void); | 8 | void early_idt_handler(void); |
9 | 9 | ||
10 | extern void system_call(void); | 10 | void system_call(void); |
11 | extern void syscall_init(void); | 11 | void syscall_init(void); |
12 | 12 | ||
13 | extern void ia32_syscall(void); | 13 | void ia32_syscall(void); |
14 | extern void ia32_cstar_target(void); | 14 | void ia32_cstar_target(void); |
15 | extern void ia32_sysenter_target(void); | 15 | void ia32_sysenter_target(void); |
16 | 16 | ||
17 | extern void syscall32_cpu_init(void); | 17 | void syscall32_cpu_init(void); |
18 | 18 | ||
19 | extern void check_efer(void); | 19 | void x86_configure_nx(void); |
20 | void x86_report_nx(void); | ||
20 | 21 | ||
21 | extern int reboot_force; | 22 | extern int reboot_force; |
22 | 23 | ||
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 0f0d908349aa..9d369f680321 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #ifdef __KERNEL__ | 8 | #ifdef __KERNEL__ |
9 | #include <asm/segment.h> | 9 | #include <asm/segment.h> |
10 | #include <asm/page_types.h> | ||
10 | #endif | 11 | #endif |
11 | 12 | ||
12 | #ifndef __ASSEMBLY__ | 13 | #ifndef __ASSEMBLY__ |
@@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) | |||
216 | return regs->sp; | 217 | return regs->sp; |
217 | } | 218 | } |
218 | 219 | ||
220 | /* Query offset/name of register from its name/offset */ | ||
221 | extern int regs_query_register_offset(const char *name); | ||
222 | extern const char *regs_query_register_name(unsigned int offset); | ||
223 | #define MAX_REG_OFFSET (offsetof(struct pt_regs, ss)) | ||
224 | |||
225 | /** | ||
226 | * regs_get_register() - get register value from its offset | ||
227 | * @regs: pt_regs from which register value is gotten. | ||
228 | * @offset: offset number of the register. | ||
229 | * | ||
230 | * regs_get_register returns the value of a register. The @offset is the | ||
231 | * offset of the register in struct pt_regs address which specified by @regs. | ||
232 | * If @offset is bigger than MAX_REG_OFFSET, this returns 0. | ||
233 | */ | ||
234 | static inline unsigned long regs_get_register(struct pt_regs *regs, | ||
235 | unsigned int offset) | ||
236 | { | ||
237 | if (unlikely(offset > MAX_REG_OFFSET)) | ||
238 | return 0; | ||
239 | return *(unsigned long *)((unsigned long)regs + offset); | ||
240 | } | ||
241 | |||
242 | /** | ||
243 | * regs_within_kernel_stack() - check the address in the stack | ||
244 | * @regs: pt_regs which contains kernel stack pointer. | ||
245 | * @addr: address which is checked. | ||
246 | * | ||
247 | * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). | ||
248 | * If @addr is within the kernel stack, it returns true. If not, returns false. | ||
249 | */ | ||
250 | static inline int regs_within_kernel_stack(struct pt_regs *regs, | ||
251 | unsigned long addr) | ||
252 | { | ||
253 | return ((addr & ~(THREAD_SIZE - 1)) == | ||
254 | (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * regs_get_kernel_stack_nth() - get Nth entry of the stack | ||
259 | * @regs: pt_regs which contains kernel stack pointer. | ||
260 | * @n: stack entry number. | ||
261 | * | ||
262 | * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which | ||
263 | * is specified by @regs. If the @n th entry is NOT in the kernel stack, | ||
264 | * this returns 0. | ||
265 | */ | ||
266 | static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, | ||
267 | unsigned int n) | ||
268 | { | ||
269 | unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); | ||
270 | addr += n; | ||
271 | if (regs_within_kernel_stack(regs, (unsigned long)addr)) | ||
272 | return *addr; | ||
273 | else | ||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | /* Get Nth argument at function call */ | ||
278 | extern unsigned long regs_get_argument_nth(struct pt_regs *regs, | ||
279 | unsigned int n); | ||
280 | |||
219 | /* | 281 | /* |
220 | * These are defined as per linux/ptrace.h, which see. | 282 | * These are defined as per linux/ptrace.h, which see. |
221 | */ | 283 | */ |
@@ -230,6 +292,8 @@ extern void user_enable_block_step(struct task_struct *); | |||
230 | #define arch_has_block_step() (boot_cpu_data.x86 >= 6) | 292 | #define arch_has_block_step() (boot_cpu_data.x86 >= 6) |
231 | #endif | 293 | #endif |
232 | 294 | ||
295 | #define ARCH_HAS_USER_SINGLE_STEP_INFO | ||
296 | |||
233 | struct user_desc; | 297 | struct user_desc; |
234 | extern int do_get_thread_area(struct task_struct *p, int idx, | 298 | extern int do_get_thread_area(struct task_struct *p, int idx, |
235 | struct user_desc __user *info); | 299 | struct user_desc __user *info); |
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 1b7ee5d673c2..0a5242428659 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h | |||
@@ -2,7 +2,13 @@ | |||
2 | #define _ASM_X86_SECTIONS_H | 2 | #define _ASM_X86_SECTIONS_H |
3 | 3 | ||
4 | #include <asm-generic/sections.h> | 4 | #include <asm-generic/sections.h> |
5 | #include <asm/uaccess.h> | ||
5 | 6 | ||
6 | extern char __brk_base[], __brk_limit[]; | 7 | extern char __brk_base[], __brk_limit[]; |
8 | extern struct exception_table_entry __stop___ex_table[]; | ||
9 | |||
10 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | ||
11 | extern char __end_rodata_hpage_align[]; | ||
12 | #endif | ||
7 | 13 | ||
8 | #endif /* _ASM_X86_SECTIONS_H */ | 14 | #endif /* _ASM_X86_SECTIONS_H */ |
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 72e5a4491661..04459d25e66e 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h | |||
@@ -124,7 +124,7 @@ struct sigcontext { | |||
124 | * fpstate is really (struct _fpstate *) or (struct _xstate *) | 124 | * fpstate is really (struct _fpstate *) or (struct _xstate *) |
125 | * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved | 125 | * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved |
126 | * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end | 126 | * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end |
127 | * of extended memory layout. See comments at the defintion of | 127 | * of extended memory layout. See comments at the definition of |
128 | * (struct _fpx_sw_bytes) | 128 | * (struct _fpx_sw_bytes) |
129 | */ | 129 | */ |
130 | void __user *fpstate; /* zero when no FPU/extended context */ | 130 | void __user *fpstate; /* zero when no FPU/extended context */ |
@@ -219,7 +219,7 @@ struct sigcontext { | |||
219 | * fpstate is really (struct _fpstate *) or (struct _xstate *) | 219 | * fpstate is really (struct _fpstate *) or (struct _xstate *) |
220 | * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved | 220 | * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved |
221 | * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end | 221 | * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end |
222 | * of extended memory layout. See comments at the defintion of | 222 | * of extended memory layout. See comments at the definition of |
223 | * (struct _fpx_sw_bytes) | 223 | * (struct _fpx_sw_bytes) |
224 | */ | 224 | */ |
225 | void __user *fpstate; /* zero when no FPU/extended context */ | 225 | void __user *fpstate; /* zero when no FPU/extended context */ |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 4e77853321db..3089f70c0c52 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -58,7 +58,7 @@ | |||
58 | #if (NR_CPUS < 256) | 58 | #if (NR_CPUS < 256) |
59 | #define TICKET_SHIFT 8 | 59 | #define TICKET_SHIFT 8 |
60 | 60 | ||
61 | static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) | 61 | static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) |
62 | { | 62 | { |
63 | short inc = 0x0100; | 63 | short inc = 0x0100; |
64 | 64 | ||
@@ -77,7 +77,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) | |||
77 | : "memory", "cc"); | 77 | : "memory", "cc"); |
78 | } | 78 | } |
79 | 79 | ||
80 | static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) | 80 | static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) |
81 | { | 81 | { |
82 | int tmp, new; | 82 | int tmp, new; |
83 | 83 | ||
@@ -96,7 +96,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) | |||
96 | return tmp; | 96 | return tmp; |
97 | } | 97 | } |
98 | 98 | ||
99 | static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) | 99 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) |
100 | { | 100 | { |
101 | asm volatile(UNLOCK_LOCK_PREFIX "incb %0" | 101 | asm volatile(UNLOCK_LOCK_PREFIX "incb %0" |
102 | : "+m" (lock->slock) | 102 | : "+m" (lock->slock) |
@@ -106,7 +106,7 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) | |||
106 | #else | 106 | #else |
107 | #define TICKET_SHIFT 16 | 107 | #define TICKET_SHIFT 16 |
108 | 108 | ||
109 | static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) | 109 | static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) |
110 | { | 110 | { |
111 | int inc = 0x00010000; | 111 | int inc = 0x00010000; |
112 | int tmp; | 112 | int tmp; |
@@ -127,7 +127,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) | |||
127 | : "memory", "cc"); | 127 | : "memory", "cc"); |
128 | } | 128 | } |
129 | 129 | ||
130 | static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) | 130 | static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) |
131 | { | 131 | { |
132 | int tmp; | 132 | int tmp; |
133 | int new; | 133 | int new; |
@@ -149,7 +149,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) | |||
149 | return tmp; | 149 | return tmp; |
150 | } | 150 | } |
151 | 151 | ||
152 | static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) | 152 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) |
153 | { | 153 | { |
154 | asm volatile(UNLOCK_LOCK_PREFIX "incw %0" | 154 | asm volatile(UNLOCK_LOCK_PREFIX "incw %0" |
155 | : "+m" (lock->slock) | 155 | : "+m" (lock->slock) |
@@ -158,14 +158,14 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) | |||
158 | } | 158 | } |
159 | #endif | 159 | #endif |
160 | 160 | ||
161 | static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) | 161 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) |
162 | { | 162 | { |
163 | int tmp = ACCESS_ONCE(lock->slock); | 163 | int tmp = ACCESS_ONCE(lock->slock); |
164 | 164 | ||
165 | return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1)); | 165 | return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1)); |
166 | } | 166 | } |
167 | 167 | ||
168 | static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) | 168 | static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) |
169 | { | 169 | { |
170 | int tmp = ACCESS_ONCE(lock->slock); | 170 | int tmp = ACCESS_ONCE(lock->slock); |
171 | 171 | ||
@@ -174,43 +174,43 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) | |||
174 | 174 | ||
175 | #ifndef CONFIG_PARAVIRT_SPINLOCKS | 175 | #ifndef CONFIG_PARAVIRT_SPINLOCKS |
176 | 176 | ||
177 | static inline int __raw_spin_is_locked(raw_spinlock_t *lock) | 177 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) |
178 | { | 178 | { |
179 | return __ticket_spin_is_locked(lock); | 179 | return __ticket_spin_is_locked(lock); |
180 | } | 180 | } |
181 | 181 | ||
182 | static inline int __raw_spin_is_contended(raw_spinlock_t *lock) | 182 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) |
183 | { | 183 | { |
184 | return __ticket_spin_is_contended(lock); | 184 | return __ticket_spin_is_contended(lock); |
185 | } | 185 | } |
186 | #define __raw_spin_is_contended __raw_spin_is_contended | 186 | #define arch_spin_is_contended arch_spin_is_contended |
187 | 187 | ||
188 | static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) | 188 | static __always_inline void arch_spin_lock(arch_spinlock_t *lock) |
189 | { | 189 | { |
190 | __ticket_spin_lock(lock); | 190 | __ticket_spin_lock(lock); |
191 | } | 191 | } |
192 | 192 | ||
193 | static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) | 193 | static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) |
194 | { | 194 | { |
195 | return __ticket_spin_trylock(lock); | 195 | return __ticket_spin_trylock(lock); |
196 | } | 196 | } |
197 | 197 | ||
198 | static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) | 198 | static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) |
199 | { | 199 | { |
200 | __ticket_spin_unlock(lock); | 200 | __ticket_spin_unlock(lock); |
201 | } | 201 | } |
202 | 202 | ||
203 | static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, | 203 | static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, |
204 | unsigned long flags) | 204 | unsigned long flags) |
205 | { | 205 | { |
206 | __raw_spin_lock(lock); | 206 | arch_spin_lock(lock); |
207 | } | 207 | } |
208 | 208 | ||
209 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | 209 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ |
210 | 210 | ||
211 | static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) | 211 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) |
212 | { | 212 | { |
213 | while (__raw_spin_is_locked(lock)) | 213 | while (arch_spin_is_locked(lock)) |
214 | cpu_relax(); | 214 | cpu_relax(); |
215 | } | 215 | } |
216 | 216 | ||
@@ -232,7 +232,7 @@ static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) | |||
232 | * read_can_lock - would read_trylock() succeed? | 232 | * read_can_lock - would read_trylock() succeed? |
233 | * @lock: the rwlock in question. | 233 | * @lock: the rwlock in question. |
234 | */ | 234 | */ |
235 | static inline int __raw_read_can_lock(raw_rwlock_t *lock) | 235 | static inline int arch_read_can_lock(arch_rwlock_t *lock) |
236 | { | 236 | { |
237 | return (int)(lock)->lock > 0; | 237 | return (int)(lock)->lock > 0; |
238 | } | 238 | } |
@@ -241,12 +241,12 @@ static inline int __raw_read_can_lock(raw_rwlock_t *lock) | |||
241 | * write_can_lock - would write_trylock() succeed? | 241 | * write_can_lock - would write_trylock() succeed? |
242 | * @lock: the rwlock in question. | 242 | * @lock: the rwlock in question. |
243 | */ | 243 | */ |
244 | static inline int __raw_write_can_lock(raw_rwlock_t *lock) | 244 | static inline int arch_write_can_lock(arch_rwlock_t *lock) |
245 | { | 245 | { |
246 | return (lock)->lock == RW_LOCK_BIAS; | 246 | return (lock)->lock == RW_LOCK_BIAS; |
247 | } | 247 | } |
248 | 248 | ||
249 | static inline void __raw_read_lock(raw_rwlock_t *rw) | 249 | static inline void arch_read_lock(arch_rwlock_t *rw) |
250 | { | 250 | { |
251 | asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" | 251 | asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" |
252 | "jns 1f\n" | 252 | "jns 1f\n" |
@@ -255,7 +255,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) | |||
255 | ::LOCK_PTR_REG (rw) : "memory"); | 255 | ::LOCK_PTR_REG (rw) : "memory"); |
256 | } | 256 | } |
257 | 257 | ||
258 | static inline void __raw_write_lock(raw_rwlock_t *rw) | 258 | static inline void arch_write_lock(arch_rwlock_t *rw) |
259 | { | 259 | { |
260 | asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" | 260 | asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" |
261 | "jz 1f\n" | 261 | "jz 1f\n" |
@@ -264,7 +264,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) | |||
264 | ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); | 264 | ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); |
265 | } | 265 | } |
266 | 266 | ||
267 | static inline int __raw_read_trylock(raw_rwlock_t *lock) | 267 | static inline int arch_read_trylock(arch_rwlock_t *lock) |
268 | { | 268 | { |
269 | atomic_t *count = (atomic_t *)lock; | 269 | atomic_t *count = (atomic_t *)lock; |
270 | 270 | ||
@@ -274,7 +274,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock) | |||
274 | return 0; | 274 | return 0; |
275 | } | 275 | } |
276 | 276 | ||
277 | static inline int __raw_write_trylock(raw_rwlock_t *lock) | 277 | static inline int arch_write_trylock(arch_rwlock_t *lock) |
278 | { | 278 | { |
279 | atomic_t *count = (atomic_t *)lock; | 279 | atomic_t *count = (atomic_t *)lock; |
280 | 280 | ||
@@ -284,23 +284,23 @@ static inline int __raw_write_trylock(raw_rwlock_t *lock) | |||
284 | return 0; | 284 | return 0; |
285 | } | 285 | } |
286 | 286 | ||
287 | static inline void __raw_read_unlock(raw_rwlock_t *rw) | 287 | static inline void arch_read_unlock(arch_rwlock_t *rw) |
288 | { | 288 | { |
289 | asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); | 289 | asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); |
290 | } | 290 | } |
291 | 291 | ||
292 | static inline void __raw_write_unlock(raw_rwlock_t *rw) | 292 | static inline void arch_write_unlock(arch_rwlock_t *rw) |
293 | { | 293 | { |
294 | asm volatile(LOCK_PREFIX "addl %1, %0" | 294 | asm volatile(LOCK_PREFIX "addl %1, %0" |
295 | : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); | 295 | : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); |
296 | } | 296 | } |
297 | 297 | ||
298 | #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) | 298 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) |
299 | #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) | 299 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) |
300 | 300 | ||
301 | #define _raw_spin_relax(lock) cpu_relax() | 301 | #define arch_spin_relax(lock) cpu_relax() |
302 | #define _raw_read_relax(lock) cpu_relax() | 302 | #define arch_read_relax(lock) cpu_relax() |
303 | #define _raw_write_relax(lock) cpu_relax() | 303 | #define arch_write_relax(lock) cpu_relax() |
304 | 304 | ||
305 | /* The {read|write|spin}_lock() on x86 are full memory barriers. */ | 305 | /* The {read|write|spin}_lock() on x86 are full memory barriers. */ |
306 | static inline void smp_mb__after_lock(void) { } | 306 | static inline void smp_mb__after_lock(void) { } |
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 845f81c87091..dcb48b2edc11 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h | |||
@@ -5,16 +5,16 @@ | |||
5 | # error "please don't include this file directly" | 5 | # error "please don't include this file directly" |
6 | #endif | 6 | #endif |
7 | 7 | ||
8 | typedef struct raw_spinlock { | 8 | typedef struct arch_spinlock { |
9 | unsigned int slock; | 9 | unsigned int slock; |
10 | } raw_spinlock_t; | 10 | } arch_spinlock_t; |
11 | 11 | ||
12 | #define __RAW_SPIN_LOCK_UNLOCKED { 0 } | 12 | #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } |
13 | 13 | ||
14 | typedef struct { | 14 | typedef struct { |
15 | unsigned int lock; | 15 | unsigned int lock; |
16 | } raw_rwlock_t; | 16 | } arch_rwlock_t; |
17 | 17 | ||
18 | #define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } | 18 | #define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } |
19 | 19 | ||
20 | #endif /* _ASM_X86_SPINLOCK_TYPES_H */ | 20 | #endif /* _ASM_X86_SPINLOCK_TYPES_H */ |
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index cf86a5e73815..35e89122a42f 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
@@ -5,6 +5,29 @@ extern int kstack_depth_to_print; | |||
5 | 5 | ||
6 | int x86_is_stack_id(int id, char *name); | 6 | int x86_is_stack_id(int id, char *name); |
7 | 7 | ||
8 | struct thread_info; | ||
9 | struct stacktrace_ops; | ||
10 | |||
11 | typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, | ||
12 | unsigned long *stack, | ||
13 | unsigned long bp, | ||
14 | const struct stacktrace_ops *ops, | ||
15 | void *data, | ||
16 | unsigned long *end, | ||
17 | int *graph); | ||
18 | |||
19 | extern unsigned long | ||
20 | print_context_stack(struct thread_info *tinfo, | ||
21 | unsigned long *stack, unsigned long bp, | ||
22 | const struct stacktrace_ops *ops, void *data, | ||
23 | unsigned long *end, int *graph); | ||
24 | |||
25 | extern unsigned long | ||
26 | print_context_stack_bp(struct thread_info *tinfo, | ||
27 | unsigned long *stack, unsigned long bp, | ||
28 | const struct stacktrace_ops *ops, void *data, | ||
29 | unsigned long *end, int *graph); | ||
30 | |||
8 | /* Generic stack tracer with callbacks */ | 31 | /* Generic stack tracer with callbacks */ |
9 | 32 | ||
10 | struct stacktrace_ops { | 33 | struct stacktrace_ops { |
@@ -14,6 +37,7 @@ struct stacktrace_ops { | |||
14 | void (*address)(void *data, unsigned long address, int reliable); | 37 | void (*address)(void *data, unsigned long address, int reliable); |
15 | /* On negative return stop dumping */ | 38 | /* On negative return stop dumping */ |
16 | int (*stack)(void *data, char *name); | 39 | int (*stack)(void *data, char *name); |
40 | walk_stack_t walk_stack; | ||
17 | }; | 41 | }; |
18 | 42 | ||
19 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | 43 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, |
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index ae907e617181..3d3e8353ee5c 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h | |||
@@ -177,10 +177,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) | |||
177 | */ | 177 | */ |
178 | 178 | ||
179 | #ifndef CONFIG_KMEMCHECK | 179 | #ifndef CONFIG_KMEMCHECK |
180 | |||
181 | #if (__GNUC__ >= 4) | ||
182 | #define memcpy(t, f, n) __builtin_memcpy(t, f, n) | ||
183 | #else | ||
180 | #define memcpy(t, f, n) \ | 184 | #define memcpy(t, f, n) \ |
181 | (__builtin_constant_p((n)) \ | 185 | (__builtin_constant_p((n)) \ |
182 | ? __constant_memcpy((t), (f), (n)) \ | 186 | ? __constant_memcpy((t), (f), (n)) \ |
183 | : __memcpy((t), (f), (n))) | 187 | : __memcpy((t), (f), (n))) |
188 | #endif | ||
184 | #else | 189 | #else |
185 | /* | 190 | /* |
186 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, | 191 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, |
@@ -316,11 +321,15 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, | |||
316 | : __memset_generic((s), (c), (count))) | 321 | : __memset_generic((s), (c), (count))) |
317 | 322 | ||
318 | #define __HAVE_ARCH_MEMSET | 323 | #define __HAVE_ARCH_MEMSET |
324 | #if (__GNUC__ >= 4) | ||
325 | #define memset(s, c, count) __builtin_memset(s, c, count) | ||
326 | #else | ||
319 | #define memset(s, c, count) \ | 327 | #define memset(s, c, count) \ |
320 | (__builtin_constant_p(c) \ | 328 | (__builtin_constant_p(c) \ |
321 | ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ | 329 | ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ |
322 | (count)) \ | 330 | (count)) \ |
323 | : __memset((s), (c), (count))) | 331 | : __memset((s), (c), (count))) |
332 | #endif | ||
324 | 333 | ||
325 | /* | 334 | /* |
326 | * find the first occurrence of byte 'c', or 1 past the area if none | 335 | * find the first occurrence of byte 'c', or 1 past the area if none |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 85574b7c1bc1..1fecb7e61130 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
@@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
57 | u16 intercept_dr_write; | 57 | u16 intercept_dr_write; |
58 | u32 intercept_exceptions; | 58 | u32 intercept_exceptions; |
59 | u64 intercept; | 59 | u64 intercept; |
60 | u8 reserved_1[44]; | 60 | u8 reserved_1[42]; |
61 | u16 pause_filter_count; | ||
61 | u64 iopm_base_pa; | 62 | u64 iopm_base_pa; |
62 | u64 msrpm_base_pa; | 63 | u64 msrpm_base_pa; |
63 | u64 tsc_offset; | 64 | u64 tsc_offset; |
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index b9e4e20174fb..8085277e1b8b 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h | |||
@@ -3,15 +3,16 @@ | |||
3 | 3 | ||
4 | #include <linux/swiotlb.h> | 4 | #include <linux/swiotlb.h> |
5 | 5 | ||
6 | /* SWIOTLB interface */ | ||
7 | |||
8 | extern int swiotlb_force; | ||
9 | |||
10 | #ifdef CONFIG_SWIOTLB | 6 | #ifdef CONFIG_SWIOTLB |
11 | extern int swiotlb; | 7 | extern int swiotlb; |
12 | extern void pci_swiotlb_init(void); | 8 | extern int __init pci_swiotlb_detect(void); |
9 | extern void __init pci_swiotlb_init(void); | ||
13 | #else | 10 | #else |
14 | #define swiotlb 0 | 11 | #define swiotlb 0 |
12 | static inline int pci_swiotlb_detect(void) | ||
13 | { | ||
14 | return 0; | ||
15 | } | ||
15 | static inline void pci_swiotlb_init(void) | 16 | static inline void pci_swiotlb_init(void) |
16 | { | 17 | { |
17 | } | 18 | } |
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 72a6dcd1299b..d5f69045c100 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h | |||
@@ -30,7 +30,6 @@ struct mmap_arg_struct; | |||
30 | asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); | 30 | asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); |
31 | asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); | 31 | asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); |
32 | 32 | ||
33 | asmlinkage long sys32_pipe(int __user *); | ||
34 | struct sigaction32; | 33 | struct sigaction32; |
35 | struct old_sigaction32; | 34 | struct old_sigaction32; |
36 | asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, | 35 | asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, |
@@ -51,20 +50,12 @@ asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, | |||
51 | asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); | 50 | asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); |
52 | asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); | 51 | asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); |
53 | 52 | ||
54 | #ifdef CONFIG_SYSCTL_SYSCALL | ||
55 | struct sysctl_ia32; | ||
56 | asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *); | ||
57 | #endif | ||
58 | |||
59 | asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); | 53 | asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); |
60 | asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); | 54 | asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); |
61 | 55 | ||
62 | asmlinkage long sys32_personality(unsigned long); | 56 | asmlinkage long sys32_personality(unsigned long); |
63 | asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); | 57 | asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); |
64 | 58 | ||
65 | asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long, | ||
66 | unsigned long, unsigned long, unsigned long); | ||
67 | |||
68 | struct oldold_utsname; | 59 | struct oldold_utsname; |
69 | struct old_utsname; | 60 | struct old_utsname; |
70 | asmlinkage long sys32_olduname(struct oldold_utsname __user *); | 61 | asmlinkage long sys32_olduname(struct oldold_utsname __user *); |
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 372b76edd63f..8868b9420b0e 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h | |||
@@ -18,16 +18,24 @@ | |||
18 | /* Common in X86_32 and X86_64 */ | 18 | /* Common in X86_32 and X86_64 */ |
19 | /* kernel/ioport.c */ | 19 | /* kernel/ioport.c */ |
20 | asmlinkage long sys_ioperm(unsigned long, unsigned long, int); | 20 | asmlinkage long sys_ioperm(unsigned long, unsigned long, int); |
21 | long sys_iopl(unsigned int, struct pt_regs *); | ||
21 | 22 | ||
22 | /* kernel/process.c */ | 23 | /* kernel/process.c */ |
23 | int sys_fork(struct pt_regs *); | 24 | int sys_fork(struct pt_regs *); |
24 | int sys_vfork(struct pt_regs *); | 25 | int sys_vfork(struct pt_regs *); |
26 | long sys_execve(char __user *, char __user * __user *, | ||
27 | char __user * __user *, struct pt_regs *); | ||
28 | long sys_clone(unsigned long, unsigned long, void __user *, | ||
29 | void __user *, struct pt_regs *); | ||
25 | 30 | ||
26 | /* kernel/ldt.c */ | 31 | /* kernel/ldt.c */ |
27 | asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); | 32 | asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); |
28 | 33 | ||
29 | /* kernel/signal.c */ | 34 | /* kernel/signal.c */ |
30 | long sys_rt_sigreturn(struct pt_regs *); | 35 | long sys_rt_sigreturn(struct pt_regs *); |
36 | long sys_sigaltstack(const stack_t __user *, stack_t __user *, | ||
37 | struct pt_regs *); | ||
38 | |||
31 | 39 | ||
32 | /* kernel/tls.c */ | 40 | /* kernel/tls.c */ |
33 | asmlinkage int sys_set_thread_area(struct user_desc __user *); | 41 | asmlinkage int sys_set_thread_area(struct user_desc __user *); |
@@ -35,18 +43,11 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); | |||
35 | 43 | ||
36 | /* X86_32 only */ | 44 | /* X86_32 only */ |
37 | #ifdef CONFIG_X86_32 | 45 | #ifdef CONFIG_X86_32 |
38 | /* kernel/ioport.c */ | ||
39 | long sys_iopl(struct pt_regs *); | ||
40 | |||
41 | /* kernel/process_32.c */ | ||
42 | int sys_clone(struct pt_regs *); | ||
43 | int sys_execve(struct pt_regs *); | ||
44 | 46 | ||
45 | /* kernel/signal.c */ | 47 | /* kernel/signal.c */ |
46 | asmlinkage int sys_sigsuspend(int, int, old_sigset_t); | 48 | asmlinkage int sys_sigsuspend(int, int, old_sigset_t); |
47 | asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, | 49 | asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, |
48 | struct old_sigaction __user *); | 50 | struct old_sigaction __user *); |
49 | int sys_sigaltstack(struct pt_regs *); | ||
50 | unsigned long sys_sigreturn(struct pt_regs *); | 51 | unsigned long sys_sigreturn(struct pt_regs *); |
51 | 52 | ||
52 | /* kernel/sys_i386_32.c */ | 53 | /* kernel/sys_i386_32.c */ |
@@ -55,8 +56,6 @@ struct sel_arg_struct; | |||
55 | struct oldold_utsname; | 56 | struct oldold_utsname; |
56 | struct old_utsname; | 57 | struct old_utsname; |
57 | 58 | ||
58 | asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, | ||
59 | unsigned long, unsigned long, unsigned long); | ||
60 | asmlinkage int old_mmap(struct mmap_arg_struct __user *); | 59 | asmlinkage int old_mmap(struct mmap_arg_struct __user *); |
61 | asmlinkage int old_select(struct sel_arg_struct __user *); | 60 | asmlinkage int old_select(struct sel_arg_struct __user *); |
62 | asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); | 61 | asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); |
@@ -64,28 +63,15 @@ asmlinkage int sys_uname(struct old_utsname __user *); | |||
64 | asmlinkage int sys_olduname(struct oldold_utsname __user *); | 63 | asmlinkage int sys_olduname(struct oldold_utsname __user *); |
65 | 64 | ||
66 | /* kernel/vm86_32.c */ | 65 | /* kernel/vm86_32.c */ |
67 | int sys_vm86old(struct pt_regs *); | 66 | int sys_vm86old(struct vm86_struct __user *, struct pt_regs *); |
68 | int sys_vm86(struct pt_regs *); | 67 | int sys_vm86(unsigned long, unsigned long, struct pt_regs *); |
69 | 68 | ||
70 | #else /* CONFIG_X86_32 */ | 69 | #else /* CONFIG_X86_32 */ |
71 | 70 | ||
72 | /* X86_64 only */ | 71 | /* X86_64 only */ |
73 | /* kernel/ioport.c */ | ||
74 | asmlinkage long sys_iopl(unsigned int, struct pt_regs *); | ||
75 | |||
76 | /* kernel/process_64.c */ | 72 | /* kernel/process_64.c */ |
77 | asmlinkage long sys_clone(unsigned long, unsigned long, | ||
78 | void __user *, void __user *, | ||
79 | struct pt_regs *); | ||
80 | asmlinkage long sys_execve(char __user *, char __user * __user *, | ||
81 | char __user * __user *, | ||
82 | struct pt_regs *); | ||
83 | long sys_arch_prctl(int, unsigned long); | 73 | long sys_arch_prctl(int, unsigned long); |
84 | 74 | ||
85 | /* kernel/signal.c */ | ||
86 | asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, | ||
87 | struct pt_regs *); | ||
88 | |||
89 | /* kernel/sys_x86_64.c */ | 75 | /* kernel/sys_x86_64.c */ |
90 | struct new_utsname; | 76 | struct new_utsname; |
91 | 77 | ||
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index de10c19d9558..e529f26c3292 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -23,6 +23,7 @@ struct task_struct *__switch_to(struct task_struct *prev, | |||
23 | struct tss_struct; | 23 | struct tss_struct; |
24 | void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 24 | void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
25 | struct tss_struct *tss); | 25 | struct tss_struct *tss); |
26 | extern void show_regs_common(void); | ||
26 | 27 | ||
27 | #ifdef CONFIG_X86_32 | 28 | #ifdef CONFIG_X86_32 |
28 | 29 | ||
@@ -128,8 +129,6 @@ do { \ | |||
128 | "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ | 129 | "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ |
129 | "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ | 130 | "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ |
130 | "call __switch_to\n\t" \ | 131 | "call __switch_to\n\t" \ |
131 | ".globl thread_return\n" \ | ||
132 | "thread_return:\n\t" \ | ||
133 | "movq "__percpu_arg([current_task])",%%rsi\n\t" \ | 132 | "movq "__percpu_arg([current_task])",%%rsi\n\t" \ |
134 | __switch_canary \ | 133 | __switch_canary \ |
135 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ | 134 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ |
@@ -157,19 +156,22 @@ extern void native_load_gs_index(unsigned); | |||
157 | * Load a segment. Fall back on loading the zero | 156 | * Load a segment. Fall back on loading the zero |
158 | * segment if something goes wrong.. | 157 | * segment if something goes wrong.. |
159 | */ | 158 | */ |
160 | #define loadsegment(seg, value) \ | 159 | #define loadsegment(seg, value) \ |
161 | asm volatile("\n" \ | 160 | do { \ |
162 | "1:\t" \ | 161 | unsigned short __val = (value); \ |
163 | "movl %k0,%%" #seg "\n" \ | 162 | \ |
164 | "2:\n" \ | 163 | asm volatile(" \n" \ |
165 | ".section .fixup,\"ax\"\n" \ | 164 | "1: movl %k0,%%" #seg " \n" \ |
166 | "3:\t" \ | 165 | \ |
167 | "movl %k1, %%" #seg "\n\t" \ | 166 | ".section .fixup,\"ax\" \n" \ |
168 | "jmp 2b\n" \ | 167 | "2: xorl %k0,%k0 \n" \ |
169 | ".previous\n" \ | 168 | " jmp 1b \n" \ |
170 | _ASM_EXTABLE(1b,3b) \ | 169 | ".previous \n" \ |
171 | : :"r" (value), "r" (0) : "memory") | 170 | \ |
172 | 171 | _ASM_EXTABLE(1b, 2b) \ | |
172 | \ | ||
173 | : "+r" (__val) : : "memory"); \ | ||
174 | } while (0) | ||
173 | 175 | ||
174 | /* | 176 | /* |
175 | * Save a segment register away | 177 | * Save a segment register away |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d27d0a2fec4c..375c917c37d2 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -83,6 +83,7 @@ struct thread_info { | |||
83 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | 83 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ |
84 | #define TIF_SECCOMP 8 /* secure computing */ | 84 | #define TIF_SECCOMP 8 /* secure computing */ |
85 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ | 85 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ |
86 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | ||
86 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 87 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
87 | #define TIF_IA32 17 /* 32bit process */ | 88 | #define TIF_IA32 17 /* 32bit process */ |
88 | #define TIF_FORK 18 /* ret_from_fork */ | 89 | #define TIF_FORK 18 /* ret_from_fork */ |
@@ -107,6 +108,7 @@ struct thread_info { | |||
107 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | 108 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) |
108 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | 109 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) |
109 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) | 110 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) |
111 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | ||
110 | #define _TIF_NOTSC (1 << TIF_NOTSC) | 112 | #define _TIF_NOTSC (1 << TIF_NOTSC) |
111 | #define _TIF_IA32 (1 << TIF_IA32) | 113 | #define _TIF_IA32 (1 << TIF_IA32) |
112 | #define _TIF_FORK (1 << TIF_FORK) | 114 | #define _TIF_FORK (1 << TIF_FORK) |
@@ -142,13 +144,14 @@ struct thread_info { | |||
142 | 144 | ||
143 | /* Only used for 64 bit */ | 145 | /* Only used for 64 bit */ |
144 | #define _TIF_DO_NOTIFY_MASK \ | 146 | #define _TIF_DO_NOTIFY_MASK \ |
145 | (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) | 147 | (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ |
148 | _TIF_USER_RETURN_NOTIFY) | ||
146 | 149 | ||
147 | /* flags to check in __switch_to() */ | 150 | /* flags to check in __switch_to() */ |
148 | #define _TIF_WORK_CTXSW \ | 151 | #define _TIF_WORK_CTXSW \ |
149 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) | 152 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) |
150 | 153 | ||
151 | #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW | 154 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
152 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) | 155 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) |
153 | 156 | ||
154 | #define PREEMPT_ACTIVE 0x10000000 | 157 | #define PREEMPT_ACTIVE 0x10000000 |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 25a92842dd99..c5087d796587 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -35,11 +35,16 @@ | |||
35 | # endif | 35 | # endif |
36 | #endif | 36 | #endif |
37 | 37 | ||
38 | /* Node not present */ | 38 | /* |
39 | #define NUMA_NO_NODE (-1) | 39 | * to preserve the visibility of NUMA_NO_NODE definition, |
40 | * moved to there from here. May be used independent of | ||
41 | * CONFIG_NUMA. | ||
42 | */ | ||
43 | #include <linux/numa.h> | ||
40 | 44 | ||
41 | #ifdef CONFIG_NUMA | 45 | #ifdef CONFIG_NUMA |
42 | #include <linux/cpumask.h> | 46 | #include <linux/cpumask.h> |
47 | |||
43 | #include <asm/mpspec.h> | 48 | #include <asm/mpspec.h> |
44 | 49 | ||
45 | #ifdef CONFIG_X86_32 | 50 | #ifdef CONFIG_X86_32 |
@@ -143,6 +148,7 @@ extern unsigned long node_remap_size[]; | |||
143 | | 1*SD_BALANCE_FORK \ | 148 | | 1*SD_BALANCE_FORK \ |
144 | | 0*SD_BALANCE_WAKE \ | 149 | | 0*SD_BALANCE_WAKE \ |
145 | | 1*SD_WAKE_AFFINE \ | 150 | | 1*SD_WAKE_AFFINE \ |
151 | | 0*SD_PREFER_LOCAL \ | ||
146 | | 0*SD_SHARE_CPUPOWER \ | 152 | | 0*SD_SHARE_CPUPOWER \ |
147 | | 0*SD_POWERSAVINGS_BALANCE \ | 153 | | 0*SD_POWERSAVINGS_BALANCE \ |
148 | | 0*SD_SHARE_PKG_RESOURCES \ | 154 | | 0*SD_SHARE_PKG_RESOURCES \ |
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index 90f06c25221d..cb507bb05d79 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h | |||
@@ -16,7 +16,6 @@ extern unsigned long initial_code; | |||
16 | extern unsigned long initial_gs; | 16 | extern unsigned long initial_gs; |
17 | 17 | ||
18 | #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) | 18 | #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) |
19 | #define TRAMPOLINE_BASE 0x6000 | ||
20 | 19 | ||
21 | extern unsigned long setup_trampoline(void); | 20 | extern unsigned long setup_trampoline(void); |
22 | extern void __init reserve_trampoline_memory(void); | 21 | extern void __init reserve_trampoline_memory(void); |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d2c6c930b491..abd3e0ea762a 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -570,7 +570,6 @@ extern struct movsl_mask { | |||
570 | #ifdef CONFIG_X86_32 | 570 | #ifdef CONFIG_X86_32 |
571 | # include "uaccess_32.h" | 571 | # include "uaccess_32.h" |
572 | #else | 572 | #else |
573 | # define ARCH_HAS_SEARCH_EXTABLE | ||
574 | # include "uaccess_64.h" | 573 | # include "uaccess_64.h" |
575 | #endif | 574 | #endif |
576 | 575 | ||
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 632fb44b4cb5..0c9825e97f36 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h | |||
@@ -187,9 +187,34 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, | |||
187 | 187 | ||
188 | unsigned long __must_check copy_to_user(void __user *to, | 188 | unsigned long __must_check copy_to_user(void __user *to, |
189 | const void *from, unsigned long n); | 189 | const void *from, unsigned long n); |
190 | unsigned long __must_check copy_from_user(void *to, | 190 | unsigned long __must_check _copy_from_user(void *to, |
191 | const void __user *from, | 191 | const void __user *from, |
192 | unsigned long n); | 192 | unsigned long n); |
193 | |||
194 | |||
195 | extern void copy_from_user_overflow(void) | ||
196 | #ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS | ||
197 | __compiletime_error("copy_from_user() buffer size is not provably correct") | ||
198 | #else | ||
199 | __compiletime_warning("copy_from_user() buffer size is not provably correct") | ||
200 | #endif | ||
201 | ; | ||
202 | |||
203 | static inline unsigned long __must_check copy_from_user(void *to, | ||
204 | const void __user *from, | ||
205 | unsigned long n) | ||
206 | { | ||
207 | int sz = __compiletime_object_size(to); | ||
208 | int ret = -EFAULT; | ||
209 | |||
210 | if (likely(sz == -1 || sz >= n)) | ||
211 | ret = _copy_from_user(to, from, n); | ||
212 | else | ||
213 | copy_from_user_overflow(); | ||
214 | |||
215 | return ret; | ||
216 | } | ||
217 | |||
193 | long __must_check strncpy_from_user(char *dst, const char __user *src, | 218 | long __must_check strncpy_from_user(char *dst, const char __user *src, |
194 | long count); | 219 | long count); |
195 | long __must_check __strncpy_from_user(char *dst, | 220 | long __must_check __strncpy_from_user(char *dst, |
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index db24b215fc50..46324c6a4f6e 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -19,12 +19,37 @@ __must_check unsigned long | |||
19 | copy_user_generic(void *to, const void *from, unsigned len); | 19 | copy_user_generic(void *to, const void *from, unsigned len); |
20 | 20 | ||
21 | __must_check unsigned long | 21 | __must_check unsigned long |
22 | copy_to_user(void __user *to, const void *from, unsigned len); | 22 | _copy_to_user(void __user *to, const void *from, unsigned len); |
23 | __must_check unsigned long | 23 | __must_check unsigned long |
24 | copy_from_user(void *to, const void __user *from, unsigned len); | 24 | _copy_from_user(void *to, const void __user *from, unsigned len); |
25 | __must_check unsigned long | 25 | __must_check unsigned long |
26 | copy_in_user(void __user *to, const void __user *from, unsigned len); | 26 | copy_in_user(void __user *to, const void __user *from, unsigned len); |
27 | 27 | ||
28 | static inline unsigned long __must_check copy_from_user(void *to, | ||
29 | const void __user *from, | ||
30 | unsigned long n) | ||
31 | { | ||
32 | int sz = __compiletime_object_size(to); | ||
33 | int ret = -EFAULT; | ||
34 | |||
35 | might_fault(); | ||
36 | if (likely(sz == -1 || sz >= n)) | ||
37 | ret = _copy_from_user(to, from, n); | ||
38 | #ifdef CONFIG_DEBUG_VM | ||
39 | else | ||
40 | WARN(1, "Buffer overflow detected!\n"); | ||
41 | #endif | ||
42 | return ret; | ||
43 | } | ||
44 | |||
45 | static __always_inline __must_check | ||
46 | int copy_to_user(void __user *dst, const void *src, unsigned size) | ||
47 | { | ||
48 | might_fault(); | ||
49 | |||
50 | return _copy_to_user(dst, src, size); | ||
51 | } | ||
52 | |||
28 | static __always_inline __must_check | 53 | static __always_inline __must_check |
29 | int __copy_from_user(void *dst, const void __user *src, unsigned size) | 54 | int __copy_from_user(void *dst, const void __user *src, unsigned size) |
30 | { | 55 | { |
@@ -176,8 +201,11 @@ __must_check long strlen_user(const char __user *str); | |||
176 | __must_check unsigned long clear_user(void __user *mem, unsigned long len); | 201 | __must_check unsigned long clear_user(void __user *mem, unsigned long len); |
177 | __must_check unsigned long __clear_user(void __user *mem, unsigned long len); | 202 | __must_check unsigned long __clear_user(void __user *mem, unsigned long len); |
178 | 203 | ||
179 | __must_check long __copy_from_user_inatomic(void *dst, const void __user *src, | 204 | static __must_check __always_inline int |
180 | unsigned size); | 205 | __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) |
206 | { | ||
207 | return copy_user_generic(dst, (__force const void *)src, size); | ||
208 | } | ||
181 | 209 | ||
182 | static __must_check __always_inline int | 210 | static __must_check __always_inline int |
183 | __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) | 211 | __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 6fb3c209a7e3..3baf379fa840 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -342,10 +342,11 @@ | |||
342 | #define __NR_pwritev 334 | 342 | #define __NR_pwritev 334 |
343 | #define __NR_rt_tgsigqueueinfo 335 | 343 | #define __NR_rt_tgsigqueueinfo 335 |
344 | #define __NR_perf_event_open 336 | 344 | #define __NR_perf_event_open 336 |
345 | #define __NR_recvmmsg 337 | ||
345 | 346 | ||
346 | #ifdef __KERNEL__ | 347 | #ifdef __KERNEL__ |
347 | 348 | ||
348 | #define NR_syscalls 337 | 349 | #define NR_syscalls 338 |
349 | 350 | ||
350 | #define __ARCH_WANT_IPC_PARSE_VERSION | 351 | #define __ARCH_WANT_IPC_PARSE_VERSION |
351 | #define __ARCH_WANT_OLD_READDIR | 352 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 8d3ad0adbc68..4843f7ba754a 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev) | |||
661 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) | 661 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) |
662 | #define __NR_perf_event_open 298 | 662 | #define __NR_perf_event_open 298 |
663 | __SYSCALL(__NR_perf_event_open, sys_perf_event_open) | 663 | __SYSCALL(__NR_perf_event_open, sys_perf_event_open) |
664 | #define __NR_recvmmsg 299 | ||
665 | __SYSCALL(__NR_recvmmsg, sys_recvmmsg) | ||
664 | 666 | ||
665 | #ifndef __NO_STUBS | 667 | #ifndef __NO_STUBS |
666 | #define __ARCH_WANT_OLD_READDIR | 668 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 7ed17ff502b9..2751f3075d8b 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h | |||
@@ -76,15 +76,6 @@ union partition_info_u { | |||
76 | }; | 76 | }; |
77 | }; | 77 | }; |
78 | 78 | ||
79 | union uv_watchlist_u { | ||
80 | u64 val; | ||
81 | struct { | ||
82 | u64 blade : 16, | ||
83 | size : 32, | ||
84 | filler : 16; | ||
85 | }; | ||
86 | }; | ||
87 | |||
88 | enum uv_memprotect { | 79 | enum uv_memprotect { |
89 | UV_MEMPROT_RESTRICT_ACCESS, | 80 | UV_MEMPROT_RESTRICT_ACCESS, |
90 | UV_MEMPROT_ALLOW_AMO, | 81 | UV_MEMPROT_ALLOW_AMO, |
@@ -100,7 +91,7 @@ extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); | |||
100 | 91 | ||
101 | extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); | 92 | extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); |
102 | extern s64 uv_bios_freq_base(u64, u64 *); | 93 | extern s64 uv_bios_freq_base(u64, u64 *); |
103 | extern int uv_bios_mq_watchlist_alloc(int, unsigned long, unsigned int, | 94 | extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int, |
104 | unsigned long *); | 95 | unsigned long *); |
105 | extern int uv_bios_mq_watchlist_free(int, int); | 96 | extern int uv_bios_mq_watchlist_free(int, int); |
106 | extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); | 97 | extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 80e2984f521c..b414d2b401f6 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -55,7 +55,7 @@ | |||
55 | #define DESC_STATUS_SOURCE_TIMEOUT 3 | 55 | #define DESC_STATUS_SOURCE_TIMEOUT 3 |
56 | 56 | ||
57 | /* | 57 | /* |
58 | * source side threshholds at which message retries print a warning | 58 | * source side thresholds at which message retries print a warning |
59 | */ | 59 | */ |
60 | #define SOURCE_TIMEOUT_LIMIT 20 | 60 | #define SOURCE_TIMEOUT_LIMIT 20 |
61 | #define DESTINATION_TIMEOUT_LIMIT 20 | 61 | #define DESTINATION_TIMEOUT_LIMIT 20 |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 04eb6c958b9d..bc54fa965af3 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -19,6 +19,8 @@ | |||
19 | #include <asm/types.h> | 19 | #include <asm/types.h> |
20 | #include <asm/percpu.h> | 20 | #include <asm/percpu.h> |
21 | #include <asm/uv/uv_mmrs.h> | 21 | #include <asm/uv/uv_mmrs.h> |
22 | #include <asm/irq_vectors.h> | ||
23 | #include <asm/io_apic.h> | ||
22 | 24 | ||
23 | 25 | ||
24 | /* | 26 | /* |
@@ -29,20 +31,20 @@ | |||
29 | * contiguous (although various IO spaces may punch holes in | 31 | * contiguous (although various IO spaces may punch holes in |
30 | * it).. | 32 | * it).. |
31 | * | 33 | * |
32 | * N - Number of bits in the node portion of a socket physical | 34 | * N - Number of bits in the node portion of a socket physical |
33 | * address. | 35 | * address. |
34 | * | 36 | * |
35 | * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of | 37 | * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of |
36 | * routers always have low bit of 1, C/MBricks have low bit | 38 | * routers always have low bit of 1, C/MBricks have low bit |
37 | * equal to 0. Most addressing macros that target UV hub chips | 39 | * equal to 0. Most addressing macros that target UV hub chips |
38 | * right shift the NASID by 1 to exclude the always-zero bit. | 40 | * right shift the NASID by 1 to exclude the always-zero bit. |
39 | * NASIDs contain up to 15 bits. | 41 | * NASIDs contain up to 15 bits. |
40 | * | 42 | * |
41 | * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead | 43 | * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead |
42 | * of nasids. | 44 | * of nasids. |
43 | * | 45 | * |
44 | * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant | 46 | * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant |
45 | * of the nasid for socket usage. | 47 | * of the nasid for socket usage. |
46 | * | 48 | * |
47 | * | 49 | * |
48 | * NumaLink Global Physical Address Format: | 50 | * NumaLink Global Physical Address Format: |
@@ -69,12 +71,12 @@ | |||
69 | * | 71 | * |
70 | * | 72 | * |
71 | * APICID format | 73 | * APICID format |
72 | * NOTE!!!!!! This is the current format of the APICID. However, code | 74 | * NOTE!!!!!! This is the current format of the APICID. However, code |
73 | * should assume that this will change in the future. Use functions | 75 | * should assume that this will change in the future. Use functions |
74 | * in this file for all APICID bit manipulations and conversion. | 76 | * in this file for all APICID bit manipulations and conversion. |
75 | * | 77 | * |
76 | * 1111110000000000 | 78 | * 1111110000000000 |
77 | * 5432109876543210 | 79 | * 5432109876543210 |
78 | * pppppppppplc0cch | 80 | * pppppppppplc0cch |
79 | * sssssssssss | 81 | * sssssssssss |
80 | * | 82 | * |
@@ -87,9 +89,9 @@ | |||
87 | * Note: Processor only supports 12 bits in the APICID register. The ACPI | 89 | * Note: Processor only supports 12 bits in the APICID register. The ACPI |
88 | * tables hold all 16 bits. Software needs to be aware of this. | 90 | * tables hold all 16 bits. Software needs to be aware of this. |
89 | * | 91 | * |
90 | * Unless otherwise specified, all references to APICID refer to | 92 | * Unless otherwise specified, all references to APICID refer to |
91 | * the FULL value contained in ACPI tables, not the subset in the | 93 | * the FULL value contained in ACPI tables, not the subset in the |
92 | * processor APICID register. | 94 | * processor APICID register. |
93 | */ | 95 | */ |
94 | 96 | ||
95 | 97 | ||
@@ -114,7 +116,7 @@ | |||
114 | /* | 116 | /* |
115 | * The largest possible NASID of a C or M brick (+ 2) | 117 | * The largest possible NASID of a C or M brick (+ 2) |
116 | */ | 118 | */ |
117 | #define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) | 119 | #define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2) |
118 | 120 | ||
119 | struct uv_scir_s { | 121 | struct uv_scir_s { |
120 | struct timer_list timer; | 122 | struct timer_list timer; |
@@ -149,16 +151,16 @@ struct uv_hub_info_s { | |||
149 | }; | 151 | }; |
150 | 152 | ||
151 | DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | 153 | DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); |
152 | #define uv_hub_info (&__get_cpu_var(__uv_hub_info)) | 154 | #define uv_hub_info (&__get_cpu_var(__uv_hub_info)) |
153 | #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) | 155 | #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) |
154 | 156 | ||
155 | /* | 157 | /* |
156 | * Local & Global MMR space macros. | 158 | * Local & Global MMR space macros. |
157 | * Note: macros are intended to be used ONLY by inline functions | 159 | * Note: macros are intended to be used ONLY by inline functions |
158 | * in this file - not by other kernel code. | 160 | * in this file - not by other kernel code. |
159 | * n - NASID (full 15-bit global nasid) | 161 | * n - NASID (full 15-bit global nasid) |
160 | * g - GNODE (full 15-bit global nasid, right shifted 1) | 162 | * g - GNODE (full 15-bit global nasid, right shifted 1) |
161 | * p - PNODE (local part of nsids, right shifted 1) | 163 | * p - PNODE (local part of nsids, right shifted 1) |
162 | */ | 164 | */ |
163 | #define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) | 165 | #define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) |
164 | #define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) | 166 | #define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) |
@@ -170,6 +172,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | |||
170 | #define UV_LOCAL_MMR_SIZE (64UL * 1024 * 1024) | 172 | #define UV_LOCAL_MMR_SIZE (64UL * 1024 * 1024) |
171 | #define UV_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024) | 173 | #define UV_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024) |
172 | 174 | ||
175 | #define UV_GLOBAL_GRU_MMR_BASE 0x4000000 | ||
176 | |||
173 | #define UV_GLOBAL_MMR32_PNODE_SHIFT 15 | 177 | #define UV_GLOBAL_MMR32_PNODE_SHIFT 15 |
174 | #define UV_GLOBAL_MMR64_PNODE_SHIFT 26 | 178 | #define UV_GLOBAL_MMR64_PNODE_SHIFT 26 |
175 | 179 | ||
@@ -211,8 +215,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | |||
211 | /* | 215 | /* |
212 | * Macros for converting between kernel virtual addresses, socket local physical | 216 | * Macros for converting between kernel virtual addresses, socket local physical |
213 | * addresses, and UV global physical addresses. | 217 | * addresses, and UV global physical addresses. |
214 | * Note: use the standard __pa() & __va() macros for converting | 218 | * Note: use the standard __pa() & __va() macros for converting |
215 | * between socket virtual and socket physical addresses. | 219 | * between socket virtual and socket physical addresses. |
216 | */ | 220 | */ |
217 | 221 | ||
218 | /* socket phys RAM --> UV global physical address */ | 222 | /* socket phys RAM --> UV global physical address */ |
@@ -230,6 +234,40 @@ static inline unsigned long uv_gpa(void *v) | |||
230 | return uv_soc_phys_ram_to_gpa(__pa(v)); | 234 | return uv_soc_phys_ram_to_gpa(__pa(v)); |
231 | } | 235 | } |
232 | 236 | ||
237 | /* Top two bits indicate the requested address is in MMR space. */ | ||
238 | static inline int | ||
239 | uv_gpa_in_mmr_space(unsigned long gpa) | ||
240 | { | ||
241 | return (gpa >> 62) == 0x3UL; | ||
242 | } | ||
243 | |||
244 | /* UV global physical address --> socket phys RAM */ | ||
245 | static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa) | ||
246 | { | ||
247 | unsigned long paddr = gpa & uv_hub_info->gpa_mask; | ||
248 | unsigned long remap_base = uv_hub_info->lowmem_remap_base; | ||
249 | unsigned long remap_top = uv_hub_info->lowmem_remap_top; | ||
250 | |||
251 | if (paddr >= remap_base && paddr < remap_base + remap_top) | ||
252 | paddr -= remap_base; | ||
253 | return paddr; | ||
254 | } | ||
255 | |||
256 | |||
257 | /* gnode -> pnode */ | ||
258 | static inline unsigned long uv_gpa_to_gnode(unsigned long gpa) | ||
259 | { | ||
260 | return gpa >> uv_hub_info->m_val; | ||
261 | } | ||
262 | |||
263 | /* gpa -> pnode */ | ||
264 | static inline int uv_gpa_to_pnode(unsigned long gpa) | ||
265 | { | ||
266 | unsigned long n_mask = (1UL << uv_hub_info->n_val) - 1; | ||
267 | |||
268 | return uv_gpa_to_gnode(gpa) & n_mask; | ||
269 | } | ||
270 | |||
233 | /* pnode, offset --> socket virtual */ | 271 | /* pnode, offset --> socket virtual */ |
234 | static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) | 272 | static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) |
235 | { | 273 | { |
@@ -249,21 +287,18 @@ static inline int uv_apicid_to_pnode(int apicid) | |||
249 | * Access global MMRs using the low memory MMR32 space. This region supports | 287 | * Access global MMRs using the low memory MMR32 space. This region supports |
250 | * faster MMR access but not all MMRs are accessible in this space. | 288 | * faster MMR access but not all MMRs are accessible in this space. |
251 | */ | 289 | */ |
252 | static inline unsigned long *uv_global_mmr32_address(int pnode, | 290 | static inline unsigned long *uv_global_mmr32_address(int pnode, unsigned long offset) |
253 | unsigned long offset) | ||
254 | { | 291 | { |
255 | return __va(UV_GLOBAL_MMR32_BASE | | 292 | return __va(UV_GLOBAL_MMR32_BASE | |
256 | UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); | 293 | UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); |
257 | } | 294 | } |
258 | 295 | ||
259 | static inline void uv_write_global_mmr32(int pnode, unsigned long offset, | 296 | static inline void uv_write_global_mmr32(int pnode, unsigned long offset, unsigned long val) |
260 | unsigned long val) | ||
261 | { | 297 | { |
262 | writeq(val, uv_global_mmr32_address(pnode, offset)); | 298 | writeq(val, uv_global_mmr32_address(pnode, offset)); |
263 | } | 299 | } |
264 | 300 | ||
265 | static inline unsigned long uv_read_global_mmr32(int pnode, | 301 | static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset) |
266 | unsigned long offset) | ||
267 | { | 302 | { |
268 | return readq(uv_global_mmr32_address(pnode, offset)); | 303 | return readq(uv_global_mmr32_address(pnode, offset)); |
269 | } | 304 | } |
@@ -272,26 +307,42 @@ static inline unsigned long uv_read_global_mmr32(int pnode, | |||
272 | * Access Global MMR space using the MMR space located at the top of physical | 307 | * Access Global MMR space using the MMR space located at the top of physical |
273 | * memory. | 308 | * memory. |
274 | */ | 309 | */ |
275 | static inline unsigned long *uv_global_mmr64_address(int pnode, | 310 | static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset) |
276 | unsigned long offset) | ||
277 | { | 311 | { |
278 | return __va(UV_GLOBAL_MMR64_BASE | | 312 | return __va(UV_GLOBAL_MMR64_BASE | |
279 | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); | 313 | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); |
280 | } | 314 | } |
281 | 315 | ||
282 | static inline void uv_write_global_mmr64(int pnode, unsigned long offset, | 316 | static inline void uv_write_global_mmr64(int pnode, unsigned long offset, unsigned long val) |
283 | unsigned long val) | ||
284 | { | 317 | { |
285 | writeq(val, uv_global_mmr64_address(pnode, offset)); | 318 | writeq(val, uv_global_mmr64_address(pnode, offset)); |
286 | } | 319 | } |
287 | 320 | ||
288 | static inline unsigned long uv_read_global_mmr64(int pnode, | 321 | static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset) |
289 | unsigned long offset) | ||
290 | { | 322 | { |
291 | return readq(uv_global_mmr64_address(pnode, offset)); | 323 | return readq(uv_global_mmr64_address(pnode, offset)); |
292 | } | 324 | } |
293 | 325 | ||
294 | /* | 326 | /* |
327 | * Global MMR space addresses when referenced by the GRU. (GRU does | ||
328 | * NOT use socket addressing). | ||
329 | */ | ||
330 | static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset) | ||
331 | { | ||
332 | return UV_GLOBAL_GRU_MMR_BASE | offset | (pnode << uv_hub_info->m_val); | ||
333 | } | ||
334 | |||
335 | static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val) | ||
336 | { | ||
337 | writeb(val, uv_global_mmr64_address(pnode, offset)); | ||
338 | } | ||
339 | |||
340 | static inline unsigned char uv_read_global_mmr8(int pnode, unsigned long offset) | ||
341 | { | ||
342 | return readb(uv_global_mmr64_address(pnode, offset)); | ||
343 | } | ||
344 | |||
345 | /* | ||
295 | * Access hub local MMRs. Faster than using global space but only local MMRs | 346 | * Access hub local MMRs. Faster than using global space but only local MMRs |
296 | * are accessible. | 347 | * are accessible. |
297 | */ | 348 | */ |
@@ -410,21 +461,37 @@ static inline void uv_set_scir_bits(unsigned char value) | |||
410 | } | 461 | } |
411 | } | 462 | } |
412 | 463 | ||
464 | static inline unsigned long uv_scir_offset(int apicid) | ||
465 | { | ||
466 | return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f); | ||
467 | } | ||
468 | |||
413 | static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) | 469 | static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) |
414 | { | 470 | { |
415 | if (uv_cpu_hub_info(cpu)->scir.state != value) { | 471 | if (uv_cpu_hub_info(cpu)->scir.state != value) { |
472 | uv_write_global_mmr8(uv_cpu_to_pnode(cpu), | ||
473 | uv_cpu_hub_info(cpu)->scir.offset, value); | ||
416 | uv_cpu_hub_info(cpu)->scir.state = value; | 474 | uv_cpu_hub_info(cpu)->scir.state = value; |
417 | uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value); | ||
418 | } | 475 | } |
419 | } | 476 | } |
420 | 477 | ||
478 | static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) | ||
479 | { | ||
480 | return (1UL << UVH_IPI_INT_SEND_SHFT) | | ||
481 | ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | | ||
482 | (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | | ||
483 | (vector << UVH_IPI_INT_VECTOR_SHFT); | ||
484 | } | ||
485 | |||
421 | static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) | 486 | static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) |
422 | { | 487 | { |
423 | unsigned long val; | 488 | unsigned long val; |
489 | unsigned long dmode = dest_Fixed; | ||
424 | 490 | ||
425 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | 491 | if (vector == NMI_VECTOR) |
426 | ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | | 492 | dmode = dest_NMI; |
427 | (vector << UVH_IPI_INT_VECTOR_SHFT); | 493 | |
494 | val = uv_hub_ipi_value(apicid, vector, dmode); | ||
428 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 495 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
429 | } | 496 | } |
430 | 497 | ||
diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h index 9613c8c0b647..d6b17c760622 100644 --- a/arch/x86/include/asm/uv/uv_irq.h +++ b/arch/x86/include/asm/uv/uv_irq.h | |||
@@ -25,12 +25,14 @@ struct uv_IO_APIC_route_entry { | |||
25 | dest : 32; | 25 | dest : 32; |
26 | }; | 26 | }; |
27 | 27 | ||
28 | extern struct irq_chip uv_irq_chip; | 28 | enum { |
29 | 29 | UV_AFFINITY_ALL, | |
30 | extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long); | 30 | UV_AFFINITY_NODE, |
31 | extern void arch_disable_uv_irq(int, unsigned long); | 31 | UV_AFFINITY_CPU |
32 | }; | ||
32 | 33 | ||
33 | extern int uv_setup_irq(char *, int, int, unsigned long); | 34 | extern int uv_irq_2_mmr_info(int, unsigned long *, int *); |
34 | extern void uv_teardown_irq(unsigned int, int, unsigned long); | 35 | extern int uv_setup_irq(char *, int, int, unsigned long, int); |
36 | extern void uv_teardown_irq(unsigned int); | ||
35 | 37 | ||
36 | #endif /* _ASM_X86_UV_UV_IRQ_H */ | 38 | #endif /* _ASM_X86_UV_UV_IRQ_H */ |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 272514c2d456..2b4945419a84 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -56,6 +56,7 @@ | |||
56 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | 56 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 |
57 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 57 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
58 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 | 58 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 |
59 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | ||
59 | 60 | ||
60 | 61 | ||
61 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 | 62 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 |
@@ -144,6 +145,8 @@ enum vmcs_field { | |||
144 | VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, | 145 | VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, |
145 | TPR_THRESHOLD = 0x0000401c, | 146 | TPR_THRESHOLD = 0x0000401c, |
146 | SECONDARY_VM_EXEC_CONTROL = 0x0000401e, | 147 | SECONDARY_VM_EXEC_CONTROL = 0x0000401e, |
148 | PLE_GAP = 0x00004020, | ||
149 | PLE_WINDOW = 0x00004022, | ||
147 | VM_INSTRUCTION_ERROR = 0x00004400, | 150 | VM_INSTRUCTION_ERROR = 0x00004400, |
148 | VM_EXIT_REASON = 0x00004402, | 151 | VM_EXIT_REASON = 0x00004402, |
149 | VM_EXIT_INTR_INFO = 0x00004404, | 152 | VM_EXIT_INTR_INFO = 0x00004404, |
@@ -248,6 +251,7 @@ enum vmcs_field { | |||
248 | #define EXIT_REASON_MSR_READ 31 | 251 | #define EXIT_REASON_MSR_READ 31 |
249 | #define EXIT_REASON_MSR_WRITE 32 | 252 | #define EXIT_REASON_MSR_WRITE 32 |
250 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 253 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
254 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 | ||
251 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 | 255 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 |
252 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 256 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
253 | #define EXIT_REASON_APIC_ACCESS 44 | 257 | #define EXIT_REASON_APIC_ACCESS 44 |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2c756fd4ab0e..ea0e8ea15e15 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -26,7 +26,7 @@ struct x86_init_mpparse { | |||
26 | void (*smp_read_mpc_oem)(struct mpc_table *mpc); | 26 | void (*smp_read_mpc_oem)(struct mpc_table *mpc); |
27 | void (*mpc_oem_pci_bus)(struct mpc_bus *m); | 27 | void (*mpc_oem_pci_bus)(struct mpc_bus *m); |
28 | void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); | 28 | void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); |
29 | void (*find_smp_config)(unsigned int reserve); | 29 | void (*find_smp_config)(void); |
30 | void (*get_smp_config)(unsigned int early); | 30 | void (*get_smp_config)(unsigned int early); |
31 | }; | 31 | }; |
32 | 32 | ||
@@ -91,6 +91,14 @@ struct x86_init_timers { | |||
91 | }; | 91 | }; |
92 | 92 | ||
93 | /** | 93 | /** |
94 | * struct x86_init_iommu - platform specific iommu setup | ||
95 | * @iommu_init: platform specific iommu setup | ||
96 | */ | ||
97 | struct x86_init_iommu { | ||
98 | int (*iommu_init)(void); | ||
99 | }; | ||
100 | |||
101 | /** | ||
94 | * struct x86_init_ops - functions for platform specific setup | 102 | * struct x86_init_ops - functions for platform specific setup |
95 | * | 103 | * |
96 | */ | 104 | */ |
@@ -101,6 +109,7 @@ struct x86_init_ops { | |||
101 | struct x86_init_oem oem; | 109 | struct x86_init_oem oem; |
102 | struct x86_init_paging paging; | 110 | struct x86_init_paging paging; |
103 | struct x86_init_timers timers; | 111 | struct x86_init_timers timers; |
112 | struct x86_init_iommu iommu; | ||
104 | }; | 113 | }; |
105 | 114 | ||
106 | /** | 115 | /** |
@@ -116,11 +125,14 @@ struct x86_cpuinit_ops { | |||
116 | * @calibrate_tsc: calibrate TSC | 125 | * @calibrate_tsc: calibrate TSC |
117 | * @get_wallclock: get time from HW clock like RTC etc. | 126 | * @get_wallclock: get time from HW clock like RTC etc. |
118 | * @set_wallclock: set time back to HW clock | 127 | * @set_wallclock: set time back to HW clock |
128 | * @is_untracked_pat_range exclude from PAT logic | ||
119 | */ | 129 | */ |
120 | struct x86_platform_ops { | 130 | struct x86_platform_ops { |
121 | unsigned long (*calibrate_tsc)(void); | 131 | unsigned long (*calibrate_tsc)(void); |
122 | unsigned long (*get_wallclock)(void); | 132 | unsigned long (*get_wallclock)(void); |
123 | int (*set_wallclock)(unsigned long nowtime); | 133 | int (*set_wallclock)(unsigned long nowtime); |
134 | void (*iommu_shutdown)(void); | ||
135 | bool (*is_untracked_pat_range)(u64 start, u64 end); | ||
124 | }; | 136 | }; |
125 | 137 | ||
126 | extern struct x86_init_ops x86_init; | 138 | extern struct x86_init_ops x86_init; |
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index d5b7e90c0edf..396ff4cc8ed4 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h | |||
@@ -37,31 +37,4 @@ | |||
37 | extern struct shared_info *HYPERVISOR_shared_info; | 37 | extern struct shared_info *HYPERVISOR_shared_info; |
38 | extern struct start_info *xen_start_info; | 38 | extern struct start_info *xen_start_info; |
39 | 39 | ||
40 | enum xen_domain_type { | ||
41 | XEN_NATIVE, /* running on bare hardware */ | ||
42 | XEN_PV_DOMAIN, /* running in a PV domain */ | ||
43 | XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ | ||
44 | }; | ||
45 | |||
46 | #ifdef CONFIG_XEN | ||
47 | extern enum xen_domain_type xen_domain_type; | ||
48 | #else | ||
49 | #define xen_domain_type XEN_NATIVE | ||
50 | #endif | ||
51 | |||
52 | #define xen_domain() (xen_domain_type != XEN_NATIVE) | ||
53 | #define xen_pv_domain() (xen_domain() && \ | ||
54 | xen_domain_type == XEN_PV_DOMAIN) | ||
55 | #define xen_hvm_domain() (xen_domain() && \ | ||
56 | xen_domain_type == XEN_HVM_DOMAIN) | ||
57 | |||
58 | #ifdef CONFIG_XEN_DOM0 | ||
59 | #include <xen/interface/xen.h> | ||
60 | |||
61 | #define xen_initial_domain() (xen_pv_domain() && \ | ||
62 | xen_start_info->flags & SIF_INITDOMAIN) | ||
63 | #else /* !CONFIG_XEN_DOM0 */ | ||
64 | #define xen_initial_domain() (0) | ||
65 | #endif /* CONFIG_XEN_DOM0 */ | ||
66 | |||
67 | #endif /* _ASM_X86_XEN_HYPERVISOR_H */ | 40 | #endif /* _ASM_X86_XEN_HYPERVISOR_H */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d8e5d0cdd678..d87f09bc5a52 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | |||
40 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 40 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
41 | obj-y += bootflag.o e820.o | 41 | obj-y += bootflag.o e820.o |
42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
43 | obj-y += alternative.o i8253.o pci-nommu.o | 43 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
44 | obj-y += tsc.o io_delay.o rtc.o | 44 | obj-y += tsc.o io_delay.o rtc.o |
45 | 45 | ||
46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
@@ -89,7 +89,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | |||
89 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 89 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
90 | 90 | ||
91 | obj-$(CONFIG_K8_NB) += k8.o | 91 | obj-$(CONFIG_K8_NB) += k8.o |
92 | obj-$(CONFIG_MGEODE_LX) += geode_32.o mfgpt_32.o | ||
93 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | 92 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o |
94 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | 93 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o |
95 | 94 | ||
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index fd5ca97a2ad5..6f35260bb3ef 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile | |||
@@ -4,7 +4,7 @@ obj-$(CONFIG_ACPI) += boot.o | |||
4 | obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o | 4 | obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o |
5 | 5 | ||
6 | ifneq ($(CONFIG_ACPI_PROCESSOR),) | 6 | ifneq ($(CONFIG_ACPI_PROCESSOR),) |
7 | obj-y += cstate.o processor.o | 7 | obj-y += cstate.o |
8 | endif | 8 | endif |
9 | 9 | ||
10 | $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin | 10 | $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 67e929b89875..fb1035cd9a6a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -624,6 +624,7 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) | |||
624 | } | 624 | } |
625 | 625 | ||
626 | hpet_address = hpet_tbl->address.address; | 626 | hpet_address = hpet_tbl->address.address; |
627 | hpet_blockid = hpet_tbl->sequence; | ||
627 | 628 | ||
628 | /* | 629 | /* |
629 | * Some broken BIOSes advertise HPET at 0x0. We really do not | 630 | * Some broken BIOSes advertise HPET at 0x0. We really do not |
@@ -1122,7 +1123,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) | |||
1122 | if (!acpi_sci_override_gsi) | 1123 | if (!acpi_sci_override_gsi) |
1123 | acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); | 1124 | acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); |
1124 | 1125 | ||
1125 | /* Fill in identity legacy mapings where no override */ | 1126 | /* Fill in identity legacy mappings where no override */ |
1126 | mp_config_acpi_legacy_irqs(); | 1127 | mp_config_acpi_legacy_irqs(); |
1127 | 1128 | ||
1128 | count = | 1129 | count = |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 59cdfa4686b2..2e837f5080fe 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
@@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, | |||
48 | * P4, Core and beyond CPUs | 48 | * P4, Core and beyond CPUs |
49 | */ | 49 | */ |
50 | if (c->x86_vendor == X86_VENDOR_INTEL && | 50 | if (c->x86_vendor == X86_VENDOR_INTEL && |
51 | (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14))) | 51 | (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f))) |
52 | flags->bm_control = 0; | 52 | flags->bm_control = 0; |
53 | } | 53 | } |
54 | EXPORT_SYMBOL(acpi_processor_power_init_bm_check); | 54 | EXPORT_SYMBOL(acpi_processor_power_init_bm_check); |
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c deleted file mode 100644 index d296f4a195c9..000000000000 --- a/arch/x86/kernel/acpi/processor.c +++ /dev/null | |||
@@ -1,100 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005 Intel Corporation | ||
3 | * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | ||
4 | * - Added _PDC for platforms with Intel CPUs | ||
5 | */ | ||
6 | |||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/init.h> | ||
10 | #include <linux/acpi.h> | ||
11 | |||
12 | #include <acpi/processor.h> | ||
13 | #include <asm/acpi.h> | ||
14 | |||
15 | static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) | ||
16 | { | ||
17 | struct acpi_object_list *obj_list; | ||
18 | union acpi_object *obj; | ||
19 | u32 *buf; | ||
20 | |||
21 | /* allocate and initialize pdc. It will be used later. */ | ||
22 | obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); | ||
23 | if (!obj_list) { | ||
24 | printk(KERN_ERR "Memory allocation error\n"); | ||
25 | return; | ||
26 | } | ||
27 | |||
28 | obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); | ||
29 | if (!obj) { | ||
30 | printk(KERN_ERR "Memory allocation error\n"); | ||
31 | kfree(obj_list); | ||
32 | return; | ||
33 | } | ||
34 | |||
35 | buf = kmalloc(12, GFP_KERNEL); | ||
36 | if (!buf) { | ||
37 | printk(KERN_ERR "Memory allocation error\n"); | ||
38 | kfree(obj); | ||
39 | kfree(obj_list); | ||
40 | return; | ||
41 | } | ||
42 | |||
43 | buf[0] = ACPI_PDC_REVISION_ID; | ||
44 | buf[1] = 1; | ||
45 | buf[2] = ACPI_PDC_C_CAPABILITY_SMP; | ||
46 | |||
47 | /* | ||
48 | * The default of PDC_SMP_T_SWCOORD bit is set for intel x86 cpu so | ||
49 | * that OSPM is capable of native ACPI throttling software | ||
50 | * coordination using BIOS supplied _TSD info. | ||
51 | */ | ||
52 | buf[2] |= ACPI_PDC_SMP_T_SWCOORD; | ||
53 | if (cpu_has(c, X86_FEATURE_EST)) | ||
54 | buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; | ||
55 | |||
56 | if (cpu_has(c, X86_FEATURE_ACPI)) | ||
57 | buf[2] |= ACPI_PDC_T_FFH; | ||
58 | |||
59 | /* | ||
60 | * If mwait/monitor is unsupported, C2/C3_FFH will be disabled | ||
61 | */ | ||
62 | if (!cpu_has(c, X86_FEATURE_MWAIT)) | ||
63 | buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); | ||
64 | |||
65 | obj->type = ACPI_TYPE_BUFFER; | ||
66 | obj->buffer.length = 12; | ||
67 | obj->buffer.pointer = (u8 *) buf; | ||
68 | obj_list->count = 1; | ||
69 | obj_list->pointer = obj; | ||
70 | pr->pdc = obj_list; | ||
71 | |||
72 | return; | ||
73 | } | ||
74 | |||
75 | |||
76 | /* Initialize _PDC data based on the CPU vendor */ | ||
77 | void arch_acpi_processor_init_pdc(struct acpi_processor *pr) | ||
78 | { | ||
79 | struct cpuinfo_x86 *c = &cpu_data(pr->id); | ||
80 | |||
81 | pr->pdc = NULL; | ||
82 | if (c->x86_vendor == X86_VENDOR_INTEL) | ||
83 | init_intel_pdc(pr, c); | ||
84 | |||
85 | return; | ||
86 | } | ||
87 | |||
88 | EXPORT_SYMBOL(arch_acpi_processor_init_pdc); | ||
89 | |||
90 | void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) | ||
91 | { | ||
92 | if (pr->pdc) { | ||
93 | kfree(pr->pdc->pointer->buffer.pointer); | ||
94 | kfree(pr->pdc->pointer); | ||
95 | kfree(pr->pdc); | ||
96 | pr->pdc = NULL; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc); | ||
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 7da00b799cda..060fff8f5c5b 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S | |||
@@ -57,5 +57,8 @@ SECTIONS | |||
57 | *(.note*) | 57 | *(.note*) |
58 | } | 58 | } |
59 | 59 | ||
60 | /* | ||
61 | * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: | ||
62 | */ | ||
60 | . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); | 63 | . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); |
61 | } | 64 | } |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ca93638ba430..f9961034e557 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -78,12 +78,9 @@ int acpi_save_state_mem(void) | |||
78 | #ifndef CONFIG_64BIT | 78 | #ifndef CONFIG_64BIT |
79 | store_gdt((struct desc_ptr *)&header->pmode_gdt); | 79 | store_gdt((struct desc_ptr *)&header->pmode_gdt); |
80 | 80 | ||
81 | header->pmode_efer_low = nx_enabled; | 81 | if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low, |
82 | if (header->pmode_efer_low & 1) { | 82 | &header->pmode_efer_high)) |
83 | /* This is strange, why not save efer, always? */ | 83 | header->pmode_efer_low = header->pmode_efer_high = 0; |
84 | rdmsr(MSR_EFER, header->pmode_efer_low, | ||
85 | header->pmode_efer_high); | ||
86 | } | ||
87 | #endif /* !CONFIG_64BIT */ | 84 | #endif /* !CONFIG_64BIT */ |
88 | 85 | ||
89 | header->pmode_cr0 = read_cr0(); | 86 | header->pmode_cr0 = read_cr0(); |
@@ -119,29 +116,32 @@ void acpi_restore_state_mem(void) | |||
119 | 116 | ||
120 | 117 | ||
121 | /** | 118 | /** |
122 | * acpi_reserve_bootmem - do _very_ early ACPI initialisation | 119 | * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation |
123 | * | 120 | * |
124 | * We allocate a page from the first 1MB of memory for the wakeup | 121 | * We allocate a page from the first 1MB of memory for the wakeup |
125 | * routine for when we come back from a sleep state. The | 122 | * routine for when we come back from a sleep state. The |
126 | * runtime allocator allows specification of <16MB pages, but not | 123 | * runtime allocator allows specification of <16MB pages, but not |
127 | * <1MB pages. | 124 | * <1MB pages. |
128 | */ | 125 | */ |
129 | void __init acpi_reserve_bootmem(void) | 126 | void __init acpi_reserve_wakeup_memory(void) |
130 | { | 127 | { |
128 | unsigned long mem; | ||
129 | |||
131 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { | 130 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { |
132 | printk(KERN_ERR | 131 | printk(KERN_ERR |
133 | "ACPI: Wakeup code way too big, S3 disabled.\n"); | 132 | "ACPI: Wakeup code way too big, S3 disabled.\n"); |
134 | return; | 133 | return; |
135 | } | 134 | } |
136 | 135 | ||
137 | acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE); | 136 | mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); |
138 | 137 | ||
139 | if (!acpi_realmode) { | 138 | if (mem == -1L) { |
140 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | 139 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); |
141 | return; | 140 | return; |
142 | } | 141 | } |
143 | 142 | acpi_realmode = (unsigned long) phys_to_virt(mem); | |
144 | acpi_wakeup_address = virt_to_phys((void *)acpi_realmode); | 143 | acpi_wakeup_address = mem; |
144 | reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); | ||
145 | } | 145 | } |
146 | 146 | ||
147 | 147 | ||
@@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str) | |||
162 | #endif | 162 | #endif |
163 | if (strncmp(str, "old_ordering", 12) == 0) | 163 | if (strncmp(str, "old_ordering", 12) == 0) |
164 | acpi_old_suspend_ordering(); | 164 | acpi_old_suspend_ordering(); |
165 | if (strncmp(str, "sci_force_enable", 16) == 0) | ||
166 | acpi_set_sci_en_on_resume(); | ||
165 | str = strchr(str, ','); | 167 | str = strchr(str, ','); |
166 | if (str != NULL) | 168 | if (str != NULL) |
167 | str += strspn(str, ", \t"); | 169 | str += strspn(str, ", \t"); |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 98f230f6a28d..23824fef789c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
@@ -19,7 +19,7 @@ | |||
19 | 19 | ||
20 | #include <linux/pci.h> | 20 | #include <linux/pci.h> |
21 | #include <linux/gfp.h> | 21 | #include <linux/gfp.h> |
22 | #include <linux/bitops.h> | 22 | #include <linux/bitmap.h> |
23 | #include <linux/debugfs.h> | 23 | #include <linux/debugfs.h> |
24 | #include <linux/scatterlist.h> | 24 | #include <linux/scatterlist.h> |
25 | #include <linux/dma-mapping.h> | 25 | #include <linux/dma-mapping.h> |
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
29 | #include <asm/iommu.h> | 29 | #include <asm/iommu.h> |
30 | #include <asm/gart.h> | 30 | #include <asm/gart.h> |
31 | #include <asm/amd_iommu_proto.h> | ||
31 | #include <asm/amd_iommu_types.h> | 32 | #include <asm/amd_iommu_types.h> |
32 | #include <asm/amd_iommu.h> | 33 | #include <asm/amd_iommu.h> |
33 | 34 | ||
@@ -56,20 +57,152 @@ struct iommu_cmd { | |||
56 | u32 data[4]; | 57 | u32 data[4]; |
57 | }; | 58 | }; |
58 | 59 | ||
59 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | ||
60 | struct unity_map_entry *e); | ||
61 | static struct dma_ops_domain *find_protection_domain(u16 devid); | ||
62 | static u64 *alloc_pte(struct protection_domain *domain, | ||
63 | unsigned long address, int end_lvl, | ||
64 | u64 **pte_page, gfp_t gfp); | ||
65 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | ||
66 | unsigned long start_page, | ||
67 | unsigned int pages); | ||
68 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); | 60 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); |
69 | static u64 *fetch_pte(struct protection_domain *domain, | ||
70 | unsigned long address, int map_size); | ||
71 | static void update_domain(struct protection_domain *domain); | 61 | static void update_domain(struct protection_domain *domain); |
72 | 62 | ||
63 | /**************************************************************************** | ||
64 | * | ||
65 | * Helper functions | ||
66 | * | ||
67 | ****************************************************************************/ | ||
68 | |||
69 | static inline u16 get_device_id(struct device *dev) | ||
70 | { | ||
71 | struct pci_dev *pdev = to_pci_dev(dev); | ||
72 | |||
73 | return calc_devid(pdev->bus->number, pdev->devfn); | ||
74 | } | ||
75 | |||
76 | static struct iommu_dev_data *get_dev_data(struct device *dev) | ||
77 | { | ||
78 | return dev->archdata.iommu; | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * In this function the list of preallocated protection domains is traversed to | ||
83 | * find the domain for a specific device | ||
84 | */ | ||
85 | static struct dma_ops_domain *find_protection_domain(u16 devid) | ||
86 | { | ||
87 | struct dma_ops_domain *entry, *ret = NULL; | ||
88 | unsigned long flags; | ||
89 | u16 alias = amd_iommu_alias_table[devid]; | ||
90 | |||
91 | if (list_empty(&iommu_pd_list)) | ||
92 | return NULL; | ||
93 | |||
94 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
95 | |||
96 | list_for_each_entry(entry, &iommu_pd_list, list) { | ||
97 | if (entry->target_dev == devid || | ||
98 | entry->target_dev == alias) { | ||
99 | ret = entry; | ||
100 | break; | ||
101 | } | ||
102 | } | ||
103 | |||
104 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
105 | |||
106 | return ret; | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * This function checks if the driver got a valid device from the caller to | ||
111 | * avoid dereferencing invalid pointers. | ||
112 | */ | ||
113 | static bool check_device(struct device *dev) | ||
114 | { | ||
115 | u16 devid; | ||
116 | |||
117 | if (!dev || !dev->dma_mask) | ||
118 | return false; | ||
119 | |||
120 | /* No device or no PCI device */ | ||
121 | if (!dev || dev->bus != &pci_bus_type) | ||
122 | return false; | ||
123 | |||
124 | devid = get_device_id(dev); | ||
125 | |||
126 | /* Out of our scope? */ | ||
127 | if (devid > amd_iommu_last_bdf) | ||
128 | return false; | ||
129 | |||
130 | if (amd_iommu_rlookup_table[devid] == NULL) | ||
131 | return false; | ||
132 | |||
133 | return true; | ||
134 | } | ||
135 | |||
136 | static int iommu_init_device(struct device *dev) | ||
137 | { | ||
138 | struct iommu_dev_data *dev_data; | ||
139 | struct pci_dev *pdev; | ||
140 | u16 devid, alias; | ||
141 | |||
142 | if (dev->archdata.iommu) | ||
143 | return 0; | ||
144 | |||
145 | dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); | ||
146 | if (!dev_data) | ||
147 | return -ENOMEM; | ||
148 | |||
149 | dev_data->dev = dev; | ||
150 | |||
151 | devid = get_device_id(dev); | ||
152 | alias = amd_iommu_alias_table[devid]; | ||
153 | pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); | ||
154 | if (pdev) | ||
155 | dev_data->alias = &pdev->dev; | ||
156 | |||
157 | atomic_set(&dev_data->bind, 0); | ||
158 | |||
159 | dev->archdata.iommu = dev_data; | ||
160 | |||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | static void iommu_uninit_device(struct device *dev) | ||
166 | { | ||
167 | kfree(dev->archdata.iommu); | ||
168 | } | ||
169 | |||
170 | void __init amd_iommu_uninit_devices(void) | ||
171 | { | ||
172 | struct pci_dev *pdev = NULL; | ||
173 | |||
174 | for_each_pci_dev(pdev) { | ||
175 | |||
176 | if (!check_device(&pdev->dev)) | ||
177 | continue; | ||
178 | |||
179 | iommu_uninit_device(&pdev->dev); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | int __init amd_iommu_init_devices(void) | ||
184 | { | ||
185 | struct pci_dev *pdev = NULL; | ||
186 | int ret = 0; | ||
187 | |||
188 | for_each_pci_dev(pdev) { | ||
189 | |||
190 | if (!check_device(&pdev->dev)) | ||
191 | continue; | ||
192 | |||
193 | ret = iommu_init_device(&pdev->dev); | ||
194 | if (ret) | ||
195 | goto out_free; | ||
196 | } | ||
197 | |||
198 | return 0; | ||
199 | |||
200 | out_free: | ||
201 | |||
202 | amd_iommu_uninit_devices(); | ||
203 | |||
204 | return ret; | ||
205 | } | ||
73 | #ifdef CONFIG_AMD_IOMMU_STATS | 206 | #ifdef CONFIG_AMD_IOMMU_STATS |
74 | 207 | ||
75 | /* | 208 | /* |
@@ -90,7 +223,6 @@ DECLARE_STATS_COUNTER(alloced_io_mem); | |||
90 | DECLARE_STATS_COUNTER(total_map_requests); | 223 | DECLARE_STATS_COUNTER(total_map_requests); |
91 | 224 | ||
92 | static struct dentry *stats_dir; | 225 | static struct dentry *stats_dir; |
93 | static struct dentry *de_isolate; | ||
94 | static struct dentry *de_fflush; | 226 | static struct dentry *de_fflush; |
95 | 227 | ||
96 | static void amd_iommu_stats_add(struct __iommu_counter *cnt) | 228 | static void amd_iommu_stats_add(struct __iommu_counter *cnt) |
@@ -108,9 +240,6 @@ static void amd_iommu_stats_init(void) | |||
108 | if (stats_dir == NULL) | 240 | if (stats_dir == NULL) |
109 | return; | 241 | return; |
110 | 242 | ||
111 | de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, | ||
112 | (u32 *)&amd_iommu_isolate); | ||
113 | |||
114 | de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, | 243 | de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, |
115 | (u32 *)&amd_iommu_unmap_flush); | 244 | (u32 *)&amd_iommu_unmap_flush); |
116 | 245 | ||
@@ -130,12 +259,6 @@ static void amd_iommu_stats_init(void) | |||
130 | 259 | ||
131 | #endif | 260 | #endif |
132 | 261 | ||
133 | /* returns !0 if the IOMMU is caching non-present entries in its TLB */ | ||
134 | static int iommu_has_npcache(struct amd_iommu *iommu) | ||
135 | { | ||
136 | return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); | ||
137 | } | ||
138 | |||
139 | /**************************************************************************** | 262 | /**************************************************************************** |
140 | * | 263 | * |
141 | * Interrupt handling functions | 264 | * Interrupt handling functions |
@@ -199,6 +322,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) | |||
199 | break; | 322 | break; |
200 | case EVENT_TYPE_ILL_CMD: | 323 | case EVENT_TYPE_ILL_CMD: |
201 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | 324 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); |
325 | iommu->reset_in_progress = true; | ||
202 | reset_iommu_command_buffer(iommu); | 326 | reset_iommu_command_buffer(iommu); |
203 | dump_command(address); | 327 | dump_command(address); |
204 | break; | 328 | break; |
@@ -321,11 +445,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) | |||
321 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | 445 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; |
322 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | 446 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); |
323 | 447 | ||
324 | if (unlikely(i == EXIT_LOOP_COUNT)) { | 448 | if (unlikely(i == EXIT_LOOP_COUNT)) |
325 | spin_unlock(&iommu->lock); | 449 | iommu->reset_in_progress = true; |
326 | reset_iommu_command_buffer(iommu); | ||
327 | spin_lock(&iommu->lock); | ||
328 | } | ||
329 | } | 450 | } |
330 | 451 | ||
331 | /* | 452 | /* |
@@ -372,26 +493,46 @@ static int iommu_completion_wait(struct amd_iommu *iommu) | |||
372 | out: | 493 | out: |
373 | spin_unlock_irqrestore(&iommu->lock, flags); | 494 | spin_unlock_irqrestore(&iommu->lock, flags); |
374 | 495 | ||
496 | if (iommu->reset_in_progress) | ||
497 | reset_iommu_command_buffer(iommu); | ||
498 | |||
375 | return 0; | 499 | return 0; |
376 | } | 500 | } |
377 | 501 | ||
502 | static void iommu_flush_complete(struct protection_domain *domain) | ||
503 | { | ||
504 | int i; | ||
505 | |||
506 | for (i = 0; i < amd_iommus_present; ++i) { | ||
507 | if (!domain->dev_iommu[i]) | ||
508 | continue; | ||
509 | |||
510 | /* | ||
511 | * Devices of this domain are behind this IOMMU | ||
512 | * We need to wait for completion of all commands. | ||
513 | */ | ||
514 | iommu_completion_wait(amd_iommus[i]); | ||
515 | } | ||
516 | } | ||
517 | |||
378 | /* | 518 | /* |
379 | * Command send function for invalidating a device table entry | 519 | * Command send function for invalidating a device table entry |
380 | */ | 520 | */ |
381 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | 521 | static int iommu_flush_device(struct device *dev) |
382 | { | 522 | { |
523 | struct amd_iommu *iommu; | ||
383 | struct iommu_cmd cmd; | 524 | struct iommu_cmd cmd; |
384 | int ret; | 525 | u16 devid; |
385 | 526 | ||
386 | BUG_ON(iommu == NULL); | 527 | devid = get_device_id(dev); |
528 | iommu = amd_iommu_rlookup_table[devid]; | ||
387 | 529 | ||
530 | /* Build command */ | ||
388 | memset(&cmd, 0, sizeof(cmd)); | 531 | memset(&cmd, 0, sizeof(cmd)); |
389 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); | 532 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); |
390 | cmd.data[0] = devid; | 533 | cmd.data[0] = devid; |
391 | 534 | ||
392 | ret = iommu_queue_command(iommu, &cmd); | 535 | return iommu_queue_command(iommu, &cmd); |
393 | |||
394 | return ret; | ||
395 | } | 536 | } |
396 | 537 | ||
397 | static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, | 538 | static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, |
@@ -430,11 +571,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | |||
430 | * It invalidates a single PTE if the range to flush is within a single | 571 | * It invalidates a single PTE if the range to flush is within a single |
431 | * page. Otherwise it flushes the whole TLB of the IOMMU. | 572 | * page. Otherwise it flushes the whole TLB of the IOMMU. |
432 | */ | 573 | */ |
433 | static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | 574 | static void __iommu_flush_pages(struct protection_domain *domain, |
434 | u64 address, size_t size) | 575 | u64 address, size_t size, int pde) |
435 | { | 576 | { |
436 | int s = 0; | 577 | int s = 0, i; |
437 | unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); | 578 | unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); |
438 | 579 | ||
439 | address &= PAGE_MASK; | 580 | address &= PAGE_MASK; |
440 | 581 | ||
@@ -447,142 +588,212 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | |||
447 | s = 1; | 588 | s = 1; |
448 | } | 589 | } |
449 | 590 | ||
450 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s); | ||
451 | 591 | ||
452 | return 0; | 592 | for (i = 0; i < amd_iommus_present; ++i) { |
593 | if (!domain->dev_iommu[i]) | ||
594 | continue; | ||
595 | |||
596 | /* | ||
597 | * Devices of this domain are behind this IOMMU | ||
598 | * We need a TLB flush | ||
599 | */ | ||
600 | iommu_queue_inv_iommu_pages(amd_iommus[i], address, | ||
601 | domain->id, pde, s); | ||
602 | } | ||
603 | |||
604 | return; | ||
453 | } | 605 | } |
454 | 606 | ||
455 | /* Flush the whole IO/TLB for a given protection domain */ | 607 | static void iommu_flush_pages(struct protection_domain *domain, |
456 | static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | 608 | u64 address, size_t size) |
457 | { | 609 | { |
458 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | 610 | __iommu_flush_pages(domain, address, size, 0); |
459 | 611 | } | |
460 | INC_STATS_COUNTER(domain_flush_single); | ||
461 | 612 | ||
462 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); | 613 | /* Flush the whole IO/TLB for a given protection domain */ |
614 | static void iommu_flush_tlb(struct protection_domain *domain) | ||
615 | { | ||
616 | __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); | ||
463 | } | 617 | } |
464 | 618 | ||
465 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ | 619 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ |
466 | static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) | 620 | static void iommu_flush_tlb_pde(struct protection_domain *domain) |
467 | { | 621 | { |
468 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | 622 | __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); |
469 | |||
470 | INC_STATS_COUNTER(domain_flush_single); | ||
471 | |||
472 | iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1); | ||
473 | } | 623 | } |
474 | 624 | ||
625 | |||
475 | /* | 626 | /* |
476 | * This function flushes one domain on one IOMMU | 627 | * This function flushes the DTEs for all devices in domain |
477 | */ | 628 | */ |
478 | static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) | 629 | static void iommu_flush_domain_devices(struct protection_domain *domain) |
479 | { | 630 | { |
480 | struct iommu_cmd cmd; | 631 | struct iommu_dev_data *dev_data; |
481 | unsigned long flags; | 632 | unsigned long flags; |
482 | 633 | ||
483 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | 634 | spin_lock_irqsave(&domain->lock, flags); |
484 | domid, 1, 1); | ||
485 | 635 | ||
486 | spin_lock_irqsave(&iommu->lock, flags); | 636 | list_for_each_entry(dev_data, &domain->dev_list, list) |
487 | __iommu_queue_command(iommu, &cmd); | 637 | iommu_flush_device(dev_data->dev); |
488 | __iommu_completion_wait(iommu); | 638 | |
489 | __iommu_wait_for_completion(iommu); | 639 | spin_unlock_irqrestore(&domain->lock, flags); |
490 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
491 | } | 640 | } |
492 | 641 | ||
493 | static void flush_all_domains_on_iommu(struct amd_iommu *iommu) | 642 | static void iommu_flush_all_domain_devices(void) |
494 | { | 643 | { |
495 | int i; | 644 | struct protection_domain *domain; |
645 | unsigned long flags; | ||
496 | 646 | ||
497 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | 647 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); |
498 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | 648 | |
499 | continue; | 649 | list_for_each_entry(domain, &amd_iommu_pd_list, list) { |
500 | flush_domain_on_iommu(iommu, i); | 650 | iommu_flush_domain_devices(domain); |
651 | iommu_flush_complete(domain); | ||
501 | } | 652 | } |
502 | 653 | ||
654 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
655 | } | ||
656 | |||
657 | void amd_iommu_flush_all_devices(void) | ||
658 | { | ||
659 | iommu_flush_all_domain_devices(); | ||
503 | } | 660 | } |
504 | 661 | ||
505 | /* | 662 | /* |
506 | * This function is used to flush the IO/TLB for a given protection domain | 663 | * This function uses heavy locking and may disable irqs for some time. But |
507 | * on every IOMMU in the system | 664 | * this is no issue because it is only called during resume. |
508 | */ | 665 | */ |
509 | static void iommu_flush_domain(u16 domid) | 666 | void amd_iommu_flush_all_domains(void) |
510 | { | 667 | { |
511 | struct amd_iommu *iommu; | 668 | struct protection_domain *domain; |
669 | unsigned long flags; | ||
512 | 670 | ||
513 | INC_STATS_COUNTER(domain_flush_all); | 671 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); |
514 | 672 | ||
515 | for_each_iommu(iommu) | 673 | list_for_each_entry(domain, &amd_iommu_pd_list, list) { |
516 | flush_domain_on_iommu(iommu, domid); | 674 | spin_lock(&domain->lock); |
675 | iommu_flush_tlb_pde(domain); | ||
676 | iommu_flush_complete(domain); | ||
677 | spin_unlock(&domain->lock); | ||
678 | } | ||
679 | |||
680 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
517 | } | 681 | } |
518 | 682 | ||
519 | void amd_iommu_flush_all_domains(void) | 683 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) |
520 | { | 684 | { |
521 | struct amd_iommu *iommu; | 685 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); |
522 | 686 | ||
523 | for_each_iommu(iommu) | 687 | if (iommu->reset_in_progress) |
524 | flush_all_domains_on_iommu(iommu); | 688 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); |
689 | |||
690 | amd_iommu_reset_cmd_buffer(iommu); | ||
691 | amd_iommu_flush_all_devices(); | ||
692 | amd_iommu_flush_all_domains(); | ||
693 | |||
694 | iommu->reset_in_progress = false; | ||
525 | } | 695 | } |
526 | 696 | ||
527 | static void flush_all_devices_for_iommu(struct amd_iommu *iommu) | 697 | /**************************************************************************** |
698 | * | ||
699 | * The functions below are used the create the page table mappings for | ||
700 | * unity mapped regions. | ||
701 | * | ||
702 | ****************************************************************************/ | ||
703 | |||
704 | /* | ||
705 | * This function is used to add another level to an IO page table. Adding | ||
706 | * another level increases the size of the address space by 9 bits to a size up | ||
707 | * to 64 bits. | ||
708 | */ | ||
709 | static bool increase_address_space(struct protection_domain *domain, | ||
710 | gfp_t gfp) | ||
528 | { | 711 | { |
529 | int i; | 712 | u64 *pte; |
530 | 713 | ||
531 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 714 | if (domain->mode == PAGE_MODE_6_LEVEL) |
532 | if (iommu != amd_iommu_rlookup_table[i]) | 715 | /* address space already 64 bit large */ |
533 | continue; | 716 | return false; |
534 | 717 | ||
535 | iommu_queue_inv_dev_entry(iommu, i); | 718 | pte = (void *)get_zeroed_page(gfp); |
536 | iommu_completion_wait(iommu); | 719 | if (!pte) |
537 | } | 720 | return false; |
721 | |||
722 | *pte = PM_LEVEL_PDE(domain->mode, | ||
723 | virt_to_phys(domain->pt_root)); | ||
724 | domain->pt_root = pte; | ||
725 | domain->mode += 1; | ||
726 | domain->updated = true; | ||
727 | |||
728 | return true; | ||
538 | } | 729 | } |
539 | 730 | ||
540 | static void flush_devices_by_domain(struct protection_domain *domain) | 731 | static u64 *alloc_pte(struct protection_domain *domain, |
732 | unsigned long address, | ||
733 | int end_lvl, | ||
734 | u64 **pte_page, | ||
735 | gfp_t gfp) | ||
541 | { | 736 | { |
542 | struct amd_iommu *iommu; | 737 | u64 *pte, *page; |
543 | int i; | 738 | int level; |
544 | 739 | ||
545 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 740 | while (address > PM_LEVEL_SIZE(domain->mode)) |
546 | if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || | 741 | increase_address_space(domain, gfp); |
547 | (amd_iommu_pd_table[i] != domain)) | ||
548 | continue; | ||
549 | 742 | ||
550 | iommu = amd_iommu_rlookup_table[i]; | 743 | level = domain->mode - 1; |
551 | if (!iommu) | 744 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
552 | continue; | 745 | |
746 | while (level > end_lvl) { | ||
747 | if (!IOMMU_PTE_PRESENT(*pte)) { | ||
748 | page = (u64 *)get_zeroed_page(gfp); | ||
749 | if (!page) | ||
750 | return NULL; | ||
751 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | ||
752 | } | ||
753 | |||
754 | level -= 1; | ||
755 | |||
756 | pte = IOMMU_PTE_PAGE(*pte); | ||
757 | |||
758 | if (pte_page && level == end_lvl) | ||
759 | *pte_page = pte; | ||
553 | 760 | ||
554 | iommu_queue_inv_dev_entry(iommu, i); | 761 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
555 | iommu_completion_wait(iommu); | ||
556 | } | 762 | } |
763 | |||
764 | return pte; | ||
557 | } | 765 | } |
558 | 766 | ||
559 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) | 767 | /* |
768 | * This function checks if there is a PTE for a given dma address. If | ||
769 | * there is one, it returns the pointer to it. | ||
770 | */ | ||
771 | static u64 *fetch_pte(struct protection_domain *domain, | ||
772 | unsigned long address, int map_size) | ||
560 | { | 773 | { |
561 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); | 774 | int level; |
775 | u64 *pte; | ||
562 | 776 | ||
563 | if (iommu->reset_in_progress) | 777 | level = domain->mode - 1; |
564 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); | 778 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
565 | 779 | ||
566 | iommu->reset_in_progress = true; | 780 | while (level > map_size) { |
781 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
782 | return NULL; | ||
567 | 783 | ||
568 | amd_iommu_reset_cmd_buffer(iommu); | 784 | level -= 1; |
569 | flush_all_devices_for_iommu(iommu); | ||
570 | flush_all_domains_on_iommu(iommu); | ||
571 | 785 | ||
572 | iommu->reset_in_progress = false; | 786 | pte = IOMMU_PTE_PAGE(*pte); |
573 | } | 787 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
574 | 788 | ||
575 | void amd_iommu_flush_all_devices(void) | 789 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { |
576 | { | 790 | pte = NULL; |
577 | flush_devices_by_domain(NULL); | 791 | break; |
578 | } | 792 | } |
793 | } | ||
579 | 794 | ||
580 | /**************************************************************************** | 795 | return pte; |
581 | * | 796 | } |
582 | * The functions below are used the create the page table mappings for | ||
583 | * unity mapped regions. | ||
584 | * | ||
585 | ****************************************************************************/ | ||
586 | 797 | ||
587 | /* | 798 | /* |
588 | * Generic mapping functions. It maps a physical address into a DMA | 799 | * Generic mapping functions. It maps a physical address into a DMA |
@@ -654,28 +865,6 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, | |||
654 | } | 865 | } |
655 | 866 | ||
656 | /* | 867 | /* |
657 | * Init the unity mappings for a specific IOMMU in the system | ||
658 | * | ||
659 | * Basically iterates over all unity mapping entries and applies them to | ||
660 | * the default domain DMA of that IOMMU if necessary. | ||
661 | */ | ||
662 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) | ||
663 | { | ||
664 | struct unity_map_entry *entry; | ||
665 | int ret; | ||
666 | |||
667 | list_for_each_entry(entry, &amd_iommu_unity_map, list) { | ||
668 | if (!iommu_for_unity_map(iommu, entry)) | ||
669 | continue; | ||
670 | ret = dma_ops_unity_map(iommu->default_dom, entry); | ||
671 | if (ret) | ||
672 | return ret; | ||
673 | } | ||
674 | |||
675 | return 0; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * This function actually applies the mapping to the page table of the | 868 | * This function actually applies the mapping to the page table of the |
680 | * dma_ops domain. | 869 | * dma_ops domain. |
681 | */ | 870 | */ |
@@ -704,6 +893,28 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
704 | } | 893 | } |
705 | 894 | ||
706 | /* | 895 | /* |
896 | * Init the unity mappings for a specific IOMMU in the system | ||
897 | * | ||
898 | * Basically iterates over all unity mapping entries and applies them to | ||
899 | * the default domain DMA of that IOMMU if necessary. | ||
900 | */ | ||
901 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) | ||
902 | { | ||
903 | struct unity_map_entry *entry; | ||
904 | int ret; | ||
905 | |||
906 | list_for_each_entry(entry, &amd_iommu_unity_map, list) { | ||
907 | if (!iommu_for_unity_map(iommu, entry)) | ||
908 | continue; | ||
909 | ret = dma_ops_unity_map(iommu->default_dom, entry); | ||
910 | if (ret) | ||
911 | return ret; | ||
912 | } | ||
913 | |||
914 | return 0; | ||
915 | } | ||
916 | |||
917 | /* | ||
707 | * Inits the unity mappings required for a specific device | 918 | * Inits the unity mappings required for a specific device |
708 | */ | 919 | */ |
709 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | 920 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, |
@@ -740,34 +951,23 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
740 | */ | 951 | */ |
741 | 952 | ||
742 | /* | 953 | /* |
743 | * This function checks if there is a PTE for a given dma address. If | 954 | * Used to reserve address ranges in the aperture (e.g. for exclusion |
744 | * there is one, it returns the pointer to it. | 955 | * ranges. |
745 | */ | 956 | */ |
746 | static u64 *fetch_pte(struct protection_domain *domain, | 957 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, |
747 | unsigned long address, int map_size) | 958 | unsigned long start_page, |
959 | unsigned int pages) | ||
748 | { | 960 | { |
749 | int level; | 961 | unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; |
750 | u64 *pte; | ||
751 | |||
752 | level = domain->mode - 1; | ||
753 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
754 | |||
755 | while (level > map_size) { | ||
756 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
757 | return NULL; | ||
758 | |||
759 | level -= 1; | ||
760 | 962 | ||
761 | pte = IOMMU_PTE_PAGE(*pte); | 963 | if (start_page + pages > last_page) |
762 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | 964 | pages = last_page - start_page; |
763 | 965 | ||
764 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { | 966 | for (i = start_page; i < start_page + pages; ++i) { |
765 | pte = NULL; | 967 | int index = i / APERTURE_RANGE_PAGES; |
766 | break; | 968 | int page = i % APERTURE_RANGE_PAGES; |
767 | } | 969 | __set_bit(page, dom->aperture[index]->bitmap); |
768 | } | 970 | } |
769 | |||
770 | return pte; | ||
771 | } | 971 | } |
772 | 972 | ||
773 | /* | 973 | /* |
@@ -775,11 +975,11 @@ static u64 *fetch_pte(struct protection_domain *domain, | |||
775 | * aperture in case of dma_ops domain allocation or address allocation | 975 | * aperture in case of dma_ops domain allocation or address allocation |
776 | * failure. | 976 | * failure. |
777 | */ | 977 | */ |
778 | static int alloc_new_range(struct amd_iommu *iommu, | 978 | static int alloc_new_range(struct dma_ops_domain *dma_dom, |
779 | struct dma_ops_domain *dma_dom, | ||
780 | bool populate, gfp_t gfp) | 979 | bool populate, gfp_t gfp) |
781 | { | 980 | { |
782 | int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; | 981 | int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; |
982 | struct amd_iommu *iommu; | ||
783 | int i; | 983 | int i; |
784 | 984 | ||
785 | #ifdef CONFIG_IOMMU_STRESS | 985 | #ifdef CONFIG_IOMMU_STRESS |
@@ -819,14 +1019,17 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
819 | dma_dom->aperture_size += APERTURE_RANGE_SIZE; | 1019 | dma_dom->aperture_size += APERTURE_RANGE_SIZE; |
820 | 1020 | ||
821 | /* Intialize the exclusion range if necessary */ | 1021 | /* Intialize the exclusion range if necessary */ |
822 | if (iommu->exclusion_start && | 1022 | for_each_iommu(iommu) { |
823 | iommu->exclusion_start >= dma_dom->aperture[index]->offset && | 1023 | if (iommu->exclusion_start && |
824 | iommu->exclusion_start < dma_dom->aperture_size) { | 1024 | iommu->exclusion_start >= dma_dom->aperture[index]->offset |
825 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; | 1025 | && iommu->exclusion_start < dma_dom->aperture_size) { |
826 | int pages = iommu_num_pages(iommu->exclusion_start, | 1026 | unsigned long startpage; |
827 | iommu->exclusion_length, | 1027 | int pages = iommu_num_pages(iommu->exclusion_start, |
828 | PAGE_SIZE); | 1028 | iommu->exclusion_length, |
829 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | 1029 | PAGE_SIZE); |
1030 | startpage = iommu->exclusion_start >> PAGE_SHIFT; | ||
1031 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | ||
1032 | } | ||
830 | } | 1033 | } |
831 | 1034 | ||
832 | /* | 1035 | /* |
@@ -928,7 +1131,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, | |||
928 | } | 1131 | } |
929 | 1132 | ||
930 | if (unlikely(address == -1)) | 1133 | if (unlikely(address == -1)) |
931 | address = bad_dma_address; | 1134 | address = DMA_ERROR_CODE; |
932 | 1135 | ||
933 | WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); | 1136 | WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); |
934 | 1137 | ||
@@ -959,7 +1162,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, | |||
959 | 1162 | ||
960 | address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; | 1163 | address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; |
961 | 1164 | ||
962 | iommu_area_free(range->bitmap, address, pages); | 1165 | bitmap_clear(range->bitmap, address, pages); |
963 | 1166 | ||
964 | } | 1167 | } |
965 | 1168 | ||
@@ -973,6 +1176,31 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, | |||
973 | * | 1176 | * |
974 | ****************************************************************************/ | 1177 | ****************************************************************************/ |
975 | 1178 | ||
1179 | /* | ||
1180 | * This function adds a protection domain to the global protection domain list | ||
1181 | */ | ||
1182 | static void add_domain_to_list(struct protection_domain *domain) | ||
1183 | { | ||
1184 | unsigned long flags; | ||
1185 | |||
1186 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); | ||
1187 | list_add(&domain->list, &amd_iommu_pd_list); | ||
1188 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
1189 | } | ||
1190 | |||
1191 | /* | ||
1192 | * This function removes a protection domain to the global | ||
1193 | * protection domain list | ||
1194 | */ | ||
1195 | static void del_domain_from_list(struct protection_domain *domain) | ||
1196 | { | ||
1197 | unsigned long flags; | ||
1198 | |||
1199 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); | ||
1200 | list_del(&domain->list); | ||
1201 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
1202 | } | ||
1203 | |||
976 | static u16 domain_id_alloc(void) | 1204 | static u16 domain_id_alloc(void) |
977 | { | 1205 | { |
978 | unsigned long flags; | 1206 | unsigned long flags; |
@@ -1000,26 +1228,6 @@ static void domain_id_free(int id) | |||
1000 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1228 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1001 | } | 1229 | } |
1002 | 1230 | ||
1003 | /* | ||
1004 | * Used to reserve address ranges in the aperture (e.g. for exclusion | ||
1005 | * ranges. | ||
1006 | */ | ||
1007 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | ||
1008 | unsigned long start_page, | ||
1009 | unsigned int pages) | ||
1010 | { | ||
1011 | unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; | ||
1012 | |||
1013 | if (start_page + pages > last_page) | ||
1014 | pages = last_page - start_page; | ||
1015 | |||
1016 | for (i = start_page; i < start_page + pages; ++i) { | ||
1017 | int index = i / APERTURE_RANGE_PAGES; | ||
1018 | int page = i % APERTURE_RANGE_PAGES; | ||
1019 | __set_bit(page, dom->aperture[index]->bitmap); | ||
1020 | } | ||
1021 | } | ||
1022 | |||
1023 | static void free_pagetable(struct protection_domain *domain) | 1231 | static void free_pagetable(struct protection_domain *domain) |
1024 | { | 1232 | { |
1025 | int i, j; | 1233 | int i, j; |
@@ -1061,6 +1269,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
1061 | if (!dom) | 1269 | if (!dom) |
1062 | return; | 1270 | return; |
1063 | 1271 | ||
1272 | del_domain_from_list(&dom->domain); | ||
1273 | |||
1064 | free_pagetable(&dom->domain); | 1274 | free_pagetable(&dom->domain); |
1065 | 1275 | ||
1066 | for (i = 0; i < APERTURE_MAX_RANGES; ++i) { | 1276 | for (i = 0; i < APERTURE_MAX_RANGES; ++i) { |
@@ -1078,7 +1288,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
1078 | * It also intializes the page table and the address allocator data | 1288 | * It also intializes the page table and the address allocator data |
1079 | * structures required for the dma_ops interface | 1289 | * structures required for the dma_ops interface |
1080 | */ | 1290 | */ |
1081 | static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | 1291 | static struct dma_ops_domain *dma_ops_domain_alloc(void) |
1082 | { | 1292 | { |
1083 | struct dma_ops_domain *dma_dom; | 1293 | struct dma_ops_domain *dma_dom; |
1084 | 1294 | ||
@@ -1091,6 +1301,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | |||
1091 | dma_dom->domain.id = domain_id_alloc(); | 1301 | dma_dom->domain.id = domain_id_alloc(); |
1092 | if (dma_dom->domain.id == 0) | 1302 | if (dma_dom->domain.id == 0) |
1093 | goto free_dma_dom; | 1303 | goto free_dma_dom; |
1304 | INIT_LIST_HEAD(&dma_dom->domain.dev_list); | ||
1094 | dma_dom->domain.mode = PAGE_MODE_2_LEVEL; | 1305 | dma_dom->domain.mode = PAGE_MODE_2_LEVEL; |
1095 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 1306 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
1096 | dma_dom->domain.flags = PD_DMA_OPS_MASK; | 1307 | dma_dom->domain.flags = PD_DMA_OPS_MASK; |
@@ -1101,7 +1312,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | |||
1101 | dma_dom->need_flush = false; | 1312 | dma_dom->need_flush = false; |
1102 | dma_dom->target_dev = 0xffff; | 1313 | dma_dom->target_dev = 0xffff; |
1103 | 1314 | ||
1104 | if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) | 1315 | add_domain_to_list(&dma_dom->domain); |
1316 | |||
1317 | if (alloc_new_range(dma_dom, true, GFP_KERNEL)) | ||
1105 | goto free_dma_dom; | 1318 | goto free_dma_dom; |
1106 | 1319 | ||
1107 | /* | 1320 | /* |
@@ -1129,22 +1342,6 @@ static bool dma_ops_domain(struct protection_domain *domain) | |||
1129 | return domain->flags & PD_DMA_OPS_MASK; | 1342 | return domain->flags & PD_DMA_OPS_MASK; |
1130 | } | 1343 | } |
1131 | 1344 | ||
1132 | /* | ||
1133 | * Find out the protection domain structure for a given PCI device. This | ||
1134 | * will give us the pointer to the page table root for example. | ||
1135 | */ | ||
1136 | static struct protection_domain *domain_for_device(u16 devid) | ||
1137 | { | ||
1138 | struct protection_domain *dom; | ||
1139 | unsigned long flags; | ||
1140 | |||
1141 | read_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1142 | dom = amd_iommu_pd_table[devid]; | ||
1143 | read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1144 | |||
1145 | return dom; | ||
1146 | } | ||
1147 | |||
1148 | static void set_dte_entry(u16 devid, struct protection_domain *domain) | 1345 | static void set_dte_entry(u16 devid, struct protection_domain *domain) |
1149 | { | 1346 | { |
1150 | u64 pte_root = virt_to_phys(domain->pt_root); | 1347 | u64 pte_root = virt_to_phys(domain->pt_root); |
@@ -1156,42 +1353,123 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) | |||
1156 | amd_iommu_dev_table[devid].data[2] = domain->id; | 1353 | amd_iommu_dev_table[devid].data[2] = domain->id; |
1157 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | 1354 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); |
1158 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | 1355 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); |
1356 | } | ||
1159 | 1357 | ||
1160 | amd_iommu_pd_table[devid] = domain; | 1358 | static void clear_dte_entry(u16 devid) |
1359 | { | ||
1360 | /* remove entry from the device table seen by the hardware */ | ||
1361 | amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1362 | amd_iommu_dev_table[devid].data[1] = 0; | ||
1363 | amd_iommu_dev_table[devid].data[2] = 0; | ||
1364 | |||
1365 | amd_iommu_apply_erratum_63(devid); | ||
1366 | } | ||
1367 | |||
1368 | static void do_attach(struct device *dev, struct protection_domain *domain) | ||
1369 | { | ||
1370 | struct iommu_dev_data *dev_data; | ||
1371 | struct amd_iommu *iommu; | ||
1372 | u16 devid; | ||
1373 | |||
1374 | devid = get_device_id(dev); | ||
1375 | iommu = amd_iommu_rlookup_table[devid]; | ||
1376 | dev_data = get_dev_data(dev); | ||
1377 | |||
1378 | /* Update data structures */ | ||
1379 | dev_data->domain = domain; | ||
1380 | list_add(&dev_data->list, &domain->dev_list); | ||
1381 | set_dte_entry(devid, domain); | ||
1382 | |||
1383 | /* Do reference counting */ | ||
1384 | domain->dev_iommu[iommu->index] += 1; | ||
1385 | domain->dev_cnt += 1; | ||
1386 | |||
1387 | /* Flush the DTE entry */ | ||
1388 | iommu_flush_device(dev); | ||
1389 | } | ||
1390 | |||
1391 | static void do_detach(struct device *dev) | ||
1392 | { | ||
1393 | struct iommu_dev_data *dev_data; | ||
1394 | struct amd_iommu *iommu; | ||
1395 | u16 devid; | ||
1396 | |||
1397 | devid = get_device_id(dev); | ||
1398 | iommu = amd_iommu_rlookup_table[devid]; | ||
1399 | dev_data = get_dev_data(dev); | ||
1400 | |||
1401 | /* decrease reference counters */ | ||
1402 | dev_data->domain->dev_iommu[iommu->index] -= 1; | ||
1403 | dev_data->domain->dev_cnt -= 1; | ||
1404 | |||
1405 | /* Update data structures */ | ||
1406 | dev_data->domain = NULL; | ||
1407 | list_del(&dev_data->list); | ||
1408 | clear_dte_entry(devid); | ||
1409 | |||
1410 | /* Flush the DTE entry */ | ||
1411 | iommu_flush_device(dev); | ||
1161 | } | 1412 | } |
1162 | 1413 | ||
1163 | /* | 1414 | /* |
1164 | * If a device is not yet associated with a domain, this function does | 1415 | * If a device is not yet associated with a domain, this function does |
1165 | * assigns it visible for the hardware | 1416 | * assigns it visible for the hardware |
1166 | */ | 1417 | */ |
1167 | static void __attach_device(struct amd_iommu *iommu, | 1418 | static int __attach_device(struct device *dev, |
1168 | struct protection_domain *domain, | 1419 | struct protection_domain *domain) |
1169 | u16 devid) | ||
1170 | { | 1420 | { |
1421 | struct iommu_dev_data *dev_data, *alias_data; | ||
1422 | |||
1423 | dev_data = get_dev_data(dev); | ||
1424 | alias_data = get_dev_data(dev_data->alias); | ||
1425 | |||
1426 | if (!alias_data) | ||
1427 | return -EINVAL; | ||
1428 | |||
1171 | /* lock domain */ | 1429 | /* lock domain */ |
1172 | spin_lock(&domain->lock); | 1430 | spin_lock(&domain->lock); |
1173 | 1431 | ||
1174 | /* update DTE entry */ | 1432 | /* Some sanity checks */ |
1175 | set_dte_entry(devid, domain); | 1433 | if (alias_data->domain != NULL && |
1434 | alias_data->domain != domain) | ||
1435 | return -EBUSY; | ||
1436 | |||
1437 | if (dev_data->domain != NULL && | ||
1438 | dev_data->domain != domain) | ||
1439 | return -EBUSY; | ||
1176 | 1440 | ||
1177 | domain->dev_cnt += 1; | 1441 | /* Do real assignment */ |
1442 | if (dev_data->alias != dev) { | ||
1443 | alias_data = get_dev_data(dev_data->alias); | ||
1444 | if (alias_data->domain == NULL) | ||
1445 | do_attach(dev_data->alias, domain); | ||
1446 | |||
1447 | atomic_inc(&alias_data->bind); | ||
1448 | } | ||
1449 | |||
1450 | if (dev_data->domain == NULL) | ||
1451 | do_attach(dev, domain); | ||
1452 | |||
1453 | atomic_inc(&dev_data->bind); | ||
1178 | 1454 | ||
1179 | /* ready */ | 1455 | /* ready */ |
1180 | spin_unlock(&domain->lock); | 1456 | spin_unlock(&domain->lock); |
1457 | |||
1458 | return 0; | ||
1181 | } | 1459 | } |
1182 | 1460 | ||
1183 | /* | 1461 | /* |
1184 | * If a device is not yet associated with a domain, this function does | 1462 | * If a device is not yet associated with a domain, this function does |
1185 | * assigns it visible for the hardware | 1463 | * assigns it visible for the hardware |
1186 | */ | 1464 | */ |
1187 | static void attach_device(struct amd_iommu *iommu, | 1465 | static int attach_device(struct device *dev, |
1188 | struct protection_domain *domain, | 1466 | struct protection_domain *domain) |
1189 | u16 devid) | ||
1190 | { | 1467 | { |
1191 | unsigned long flags; | 1468 | unsigned long flags; |
1469 | int ret; | ||
1192 | 1470 | ||
1193 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1471 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1194 | __attach_device(iommu, domain, devid); | 1472 | ret = __attach_device(dev, domain); |
1195 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1473 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1196 | 1474 | ||
1197 | /* | 1475 | /* |
@@ -1199,96 +1477,125 @@ static void attach_device(struct amd_iommu *iommu, | |||
1199 | * left the caches in the IOMMU dirty. So we have to flush | 1477 | * left the caches in the IOMMU dirty. So we have to flush |
1200 | * here to evict all dirty stuff. | 1478 | * here to evict all dirty stuff. |
1201 | */ | 1479 | */ |
1202 | iommu_queue_inv_dev_entry(iommu, devid); | 1480 | iommu_flush_tlb_pde(domain); |
1203 | iommu_flush_tlb_pde(iommu, domain->id); | 1481 | |
1482 | return ret; | ||
1204 | } | 1483 | } |
1205 | 1484 | ||
1206 | /* | 1485 | /* |
1207 | * Removes a device from a protection domain (unlocked) | 1486 | * Removes a device from a protection domain (unlocked) |
1208 | */ | 1487 | */ |
1209 | static void __detach_device(struct protection_domain *domain, u16 devid) | 1488 | static void __detach_device(struct device *dev) |
1210 | { | 1489 | { |
1490 | struct iommu_dev_data *dev_data = get_dev_data(dev); | ||
1491 | struct iommu_dev_data *alias_data; | ||
1492 | unsigned long flags; | ||
1211 | 1493 | ||
1212 | /* lock domain */ | 1494 | BUG_ON(!dev_data->domain); |
1213 | spin_lock(&domain->lock); | ||
1214 | 1495 | ||
1215 | /* remove domain from the lookup table */ | 1496 | spin_lock_irqsave(&dev_data->domain->lock, flags); |
1216 | amd_iommu_pd_table[devid] = NULL; | ||
1217 | 1497 | ||
1218 | /* remove entry from the device table seen by the hardware */ | 1498 | if (dev_data->alias != dev) { |
1219 | amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; | 1499 | alias_data = get_dev_data(dev_data->alias); |
1220 | amd_iommu_dev_table[devid].data[1] = 0; | 1500 | if (atomic_dec_and_test(&alias_data->bind)) |
1221 | amd_iommu_dev_table[devid].data[2] = 0; | 1501 | do_detach(dev_data->alias); |
1502 | } | ||
1222 | 1503 | ||
1223 | /* decrease reference counter */ | 1504 | if (atomic_dec_and_test(&dev_data->bind)) |
1224 | domain->dev_cnt -= 1; | 1505 | do_detach(dev); |
1225 | 1506 | ||
1226 | /* ready */ | 1507 | spin_unlock_irqrestore(&dev_data->domain->lock, flags); |
1227 | spin_unlock(&domain->lock); | ||
1228 | 1508 | ||
1229 | /* | 1509 | /* |
1230 | * If we run in passthrough mode the device must be assigned to the | 1510 | * If we run in passthrough mode the device must be assigned to the |
1231 | * passthrough domain if it is detached from any other domain | 1511 | * passthrough domain if it is detached from any other domain |
1232 | */ | 1512 | */ |
1233 | if (iommu_pass_through) { | 1513 | if (iommu_pass_through && dev_data->domain == NULL) |
1234 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; | 1514 | __attach_device(dev, pt_domain); |
1235 | __attach_device(iommu, pt_domain, devid); | ||
1236 | } | ||
1237 | } | 1515 | } |
1238 | 1516 | ||
1239 | /* | 1517 | /* |
1240 | * Removes a device from a protection domain (with devtable_lock held) | 1518 | * Removes a device from a protection domain (with devtable_lock held) |
1241 | */ | 1519 | */ |
1242 | static void detach_device(struct protection_domain *domain, u16 devid) | 1520 | static void detach_device(struct device *dev) |
1243 | { | 1521 | { |
1244 | unsigned long flags; | 1522 | unsigned long flags; |
1245 | 1523 | ||
1246 | /* lock device table */ | 1524 | /* lock device table */ |
1247 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1525 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1248 | __detach_device(domain, devid); | 1526 | __detach_device(dev); |
1249 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1527 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1250 | } | 1528 | } |
1251 | 1529 | ||
1530 | /* | ||
1531 | * Find out the protection domain structure for a given PCI device. This | ||
1532 | * will give us the pointer to the page table root for example. | ||
1533 | */ | ||
1534 | static struct protection_domain *domain_for_device(struct device *dev) | ||
1535 | { | ||
1536 | struct protection_domain *dom; | ||
1537 | struct iommu_dev_data *dev_data, *alias_data; | ||
1538 | unsigned long flags; | ||
1539 | u16 devid, alias; | ||
1540 | |||
1541 | devid = get_device_id(dev); | ||
1542 | alias = amd_iommu_alias_table[devid]; | ||
1543 | dev_data = get_dev_data(dev); | ||
1544 | alias_data = get_dev_data(dev_data->alias); | ||
1545 | if (!alias_data) | ||
1546 | return NULL; | ||
1547 | |||
1548 | read_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1549 | dom = dev_data->domain; | ||
1550 | if (dom == NULL && | ||
1551 | alias_data->domain != NULL) { | ||
1552 | __attach_device(dev, alias_data->domain); | ||
1553 | dom = alias_data->domain; | ||
1554 | } | ||
1555 | |||
1556 | read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1557 | |||
1558 | return dom; | ||
1559 | } | ||
1560 | |||
1252 | static int device_change_notifier(struct notifier_block *nb, | 1561 | static int device_change_notifier(struct notifier_block *nb, |
1253 | unsigned long action, void *data) | 1562 | unsigned long action, void *data) |
1254 | { | 1563 | { |
1255 | struct device *dev = data; | 1564 | struct device *dev = data; |
1256 | struct pci_dev *pdev = to_pci_dev(dev); | 1565 | u16 devid; |
1257 | u16 devid = calc_devid(pdev->bus->number, pdev->devfn); | ||
1258 | struct protection_domain *domain; | 1566 | struct protection_domain *domain; |
1259 | struct dma_ops_domain *dma_domain; | 1567 | struct dma_ops_domain *dma_domain; |
1260 | struct amd_iommu *iommu; | 1568 | struct amd_iommu *iommu; |
1261 | unsigned long flags; | 1569 | unsigned long flags; |
1262 | 1570 | ||
1263 | if (devid > amd_iommu_last_bdf) | 1571 | if (!check_device(dev)) |
1264 | goto out; | 1572 | return 0; |
1265 | |||
1266 | devid = amd_iommu_alias_table[devid]; | ||
1267 | |||
1268 | iommu = amd_iommu_rlookup_table[devid]; | ||
1269 | if (iommu == NULL) | ||
1270 | goto out; | ||
1271 | |||
1272 | domain = domain_for_device(devid); | ||
1273 | 1573 | ||
1274 | if (domain && !dma_ops_domain(domain)) | 1574 | devid = get_device_id(dev); |
1275 | WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " | 1575 | iommu = amd_iommu_rlookup_table[devid]; |
1276 | "to a non-dma-ops domain\n", dev_name(dev)); | ||
1277 | 1576 | ||
1278 | switch (action) { | 1577 | switch (action) { |
1279 | case BUS_NOTIFY_UNBOUND_DRIVER: | 1578 | case BUS_NOTIFY_UNBOUND_DRIVER: |
1579 | |||
1580 | domain = domain_for_device(dev); | ||
1581 | |||
1280 | if (!domain) | 1582 | if (!domain) |
1281 | goto out; | 1583 | goto out; |
1282 | if (iommu_pass_through) | 1584 | if (iommu_pass_through) |
1283 | break; | 1585 | break; |
1284 | detach_device(domain, devid); | 1586 | detach_device(dev); |
1285 | break; | 1587 | break; |
1286 | case BUS_NOTIFY_ADD_DEVICE: | 1588 | case BUS_NOTIFY_ADD_DEVICE: |
1589 | |||
1590 | iommu_init_device(dev); | ||
1591 | |||
1592 | domain = domain_for_device(dev); | ||
1593 | |||
1287 | /* allocate a protection domain if a device is added */ | 1594 | /* allocate a protection domain if a device is added */ |
1288 | dma_domain = find_protection_domain(devid); | 1595 | dma_domain = find_protection_domain(devid); |
1289 | if (dma_domain) | 1596 | if (dma_domain) |
1290 | goto out; | 1597 | goto out; |
1291 | dma_domain = dma_ops_domain_alloc(iommu); | 1598 | dma_domain = dma_ops_domain_alloc(); |
1292 | if (!dma_domain) | 1599 | if (!dma_domain) |
1293 | goto out; | 1600 | goto out; |
1294 | dma_domain->target_dev = devid; | 1601 | dma_domain->target_dev = devid; |
@@ -1298,11 +1605,15 @@ static int device_change_notifier(struct notifier_block *nb, | |||
1298 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | 1605 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); |
1299 | 1606 | ||
1300 | break; | 1607 | break; |
1608 | case BUS_NOTIFY_DEL_DEVICE: | ||
1609 | |||
1610 | iommu_uninit_device(dev); | ||
1611 | |||
1301 | default: | 1612 | default: |
1302 | goto out; | 1613 | goto out; |
1303 | } | 1614 | } |
1304 | 1615 | ||
1305 | iommu_queue_inv_dev_entry(iommu, devid); | 1616 | iommu_flush_device(dev); |
1306 | iommu_completion_wait(iommu); | 1617 | iommu_completion_wait(iommu); |
1307 | 1618 | ||
1308 | out: | 1619 | out: |
@@ -1313,6 +1624,11 @@ static struct notifier_block device_nb = { | |||
1313 | .notifier_call = device_change_notifier, | 1624 | .notifier_call = device_change_notifier, |
1314 | }; | 1625 | }; |
1315 | 1626 | ||
1627 | void amd_iommu_init_notifier(void) | ||
1628 | { | ||
1629 | bus_register_notifier(&pci_bus_type, &device_nb); | ||
1630 | } | ||
1631 | |||
1316 | /***************************************************************************** | 1632 | /***************************************************************************** |
1317 | * | 1633 | * |
1318 | * The next functions belong to the dma_ops mapping/unmapping code. | 1634 | * The next functions belong to the dma_ops mapping/unmapping code. |
@@ -1320,106 +1636,46 @@ static struct notifier_block device_nb = { | |||
1320 | *****************************************************************************/ | 1636 | *****************************************************************************/ |
1321 | 1637 | ||
1322 | /* | 1638 | /* |
1323 | * This function checks if the driver got a valid device from the caller to | ||
1324 | * avoid dereferencing invalid pointers. | ||
1325 | */ | ||
1326 | static bool check_device(struct device *dev) | ||
1327 | { | ||
1328 | if (!dev || !dev->dma_mask) | ||
1329 | return false; | ||
1330 | |||
1331 | return true; | ||
1332 | } | ||
1333 | |||
1334 | /* | ||
1335 | * In this function the list of preallocated protection domains is traversed to | ||
1336 | * find the domain for a specific device | ||
1337 | */ | ||
1338 | static struct dma_ops_domain *find_protection_domain(u16 devid) | ||
1339 | { | ||
1340 | struct dma_ops_domain *entry, *ret = NULL; | ||
1341 | unsigned long flags; | ||
1342 | |||
1343 | if (list_empty(&iommu_pd_list)) | ||
1344 | return NULL; | ||
1345 | |||
1346 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
1347 | |||
1348 | list_for_each_entry(entry, &iommu_pd_list, list) { | ||
1349 | if (entry->target_dev == devid) { | ||
1350 | ret = entry; | ||
1351 | break; | ||
1352 | } | ||
1353 | } | ||
1354 | |||
1355 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
1356 | |||
1357 | return ret; | ||
1358 | } | ||
1359 | |||
1360 | /* | ||
1361 | * In the dma_ops path we only have the struct device. This function | 1639 | * In the dma_ops path we only have the struct device. This function |
1362 | * finds the corresponding IOMMU, the protection domain and the | 1640 | * finds the corresponding IOMMU, the protection domain and the |
1363 | * requestor id for a given device. | 1641 | * requestor id for a given device. |
1364 | * If the device is not yet associated with a domain this is also done | 1642 | * If the device is not yet associated with a domain this is also done |
1365 | * in this function. | 1643 | * in this function. |
1366 | */ | 1644 | */ |
1367 | static int get_device_resources(struct device *dev, | 1645 | static struct protection_domain *get_domain(struct device *dev) |
1368 | struct amd_iommu **iommu, | ||
1369 | struct protection_domain **domain, | ||
1370 | u16 *bdf) | ||
1371 | { | 1646 | { |
1647 | struct protection_domain *domain; | ||
1372 | struct dma_ops_domain *dma_dom; | 1648 | struct dma_ops_domain *dma_dom; |
1373 | struct pci_dev *pcidev; | 1649 | u16 devid = get_device_id(dev); |
1374 | u16 _bdf; | ||
1375 | |||
1376 | *iommu = NULL; | ||
1377 | *domain = NULL; | ||
1378 | *bdf = 0xffff; | ||
1379 | |||
1380 | if (dev->bus != &pci_bus_type) | ||
1381 | return 0; | ||
1382 | |||
1383 | pcidev = to_pci_dev(dev); | ||
1384 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | ||
1385 | 1650 | ||
1386 | /* device not translated by any IOMMU in the system? */ | 1651 | if (!check_device(dev)) |
1387 | if (_bdf > amd_iommu_last_bdf) | 1652 | return ERR_PTR(-EINVAL); |
1388 | return 0; | ||
1389 | 1653 | ||
1390 | *bdf = amd_iommu_alias_table[_bdf]; | 1654 | domain = domain_for_device(dev); |
1655 | if (domain != NULL && !dma_ops_domain(domain)) | ||
1656 | return ERR_PTR(-EBUSY); | ||
1391 | 1657 | ||
1392 | *iommu = amd_iommu_rlookup_table[*bdf]; | 1658 | if (domain != NULL) |
1393 | if (*iommu == NULL) | 1659 | return domain; |
1394 | return 0; | ||
1395 | *domain = domain_for_device(*bdf); | ||
1396 | if (*domain == NULL) { | ||
1397 | dma_dom = find_protection_domain(*bdf); | ||
1398 | if (!dma_dom) | ||
1399 | dma_dom = (*iommu)->default_dom; | ||
1400 | *domain = &dma_dom->domain; | ||
1401 | attach_device(*iommu, *domain, *bdf); | ||
1402 | DUMP_printk("Using protection domain %d for device %s\n", | ||
1403 | (*domain)->id, dev_name(dev)); | ||
1404 | } | ||
1405 | 1660 | ||
1406 | if (domain_for_device(_bdf) == NULL) | 1661 | /* Device not bount yet - bind it */ |
1407 | attach_device(*iommu, *domain, _bdf); | 1662 | dma_dom = find_protection_domain(devid); |
1663 | if (!dma_dom) | ||
1664 | dma_dom = amd_iommu_rlookup_table[devid]->default_dom; | ||
1665 | attach_device(dev, &dma_dom->domain); | ||
1666 | DUMP_printk("Using protection domain %d for device %s\n", | ||
1667 | dma_dom->domain.id, dev_name(dev)); | ||
1408 | 1668 | ||
1409 | return 1; | 1669 | return &dma_dom->domain; |
1410 | } | 1670 | } |
1411 | 1671 | ||
1412 | static void update_device_table(struct protection_domain *domain) | 1672 | static void update_device_table(struct protection_domain *domain) |
1413 | { | 1673 | { |
1414 | unsigned long flags; | 1674 | struct iommu_dev_data *dev_data; |
1415 | int i; | ||
1416 | 1675 | ||
1417 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 1676 | list_for_each_entry(dev_data, &domain->dev_list, list) { |
1418 | if (amd_iommu_pd_table[i] != domain) | 1677 | u16 devid = get_device_id(dev_data->dev); |
1419 | continue; | 1678 | set_dte_entry(devid, domain); |
1420 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1421 | set_dte_entry(i, domain); | ||
1422 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1423 | } | 1679 | } |
1424 | } | 1680 | } |
1425 | 1681 | ||
@@ -1429,76 +1685,13 @@ static void update_domain(struct protection_domain *domain) | |||
1429 | return; | 1685 | return; |
1430 | 1686 | ||
1431 | update_device_table(domain); | 1687 | update_device_table(domain); |
1432 | flush_devices_by_domain(domain); | 1688 | iommu_flush_domain_devices(domain); |
1433 | iommu_flush_domain(domain->id); | 1689 | iommu_flush_tlb_pde(domain); |
1434 | 1690 | ||
1435 | domain->updated = false; | 1691 | domain->updated = false; |
1436 | } | 1692 | } |
1437 | 1693 | ||
1438 | /* | 1694 | /* |
1439 | * This function is used to add another level to an IO page table. Adding | ||
1440 | * another level increases the size of the address space by 9 bits to a size up | ||
1441 | * to 64 bits. | ||
1442 | */ | ||
1443 | static bool increase_address_space(struct protection_domain *domain, | ||
1444 | gfp_t gfp) | ||
1445 | { | ||
1446 | u64 *pte; | ||
1447 | |||
1448 | if (domain->mode == PAGE_MODE_6_LEVEL) | ||
1449 | /* address space already 64 bit large */ | ||
1450 | return false; | ||
1451 | |||
1452 | pte = (void *)get_zeroed_page(gfp); | ||
1453 | if (!pte) | ||
1454 | return false; | ||
1455 | |||
1456 | *pte = PM_LEVEL_PDE(domain->mode, | ||
1457 | virt_to_phys(domain->pt_root)); | ||
1458 | domain->pt_root = pte; | ||
1459 | domain->mode += 1; | ||
1460 | domain->updated = true; | ||
1461 | |||
1462 | return true; | ||
1463 | } | ||
1464 | |||
1465 | static u64 *alloc_pte(struct protection_domain *domain, | ||
1466 | unsigned long address, | ||
1467 | int end_lvl, | ||
1468 | u64 **pte_page, | ||
1469 | gfp_t gfp) | ||
1470 | { | ||
1471 | u64 *pte, *page; | ||
1472 | int level; | ||
1473 | |||
1474 | while (address > PM_LEVEL_SIZE(domain->mode)) | ||
1475 | increase_address_space(domain, gfp); | ||
1476 | |||
1477 | level = domain->mode - 1; | ||
1478 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
1479 | |||
1480 | while (level > end_lvl) { | ||
1481 | if (!IOMMU_PTE_PRESENT(*pte)) { | ||
1482 | page = (u64 *)get_zeroed_page(gfp); | ||
1483 | if (!page) | ||
1484 | return NULL; | ||
1485 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | ||
1486 | } | ||
1487 | |||
1488 | level -= 1; | ||
1489 | |||
1490 | pte = IOMMU_PTE_PAGE(*pte); | ||
1491 | |||
1492 | if (pte_page && level == end_lvl) | ||
1493 | *pte_page = pte; | ||
1494 | |||
1495 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | ||
1496 | } | ||
1497 | |||
1498 | return pte; | ||
1499 | } | ||
1500 | |||
1501 | /* | ||
1502 | * This function fetches the PTE for a given address in the aperture | 1695 | * This function fetches the PTE for a given address in the aperture |
1503 | */ | 1696 | */ |
1504 | static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | 1697 | static u64* dma_ops_get_pte(struct dma_ops_domain *dom, |
@@ -1528,8 +1721,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | |||
1528 | * This is the generic map function. It maps one 4kb page at paddr to | 1721 | * This is the generic map function. It maps one 4kb page at paddr to |
1529 | * the given address in the DMA address space for the domain. | 1722 | * the given address in the DMA address space for the domain. |
1530 | */ | 1723 | */ |
1531 | static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | 1724 | static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, |
1532 | struct dma_ops_domain *dom, | ||
1533 | unsigned long address, | 1725 | unsigned long address, |
1534 | phys_addr_t paddr, | 1726 | phys_addr_t paddr, |
1535 | int direction) | 1727 | int direction) |
@@ -1542,7 +1734,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | |||
1542 | 1734 | ||
1543 | pte = dma_ops_get_pte(dom, address); | 1735 | pte = dma_ops_get_pte(dom, address); |
1544 | if (!pte) | 1736 | if (!pte) |
1545 | return bad_dma_address; | 1737 | return DMA_ERROR_CODE; |
1546 | 1738 | ||
1547 | __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; | 1739 | __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; |
1548 | 1740 | ||
@@ -1563,8 +1755,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | |||
1563 | /* | 1755 | /* |
1564 | * The generic unmapping function for on page in the DMA address space. | 1756 | * The generic unmapping function for on page in the DMA address space. |
1565 | */ | 1757 | */ |
1566 | static void dma_ops_domain_unmap(struct amd_iommu *iommu, | 1758 | static void dma_ops_domain_unmap(struct dma_ops_domain *dom, |
1567 | struct dma_ops_domain *dom, | ||
1568 | unsigned long address) | 1759 | unsigned long address) |
1569 | { | 1760 | { |
1570 | struct aperture_range *aperture; | 1761 | struct aperture_range *aperture; |
@@ -1595,7 +1786,6 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
1595 | * Must be called with the domain lock held. | 1786 | * Must be called with the domain lock held. |
1596 | */ | 1787 | */ |
1597 | static dma_addr_t __map_single(struct device *dev, | 1788 | static dma_addr_t __map_single(struct device *dev, |
1598 | struct amd_iommu *iommu, | ||
1599 | struct dma_ops_domain *dma_dom, | 1789 | struct dma_ops_domain *dma_dom, |
1600 | phys_addr_t paddr, | 1790 | phys_addr_t paddr, |
1601 | size_t size, | 1791 | size_t size, |
@@ -1623,7 +1813,7 @@ static dma_addr_t __map_single(struct device *dev, | |||
1623 | retry: | 1813 | retry: |
1624 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, | 1814 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, |
1625 | dma_mask); | 1815 | dma_mask); |
1626 | if (unlikely(address == bad_dma_address)) { | 1816 | if (unlikely(address == DMA_ERROR_CODE)) { |
1627 | /* | 1817 | /* |
1628 | * setting next_address here will let the address | 1818 | * setting next_address here will let the address |
1629 | * allocator only scan the new allocated range in the | 1819 | * allocator only scan the new allocated range in the |
@@ -1631,11 +1821,11 @@ retry: | |||
1631 | */ | 1821 | */ |
1632 | dma_dom->next_address = dma_dom->aperture_size; | 1822 | dma_dom->next_address = dma_dom->aperture_size; |
1633 | 1823 | ||
1634 | if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) | 1824 | if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) |
1635 | goto out; | 1825 | goto out; |
1636 | 1826 | ||
1637 | /* | 1827 | /* |
1638 | * aperture was sucessfully enlarged by 128 MB, try | 1828 | * aperture was successfully enlarged by 128 MB, try |
1639 | * allocation again | 1829 | * allocation again |
1640 | */ | 1830 | */ |
1641 | goto retry; | 1831 | goto retry; |
@@ -1643,8 +1833,8 @@ retry: | |||
1643 | 1833 | ||
1644 | start = address; | 1834 | start = address; |
1645 | for (i = 0; i < pages; ++i) { | 1835 | for (i = 0; i < pages; ++i) { |
1646 | ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); | 1836 | ret = dma_ops_domain_map(dma_dom, start, paddr, dir); |
1647 | if (ret == bad_dma_address) | 1837 | if (ret == DMA_ERROR_CODE) |
1648 | goto out_unmap; | 1838 | goto out_unmap; |
1649 | 1839 | ||
1650 | paddr += PAGE_SIZE; | 1840 | paddr += PAGE_SIZE; |
@@ -1655,10 +1845,10 @@ retry: | |||
1655 | ADD_STATS_COUNTER(alloced_io_mem, size); | 1845 | ADD_STATS_COUNTER(alloced_io_mem, size); |
1656 | 1846 | ||
1657 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { | 1847 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { |
1658 | iommu_flush_tlb(iommu, dma_dom->domain.id); | 1848 | iommu_flush_tlb(&dma_dom->domain); |
1659 | dma_dom->need_flush = false; | 1849 | dma_dom->need_flush = false; |
1660 | } else if (unlikely(iommu_has_npcache(iommu))) | 1850 | } else if (unlikely(amd_iommu_np_cache)) |
1661 | iommu_flush_pages(iommu, dma_dom->domain.id, address, size); | 1851 | iommu_flush_pages(&dma_dom->domain, address, size); |
1662 | 1852 | ||
1663 | out: | 1853 | out: |
1664 | return address; | 1854 | return address; |
@@ -1667,20 +1857,19 @@ out_unmap: | |||
1667 | 1857 | ||
1668 | for (--i; i >= 0; --i) { | 1858 | for (--i; i >= 0; --i) { |
1669 | start -= PAGE_SIZE; | 1859 | start -= PAGE_SIZE; |
1670 | dma_ops_domain_unmap(iommu, dma_dom, start); | 1860 | dma_ops_domain_unmap(dma_dom, start); |
1671 | } | 1861 | } |
1672 | 1862 | ||
1673 | dma_ops_free_addresses(dma_dom, address, pages); | 1863 | dma_ops_free_addresses(dma_dom, address, pages); |
1674 | 1864 | ||
1675 | return bad_dma_address; | 1865 | return DMA_ERROR_CODE; |
1676 | } | 1866 | } |
1677 | 1867 | ||
1678 | /* | 1868 | /* |
1679 | * Does the reverse of the __map_single function. Must be called with | 1869 | * Does the reverse of the __map_single function. Must be called with |
1680 | * the domain lock held too | 1870 | * the domain lock held too |
1681 | */ | 1871 | */ |
1682 | static void __unmap_single(struct amd_iommu *iommu, | 1872 | static void __unmap_single(struct dma_ops_domain *dma_dom, |
1683 | struct dma_ops_domain *dma_dom, | ||
1684 | dma_addr_t dma_addr, | 1873 | dma_addr_t dma_addr, |
1685 | size_t size, | 1874 | size_t size, |
1686 | int dir) | 1875 | int dir) |
@@ -1688,7 +1877,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
1688 | dma_addr_t i, start; | 1877 | dma_addr_t i, start; |
1689 | unsigned int pages; | 1878 | unsigned int pages; |
1690 | 1879 | ||
1691 | if ((dma_addr == bad_dma_address) || | 1880 | if ((dma_addr == DMA_ERROR_CODE) || |
1692 | (dma_addr + size > dma_dom->aperture_size)) | 1881 | (dma_addr + size > dma_dom->aperture_size)) |
1693 | return; | 1882 | return; |
1694 | 1883 | ||
@@ -1697,7 +1886,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
1697 | start = dma_addr; | 1886 | start = dma_addr; |
1698 | 1887 | ||
1699 | for (i = 0; i < pages; ++i) { | 1888 | for (i = 0; i < pages; ++i) { |
1700 | dma_ops_domain_unmap(iommu, dma_dom, start); | 1889 | dma_ops_domain_unmap(dma_dom, start); |
1701 | start += PAGE_SIZE; | 1890 | start += PAGE_SIZE; |
1702 | } | 1891 | } |
1703 | 1892 | ||
@@ -1706,7 +1895,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
1706 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 1895 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
1707 | 1896 | ||
1708 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { | 1897 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { |
1709 | iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); | 1898 | iommu_flush_pages(&dma_dom->domain, dma_addr, size); |
1710 | dma_dom->need_flush = false; | 1899 | dma_dom->need_flush = false; |
1711 | } | 1900 | } |
1712 | } | 1901 | } |
@@ -1720,36 +1909,29 @@ static dma_addr_t map_page(struct device *dev, struct page *page, | |||
1720 | struct dma_attrs *attrs) | 1909 | struct dma_attrs *attrs) |
1721 | { | 1910 | { |
1722 | unsigned long flags; | 1911 | unsigned long flags; |
1723 | struct amd_iommu *iommu; | ||
1724 | struct protection_domain *domain; | 1912 | struct protection_domain *domain; |
1725 | u16 devid; | ||
1726 | dma_addr_t addr; | 1913 | dma_addr_t addr; |
1727 | u64 dma_mask; | 1914 | u64 dma_mask; |
1728 | phys_addr_t paddr = page_to_phys(page) + offset; | 1915 | phys_addr_t paddr = page_to_phys(page) + offset; |
1729 | 1916 | ||
1730 | INC_STATS_COUNTER(cnt_map_single); | 1917 | INC_STATS_COUNTER(cnt_map_single); |
1731 | 1918 | ||
1732 | if (!check_device(dev)) | 1919 | domain = get_domain(dev); |
1733 | return bad_dma_address; | 1920 | if (PTR_ERR(domain) == -EINVAL) |
1734 | |||
1735 | dma_mask = *dev->dma_mask; | ||
1736 | |||
1737 | get_device_resources(dev, &iommu, &domain, &devid); | ||
1738 | |||
1739 | if (iommu == NULL || domain == NULL) | ||
1740 | /* device not handled by any AMD IOMMU */ | ||
1741 | return (dma_addr_t)paddr; | 1921 | return (dma_addr_t)paddr; |
1922 | else if (IS_ERR(domain)) | ||
1923 | return DMA_ERROR_CODE; | ||
1742 | 1924 | ||
1743 | if (!dma_ops_domain(domain)) | 1925 | dma_mask = *dev->dma_mask; |
1744 | return bad_dma_address; | ||
1745 | 1926 | ||
1746 | spin_lock_irqsave(&domain->lock, flags); | 1927 | spin_lock_irqsave(&domain->lock, flags); |
1747 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, | 1928 | |
1929 | addr = __map_single(dev, domain->priv, paddr, size, dir, false, | ||
1748 | dma_mask); | 1930 | dma_mask); |
1749 | if (addr == bad_dma_address) | 1931 | if (addr == DMA_ERROR_CODE) |
1750 | goto out; | 1932 | goto out; |
1751 | 1933 | ||
1752 | iommu_completion_wait(iommu); | 1934 | iommu_flush_complete(domain); |
1753 | 1935 | ||
1754 | out: | 1936 | out: |
1755 | spin_unlock_irqrestore(&domain->lock, flags); | 1937 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -1764,25 +1946,19 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, | |||
1764 | enum dma_data_direction dir, struct dma_attrs *attrs) | 1946 | enum dma_data_direction dir, struct dma_attrs *attrs) |
1765 | { | 1947 | { |
1766 | unsigned long flags; | 1948 | unsigned long flags; |
1767 | struct amd_iommu *iommu; | ||
1768 | struct protection_domain *domain; | 1949 | struct protection_domain *domain; |
1769 | u16 devid; | ||
1770 | 1950 | ||
1771 | INC_STATS_COUNTER(cnt_unmap_single); | 1951 | INC_STATS_COUNTER(cnt_unmap_single); |
1772 | 1952 | ||
1773 | if (!check_device(dev) || | 1953 | domain = get_domain(dev); |
1774 | !get_device_resources(dev, &iommu, &domain, &devid)) | 1954 | if (IS_ERR(domain)) |
1775 | /* device not handled by any AMD IOMMU */ | ||
1776 | return; | ||
1777 | |||
1778 | if (!dma_ops_domain(domain)) | ||
1779 | return; | 1955 | return; |
1780 | 1956 | ||
1781 | spin_lock_irqsave(&domain->lock, flags); | 1957 | spin_lock_irqsave(&domain->lock, flags); |
1782 | 1958 | ||
1783 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); | 1959 | __unmap_single(domain->priv, dma_addr, size, dir); |
1784 | 1960 | ||
1785 | iommu_completion_wait(iommu); | 1961 | iommu_flush_complete(domain); |
1786 | 1962 | ||
1787 | spin_unlock_irqrestore(&domain->lock, flags); | 1963 | spin_unlock_irqrestore(&domain->lock, flags); |
1788 | } | 1964 | } |
@@ -1814,9 +1990,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1814 | struct dma_attrs *attrs) | 1990 | struct dma_attrs *attrs) |
1815 | { | 1991 | { |
1816 | unsigned long flags; | 1992 | unsigned long flags; |
1817 | struct amd_iommu *iommu; | ||
1818 | struct protection_domain *domain; | 1993 | struct protection_domain *domain; |
1819 | u16 devid; | ||
1820 | int i; | 1994 | int i; |
1821 | struct scatterlist *s; | 1995 | struct scatterlist *s; |
1822 | phys_addr_t paddr; | 1996 | phys_addr_t paddr; |
@@ -1825,25 +1999,20 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1825 | 1999 | ||
1826 | INC_STATS_COUNTER(cnt_map_sg); | 2000 | INC_STATS_COUNTER(cnt_map_sg); |
1827 | 2001 | ||
1828 | if (!check_device(dev)) | 2002 | domain = get_domain(dev); |
2003 | if (PTR_ERR(domain) == -EINVAL) | ||
2004 | return map_sg_no_iommu(dev, sglist, nelems, dir); | ||
2005 | else if (IS_ERR(domain)) | ||
1829 | return 0; | 2006 | return 0; |
1830 | 2007 | ||
1831 | dma_mask = *dev->dma_mask; | 2008 | dma_mask = *dev->dma_mask; |
1832 | 2009 | ||
1833 | get_device_resources(dev, &iommu, &domain, &devid); | ||
1834 | |||
1835 | if (!iommu || !domain) | ||
1836 | return map_sg_no_iommu(dev, sglist, nelems, dir); | ||
1837 | |||
1838 | if (!dma_ops_domain(domain)) | ||
1839 | return 0; | ||
1840 | |||
1841 | spin_lock_irqsave(&domain->lock, flags); | 2010 | spin_lock_irqsave(&domain->lock, flags); |
1842 | 2011 | ||
1843 | for_each_sg(sglist, s, nelems, i) { | 2012 | for_each_sg(sglist, s, nelems, i) { |
1844 | paddr = sg_phys(s); | 2013 | paddr = sg_phys(s); |
1845 | 2014 | ||
1846 | s->dma_address = __map_single(dev, iommu, domain->priv, | 2015 | s->dma_address = __map_single(dev, domain->priv, |
1847 | paddr, s->length, dir, false, | 2016 | paddr, s->length, dir, false, |
1848 | dma_mask); | 2017 | dma_mask); |
1849 | 2018 | ||
@@ -1854,7 +2023,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1854 | goto unmap; | 2023 | goto unmap; |
1855 | } | 2024 | } |
1856 | 2025 | ||
1857 | iommu_completion_wait(iommu); | 2026 | iommu_flush_complete(domain); |
1858 | 2027 | ||
1859 | out: | 2028 | out: |
1860 | spin_unlock_irqrestore(&domain->lock, flags); | 2029 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -1863,7 +2032,7 @@ out: | |||
1863 | unmap: | 2032 | unmap: |
1864 | for_each_sg(sglist, s, mapped_elems, i) { | 2033 | for_each_sg(sglist, s, mapped_elems, i) { |
1865 | if (s->dma_address) | 2034 | if (s->dma_address) |
1866 | __unmap_single(iommu, domain->priv, s->dma_address, | 2035 | __unmap_single(domain->priv, s->dma_address, |
1867 | s->dma_length, dir); | 2036 | s->dma_length, dir); |
1868 | s->dma_address = s->dma_length = 0; | 2037 | s->dma_address = s->dma_length = 0; |
1869 | } | 2038 | } |
@@ -1882,30 +2051,25 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
1882 | struct dma_attrs *attrs) | 2051 | struct dma_attrs *attrs) |
1883 | { | 2052 | { |
1884 | unsigned long flags; | 2053 | unsigned long flags; |
1885 | struct amd_iommu *iommu; | ||
1886 | struct protection_domain *domain; | 2054 | struct protection_domain *domain; |
1887 | struct scatterlist *s; | 2055 | struct scatterlist *s; |
1888 | u16 devid; | ||
1889 | int i; | 2056 | int i; |
1890 | 2057 | ||
1891 | INC_STATS_COUNTER(cnt_unmap_sg); | 2058 | INC_STATS_COUNTER(cnt_unmap_sg); |
1892 | 2059 | ||
1893 | if (!check_device(dev) || | 2060 | domain = get_domain(dev); |
1894 | !get_device_resources(dev, &iommu, &domain, &devid)) | 2061 | if (IS_ERR(domain)) |
1895 | return; | ||
1896 | |||
1897 | if (!dma_ops_domain(domain)) | ||
1898 | return; | 2062 | return; |
1899 | 2063 | ||
1900 | spin_lock_irqsave(&domain->lock, flags); | 2064 | spin_lock_irqsave(&domain->lock, flags); |
1901 | 2065 | ||
1902 | for_each_sg(sglist, s, nelems, i) { | 2066 | for_each_sg(sglist, s, nelems, i) { |
1903 | __unmap_single(iommu, domain->priv, s->dma_address, | 2067 | __unmap_single(domain->priv, s->dma_address, |
1904 | s->dma_length, dir); | 2068 | s->dma_length, dir); |
1905 | s->dma_address = s->dma_length = 0; | 2069 | s->dma_address = s->dma_length = 0; |
1906 | } | 2070 | } |
1907 | 2071 | ||
1908 | iommu_completion_wait(iommu); | 2072 | iommu_flush_complete(domain); |
1909 | 2073 | ||
1910 | spin_unlock_irqrestore(&domain->lock, flags); | 2074 | spin_unlock_irqrestore(&domain->lock, flags); |
1911 | } | 2075 | } |
@@ -1918,49 +2082,44 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
1918 | { | 2082 | { |
1919 | unsigned long flags; | 2083 | unsigned long flags; |
1920 | void *virt_addr; | 2084 | void *virt_addr; |
1921 | struct amd_iommu *iommu; | ||
1922 | struct protection_domain *domain; | 2085 | struct protection_domain *domain; |
1923 | u16 devid; | ||
1924 | phys_addr_t paddr; | 2086 | phys_addr_t paddr; |
1925 | u64 dma_mask = dev->coherent_dma_mask; | 2087 | u64 dma_mask = dev->coherent_dma_mask; |
1926 | 2088 | ||
1927 | INC_STATS_COUNTER(cnt_alloc_coherent); | 2089 | INC_STATS_COUNTER(cnt_alloc_coherent); |
1928 | 2090 | ||
1929 | if (!check_device(dev)) | 2091 | domain = get_domain(dev); |
2092 | if (PTR_ERR(domain) == -EINVAL) { | ||
2093 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | ||
2094 | *dma_addr = __pa(virt_addr); | ||
2095 | return virt_addr; | ||
2096 | } else if (IS_ERR(domain)) | ||
1930 | return NULL; | 2097 | return NULL; |
1931 | 2098 | ||
1932 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 2099 | dma_mask = dev->coherent_dma_mask; |
1933 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | 2100 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); |
2101 | flag |= __GFP_ZERO; | ||
1934 | 2102 | ||
1935 | flag |= __GFP_ZERO; | ||
1936 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | 2103 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); |
1937 | if (!virt_addr) | 2104 | if (!virt_addr) |
1938 | return NULL; | 2105 | return NULL; |
1939 | 2106 | ||
1940 | paddr = virt_to_phys(virt_addr); | 2107 | paddr = virt_to_phys(virt_addr); |
1941 | 2108 | ||
1942 | if (!iommu || !domain) { | ||
1943 | *dma_addr = (dma_addr_t)paddr; | ||
1944 | return virt_addr; | ||
1945 | } | ||
1946 | |||
1947 | if (!dma_ops_domain(domain)) | ||
1948 | goto out_free; | ||
1949 | |||
1950 | if (!dma_mask) | 2109 | if (!dma_mask) |
1951 | dma_mask = *dev->dma_mask; | 2110 | dma_mask = *dev->dma_mask; |
1952 | 2111 | ||
1953 | spin_lock_irqsave(&domain->lock, flags); | 2112 | spin_lock_irqsave(&domain->lock, flags); |
1954 | 2113 | ||
1955 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, | 2114 | *dma_addr = __map_single(dev, domain->priv, paddr, |
1956 | size, DMA_BIDIRECTIONAL, true, dma_mask); | 2115 | size, DMA_BIDIRECTIONAL, true, dma_mask); |
1957 | 2116 | ||
1958 | if (*dma_addr == bad_dma_address) { | 2117 | if (*dma_addr == DMA_ERROR_CODE) { |
1959 | spin_unlock_irqrestore(&domain->lock, flags); | 2118 | spin_unlock_irqrestore(&domain->lock, flags); |
1960 | goto out_free; | 2119 | goto out_free; |
1961 | } | 2120 | } |
1962 | 2121 | ||
1963 | iommu_completion_wait(iommu); | 2122 | iommu_flush_complete(domain); |
1964 | 2123 | ||
1965 | spin_unlock_irqrestore(&domain->lock, flags); | 2124 | spin_unlock_irqrestore(&domain->lock, flags); |
1966 | 2125 | ||
@@ -1980,28 +2139,19 @@ static void free_coherent(struct device *dev, size_t size, | |||
1980 | void *virt_addr, dma_addr_t dma_addr) | 2139 | void *virt_addr, dma_addr_t dma_addr) |
1981 | { | 2140 | { |
1982 | unsigned long flags; | 2141 | unsigned long flags; |
1983 | struct amd_iommu *iommu; | ||
1984 | struct protection_domain *domain; | 2142 | struct protection_domain *domain; |
1985 | u16 devid; | ||
1986 | 2143 | ||
1987 | INC_STATS_COUNTER(cnt_free_coherent); | 2144 | INC_STATS_COUNTER(cnt_free_coherent); |
1988 | 2145 | ||
1989 | if (!check_device(dev)) | 2146 | domain = get_domain(dev); |
1990 | return; | 2147 | if (IS_ERR(domain)) |
1991 | |||
1992 | get_device_resources(dev, &iommu, &domain, &devid); | ||
1993 | |||
1994 | if (!iommu || !domain) | ||
1995 | goto free_mem; | ||
1996 | |||
1997 | if (!dma_ops_domain(domain)) | ||
1998 | goto free_mem; | 2148 | goto free_mem; |
1999 | 2149 | ||
2000 | spin_lock_irqsave(&domain->lock, flags); | 2150 | spin_lock_irqsave(&domain->lock, flags); |
2001 | 2151 | ||
2002 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); | 2152 | __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); |
2003 | 2153 | ||
2004 | iommu_completion_wait(iommu); | 2154 | iommu_flush_complete(domain); |
2005 | 2155 | ||
2006 | spin_unlock_irqrestore(&domain->lock, flags); | 2156 | spin_unlock_irqrestore(&domain->lock, flags); |
2007 | 2157 | ||
@@ -2015,22 +2165,7 @@ free_mem: | |||
2015 | */ | 2165 | */ |
2016 | static int amd_iommu_dma_supported(struct device *dev, u64 mask) | 2166 | static int amd_iommu_dma_supported(struct device *dev, u64 mask) |
2017 | { | 2167 | { |
2018 | u16 bdf; | 2168 | return check_device(dev); |
2019 | struct pci_dev *pcidev; | ||
2020 | |||
2021 | /* No device or no PCI device */ | ||
2022 | if (!dev || dev->bus != &pci_bus_type) | ||
2023 | return 0; | ||
2024 | |||
2025 | pcidev = to_pci_dev(dev); | ||
2026 | |||
2027 | bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | ||
2028 | |||
2029 | /* Out of our scope? */ | ||
2030 | if (bdf > amd_iommu_last_bdf) | ||
2031 | return 0; | ||
2032 | |||
2033 | return 1; | ||
2034 | } | 2169 | } |
2035 | 2170 | ||
2036 | /* | 2171 | /* |
@@ -2044,25 +2179,28 @@ static void prealloc_protection_domains(void) | |||
2044 | { | 2179 | { |
2045 | struct pci_dev *dev = NULL; | 2180 | struct pci_dev *dev = NULL; |
2046 | struct dma_ops_domain *dma_dom; | 2181 | struct dma_ops_domain *dma_dom; |
2047 | struct amd_iommu *iommu; | ||
2048 | u16 devid; | 2182 | u16 devid; |
2049 | 2183 | ||
2050 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 2184 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
2051 | devid = calc_devid(dev->bus->number, dev->devfn); | 2185 | |
2052 | if (devid > amd_iommu_last_bdf) | 2186 | /* Do we handle this device? */ |
2053 | continue; | 2187 | if (!check_device(&dev->dev)) |
2054 | devid = amd_iommu_alias_table[devid]; | ||
2055 | if (domain_for_device(devid)) | ||
2056 | continue; | 2188 | continue; |
2057 | iommu = amd_iommu_rlookup_table[devid]; | 2189 | |
2058 | if (!iommu) | 2190 | /* Is there already any domain for it? */ |
2191 | if (domain_for_device(&dev->dev)) | ||
2059 | continue; | 2192 | continue; |
2060 | dma_dom = dma_ops_domain_alloc(iommu); | 2193 | |
2194 | devid = get_device_id(&dev->dev); | ||
2195 | |||
2196 | dma_dom = dma_ops_domain_alloc(); | ||
2061 | if (!dma_dom) | 2197 | if (!dma_dom) |
2062 | continue; | 2198 | continue; |
2063 | init_unity_mappings_for_device(dma_dom, devid); | 2199 | init_unity_mappings_for_device(dma_dom, devid); |
2064 | dma_dom->target_dev = devid; | 2200 | dma_dom->target_dev = devid; |
2065 | 2201 | ||
2202 | attach_device(&dev->dev, &dma_dom->domain); | ||
2203 | |||
2066 | list_add_tail(&dma_dom->list, &iommu_pd_list); | 2204 | list_add_tail(&dma_dom->list, &iommu_pd_list); |
2067 | } | 2205 | } |
2068 | } | 2206 | } |
@@ -2091,7 +2229,7 @@ int __init amd_iommu_init_dma_ops(void) | |||
2091 | * protection domain will be assigned to the default one. | 2229 | * protection domain will be assigned to the default one. |
2092 | */ | 2230 | */ |
2093 | for_each_iommu(iommu) { | 2231 | for_each_iommu(iommu) { |
2094 | iommu->default_dom = dma_ops_domain_alloc(iommu); | 2232 | iommu->default_dom = dma_ops_domain_alloc(); |
2095 | if (iommu->default_dom == NULL) | 2233 | if (iommu->default_dom == NULL) |
2096 | return -ENOMEM; | 2234 | return -ENOMEM; |
2097 | iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; | 2235 | iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; |
@@ -2101,15 +2239,12 @@ int __init amd_iommu_init_dma_ops(void) | |||
2101 | } | 2239 | } |
2102 | 2240 | ||
2103 | /* | 2241 | /* |
2104 | * If device isolation is enabled, pre-allocate the protection | 2242 | * Pre-allocate the protection domains for each device. |
2105 | * domains for each device. | ||
2106 | */ | 2243 | */ |
2107 | if (amd_iommu_isolate) | 2244 | prealloc_protection_domains(); |
2108 | prealloc_protection_domains(); | ||
2109 | 2245 | ||
2110 | iommu_detected = 1; | 2246 | iommu_detected = 1; |
2111 | force_iommu = 1; | 2247 | swiotlb = 0; |
2112 | bad_dma_address = 0; | ||
2113 | #ifdef CONFIG_GART_IOMMU | 2248 | #ifdef CONFIG_GART_IOMMU |
2114 | gart_iommu_aperture_disabled = 1; | 2249 | gart_iommu_aperture_disabled = 1; |
2115 | gart_iommu_aperture = 0; | 2250 | gart_iommu_aperture = 0; |
@@ -2120,8 +2255,6 @@ int __init amd_iommu_init_dma_ops(void) | |||
2120 | 2255 | ||
2121 | register_iommu(&amd_iommu_ops); | 2256 | register_iommu(&amd_iommu_ops); |
2122 | 2257 | ||
2123 | bus_register_notifier(&pci_bus_type, &device_nb); | ||
2124 | |||
2125 | amd_iommu_stats_init(); | 2258 | amd_iommu_stats_init(); |
2126 | 2259 | ||
2127 | return 0; | 2260 | return 0; |
@@ -2148,14 +2281,17 @@ free_domains: | |||
2148 | 2281 | ||
2149 | static void cleanup_domain(struct protection_domain *domain) | 2282 | static void cleanup_domain(struct protection_domain *domain) |
2150 | { | 2283 | { |
2284 | struct iommu_dev_data *dev_data, *next; | ||
2151 | unsigned long flags; | 2285 | unsigned long flags; |
2152 | u16 devid; | ||
2153 | 2286 | ||
2154 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 2287 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
2155 | 2288 | ||
2156 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) | 2289 | list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { |
2157 | if (amd_iommu_pd_table[devid] == domain) | 2290 | struct device *dev = dev_data->dev; |
2158 | __detach_device(domain, devid); | 2291 | |
2292 | do_detach(dev); | ||
2293 | atomic_set(&dev_data->bind, 0); | ||
2294 | } | ||
2159 | 2295 | ||
2160 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 2296 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
2161 | } | 2297 | } |
@@ -2165,6 +2301,8 @@ static void protection_domain_free(struct protection_domain *domain) | |||
2165 | if (!domain) | 2301 | if (!domain) |
2166 | return; | 2302 | return; |
2167 | 2303 | ||
2304 | del_domain_from_list(domain); | ||
2305 | |||
2168 | if (domain->id) | 2306 | if (domain->id) |
2169 | domain_id_free(domain->id); | 2307 | domain_id_free(domain->id); |
2170 | 2308 | ||
@@ -2183,6 +2321,9 @@ static struct protection_domain *protection_domain_alloc(void) | |||
2183 | domain->id = domain_id_alloc(); | 2321 | domain->id = domain_id_alloc(); |
2184 | if (!domain->id) | 2322 | if (!domain->id) |
2185 | goto out_err; | 2323 | goto out_err; |
2324 | INIT_LIST_HEAD(&domain->dev_list); | ||
2325 | |||
2326 | add_domain_to_list(domain); | ||
2186 | 2327 | ||
2187 | return domain; | 2328 | return domain; |
2188 | 2329 | ||
@@ -2239,26 +2380,23 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) | |||
2239 | static void amd_iommu_detach_device(struct iommu_domain *dom, | 2380 | static void amd_iommu_detach_device(struct iommu_domain *dom, |
2240 | struct device *dev) | 2381 | struct device *dev) |
2241 | { | 2382 | { |
2242 | struct protection_domain *domain = dom->priv; | 2383 | struct iommu_dev_data *dev_data = dev->archdata.iommu; |
2243 | struct amd_iommu *iommu; | 2384 | struct amd_iommu *iommu; |
2244 | struct pci_dev *pdev; | ||
2245 | u16 devid; | 2385 | u16 devid; |
2246 | 2386 | ||
2247 | if (dev->bus != &pci_bus_type) | 2387 | if (!check_device(dev)) |
2248 | return; | 2388 | return; |
2249 | 2389 | ||
2250 | pdev = to_pci_dev(dev); | 2390 | devid = get_device_id(dev); |
2251 | 2391 | ||
2252 | devid = calc_devid(pdev->bus->number, pdev->devfn); | 2392 | if (dev_data->domain != NULL) |
2253 | 2393 | detach_device(dev); | |
2254 | if (devid > 0) | ||
2255 | detach_device(domain, devid); | ||
2256 | 2394 | ||
2257 | iommu = amd_iommu_rlookup_table[devid]; | 2395 | iommu = amd_iommu_rlookup_table[devid]; |
2258 | if (!iommu) | 2396 | if (!iommu) |
2259 | return; | 2397 | return; |
2260 | 2398 | ||
2261 | iommu_queue_inv_dev_entry(iommu, devid); | 2399 | iommu_flush_device(dev); |
2262 | iommu_completion_wait(iommu); | 2400 | iommu_completion_wait(iommu); |
2263 | } | 2401 | } |
2264 | 2402 | ||
@@ -2266,35 +2404,30 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, | |||
2266 | struct device *dev) | 2404 | struct device *dev) |
2267 | { | 2405 | { |
2268 | struct protection_domain *domain = dom->priv; | 2406 | struct protection_domain *domain = dom->priv; |
2269 | struct protection_domain *old_domain; | 2407 | struct iommu_dev_data *dev_data; |
2270 | struct amd_iommu *iommu; | 2408 | struct amd_iommu *iommu; |
2271 | struct pci_dev *pdev; | 2409 | int ret; |
2272 | u16 devid; | 2410 | u16 devid; |
2273 | 2411 | ||
2274 | if (dev->bus != &pci_bus_type) | 2412 | if (!check_device(dev)) |
2275 | return -EINVAL; | 2413 | return -EINVAL; |
2276 | 2414 | ||
2277 | pdev = to_pci_dev(dev); | 2415 | dev_data = dev->archdata.iommu; |
2278 | |||
2279 | devid = calc_devid(pdev->bus->number, pdev->devfn); | ||
2280 | 2416 | ||
2281 | if (devid >= amd_iommu_last_bdf || | 2417 | devid = get_device_id(dev); |
2282 | devid != amd_iommu_alias_table[devid]) | ||
2283 | return -EINVAL; | ||
2284 | 2418 | ||
2285 | iommu = amd_iommu_rlookup_table[devid]; | 2419 | iommu = amd_iommu_rlookup_table[devid]; |
2286 | if (!iommu) | 2420 | if (!iommu) |
2287 | return -EINVAL; | 2421 | return -EINVAL; |
2288 | 2422 | ||
2289 | old_domain = domain_for_device(devid); | 2423 | if (dev_data->domain) |
2290 | if (old_domain) | 2424 | detach_device(dev); |
2291 | detach_device(old_domain, devid); | ||
2292 | 2425 | ||
2293 | attach_device(iommu, domain, devid); | 2426 | ret = attach_device(dev, domain); |
2294 | 2427 | ||
2295 | iommu_completion_wait(iommu); | 2428 | iommu_completion_wait(iommu); |
2296 | 2429 | ||
2297 | return 0; | 2430 | return ret; |
2298 | } | 2431 | } |
2299 | 2432 | ||
2300 | static int amd_iommu_map_range(struct iommu_domain *dom, | 2433 | static int amd_iommu_map_range(struct iommu_domain *dom, |
@@ -2340,7 +2473,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, | |||
2340 | iova += PAGE_SIZE; | 2473 | iova += PAGE_SIZE; |
2341 | } | 2474 | } |
2342 | 2475 | ||
2343 | iommu_flush_domain(domain->id); | 2476 | iommu_flush_tlb_pde(domain); |
2344 | } | 2477 | } |
2345 | 2478 | ||
2346 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | 2479 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, |
@@ -2391,10 +2524,11 @@ static struct iommu_ops amd_iommu_ops = { | |||
2391 | 2524 | ||
2392 | int __init amd_iommu_init_passthrough(void) | 2525 | int __init amd_iommu_init_passthrough(void) |
2393 | { | 2526 | { |
2527 | struct amd_iommu *iommu; | ||
2394 | struct pci_dev *dev = NULL; | 2528 | struct pci_dev *dev = NULL; |
2395 | u16 devid, devid2; | 2529 | u16 devid; |
2396 | 2530 | ||
2397 | /* allocate passthroug domain */ | 2531 | /* allocate passthrough domain */ |
2398 | pt_domain = protection_domain_alloc(); | 2532 | pt_domain = protection_domain_alloc(); |
2399 | if (!pt_domain) | 2533 | if (!pt_domain) |
2400 | return -ENOMEM; | 2534 | return -ENOMEM; |
@@ -2402,20 +2536,17 @@ int __init amd_iommu_init_passthrough(void) | |||
2402 | pt_domain->mode |= PAGE_MODE_NONE; | 2536 | pt_domain->mode |= PAGE_MODE_NONE; |
2403 | 2537 | ||
2404 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 2538 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
2405 | struct amd_iommu *iommu; | ||
2406 | 2539 | ||
2407 | devid = calc_devid(dev->bus->number, dev->devfn); | 2540 | if (!check_device(&dev->dev)) |
2408 | if (devid > amd_iommu_last_bdf) | ||
2409 | continue; | 2541 | continue; |
2410 | 2542 | ||
2411 | devid2 = amd_iommu_alias_table[devid]; | 2543 | devid = get_device_id(&dev->dev); |
2412 | 2544 | ||
2413 | iommu = amd_iommu_rlookup_table[devid2]; | 2545 | iommu = amd_iommu_rlookup_table[devid]; |
2414 | if (!iommu) | 2546 | if (!iommu) |
2415 | continue; | 2547 | continue; |
2416 | 2548 | ||
2417 | __attach_device(iommu, pt_domain, devid); | 2549 | attach_device(&dev->dev, pt_domain); |
2418 | __attach_device(iommu, pt_domain, devid2); | ||
2419 | } | 2550 | } |
2420 | 2551 | ||
2421 | pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); | 2552 | pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index b4b61d462dcc..fb490ce7dd55 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
@@ -25,10 +25,12 @@ | |||
25 | #include <linux/interrupt.h> | 25 | #include <linux/interrupt.h> |
26 | #include <linux/msi.h> | 26 | #include <linux/msi.h> |
27 | #include <asm/pci-direct.h> | 27 | #include <asm/pci-direct.h> |
28 | #include <asm/amd_iommu_proto.h> | ||
28 | #include <asm/amd_iommu_types.h> | 29 | #include <asm/amd_iommu_types.h> |
29 | #include <asm/amd_iommu.h> | 30 | #include <asm/amd_iommu.h> |
30 | #include <asm/iommu.h> | 31 | #include <asm/iommu.h> |
31 | #include <asm/gart.h> | 32 | #include <asm/gart.h> |
33 | #include <asm/x86_init.h> | ||
32 | 34 | ||
33 | /* | 35 | /* |
34 | * definitions for the ACPI scanning code | 36 | * definitions for the ACPI scanning code |
@@ -123,18 +125,29 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have | |||
123 | to handle */ | 125 | to handle */ |
124 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings | 126 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings |
125 | we find in ACPI */ | 127 | we find in ACPI */ |
126 | #ifdef CONFIG_IOMMU_STRESS | ||
127 | bool amd_iommu_isolate = false; | ||
128 | #else | ||
129 | bool amd_iommu_isolate = true; /* if true, device isolation is | ||
130 | enabled */ | ||
131 | #endif | ||
132 | |||
133 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ | 128 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ |
134 | 129 | ||
135 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | 130 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the |
136 | system */ | 131 | system */ |
137 | 132 | ||
133 | /* Array to assign indices to IOMMUs*/ | ||
134 | struct amd_iommu *amd_iommus[MAX_IOMMUS]; | ||
135 | int amd_iommus_present; | ||
136 | |||
137 | /* IOMMUs have a non-present cache? */ | ||
138 | bool amd_iommu_np_cache __read_mostly; | ||
139 | |||
140 | /* | ||
141 | * Set to true if ACPI table parsing and hardware intialization went properly | ||
142 | */ | ||
143 | static bool amd_iommu_initialized; | ||
144 | |||
145 | /* | ||
146 | * List of protection domains - used during resume | ||
147 | */ | ||
148 | LIST_HEAD(amd_iommu_pd_list); | ||
149 | spinlock_t amd_iommu_pd_lock; | ||
150 | |||
138 | /* | 151 | /* |
139 | * Pointer to the device table which is shared by all AMD IOMMUs | 152 | * Pointer to the device table which is shared by all AMD IOMMUs |
140 | * it is indexed by the PCI device id or the HT unit id and contains | 153 | * it is indexed by the PCI device id or the HT unit id and contains |
@@ -157,12 +170,6 @@ u16 *amd_iommu_alias_table; | |||
157 | struct amd_iommu **amd_iommu_rlookup_table; | 170 | struct amd_iommu **amd_iommu_rlookup_table; |
158 | 171 | ||
159 | /* | 172 | /* |
160 | * The pd table (protection domain table) is used to find the protection domain | ||
161 | * data structure a device belongs to. Indexed with the PCI device id too. | ||
162 | */ | ||
163 | struct protection_domain **amd_iommu_pd_table; | ||
164 | |||
165 | /* | ||
166 | * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap | 173 | * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap |
167 | * to know which ones are already in use. | 174 | * to know which ones are already in use. |
168 | */ | 175 | */ |
@@ -240,7 +247,7 @@ static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) | |||
240 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); | 247 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); |
241 | } | 248 | } |
242 | 249 | ||
243 | static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | 250 | static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) |
244 | { | 251 | { |
245 | u32 ctrl; | 252 | u32 ctrl; |
246 | 253 | ||
@@ -519,6 +526,26 @@ static void set_dev_entry_bit(u16 devid, u8 bit) | |||
519 | amd_iommu_dev_table[devid].data[i] |= (1 << _bit); | 526 | amd_iommu_dev_table[devid].data[i] |= (1 << _bit); |
520 | } | 527 | } |
521 | 528 | ||
529 | static int get_dev_entry_bit(u16 devid, u8 bit) | ||
530 | { | ||
531 | int i = (bit >> 5) & 0x07; | ||
532 | int _bit = bit & 0x1f; | ||
533 | |||
534 | return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit; | ||
535 | } | ||
536 | |||
537 | |||
538 | void amd_iommu_apply_erratum_63(u16 devid) | ||
539 | { | ||
540 | int sysmgt; | ||
541 | |||
542 | sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | | ||
543 | (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); | ||
544 | |||
545 | if (sysmgt == 0x01) | ||
546 | set_dev_entry_bit(devid, DEV_ENTRY_IW); | ||
547 | } | ||
548 | |||
522 | /* Writes the specific IOMMU for a device into the rlookup table */ | 549 | /* Writes the specific IOMMU for a device into the rlookup table */ |
523 | static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) | 550 | static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) |
524 | { | 551 | { |
@@ -547,6 +574,8 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, | |||
547 | if (flags & ACPI_DEVFLAG_LINT1) | 574 | if (flags & ACPI_DEVFLAG_LINT1) |
548 | set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); | 575 | set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); |
549 | 576 | ||
577 | amd_iommu_apply_erratum_63(devid); | ||
578 | |||
550 | set_iommu_for_device(iommu, devid); | 579 | set_iommu_for_device(iommu, devid); |
551 | } | 580 | } |
552 | 581 | ||
@@ -816,7 +845,18 @@ static void __init free_iommu_all(void) | |||
816 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | 845 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) |
817 | { | 846 | { |
818 | spin_lock_init(&iommu->lock); | 847 | spin_lock_init(&iommu->lock); |
848 | |||
849 | /* Add IOMMU to internal data structures */ | ||
819 | list_add_tail(&iommu->list, &amd_iommu_list); | 850 | list_add_tail(&iommu->list, &amd_iommu_list); |
851 | iommu->index = amd_iommus_present++; | ||
852 | |||
853 | if (unlikely(iommu->index >= MAX_IOMMUS)) { | ||
854 | WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); | ||
855 | return -ENOSYS; | ||
856 | } | ||
857 | |||
858 | /* Index is fine - add IOMMU to the array */ | ||
859 | amd_iommus[iommu->index] = iommu; | ||
820 | 860 | ||
821 | /* | 861 | /* |
822 | * Copy data from ACPI table entry to the iommu struct | 862 | * Copy data from ACPI table entry to the iommu struct |
@@ -846,6 +886,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
846 | init_iommu_from_acpi(iommu, h); | 886 | init_iommu_from_acpi(iommu, h); |
847 | init_iommu_devices(iommu); | 887 | init_iommu_devices(iommu); |
848 | 888 | ||
889 | if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) | ||
890 | amd_iommu_np_cache = true; | ||
891 | |||
849 | return pci_enable_device(iommu->dev); | 892 | return pci_enable_device(iommu->dev); |
850 | } | 893 | } |
851 | 894 | ||
@@ -891,6 +934,8 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
891 | } | 934 | } |
892 | WARN_ON(p != end); | 935 | WARN_ON(p != end); |
893 | 936 | ||
937 | amd_iommu_initialized = true; | ||
938 | |||
894 | return 0; | 939 | return 0; |
895 | } | 940 | } |
896 | 941 | ||
@@ -903,7 +948,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
903 | * | 948 | * |
904 | ****************************************************************************/ | 949 | ****************************************************************************/ |
905 | 950 | ||
906 | static int __init iommu_setup_msi(struct amd_iommu *iommu) | 951 | static int iommu_setup_msi(struct amd_iommu *iommu) |
907 | { | 952 | { |
908 | int r; | 953 | int r; |
909 | 954 | ||
@@ -1154,19 +1199,10 @@ static struct sys_device device_amd_iommu = { | |||
1154 | * functions. Finally it prints some information about AMD IOMMUs and | 1199 | * functions. Finally it prints some information about AMD IOMMUs and |
1155 | * the driver state and enables the hardware. | 1200 | * the driver state and enables the hardware. |
1156 | */ | 1201 | */ |
1157 | int __init amd_iommu_init(void) | 1202 | static int __init amd_iommu_init(void) |
1158 | { | 1203 | { |
1159 | int i, ret = 0; | 1204 | int i, ret = 0; |
1160 | 1205 | ||
1161 | |||
1162 | if (no_iommu) { | ||
1163 | printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); | ||
1164 | return 0; | ||
1165 | } | ||
1166 | |||
1167 | if (!amd_iommu_detected) | ||
1168 | return -ENODEV; | ||
1169 | |||
1170 | /* | 1206 | /* |
1171 | * First parse ACPI tables to find the largest Bus/Dev/Func | 1207 | * First parse ACPI tables to find the largest Bus/Dev/Func |
1172 | * we need to handle. Upon this information the shared data | 1208 | * we need to handle. Upon this information the shared data |
@@ -1203,15 +1239,6 @@ int __init amd_iommu_init(void) | |||
1203 | if (amd_iommu_rlookup_table == NULL) | 1239 | if (amd_iommu_rlookup_table == NULL) |
1204 | goto free; | 1240 | goto free; |
1205 | 1241 | ||
1206 | /* | ||
1207 | * Protection Domain table - maps devices to protection domains | ||
1208 | * This table has the same size as the rlookup_table | ||
1209 | */ | ||
1210 | amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
1211 | get_order(rlookup_table_size)); | ||
1212 | if (amd_iommu_pd_table == NULL) | ||
1213 | goto free; | ||
1214 | |||
1215 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( | 1242 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( |
1216 | GFP_KERNEL | __GFP_ZERO, | 1243 | GFP_KERNEL | __GFP_ZERO, |
1217 | get_order(MAX_DOMAIN_ID/8)); | 1244 | get_order(MAX_DOMAIN_ID/8)); |
@@ -1233,6 +1260,8 @@ int __init amd_iommu_init(void) | |||
1233 | */ | 1260 | */ |
1234 | amd_iommu_pd_alloc_bitmap[0] = 1; | 1261 | amd_iommu_pd_alloc_bitmap[0] = 1; |
1235 | 1262 | ||
1263 | spin_lock_init(&amd_iommu_pd_lock); | ||
1264 | |||
1236 | /* | 1265 | /* |
1237 | * now the data structures are allocated and basically initialized | 1266 | * now the data structures are allocated and basically initialized |
1238 | * start the real acpi table scan | 1267 | * start the real acpi table scan |
@@ -1241,6 +1270,9 @@ int __init amd_iommu_init(void) | |||
1241 | if (acpi_table_parse("IVRS", init_iommu_all) != 0) | 1270 | if (acpi_table_parse("IVRS", init_iommu_all) != 0) |
1242 | goto free; | 1271 | goto free; |
1243 | 1272 | ||
1273 | if (!amd_iommu_initialized) | ||
1274 | goto free; | ||
1275 | |||
1244 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) | 1276 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) |
1245 | goto free; | 1277 | goto free; |
1246 | 1278 | ||
@@ -1252,6 +1284,10 @@ int __init amd_iommu_init(void) | |||
1252 | if (ret) | 1284 | if (ret) |
1253 | goto free; | 1285 | goto free; |
1254 | 1286 | ||
1287 | ret = amd_iommu_init_devices(); | ||
1288 | if (ret) | ||
1289 | goto free; | ||
1290 | |||
1255 | if (iommu_pass_through) | 1291 | if (iommu_pass_through) |
1256 | ret = amd_iommu_init_passthrough(); | 1292 | ret = amd_iommu_init_passthrough(); |
1257 | else | 1293 | else |
@@ -1259,32 +1295,29 @@ int __init amd_iommu_init(void) | |||
1259 | if (ret) | 1295 | if (ret) |
1260 | goto free; | 1296 | goto free; |
1261 | 1297 | ||
1298 | amd_iommu_init_notifier(); | ||
1299 | |||
1262 | enable_iommus(); | 1300 | enable_iommus(); |
1263 | 1301 | ||
1264 | if (iommu_pass_through) | 1302 | if (iommu_pass_through) |
1265 | goto out; | 1303 | goto out; |
1266 | 1304 | ||
1267 | printk(KERN_INFO "AMD-Vi: device isolation "); | ||
1268 | if (amd_iommu_isolate) | ||
1269 | printk("enabled\n"); | ||
1270 | else | ||
1271 | printk("disabled\n"); | ||
1272 | |||
1273 | if (amd_iommu_unmap_flush) | 1305 | if (amd_iommu_unmap_flush) |
1274 | printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); | 1306 | printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); |
1275 | else | 1307 | else |
1276 | printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); | 1308 | printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); |
1277 | 1309 | ||
1310 | x86_platform.iommu_shutdown = disable_iommus; | ||
1278 | out: | 1311 | out: |
1279 | return ret; | 1312 | return ret; |
1280 | 1313 | ||
1281 | free: | 1314 | free: |
1315 | |||
1316 | amd_iommu_uninit_devices(); | ||
1317 | |||
1282 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, | 1318 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, |
1283 | get_order(MAX_DOMAIN_ID/8)); | 1319 | get_order(MAX_DOMAIN_ID/8)); |
1284 | 1320 | ||
1285 | free_pages((unsigned long)amd_iommu_pd_table, | ||
1286 | get_order(rlookup_table_size)); | ||
1287 | |||
1288 | free_pages((unsigned long)amd_iommu_rlookup_table, | 1321 | free_pages((unsigned long)amd_iommu_rlookup_table, |
1289 | get_order(rlookup_table_size)); | 1322 | get_order(rlookup_table_size)); |
1290 | 1323 | ||
@@ -1301,11 +1334,6 @@ free: | |||
1301 | goto out; | 1334 | goto out; |
1302 | } | 1335 | } |
1303 | 1336 | ||
1304 | void amd_iommu_shutdown(void) | ||
1305 | { | ||
1306 | disable_iommus(); | ||
1307 | } | ||
1308 | |||
1309 | /**************************************************************************** | 1337 | /**************************************************************************** |
1310 | * | 1338 | * |
1311 | * Early detect code. This code runs at IOMMU detection time in the DMA | 1339 | * Early detect code. This code runs at IOMMU detection time in the DMA |
@@ -1320,16 +1348,16 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) | |||
1320 | 1348 | ||
1321 | void __init amd_iommu_detect(void) | 1349 | void __init amd_iommu_detect(void) |
1322 | { | 1350 | { |
1323 | if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) | 1351 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) |
1324 | return; | 1352 | return; |
1325 | 1353 | ||
1326 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { | 1354 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { |
1327 | iommu_detected = 1; | 1355 | iommu_detected = 1; |
1328 | amd_iommu_detected = 1; | 1356 | amd_iommu_detected = 1; |
1329 | #ifdef CONFIG_GART_IOMMU | 1357 | x86_init.iommu.iommu_init = amd_iommu_init; |
1330 | gart_iommu_aperture_disabled = 1; | 1358 | |
1331 | gart_iommu_aperture = 0; | 1359 | /* Make sure ACS will be enabled */ |
1332 | #endif | 1360 | pci_request_acs(); |
1333 | } | 1361 | } |
1334 | } | 1362 | } |
1335 | 1363 | ||
@@ -1350,10 +1378,6 @@ static int __init parse_amd_iommu_dump(char *str) | |||
1350 | static int __init parse_amd_iommu_options(char *str) | 1378 | static int __init parse_amd_iommu_options(char *str) |
1351 | { | 1379 | { |
1352 | for (; *str; ++str) { | 1380 | for (; *str; ++str) { |
1353 | if (strncmp(str, "isolate", 7) == 0) | ||
1354 | amd_iommu_isolate = true; | ||
1355 | if (strncmp(str, "share", 5) == 0) | ||
1356 | amd_iommu_isolate = false; | ||
1357 | if (strncmp(str, "fullflush", 9) == 0) | 1381 | if (strncmp(str, "fullflush", 9) == 0) |
1358 | amd_iommu_unmap_flush = true; | 1382 | amd_iommu_unmap_flush = true; |
1359 | } | 1383 | } |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 128111d8ffe0..3704997e8b25 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/pci-direct.h> | 28 | #include <asm/pci-direct.h> |
29 | #include <asm/dma.h> | 29 | #include <asm/dma.h> |
30 | #include <asm/k8.h> | 30 | #include <asm/k8.h> |
31 | #include <asm/x86_init.h> | ||
31 | 32 | ||
32 | int gart_iommu_aperture; | 33 | int gart_iommu_aperture; |
33 | int gart_iommu_aperture_disabled __initdata; | 34 | int gart_iommu_aperture_disabled __initdata; |
@@ -279,7 +280,8 @@ void __init early_gart_iommu_check(void) | |||
279 | * or BIOS forget to put that in reserved. | 280 | * or BIOS forget to put that in reserved. |
280 | * try to update e820 to make that region as reserved. | 281 | * try to update e820 to make that region as reserved. |
281 | */ | 282 | */ |
282 | int i, fix, slot; | 283 | u32 agp_aper_base = 0, agp_aper_order = 0; |
284 | int i, fix, slot, valid_agp = 0; | ||
283 | u32 ctl; | 285 | u32 ctl; |
284 | u32 aper_size = 0, aper_order = 0, last_aper_order = 0; | 286 | u32 aper_size = 0, aper_order = 0, last_aper_order = 0; |
285 | u64 aper_base = 0, last_aper_base = 0; | 287 | u64 aper_base = 0, last_aper_base = 0; |
@@ -289,6 +291,8 @@ void __init early_gart_iommu_check(void) | |||
289 | return; | 291 | return; |
290 | 292 | ||
291 | /* This is mostly duplicate of iommu_hole_init */ | 293 | /* This is mostly duplicate of iommu_hole_init */ |
294 | agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); | ||
295 | |||
292 | fix = 0; | 296 | fix = 0; |
293 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { | 297 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
294 | int bus; | 298 | int bus; |
@@ -341,10 +345,10 @@ void __init early_gart_iommu_check(void) | |||
341 | } | 345 | } |
342 | } | 346 | } |
343 | 347 | ||
344 | if (!fix) | 348 | if (valid_agp) |
345 | return; | 349 | return; |
346 | 350 | ||
347 | /* different nodes have different setting, disable them all at first*/ | 351 | /* disable them all at first */ |
348 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { | 352 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
349 | int bus; | 353 | int bus; |
350 | int dev_base, dev_limit; | 354 | int dev_base, dev_limit; |
@@ -400,6 +404,7 @@ void __init gart_iommu_hole_init(void) | |||
400 | 404 | ||
401 | iommu_detected = 1; | 405 | iommu_detected = 1; |
402 | gart_iommu_aperture = 1; | 406 | gart_iommu_aperture = 1; |
407 | x86_init.iommu.iommu_init = gart_iommu_init; | ||
403 | 408 | ||
404 | aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; | 409 | aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; |
405 | aper_size = (32 * 1024 * 1024) << aper_order; | 410 | aper_size = (32 * 1024 * 1024) << aper_order; |
@@ -456,8 +461,6 @@ out: | |||
456 | 461 | ||
457 | if (aper_alloc) { | 462 | if (aper_alloc) { |
458 | /* Got the aperture from the AGP bridge */ | 463 | /* Got the aperture from the AGP bridge */ |
459 | } else if (swiotlb && !valid_agp) { | ||
460 | /* Do nothing */ | ||
461 | } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || | 464 | } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || |
462 | force_iommu || | 465 | force_iommu || |
463 | valid_agp || | 466 | valid_agp || |
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index da7b7b9f8bd8..565c1bfc507d 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for local APIC drivers and for the IO-APIC code | 2 | # Makefile for local APIC drivers and for the IO-APIC code |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o | 5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o |
6 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 6 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
7 | obj-$(CONFIG_SMP) += ipi.o | 7 | obj-$(CONFIG_SMP) += ipi.o |
8 | 8 | ||
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 894aa97f0717..aa57c079c98f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -241,28 +241,13 @@ static int modern_apic(void) | |||
241 | } | 241 | } |
242 | 242 | ||
243 | /* | 243 | /* |
244 | * bare function to substitute write operation | 244 | * right after this call apic become NOOP driven |
245 | * and it's _that_ fast :) | 245 | * so apic->write/read doesn't do anything |
246 | */ | ||
247 | static void native_apic_write_dummy(u32 reg, u32 v) | ||
248 | { | ||
249 | WARN_ON_ONCE((cpu_has_apic || !disable_apic)); | ||
250 | } | ||
251 | |||
252 | static u32 native_apic_read_dummy(u32 reg) | ||
253 | { | ||
254 | WARN_ON_ONCE((cpu_has_apic && !disable_apic)); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * right after this call apic->write/read doesn't do anything | ||
260 | * note that there is no restore operation it works one way | ||
261 | */ | 246 | */ |
262 | void apic_disable(void) | 247 | void apic_disable(void) |
263 | { | 248 | { |
264 | apic->read = native_apic_read_dummy; | 249 | pr_info("APIC: switched to apic NOOP\n"); |
265 | apic->write = native_apic_write_dummy; | 250 | apic = &apic_noop; |
266 | } | 251 | } |
267 | 252 | ||
268 | void native_apic_wait_icr_idle(void) | 253 | void native_apic_wait_icr_idle(void) |
@@ -459,7 +444,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
459 | v = apic_read(APIC_LVTT); | 444 | v = apic_read(APIC_LVTT); |
460 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 445 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); |
461 | apic_write(APIC_LVTT, v); | 446 | apic_write(APIC_LVTT, v); |
462 | apic_write(APIC_TMICT, 0xffffffff); | 447 | apic_write(APIC_TMICT, 0); |
463 | break; | 448 | break; |
464 | case CLOCK_EVT_MODE_RESUME: | 449 | case CLOCK_EVT_MODE_RESUME: |
465 | /* Nothing to do here */ | 450 | /* Nothing to do here */ |
@@ -662,7 +647,7 @@ static int __init calibrate_APIC_clock(void) | |||
662 | calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; | 647 | calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; |
663 | 648 | ||
664 | apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); | 649 | apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); |
665 | apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); | 650 | apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); |
666 | apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", | 651 | apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", |
667 | calibration_result); | 652 | calibration_result); |
668 | 653 | ||
@@ -1356,7 +1341,7 @@ void enable_x2apic(void) | |||
1356 | 1341 | ||
1357 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1342 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
1358 | if (!(msr & X2APIC_ENABLE)) { | 1343 | if (!(msr & X2APIC_ENABLE)) { |
1359 | pr_info("Enabling x2apic\n"); | 1344 | printk_once(KERN_INFO "Enabling x2apic\n"); |
1360 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); | 1345 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); |
1361 | } | 1346 | } |
1362 | } | 1347 | } |
@@ -1392,14 +1377,11 @@ void __init enable_IR_x2apic(void) | |||
1392 | unsigned long flags; | 1377 | unsigned long flags; |
1393 | struct IO_APIC_route_entry **ioapic_entries = NULL; | 1378 | struct IO_APIC_route_entry **ioapic_entries = NULL; |
1394 | int ret, x2apic_enabled = 0; | 1379 | int ret, x2apic_enabled = 0; |
1395 | int dmar_table_init_ret = 0; | 1380 | int dmar_table_init_ret; |
1396 | 1381 | ||
1397 | #ifdef CONFIG_INTR_REMAP | ||
1398 | dmar_table_init_ret = dmar_table_init(); | 1382 | dmar_table_init_ret = dmar_table_init(); |
1399 | if (dmar_table_init_ret) | 1383 | if (dmar_table_init_ret && !x2apic_supported()) |
1400 | pr_debug("dmar_table_init() failed with %d:\n", | 1384 | return; |
1401 | dmar_table_init_ret); | ||
1402 | #endif | ||
1403 | 1385 | ||
1404 | ioapic_entries = alloc_ioapic_entries(); | 1386 | ioapic_entries = alloc_ioapic_entries(); |
1405 | if (!ioapic_entries) { | 1387 | if (!ioapic_entries) { |
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index d0c99abc26c3..eacbd2b31d27 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -306,10 +306,7 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
306 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 306 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
307 | break; | 307 | break; |
308 | } | 308 | } |
309 | if (cpu < nr_cpu_ids) | 309 | return per_cpu(x86_cpu_to_apicid, cpu); |
310 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
311 | |||
312 | return BAD_APICID; | ||
313 | } | 310 | } |
314 | 311 | ||
315 | struct apic apic_physflat = { | 312 | struct apic apic_physflat = { |
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c new file mode 100644 index 000000000000..e31b9ffe25f5 --- /dev/null +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -0,0 +1,200 @@ | |||
1 | /* | ||
2 | * NOOP APIC driver. | ||
3 | * | ||
4 | * Does almost nothing and should be substituted by a real apic driver via | ||
5 | * probe routine. | ||
6 | * | ||
7 | * Though in case if apic is disabled (for some reason) we try | ||
8 | * to not uglify the caller's code and allow to call (some) apic routines | ||
9 | * like self-ipi, etc... | ||
10 | */ | ||
11 | |||
12 | #include <linux/threads.h> | ||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/ctype.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/errno.h> | ||
20 | #include <asm/fixmap.h> | ||
21 | #include <asm/mpspec.h> | ||
22 | #include <asm/apicdef.h> | ||
23 | #include <asm/apic.h> | ||
24 | #include <asm/setup.h> | ||
25 | |||
26 | #include <linux/smp.h> | ||
27 | #include <asm/ipi.h> | ||
28 | |||
29 | #include <linux/interrupt.h> | ||
30 | #include <asm/acpi.h> | ||
31 | #include <asm/e820.h> | ||
32 | |||
33 | static void noop_init_apic_ldr(void) { } | ||
34 | static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } | ||
35 | static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } | ||
36 | static void noop_send_IPI_allbutself(int vector) { } | ||
37 | static void noop_send_IPI_all(int vector) { } | ||
38 | static void noop_send_IPI_self(int vector) { } | ||
39 | static void noop_apic_wait_icr_idle(void) { } | ||
40 | static void noop_apic_icr_write(u32 low, u32 id) { } | ||
41 | |||
42 | static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) | ||
43 | { | ||
44 | return -1; | ||
45 | } | ||
46 | |||
47 | static u32 noop_safe_apic_wait_icr_idle(void) | ||
48 | { | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static u64 noop_apic_icr_read(void) | ||
53 | { | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | static int noop_cpu_to_logical_apicid(int cpu) | ||
58 | { | ||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | static int noop_phys_pkg_id(int cpuid_apic, int index_msb) | ||
63 | { | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | static unsigned int noop_get_apic_id(unsigned long x) | ||
68 | { | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static int noop_probe(void) | ||
73 | { | ||
74 | /* | ||
75 | * NOOP apic should not ever be | ||
76 | * enabled via probe routine | ||
77 | */ | ||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | static int noop_apic_id_registered(void) | ||
82 | { | ||
83 | /* | ||
84 | * if we would be really "pedantic" | ||
85 | * we should pass read_apic_id() here | ||
86 | * but since NOOP suppose APIC ID = 0 | ||
87 | * lets save a few cycles | ||
88 | */ | ||
89 | return physid_isset(0, phys_cpu_present_map); | ||
90 | } | ||
91 | |||
92 | static const struct cpumask *noop_target_cpus(void) | ||
93 | { | ||
94 | /* only BSP here */ | ||
95 | return cpumask_of(0); | ||
96 | } | ||
97 | |||
98 | static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid) | ||
99 | { | ||
100 | return physid_isset(apicid, *map); | ||
101 | } | ||
102 | |||
103 | static unsigned long noop_check_apicid_present(int bit) | ||
104 | { | ||
105 | return physid_isset(bit, phys_cpu_present_map); | ||
106 | } | ||
107 | |||
108 | static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
109 | { | ||
110 | if (cpu != 0) | ||
111 | pr_warning("APIC: Vector allocated for non-BSP cpu\n"); | ||
112 | cpumask_clear(retmask); | ||
113 | cpumask_set_cpu(cpu, retmask); | ||
114 | } | ||
115 | |||
116 | int noop_apicid_to_node(int logical_apicid) | ||
117 | { | ||
118 | /* we're always on node 0 */ | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static u32 noop_apic_read(u32 reg) | ||
123 | { | ||
124 | WARN_ON_ONCE((cpu_has_apic && !disable_apic)); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | static void noop_apic_write(u32 reg, u32 v) | ||
129 | { | ||
130 | WARN_ON_ONCE(cpu_has_apic && !disable_apic); | ||
131 | } | ||
132 | |||
133 | struct apic apic_noop = { | ||
134 | .name = "noop", | ||
135 | .probe = noop_probe, | ||
136 | .acpi_madt_oem_check = NULL, | ||
137 | |||
138 | .apic_id_registered = noop_apic_id_registered, | ||
139 | |||
140 | .irq_delivery_mode = dest_LowestPrio, | ||
141 | /* logical delivery broadcast to all CPUs: */ | ||
142 | .irq_dest_mode = 1, | ||
143 | |||
144 | .target_cpus = noop_target_cpus, | ||
145 | .disable_esr = 0, | ||
146 | .dest_logical = APIC_DEST_LOGICAL, | ||
147 | .check_apicid_used = noop_check_apicid_used, | ||
148 | .check_apicid_present = noop_check_apicid_present, | ||
149 | |||
150 | .vector_allocation_domain = noop_vector_allocation_domain, | ||
151 | .init_apic_ldr = noop_init_apic_ldr, | ||
152 | |||
153 | .ioapic_phys_id_map = default_ioapic_phys_id_map, | ||
154 | .setup_apic_routing = NULL, | ||
155 | .multi_timer_check = NULL, | ||
156 | .apicid_to_node = noop_apicid_to_node, | ||
157 | |||
158 | .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, | ||
159 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
160 | .apicid_to_cpu_present = physid_set_mask_of_physid, | ||
161 | |||
162 | .setup_portio_remap = NULL, | ||
163 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
164 | .enable_apic_mode = NULL, | ||
165 | |||
166 | .phys_pkg_id = noop_phys_pkg_id, | ||
167 | |||
168 | .mps_oem_check = NULL, | ||
169 | |||
170 | .get_apic_id = noop_get_apic_id, | ||
171 | .set_apic_id = NULL, | ||
172 | .apic_id_mask = 0x0F << 24, | ||
173 | |||
174 | .cpu_mask_to_apicid = default_cpu_mask_to_apicid, | ||
175 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, | ||
176 | |||
177 | .send_IPI_mask = noop_send_IPI_mask, | ||
178 | .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, | ||
179 | .send_IPI_allbutself = noop_send_IPI_allbutself, | ||
180 | .send_IPI_all = noop_send_IPI_all, | ||
181 | .send_IPI_self = noop_send_IPI_self, | ||
182 | |||
183 | .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, | ||
184 | |||
185 | /* should be safe */ | ||
186 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
187 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
188 | |||
189 | .wait_for_init_deassert = NULL, | ||
190 | |||
191 | .smp_callin_clear_local_apic = NULL, | ||
192 | .inquire_remote_apic = NULL, | ||
193 | |||
194 | .read = noop_apic_read, | ||
195 | .write = noop_apic_write, | ||
196 | .icr_read = noop_apic_icr_read, | ||
197 | .icr_write = noop_apic_icr_write, | ||
198 | .wait_icr_idle = noop_apic_wait_icr_idle, | ||
199 | .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, | ||
200 | }; | ||
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 77a06413b6b2..cb804c5091b9 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void) | |||
35 | #endif | 35 | #endif |
36 | } | 36 | } |
37 | 37 | ||
38 | static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) | 38 | static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) |
39 | { | 39 | { |
40 | return 0; | 40 | return 0; |
41 | } | 41 | } |
@@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu) | |||
93 | return BAD_APICID; | 93 | return BAD_APICID; |
94 | } | 94 | } |
95 | 95 | ||
96 | static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) | ||
97 | { | ||
98 | return physid_mask_of_physid(phys_apicid); | ||
99 | } | ||
100 | |||
101 | /* Mapping from cpu number to logical apicid */ | 96 | /* Mapping from cpu number to logical apicid */ |
102 | static inline int bigsmp_cpu_to_logical_apicid(int cpu) | 97 | static inline int bigsmp_cpu_to_logical_apicid(int cpu) |
103 | { | 98 | { |
@@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu) | |||
106 | return cpu_physical_id(cpu); | 101 | return cpu_physical_id(cpu); |
107 | } | 102 | } |
108 | 103 | ||
109 | static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) | 104 | static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
110 | { | 105 | { |
111 | /* For clustered we don't have a good way to do this yet - hack */ | 106 | /* For clustered we don't have a good way to do this yet - hack */ |
112 | return physids_promote(0xFFL); | 107 | physids_promote(0xFFL, retmap); |
113 | } | 108 | } |
114 | 109 | ||
115 | static int bigsmp_check_phys_apicid_present(int phys_apicid) | 110 | static int bigsmp_check_phys_apicid_present(int phys_apicid) |
@@ -136,10 +131,7 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
136 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 131 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
137 | break; | 132 | break; |
138 | } | 133 | } |
139 | if (cpu < nr_cpu_ids) | 134 | return bigsmp_cpu_to_logical_apicid(cpu); |
140 | return bigsmp_cpu_to_logical_apicid(cpu); | ||
141 | |||
142 | return BAD_APICID; | ||
143 | } | 135 | } |
144 | 136 | ||
145 | static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) | 137 | static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) |
@@ -230,7 +222,7 @@ struct apic apic_bigsmp = { | |||
230 | .apicid_to_node = bigsmp_apicid_to_node, | 222 | .apicid_to_node = bigsmp_apicid_to_node, |
231 | .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, | 223 | .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, |
232 | .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, | 224 | .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, |
233 | .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, | 225 | .apicid_to_cpu_present = physid_set_mask_of_physid, |
234 | .setup_portio_remap = NULL, | 226 | .setup_portio_remap = NULL, |
235 | .check_phys_apicid_present = bigsmp_check_phys_apicid_present, | 227 | .check_phys_apicid_present = bigsmp_check_phys_apicid_present, |
236 | .enable_apic_mode = NULL, | 228 | .enable_apic_mode = NULL, |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 89174f847b49..dd2b5f264643 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -27,6 +27,9 @@ | |||
27 | * | 27 | * |
28 | * http://www.unisys.com | 28 | * http://www.unisys.com |
29 | */ | 29 | */ |
30 | |||
31 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
32 | |||
30 | #include <linux/notifier.h> | 33 | #include <linux/notifier.h> |
31 | #include <linux/spinlock.h> | 34 | #include <linux/spinlock.h> |
32 | #include <linux/cpumask.h> | 35 | #include <linux/cpumask.h> |
@@ -223,9 +226,9 @@ static int parse_unisys_oem(char *oemptr) | |||
223 | mip_addr = val; | 226 | mip_addr = val; |
224 | mip = (struct mip_reg *)val; | 227 | mip = (struct mip_reg *)val; |
225 | mip_reg = __va(mip); | 228 | mip_reg = __va(mip); |
226 | pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", | 229 | pr_debug("host_reg = 0x%lx\n", |
227 | (unsigned long)host_reg); | 230 | (unsigned long)host_reg); |
228 | pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", | 231 | pr_debug("mip_reg = 0x%lx\n", |
229 | (unsigned long)mip_reg); | 232 | (unsigned long)mip_reg); |
230 | success++; | 233 | success++; |
231 | break; | 234 | break; |
@@ -401,7 +404,7 @@ static void es7000_enable_apic_mode(void) | |||
401 | if (!es7000_plat) | 404 | if (!es7000_plat) |
402 | return; | 405 | return; |
403 | 406 | ||
404 | printk(KERN_INFO "ES7000: Enabling APIC mode.\n"); | 407 | pr_info("Enabling APIC mode.\n"); |
405 | memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); | 408 | memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); |
406 | es7000_mip_reg.off_0x00 = MIP_SW_APIC; | 409 | es7000_mip_reg.off_0x00 = MIP_SW_APIC; |
407 | es7000_mip_reg.off_0x38 = MIP_VALID; | 410 | es7000_mip_reg.off_0x38 = MIP_VALID; |
@@ -466,11 +469,11 @@ static const struct cpumask *es7000_target_cpus(void) | |||
466 | return cpumask_of(smp_processor_id()); | 469 | return cpumask_of(smp_processor_id()); |
467 | } | 470 | } |
468 | 471 | ||
469 | static unsigned long | 472 | static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid) |
470 | es7000_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
471 | { | 473 | { |
472 | return 0; | 474 | return 0; |
473 | } | 475 | } |
476 | |||
474 | static unsigned long es7000_check_apicid_present(int bit) | 477 | static unsigned long es7000_check_apicid_present(int bit) |
475 | { | 478 | { |
476 | return physid_isset(bit, phys_cpu_present_map); | 479 | return physid_isset(bit, phys_cpu_present_map); |
@@ -514,8 +517,7 @@ static void es7000_setup_apic_routing(void) | |||
514 | { | 517 | { |
515 | int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); | 518 | int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); |
516 | 519 | ||
517 | printk(KERN_INFO | 520 | pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", |
518 | "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", | ||
519 | (apic_version[apic] == 0x14) ? | 521 | (apic_version[apic] == 0x14) ? |
520 | "Physical Cluster" : "Logical Cluster", | 522 | "Physical Cluster" : "Logical Cluster", |
521 | nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); | 523 | nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); |
@@ -539,14 +541,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu) | |||
539 | 541 | ||
540 | static int cpu_id; | 542 | static int cpu_id; |
541 | 543 | ||
542 | static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) | 544 | static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap) |
543 | { | 545 | { |
544 | physid_mask_t mask; | 546 | physid_set_mask_of_physid(cpu_id, retmap); |
545 | |||
546 | mask = physid_mask_of_physid(cpu_id); | ||
547 | ++cpu_id; | 547 | ++cpu_id; |
548 | |||
549 | return mask; | ||
550 | } | 548 | } |
551 | 549 | ||
552 | /* Mapping from cpu number to logical apicid */ | 550 | /* Mapping from cpu number to logical apicid */ |
@@ -561,10 +559,10 @@ static int es7000_cpu_to_logical_apicid(int cpu) | |||
561 | #endif | 559 | #endif |
562 | } | 560 | } |
563 | 561 | ||
564 | static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) | 562 | static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
565 | { | 563 | { |
566 | /* For clustered we don't have a good way to do this yet - hack */ | 564 | /* For clustered we don't have a good way to do this yet - hack */ |
567 | return physids_promote(0xff); | 565 | physids_promote(0xFFL, retmap); |
568 | } | 566 | } |
569 | 567 | ||
570 | static int es7000_check_phys_apicid_present(int cpu_physical_apicid) | 568 | static int es7000_check_phys_apicid_present(int cpu_physical_apicid) |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index dc69f28489f5..de00c4619a55 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -60,8 +60,6 @@ | |||
60 | #include <asm/irq_remapping.h> | 60 | #include <asm/irq_remapping.h> |
61 | #include <asm/hpet.h> | 61 | #include <asm/hpet.h> |
62 | #include <asm/hw_irq.h> | 62 | #include <asm/hw_irq.h> |
63 | #include <asm/uv/uv_hub.h> | ||
64 | #include <asm/uv/uv_irq.h> | ||
65 | 63 | ||
66 | #include <asm/apic.h> | 64 | #include <asm/apic.h> |
67 | 65 | ||
@@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) | |||
140 | return pin; | 138 | return pin; |
141 | } | 139 | } |
142 | 140 | ||
143 | /* | ||
144 | * This is performance-critical, we want to do it O(1) | ||
145 | * | ||
146 | * Most irqs are mapped 1:1 with pins. | ||
147 | */ | ||
148 | struct irq_cfg { | ||
149 | struct irq_pin_list *irq_2_pin; | ||
150 | cpumask_var_t domain; | ||
151 | cpumask_var_t old_domain; | ||
152 | unsigned move_cleanup_count; | ||
153 | u8 vector; | ||
154 | u8 move_in_progress : 1; | ||
155 | }; | ||
156 | |||
157 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 141 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
158 | #ifdef CONFIG_SPARSE_IRQ | 142 | #ifdef CONFIG_SPARSE_IRQ |
159 | static struct irq_cfg irq_cfgx[] = { | 143 | static struct irq_cfg irq_cfgx[] = { |
@@ -209,7 +193,7 @@ int __init arch_early_irq_init(void) | |||
209 | } | 193 | } |
210 | 194 | ||
211 | #ifdef CONFIG_SPARSE_IRQ | 195 | #ifdef CONFIG_SPARSE_IRQ |
212 | static struct irq_cfg *irq_cfg(unsigned int irq) | 196 | struct irq_cfg *irq_cfg(unsigned int irq) |
213 | { | 197 | { |
214 | struct irq_cfg *cfg = NULL; | 198 | struct irq_cfg *cfg = NULL; |
215 | struct irq_desc *desc; | 199 | struct irq_desc *desc; |
@@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) | |||
361 | /* end for move_irq_desc */ | 345 | /* end for move_irq_desc */ |
362 | 346 | ||
363 | #else | 347 | #else |
364 | static struct irq_cfg *irq_cfg(unsigned int irq) | 348 | struct irq_cfg *irq_cfg(unsigned int irq) |
365 | { | 349 | { |
366 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | 350 | return irq < nr_irqs ? irq_cfgx + irq : NULL; |
367 | } | 351 | } |
@@ -555,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, | |||
555 | add_pin_to_irq_node(cfg, node, newapic, newpin); | 539 | add_pin_to_irq_node(cfg, node, newapic, newpin); |
556 | } | 540 | } |
557 | 541 | ||
542 | static void __io_apic_modify_irq(struct irq_pin_list *entry, | ||
543 | int mask_and, int mask_or, | ||
544 | void (*final)(struct irq_pin_list *entry)) | ||
545 | { | ||
546 | unsigned int reg, pin; | ||
547 | |||
548 | pin = entry->pin; | ||
549 | reg = io_apic_read(entry->apic, 0x10 + pin * 2); | ||
550 | reg &= mask_and; | ||
551 | reg |= mask_or; | ||
552 | io_apic_modify(entry->apic, 0x10 + pin * 2, reg); | ||
553 | if (final) | ||
554 | final(entry); | ||
555 | } | ||
556 | |||
558 | static void io_apic_modify_irq(struct irq_cfg *cfg, | 557 | static void io_apic_modify_irq(struct irq_cfg *cfg, |
559 | int mask_and, int mask_or, | 558 | int mask_and, int mask_or, |
560 | void (*final)(struct irq_pin_list *entry)) | 559 | void (*final)(struct irq_pin_list *entry)) |
561 | { | 560 | { |
562 | int pin; | ||
563 | struct irq_pin_list *entry; | 561 | struct irq_pin_list *entry; |
564 | 562 | ||
565 | for_each_irq_pin(entry, cfg->irq_2_pin) { | 563 | for_each_irq_pin(entry, cfg->irq_2_pin) |
566 | unsigned int reg; | 564 | __io_apic_modify_irq(entry, mask_and, mask_or, final); |
567 | pin = entry->pin; | 565 | } |
568 | reg = io_apic_read(entry->apic, 0x10 + pin * 2); | 566 | |
569 | reg &= mask_and; | 567 | static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) |
570 | reg |= mask_or; | 568 | { |
571 | io_apic_modify(entry->apic, 0x10 + pin * 2, reg); | 569 | __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, |
572 | if (final) | 570 | IO_APIC_REDIR_MASKED, NULL); |
573 | final(entry); | 571 | } |
574 | } | 572 | |
573 | static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) | ||
574 | { | ||
575 | __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, | ||
576 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | ||
575 | } | 577 | } |
576 | 578 | ||
577 | static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) | 579 | static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) |
@@ -595,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) | |||
595 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); | 597 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); |
596 | } | 598 | } |
597 | 599 | ||
598 | static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) | ||
599 | { | ||
600 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, | ||
601 | IO_APIC_REDIR_MASKED, NULL); | ||
602 | } | ||
603 | |||
604 | static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) | ||
605 | { | ||
606 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, | ||
607 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | ||
608 | } | ||
609 | |||
610 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) | 600 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) |
611 | { | 601 | { |
612 | struct irq_cfg *cfg = desc->chip_data; | 602 | struct irq_cfg *cfg = desc->chip_data; |
@@ -1177,7 +1167,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
1177 | int cpu, err; | 1167 | int cpu, err; |
1178 | cpumask_var_t tmp_mask; | 1168 | cpumask_var_t tmp_mask; |
1179 | 1169 | ||
1180 | if ((cfg->move_in_progress) || cfg->move_cleanup_count) | 1170 | if (cfg->move_in_progress) |
1181 | return -EBUSY; | 1171 | return -EBUSY; |
1182 | 1172 | ||
1183 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) | 1173 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) |
@@ -1237,8 +1227,7 @@ next: | |||
1237 | return err; | 1227 | return err; |
1238 | } | 1228 | } |
1239 | 1229 | ||
1240 | static int | 1230 | int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) |
1241 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | ||
1242 | { | 1231 | { |
1243 | int err; | 1232 | int err; |
1244 | unsigned long flags; | 1233 | unsigned long flags; |
@@ -1599,9 +1588,6 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1599 | struct irq_desc *desc; | 1588 | struct irq_desc *desc; |
1600 | unsigned int irq; | 1589 | unsigned int irq; |
1601 | 1590 | ||
1602 | if (apic_verbosity == APIC_QUIET) | ||
1603 | return; | ||
1604 | |||
1605 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 1591 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
1606 | for (i = 0; i < nr_ioapics; i++) | 1592 | for (i = 0; i < nr_ioapics; i++) |
1607 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | 1593 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", |
@@ -1708,9 +1694,6 @@ __apicdebuginit(void) print_APIC_field(int base) | |||
1708 | { | 1694 | { |
1709 | int i; | 1695 | int i; |
1710 | 1696 | ||
1711 | if (apic_verbosity == APIC_QUIET) | ||
1712 | return; | ||
1713 | |||
1714 | printk(KERN_DEBUG); | 1697 | printk(KERN_DEBUG); |
1715 | 1698 | ||
1716 | for (i = 0; i < 8; i++) | 1699 | for (i = 0; i < 8; i++) |
@@ -1724,9 +1707,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy) | |||
1724 | unsigned int i, v, ver, maxlvt; | 1707 | unsigned int i, v, ver, maxlvt; |
1725 | u64 icr; | 1708 | u64 icr; |
1726 | 1709 | ||
1727 | if (apic_verbosity == APIC_QUIET) | ||
1728 | return; | ||
1729 | |||
1730 | printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | 1710 | printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", |
1731 | smp_processor_id(), hard_smp_processor_id()); | 1711 | smp_processor_id(), hard_smp_processor_id()); |
1732 | v = apic_read(APIC_ID); | 1712 | v = apic_read(APIC_ID); |
@@ -1824,13 +1804,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy) | |||
1824 | printk("\n"); | 1804 | printk("\n"); |
1825 | } | 1805 | } |
1826 | 1806 | ||
1827 | __apicdebuginit(void) print_all_local_APICs(void) | 1807 | __apicdebuginit(void) print_local_APICs(int maxcpu) |
1828 | { | 1808 | { |
1829 | int cpu; | 1809 | int cpu; |
1830 | 1810 | ||
1811 | if (!maxcpu) | ||
1812 | return; | ||
1813 | |||
1831 | preempt_disable(); | 1814 | preempt_disable(); |
1832 | for_each_online_cpu(cpu) | 1815 | for_each_online_cpu(cpu) { |
1816 | if (cpu >= maxcpu) | ||
1817 | break; | ||
1833 | smp_call_function_single(cpu, print_local_APIC, NULL, 1); | 1818 | smp_call_function_single(cpu, print_local_APIC, NULL, 1); |
1819 | } | ||
1834 | preempt_enable(); | 1820 | preempt_enable(); |
1835 | } | 1821 | } |
1836 | 1822 | ||
@@ -1839,7 +1825,7 @@ __apicdebuginit(void) print_PIC(void) | |||
1839 | unsigned int v; | 1825 | unsigned int v; |
1840 | unsigned long flags; | 1826 | unsigned long flags; |
1841 | 1827 | ||
1842 | if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) | 1828 | if (!nr_legacy_irqs) |
1843 | return; | 1829 | return; |
1844 | 1830 | ||
1845 | printk(KERN_DEBUG "\nprinting PIC contents\n"); | 1831 | printk(KERN_DEBUG "\nprinting PIC contents\n"); |
@@ -1866,21 +1852,41 @@ __apicdebuginit(void) print_PIC(void) | |||
1866 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); | 1852 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); |
1867 | } | 1853 | } |
1868 | 1854 | ||
1869 | __apicdebuginit(int) print_all_ICs(void) | 1855 | static int __initdata show_lapic = 1; |
1856 | static __init int setup_show_lapic(char *arg) | ||
1857 | { | ||
1858 | int num = -1; | ||
1859 | |||
1860 | if (strcmp(arg, "all") == 0) { | ||
1861 | show_lapic = CONFIG_NR_CPUS; | ||
1862 | } else { | ||
1863 | get_option(&arg, &num); | ||
1864 | if (num >= 0) | ||
1865 | show_lapic = num; | ||
1866 | } | ||
1867 | |||
1868 | return 1; | ||
1869 | } | ||
1870 | __setup("show_lapic=", setup_show_lapic); | ||
1871 | |||
1872 | __apicdebuginit(int) print_ICs(void) | ||
1870 | { | 1873 | { |
1874 | if (apic_verbosity == APIC_QUIET) | ||
1875 | return 0; | ||
1876 | |||
1871 | print_PIC(); | 1877 | print_PIC(); |
1872 | 1878 | ||
1873 | /* don't print out if apic is not there */ | 1879 | /* don't print out if apic is not there */ |
1874 | if (!cpu_has_apic && !apic_from_smp_config()) | 1880 | if (!cpu_has_apic && !apic_from_smp_config()) |
1875 | return 0; | 1881 | return 0; |
1876 | 1882 | ||
1877 | print_all_local_APICs(); | 1883 | print_local_APICs(show_lapic); |
1878 | print_IO_APIC(); | 1884 | print_IO_APIC(); |
1879 | 1885 | ||
1880 | return 0; | 1886 | return 0; |
1881 | } | 1887 | } |
1882 | 1888 | ||
1883 | fs_initcall(print_all_ICs); | 1889 | fs_initcall(print_ICs); |
1884 | 1890 | ||
1885 | 1891 | ||
1886 | /* Where if anywhere is the i8259 connect in external int mode */ | 1892 | /* Where if anywhere is the i8259 connect in external int mode */ |
@@ -2031,7 +2037,7 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2031 | * This is broken; anything with a real cpu count has to | 2037 | * This is broken; anything with a real cpu count has to |
2032 | * circumvent this idiocy regardless. | 2038 | * circumvent this idiocy regardless. |
2033 | */ | 2039 | */ |
2034 | phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); | 2040 | apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); |
2035 | 2041 | ||
2036 | /* | 2042 | /* |
2037 | * Set the IOAPIC ID to the value stored in the MPC table. | 2043 | * Set the IOAPIC ID to the value stored in the MPC table. |
@@ -2058,7 +2064,7 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2058 | * system must have a unique ID or we get lots of nice | 2064 | * system must have a unique ID or we get lots of nice |
2059 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 2065 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
2060 | */ | 2066 | */ |
2061 | if (apic->check_apicid_used(phys_id_present_map, | 2067 | if (apic->check_apicid_used(&phys_id_present_map, |
2062 | mp_ioapics[apic_id].apicid)) { | 2068 | mp_ioapics[apic_id].apicid)) { |
2063 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | 2069 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", |
2064 | apic_id, mp_ioapics[apic_id].apicid); | 2070 | apic_id, mp_ioapics[apic_id].apicid); |
@@ -2073,7 +2079,7 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2073 | mp_ioapics[apic_id].apicid = i; | 2079 | mp_ioapics[apic_id].apicid = i; |
2074 | } else { | 2080 | } else { |
2075 | physid_mask_t tmp; | 2081 | physid_mask_t tmp; |
2076 | tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); | 2082 | apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); |
2077 | apic_printk(APIC_VERBOSE, "Setting %d in the " | 2083 | apic_printk(APIC_VERBOSE, "Setting %d in the " |
2078 | "phys_id_present_map\n", | 2084 | "phys_id_present_map\n", |
2079 | mp_ioapics[apic_id].apicid); | 2085 | mp_ioapics[apic_id].apicid); |
@@ -2228,20 +2234,16 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2228 | */ | 2234 | */ |
2229 | 2235 | ||
2230 | #ifdef CONFIG_SMP | 2236 | #ifdef CONFIG_SMP |
2231 | static void send_cleanup_vector(struct irq_cfg *cfg) | 2237 | void send_cleanup_vector(struct irq_cfg *cfg) |
2232 | { | 2238 | { |
2233 | cpumask_var_t cleanup_mask; | 2239 | cpumask_var_t cleanup_mask; |
2234 | 2240 | ||
2235 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { | 2241 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { |
2236 | unsigned int i; | 2242 | unsigned int i; |
2237 | cfg->move_cleanup_count = 0; | ||
2238 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
2239 | cfg->move_cleanup_count++; | ||
2240 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | 2243 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) |
2241 | apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); | 2244 | apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); |
2242 | } else { | 2245 | } else { |
2243 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); | 2246 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); |
2244 | cfg->move_cleanup_count = cpumask_weight(cleanup_mask); | ||
2245 | apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | 2247 | apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); |
2246 | free_cpumask_var(cleanup_mask); | 2248 | free_cpumask_var(cleanup_mask); |
2247 | } | 2249 | } |
@@ -2272,31 +2274,30 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq | |||
2272 | } | 2274 | } |
2273 | } | 2275 | } |
2274 | 2276 | ||
2275 | static int | ||
2276 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); | ||
2277 | |||
2278 | /* | 2277 | /* |
2279 | * Either sets desc->affinity to a valid value, and returns | 2278 | * Either sets desc->affinity to a valid value, and returns |
2280 | * ->cpu_mask_to_apicid of that, or returns BAD_APICID and | 2279 | * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and |
2281 | * leaves desc->affinity untouched. | 2280 | * leaves desc->affinity untouched. |
2282 | */ | 2281 | */ |
2283 | static unsigned int | 2282 | unsigned int |
2284 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) | 2283 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask, |
2284 | unsigned int *dest_id) | ||
2285 | { | 2285 | { |
2286 | struct irq_cfg *cfg; | 2286 | struct irq_cfg *cfg; |
2287 | unsigned int irq; | 2287 | unsigned int irq; |
2288 | 2288 | ||
2289 | if (!cpumask_intersects(mask, cpu_online_mask)) | 2289 | if (!cpumask_intersects(mask, cpu_online_mask)) |
2290 | return BAD_APICID; | 2290 | return -1; |
2291 | 2291 | ||
2292 | irq = desc->irq; | 2292 | irq = desc->irq; |
2293 | cfg = desc->chip_data; | 2293 | cfg = desc->chip_data; |
2294 | if (assign_irq_vector(irq, cfg, mask)) | 2294 | if (assign_irq_vector(irq, cfg, mask)) |
2295 | return BAD_APICID; | 2295 | return -1; |
2296 | 2296 | ||
2297 | cpumask_copy(desc->affinity, mask); | 2297 | cpumask_copy(desc->affinity, mask); |
2298 | 2298 | ||
2299 | return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); | 2299 | *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); |
2300 | return 0; | ||
2300 | } | 2301 | } |
2301 | 2302 | ||
2302 | static int | 2303 | static int |
@@ -2312,12 +2313,11 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | |||
2312 | cfg = desc->chip_data; | 2313 | cfg = desc->chip_data; |
2313 | 2314 | ||
2314 | spin_lock_irqsave(&ioapic_lock, flags); | 2315 | spin_lock_irqsave(&ioapic_lock, flags); |
2315 | dest = set_desc_affinity(desc, mask); | 2316 | ret = set_desc_affinity(desc, mask, &dest); |
2316 | if (dest != BAD_APICID) { | 2317 | if (!ret) { |
2317 | /* Only the high 8 bits are valid. */ | 2318 | /* Only the high 8 bits are valid. */ |
2318 | dest = SET_APIC_LOGICAL_ID(dest); | 2319 | dest = SET_APIC_LOGICAL_ID(dest); |
2319 | __target_IO_APIC_irq(irq, dest, cfg); | 2320 | __target_IO_APIC_irq(irq, dest, cfg); |
2320 | ret = 0; | ||
2321 | } | 2321 | } |
2322 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2322 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2323 | 2323 | ||
@@ -2432,9 +2432,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2432 | continue; | 2432 | continue; |
2433 | 2433 | ||
2434 | cfg = irq_cfg(irq); | 2434 | cfg = irq_cfg(irq); |
2435 | spin_lock(&desc->lock); | 2435 | raw_spin_lock(&desc->lock); |
2436 | if (!cfg->move_cleanup_count) | ||
2437 | goto unlock; | ||
2438 | 2436 | ||
2439 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) | 2437 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
2440 | goto unlock; | 2438 | goto unlock; |
@@ -2452,29 +2450,40 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2452 | goto unlock; | 2450 | goto unlock; |
2453 | } | 2451 | } |
2454 | __get_cpu_var(vector_irq)[vector] = -1; | 2452 | __get_cpu_var(vector_irq)[vector] = -1; |
2455 | cfg->move_cleanup_count--; | ||
2456 | unlock: | 2453 | unlock: |
2457 | spin_unlock(&desc->lock); | 2454 | raw_spin_unlock(&desc->lock); |
2458 | } | 2455 | } |
2459 | 2456 | ||
2460 | irq_exit(); | 2457 | irq_exit(); |
2461 | } | 2458 | } |
2462 | 2459 | ||
2463 | static void irq_complete_move(struct irq_desc **descp) | 2460 | static void __irq_complete_move(struct irq_desc **descp, unsigned vector) |
2464 | { | 2461 | { |
2465 | struct irq_desc *desc = *descp; | 2462 | struct irq_desc *desc = *descp; |
2466 | struct irq_cfg *cfg = desc->chip_data; | 2463 | struct irq_cfg *cfg = desc->chip_data; |
2467 | unsigned vector, me; | 2464 | unsigned me; |
2468 | 2465 | ||
2469 | if (likely(!cfg->move_in_progress)) | 2466 | if (likely(!cfg->move_in_progress)) |
2470 | return; | 2467 | return; |
2471 | 2468 | ||
2472 | vector = ~get_irq_regs()->orig_ax; | ||
2473 | me = smp_processor_id(); | 2469 | me = smp_processor_id(); |
2474 | 2470 | ||
2475 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) | 2471 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
2476 | send_cleanup_vector(cfg); | 2472 | send_cleanup_vector(cfg); |
2477 | } | 2473 | } |
2474 | |||
2475 | static void irq_complete_move(struct irq_desc **descp) | ||
2476 | { | ||
2477 | __irq_complete_move(descp, ~get_irq_regs()->orig_ax); | ||
2478 | } | ||
2479 | |||
2480 | void irq_force_complete_move(int irq) | ||
2481 | { | ||
2482 | struct irq_desc *desc = irq_to_desc(irq); | ||
2483 | struct irq_cfg *cfg = desc->chip_data; | ||
2484 | |||
2485 | __irq_complete_move(&desc, cfg->vector); | ||
2486 | } | ||
2478 | #else | 2487 | #else |
2479 | static inline void irq_complete_move(struct irq_desc **descp) {} | 2488 | static inline void irq_complete_move(struct irq_desc **descp) {} |
2480 | #endif | 2489 | #endif |
@@ -2490,6 +2499,59 @@ static void ack_apic_edge(unsigned int irq) | |||
2490 | 2499 | ||
2491 | atomic_t irq_mis_count; | 2500 | atomic_t irq_mis_count; |
2492 | 2501 | ||
2502 | /* | ||
2503 | * IO-APIC versions below 0x20 don't support EOI register. | ||
2504 | * For the record, here is the information about various versions: | ||
2505 | * 0Xh 82489DX | ||
2506 | * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant | ||
2507 | * 2Xh I/O(x)APIC which is PCI 2.2 Compliant | ||
2508 | * 30h-FFh Reserved | ||
2509 | * | ||
2510 | * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic | ||
2511 | * version as 0x2. This is an error with documentation and these ICH chips | ||
2512 | * use io-apic's of version 0x20. | ||
2513 | * | ||
2514 | * For IO-APIC's with EOI register, we use that to do an explicit EOI. | ||
2515 | * Otherwise, we simulate the EOI message manually by changing the trigger | ||
2516 | * mode to edge and then back to level, with RTE being masked during this. | ||
2517 | */ | ||
2518 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | ||
2519 | { | ||
2520 | struct irq_pin_list *entry; | ||
2521 | |||
2522 | for_each_irq_pin(entry, cfg->irq_2_pin) { | ||
2523 | if (mp_ioapics[entry->apic].apicver >= 0x20) { | ||
2524 | /* | ||
2525 | * Intr-remapping uses pin number as the virtual vector | ||
2526 | * in the RTE. Actual vector is programmed in | ||
2527 | * intr-remapping table entry. Hence for the io-apic | ||
2528 | * EOI we use the pin number. | ||
2529 | */ | ||
2530 | if (irq_remapped(irq)) | ||
2531 | io_apic_eoi(entry->apic, entry->pin); | ||
2532 | else | ||
2533 | io_apic_eoi(entry->apic, cfg->vector); | ||
2534 | } else { | ||
2535 | __mask_and_edge_IO_APIC_irq(entry); | ||
2536 | __unmask_and_level_IO_APIC_irq(entry); | ||
2537 | } | ||
2538 | } | ||
2539 | } | ||
2540 | |||
2541 | static void eoi_ioapic_irq(struct irq_desc *desc) | ||
2542 | { | ||
2543 | struct irq_cfg *cfg; | ||
2544 | unsigned long flags; | ||
2545 | unsigned int irq; | ||
2546 | |||
2547 | irq = desc->irq; | ||
2548 | cfg = desc->chip_data; | ||
2549 | |||
2550 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2551 | __eoi_ioapic_irq(irq, cfg); | ||
2552 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2553 | } | ||
2554 | |||
2493 | static void ack_apic_level(unsigned int irq) | 2555 | static void ack_apic_level(unsigned int irq) |
2494 | { | 2556 | { |
2495 | struct irq_desc *desc = irq_to_desc(irq); | 2557 | struct irq_desc *desc = irq_to_desc(irq); |
@@ -2525,6 +2587,19 @@ static void ack_apic_level(unsigned int irq) | |||
2525 | * level-triggered interrupt. We mask the source for the time of the | 2587 | * level-triggered interrupt. We mask the source for the time of the |
2526 | * operation to prevent an edge-triggered interrupt escaping meanwhile. | 2588 | * operation to prevent an edge-triggered interrupt escaping meanwhile. |
2527 | * The idea is from Manfred Spraul. --macro | 2589 | * The idea is from Manfred Spraul. --macro |
2590 | * | ||
2591 | * Also in the case when cpu goes offline, fixup_irqs() will forward | ||
2592 | * any unhandled interrupt on the offlined cpu to the new cpu | ||
2593 | * destination that is handling the corresponding interrupt. This | ||
2594 | * interrupt forwarding is done via IPI's. Hence, in this case also | ||
2595 | * level-triggered io-apic interrupt will be seen as an edge | ||
2596 | * interrupt in the IRR. And we can't rely on the cpu's EOI | ||
2597 | * to be broadcasted to the IO-APIC's which will clear the remoteIRR | ||
2598 | * corresponding to the level-triggered interrupt. Hence on IO-APIC's | ||
2599 | * supporting EOI register, we do an explicit EOI to clear the | ||
2600 | * remote IRR and on IO-APIC's which don't have an EOI register, | ||
2601 | * we use the above logic (mask+edge followed by unmask+level) from | ||
2602 | * Manfred Spraul to clear the remote IRR. | ||
2528 | */ | 2603 | */ |
2529 | cfg = desc->chip_data; | 2604 | cfg = desc->chip_data; |
2530 | i = cfg->vector; | 2605 | i = cfg->vector; |
@@ -2536,6 +2611,19 @@ static void ack_apic_level(unsigned int irq) | |||
2536 | */ | 2611 | */ |
2537 | ack_APIC_irq(); | 2612 | ack_APIC_irq(); |
2538 | 2613 | ||
2614 | /* | ||
2615 | * Tail end of clearing remote IRR bit (either by delivering the EOI | ||
2616 | * message via io-apic EOI register write or simulating it using | ||
2617 | * mask+edge followed by unnask+level logic) manually when the | ||
2618 | * level triggered interrupt is seen as the edge triggered interrupt | ||
2619 | * at the cpu. | ||
2620 | */ | ||
2621 | if (!(v & (1 << (i & 0x1f)))) { | ||
2622 | atomic_inc(&irq_mis_count); | ||
2623 | |||
2624 | eoi_ioapic_irq(desc); | ||
2625 | } | ||
2626 | |||
2539 | /* Now we can move and renable the irq */ | 2627 | /* Now we can move and renable the irq */ |
2540 | if (unlikely(do_unmask_irq)) { | 2628 | if (unlikely(do_unmask_irq)) { |
2541 | /* Only migrate the irq if the ack has been received. | 2629 | /* Only migrate the irq if the ack has been received. |
@@ -2569,41 +2657,9 @@ static void ack_apic_level(unsigned int irq) | |||
2569 | move_masked_irq(irq); | 2657 | move_masked_irq(irq); |
2570 | unmask_IO_APIC_irq_desc(desc); | 2658 | unmask_IO_APIC_irq_desc(desc); |
2571 | } | 2659 | } |
2572 | |||
2573 | /* Tail end of version 0x11 I/O APIC bug workaround */ | ||
2574 | if (!(v & (1 << (i & 0x1f)))) { | ||
2575 | atomic_inc(&irq_mis_count); | ||
2576 | spin_lock(&ioapic_lock); | ||
2577 | __mask_and_edge_IO_APIC_irq(cfg); | ||
2578 | __unmask_and_level_IO_APIC_irq(cfg); | ||
2579 | spin_unlock(&ioapic_lock); | ||
2580 | } | ||
2581 | } | 2660 | } |
2582 | 2661 | ||
2583 | #ifdef CONFIG_INTR_REMAP | 2662 | #ifdef CONFIG_INTR_REMAP |
2584 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | ||
2585 | { | ||
2586 | struct irq_pin_list *entry; | ||
2587 | |||
2588 | for_each_irq_pin(entry, cfg->irq_2_pin) | ||
2589 | io_apic_eoi(entry->apic, entry->pin); | ||
2590 | } | ||
2591 | |||
2592 | static void | ||
2593 | eoi_ioapic_irq(struct irq_desc *desc) | ||
2594 | { | ||
2595 | struct irq_cfg *cfg; | ||
2596 | unsigned long flags; | ||
2597 | unsigned int irq; | ||
2598 | |||
2599 | irq = desc->irq; | ||
2600 | cfg = desc->chip_data; | ||
2601 | |||
2602 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2603 | __eoi_ioapic_irq(irq, cfg); | ||
2604 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2605 | } | ||
2606 | |||
2607 | static void ir_ack_apic_edge(unsigned int irq) | 2663 | static void ir_ack_apic_edge(unsigned int irq) |
2608 | { | 2664 | { |
2609 | ack_APIC_irq(); | 2665 | ack_APIC_irq(); |
@@ -3157,6 +3213,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) | |||
3157 | continue; | 3213 | continue; |
3158 | 3214 | ||
3159 | desc_new = move_irq_desc(desc_new, node); | 3215 | desc_new = move_irq_desc(desc_new, node); |
3216 | cfg_new = desc_new->chip_data; | ||
3160 | 3217 | ||
3161 | if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) | 3218 | if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) |
3162 | irq = new; | 3219 | irq = new; |
@@ -3211,7 +3268,8 @@ void destroy_irq(unsigned int irq) | |||
3211 | * MSI message composition | 3268 | * MSI message composition |
3212 | */ | 3269 | */ |
3213 | #ifdef CONFIG_PCI_MSI | 3270 | #ifdef CONFIG_PCI_MSI |
3214 | static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) | 3271 | static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, |
3272 | struct msi_msg *msg, u8 hpet_id) | ||
3215 | { | 3273 | { |
3216 | struct irq_cfg *cfg; | 3274 | struct irq_cfg *cfg; |
3217 | int err; | 3275 | int err; |
@@ -3245,7 +3303,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
3245 | irte.dest_id = IRTE_DEST(dest); | 3303 | irte.dest_id = IRTE_DEST(dest); |
3246 | 3304 | ||
3247 | /* Set source-id of interrupt request */ | 3305 | /* Set source-id of interrupt request */ |
3248 | set_msi_sid(&irte, pdev); | 3306 | if (pdev) |
3307 | set_msi_sid(&irte, pdev); | ||
3308 | else | ||
3309 | set_hpet_sid(&irte, hpet_id); | ||
3249 | 3310 | ||
3250 | modify_irte(irq, &irte); | 3311 | modify_irte(irq, &irte); |
3251 | 3312 | ||
@@ -3291,8 +3352,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3291 | struct msi_msg msg; | 3352 | struct msi_msg msg; |
3292 | unsigned int dest; | 3353 | unsigned int dest; |
3293 | 3354 | ||
3294 | dest = set_desc_affinity(desc, mask); | 3355 | if (set_desc_affinity(desc, mask, &dest)) |
3295 | if (dest == BAD_APICID) | ||
3296 | return -1; | 3356 | return -1; |
3297 | 3357 | ||
3298 | cfg = desc->chip_data; | 3358 | cfg = desc->chip_data; |
@@ -3324,8 +3384,7 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3324 | if (get_irte(irq, &irte)) | 3384 | if (get_irte(irq, &irte)) |
3325 | return -1; | 3385 | return -1; |
3326 | 3386 | ||
3327 | dest = set_desc_affinity(desc, mask); | 3387 | if (set_desc_affinity(desc, mask, &dest)) |
3328 | if (dest == BAD_APICID) | ||
3329 | return -1; | 3388 | return -1; |
3330 | 3389 | ||
3331 | irte.vector = cfg->vector; | 3390 | irte.vector = cfg->vector; |
@@ -3410,7 +3469,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | |||
3410 | int ret; | 3469 | int ret; |
3411 | struct msi_msg msg; | 3470 | struct msi_msg msg; |
3412 | 3471 | ||
3413 | ret = msi_compose_msg(dev, irq, &msg); | 3472 | ret = msi_compose_msg(dev, irq, &msg, -1); |
3414 | if (ret < 0) | 3473 | if (ret < 0) |
3415 | return ret; | 3474 | return ret; |
3416 | 3475 | ||
@@ -3507,8 +3566,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3507 | struct msi_msg msg; | 3566 | struct msi_msg msg; |
3508 | unsigned int dest; | 3567 | unsigned int dest; |
3509 | 3568 | ||
3510 | dest = set_desc_affinity(desc, mask); | 3569 | if (set_desc_affinity(desc, mask, &dest)) |
3511 | if (dest == BAD_APICID) | ||
3512 | return -1; | 3570 | return -1; |
3513 | 3571 | ||
3514 | cfg = desc->chip_data; | 3572 | cfg = desc->chip_data; |
@@ -3543,7 +3601,7 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
3543 | int ret; | 3601 | int ret; |
3544 | struct msi_msg msg; | 3602 | struct msi_msg msg; |
3545 | 3603 | ||
3546 | ret = msi_compose_msg(NULL, irq, &msg); | 3604 | ret = msi_compose_msg(NULL, irq, &msg, -1); |
3547 | if (ret < 0) | 3605 | if (ret < 0) |
3548 | return ret; | 3606 | return ret; |
3549 | dmar_msi_write(irq, &msg); | 3607 | dmar_msi_write(irq, &msg); |
@@ -3563,8 +3621,7 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3563 | struct msi_msg msg; | 3621 | struct msi_msg msg; |
3564 | unsigned int dest; | 3622 | unsigned int dest; |
3565 | 3623 | ||
3566 | dest = set_desc_affinity(desc, mask); | 3624 | if (set_desc_affinity(desc, mask, &dest)) |
3567 | if (dest == BAD_APICID) | ||
3568 | return -1; | 3625 | return -1; |
3569 | 3626 | ||
3570 | cfg = desc->chip_data; | 3627 | cfg = desc->chip_data; |
@@ -3583,6 +3640,19 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3583 | 3640 | ||
3584 | #endif /* CONFIG_SMP */ | 3641 | #endif /* CONFIG_SMP */ |
3585 | 3642 | ||
3643 | static struct irq_chip ir_hpet_msi_type = { | ||
3644 | .name = "IR-HPET_MSI", | ||
3645 | .unmask = hpet_msi_unmask, | ||
3646 | .mask = hpet_msi_mask, | ||
3647 | #ifdef CONFIG_INTR_REMAP | ||
3648 | .ack = ir_ack_apic_edge, | ||
3649 | #ifdef CONFIG_SMP | ||
3650 | .set_affinity = ir_set_msi_irq_affinity, | ||
3651 | #endif | ||
3652 | #endif | ||
3653 | .retrigger = ioapic_retrigger_irq, | ||
3654 | }; | ||
3655 | |||
3586 | static struct irq_chip hpet_msi_type = { | 3656 | static struct irq_chip hpet_msi_type = { |
3587 | .name = "HPET_MSI", | 3657 | .name = "HPET_MSI", |
3588 | .unmask = hpet_msi_unmask, | 3658 | .unmask = hpet_msi_unmask, |
@@ -3594,20 +3664,36 @@ static struct irq_chip hpet_msi_type = { | |||
3594 | .retrigger = ioapic_retrigger_irq, | 3664 | .retrigger = ioapic_retrigger_irq, |
3595 | }; | 3665 | }; |
3596 | 3666 | ||
3597 | int arch_setup_hpet_msi(unsigned int irq) | 3667 | int arch_setup_hpet_msi(unsigned int irq, unsigned int id) |
3598 | { | 3668 | { |
3599 | int ret; | 3669 | int ret; |
3600 | struct msi_msg msg; | 3670 | struct msi_msg msg; |
3601 | struct irq_desc *desc = irq_to_desc(irq); | 3671 | struct irq_desc *desc = irq_to_desc(irq); |
3602 | 3672 | ||
3603 | ret = msi_compose_msg(NULL, irq, &msg); | 3673 | if (intr_remapping_enabled) { |
3674 | struct intel_iommu *iommu = map_hpet_to_ir(id); | ||
3675 | int index; | ||
3676 | |||
3677 | if (!iommu) | ||
3678 | return -1; | ||
3679 | |||
3680 | index = alloc_irte(iommu, irq, 1); | ||
3681 | if (index < 0) | ||
3682 | return -1; | ||
3683 | } | ||
3684 | |||
3685 | ret = msi_compose_msg(NULL, irq, &msg, id); | ||
3604 | if (ret < 0) | 3686 | if (ret < 0) |
3605 | return ret; | 3687 | return ret; |
3606 | 3688 | ||
3607 | hpet_msi_write(irq, &msg); | 3689 | hpet_msi_write(irq, &msg); |
3608 | desc->status |= IRQ_MOVE_PCNTXT; | 3690 | desc->status |= IRQ_MOVE_PCNTXT; |
3609 | set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, | 3691 | if (irq_remapped(irq)) |
3610 | "edge"); | 3692 | set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, |
3693 | handle_edge_irq, "edge"); | ||
3694 | else | ||
3695 | set_irq_chip_and_handler_name(irq, &hpet_msi_type, | ||
3696 | handle_edge_irq, "edge"); | ||
3611 | 3697 | ||
3612 | return 0; | 3698 | return 0; |
3613 | } | 3699 | } |
@@ -3641,8 +3727,7 @@ static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3641 | struct irq_cfg *cfg; | 3727 | struct irq_cfg *cfg; |
3642 | unsigned int dest; | 3728 | unsigned int dest; |
3643 | 3729 | ||
3644 | dest = set_desc_affinity(desc, mask); | 3730 | if (set_desc_affinity(desc, mask, &dest)) |
3645 | if (dest == BAD_APICID) | ||
3646 | return -1; | 3731 | return -1; |
3647 | 3732 | ||
3648 | cfg = desc->chip_data; | 3733 | cfg = desc->chip_data; |
@@ -3708,75 +3793,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3708 | } | 3793 | } |
3709 | #endif /* CONFIG_HT_IRQ */ | 3794 | #endif /* CONFIG_HT_IRQ */ |
3710 | 3795 | ||
3711 | #ifdef CONFIG_X86_UV | ||
3712 | /* | ||
3713 | * Re-target the irq to the specified CPU and enable the specified MMR located | ||
3714 | * on the specified blade to allow the sending of MSIs to the specified CPU. | ||
3715 | */ | ||
3716 | int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | ||
3717 | unsigned long mmr_offset) | ||
3718 | { | ||
3719 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | ||
3720 | struct irq_cfg *cfg; | ||
3721 | int mmr_pnode; | ||
3722 | unsigned long mmr_value; | ||
3723 | struct uv_IO_APIC_route_entry *entry; | ||
3724 | unsigned long flags; | ||
3725 | int err; | ||
3726 | |||
3727 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | ||
3728 | |||
3729 | cfg = irq_cfg(irq); | ||
3730 | |||
3731 | err = assign_irq_vector(irq, cfg, eligible_cpu); | ||
3732 | if (err != 0) | ||
3733 | return err; | ||
3734 | |||
3735 | spin_lock_irqsave(&vector_lock, flags); | ||
3736 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, | ||
3737 | irq_name); | ||
3738 | spin_unlock_irqrestore(&vector_lock, flags); | ||
3739 | |||
3740 | mmr_value = 0; | ||
3741 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
3742 | entry->vector = cfg->vector; | ||
3743 | entry->delivery_mode = apic->irq_delivery_mode; | ||
3744 | entry->dest_mode = apic->irq_dest_mode; | ||
3745 | entry->polarity = 0; | ||
3746 | entry->trigger = 0; | ||
3747 | entry->mask = 0; | ||
3748 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); | ||
3749 | |||
3750 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
3751 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
3752 | |||
3753 | if (cfg->move_in_progress) | ||
3754 | send_cleanup_vector(cfg); | ||
3755 | |||
3756 | return irq; | ||
3757 | } | ||
3758 | |||
3759 | /* | ||
3760 | * Disable the specified MMR located on the specified blade so that MSIs are | ||
3761 | * longer allowed to be sent. | ||
3762 | */ | ||
3763 | void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) | ||
3764 | { | ||
3765 | unsigned long mmr_value; | ||
3766 | struct uv_IO_APIC_route_entry *entry; | ||
3767 | int mmr_pnode; | ||
3768 | |||
3769 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | ||
3770 | |||
3771 | mmr_value = 0; | ||
3772 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
3773 | entry->mask = 1; | ||
3774 | |||
3775 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
3776 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
3777 | } | ||
3778 | #endif /* CONFIG_X86_64 */ | ||
3779 | |||
3780 | int __init io_apic_get_redir_entries (int ioapic) | 3796 | int __init io_apic_get_redir_entries (int ioapic) |
3781 | { | 3797 | { |
3782 | union IO_APIC_reg_01 reg_01; | 3798 | union IO_APIC_reg_01 reg_01; |
@@ -3944,7 +3960,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3944 | */ | 3960 | */ |
3945 | 3961 | ||
3946 | if (physids_empty(apic_id_map)) | 3962 | if (physids_empty(apic_id_map)) |
3947 | apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); | 3963 | apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); |
3948 | 3964 | ||
3949 | spin_lock_irqsave(&ioapic_lock, flags); | 3965 | spin_lock_irqsave(&ioapic_lock, flags); |
3950 | reg_00.raw = io_apic_read(ioapic, 0); | 3966 | reg_00.raw = io_apic_read(ioapic, 0); |
@@ -3960,10 +3976,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3960 | * Every APIC in a system must have a unique ID or we get lots of nice | 3976 | * Every APIC in a system must have a unique ID or we get lots of nice |
3961 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 3977 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
3962 | */ | 3978 | */ |
3963 | if (apic->check_apicid_used(apic_id_map, apic_id)) { | 3979 | if (apic->check_apicid_used(&apic_id_map, apic_id)) { |
3964 | 3980 | ||
3965 | for (i = 0; i < get_physical_broadcast(); i++) { | 3981 | for (i = 0; i < get_physical_broadcast(); i++) { |
3966 | if (!apic->check_apicid_used(apic_id_map, i)) | 3982 | if (!apic->check_apicid_used(&apic_id_map, i)) |
3967 | break; | 3983 | break; |
3968 | } | 3984 | } |
3969 | 3985 | ||
@@ -3976,7 +3992,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3976 | apic_id = i; | 3992 | apic_id = i; |
3977 | } | 3993 | } |
3978 | 3994 | ||
3979 | tmp = apic->apicid_to_cpu_present(apic_id); | 3995 | apic->apicid_to_cpu_present(apic_id, &tmp); |
3980 | physids_or(apic_id_map, apic_id_map, tmp); | 3996 | physids_or(apic_id_map, apic_id_map, tmp); |
3981 | 3997 | ||
3982 | if (reg_00.bits.ID != apic_id) { | 3998 | if (reg_00.bits.ID != apic_id) { |
@@ -4106,7 +4122,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) | |||
4106 | for (i = 0; i < nr_ioapics; i++) { | 4122 | for (i = 0; i < nr_ioapics; i++) { |
4107 | res[i].name = mem; | 4123 | res[i].name = mem; |
4108 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; | 4124 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
4109 | sprintf(mem, "IOAPIC %u", i); | 4125 | snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); |
4110 | mem += IOAPIC_RESOURCE_NAME_SIZE; | 4126 | mem += IOAPIC_RESOURCE_NAME_SIZE; |
4111 | } | 4127 | } |
4112 | 4128 | ||
@@ -4140,18 +4156,17 @@ void __init ioapic_init_mappings(void) | |||
4140 | #ifdef CONFIG_X86_32 | 4156 | #ifdef CONFIG_X86_32 |
4141 | fake_ioapic_page: | 4157 | fake_ioapic_page: |
4142 | #endif | 4158 | #endif |
4143 | ioapic_phys = (unsigned long) | 4159 | ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); |
4144 | alloc_bootmem_pages(PAGE_SIZE); | ||
4145 | ioapic_phys = __pa(ioapic_phys); | 4160 | ioapic_phys = __pa(ioapic_phys); |
4146 | } | 4161 | } |
4147 | set_fixmap_nocache(idx, ioapic_phys); | 4162 | set_fixmap_nocache(idx, ioapic_phys); |
4148 | apic_printk(APIC_VERBOSE, | 4163 | apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", |
4149 | "mapped IOAPIC to %08lx (%08lx)\n", | 4164 | __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), |
4150 | __fix_to_virt(idx), ioapic_phys); | 4165 | ioapic_phys); |
4151 | idx++; | 4166 | idx++; |
4152 | 4167 | ||
4153 | ioapic_res->start = ioapic_phys; | 4168 | ioapic_res->start = ioapic_phys; |
4154 | ioapic_res->end = ioapic_phys + (4 * 1024) - 1; | 4169 | ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; |
4155 | ioapic_res++; | 4170 | ioapic_res++; |
4156 | } | 4171 | } |
4157 | } | 4172 | } |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 45404379d173..4ada42c3dabb 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -39,7 +39,8 @@ | |||
39 | int unknown_nmi_panic; | 39 | int unknown_nmi_panic; |
40 | int nmi_watchdog_enabled; | 40 | int nmi_watchdog_enabled; |
41 | 41 | ||
42 | static cpumask_t backtrace_mask __read_mostly; | 42 | /* For reliability, we're prepared to waste bits here. */ |
43 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | ||
43 | 44 | ||
44 | /* nmi_active: | 45 | /* nmi_active: |
45 | * >0: the lapic NMI watchdog is active, but can be disabled | 46 | * >0: the lapic NMI watchdog is active, but can be disabled |
@@ -414,7 +415,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
414 | } | 415 | } |
415 | 416 | ||
416 | /* We can be called before check_nmi_watchdog, hence NULL check. */ | 417 | /* We can be called before check_nmi_watchdog, hence NULL check. */ |
417 | if (cpumask_test_cpu(cpu, &backtrace_mask)) { | 418 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { |
418 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | 419 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ |
419 | 420 | ||
420 | spin_lock(&lock); | 421 | spin_lock(&lock); |
@@ -422,7 +423,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
422 | show_regs(regs); | 423 | show_regs(regs); |
423 | dump_stack(); | 424 | dump_stack(); |
424 | spin_unlock(&lock); | 425 | spin_unlock(&lock); |
425 | cpumask_clear_cpu(cpu, &backtrace_mask); | 426 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); |
426 | 427 | ||
427 | rc = 1; | 428 | rc = 1; |
428 | } | 429 | } |
@@ -558,14 +559,14 @@ void arch_trigger_all_cpu_backtrace(void) | |||
558 | { | 559 | { |
559 | int i; | 560 | int i; |
560 | 561 | ||
561 | cpumask_copy(&backtrace_mask, cpu_online_mask); | 562 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); |
562 | 563 | ||
563 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | 564 | printk(KERN_INFO "sending NMI to all CPUs:\n"); |
564 | apic->send_IPI_all(NMI_VECTOR); | 565 | apic->send_IPI_all(NMI_VECTOR); |
565 | 566 | ||
566 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | 567 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ |
567 | for (i = 0; i < 10 * 1000; i++) { | 568 | for (i = 0; i < 10 * 1000; i++) { |
568 | if (cpumask_empty(&backtrace_mask)) | 569 | if (cpumask_empty(to_cpumask(backtrace_mask))) |
569 | break; | 570 | break; |
570 | mdelay(1); | 571 | mdelay(1); |
571 | } | 572 | } |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index efa00e2b8505..98c4665f251c 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -264,11 +264,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc) | |||
264 | static __init void early_check_numaq(void) | 264 | static __init void early_check_numaq(void) |
265 | { | 265 | { |
266 | /* | 266 | /* |
267 | * Find possible boot-time SMP configuration: | ||
268 | */ | ||
269 | early_find_smp_config(); | ||
270 | |||
271 | /* | ||
272 | * get boot-time SMP configuration: | 267 | * get boot-time SMP configuration: |
273 | */ | 268 | */ |
274 | if (smp_found_config) | 269 | if (smp_found_config) |
@@ -334,10 +329,9 @@ static inline const struct cpumask *numaq_target_cpus(void) | |||
334 | return cpu_all_mask; | 329 | return cpu_all_mask; |
335 | } | 330 | } |
336 | 331 | ||
337 | static inline unsigned long | 332 | static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) |
338 | numaq_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
339 | { | 333 | { |
340 | return physid_isset(apicid, bitmap); | 334 | return physid_isset(apicid, *map); |
341 | } | 335 | } |
342 | 336 | ||
343 | static inline unsigned long numaq_check_apicid_present(int bit) | 337 | static inline unsigned long numaq_check_apicid_present(int bit) |
@@ -371,10 +365,10 @@ static inline int numaq_multi_timer_check(int apic, int irq) | |||
371 | return apic != 0 && irq == 0; | 365 | return apic != 0 && irq == 0; |
372 | } | 366 | } |
373 | 367 | ||
374 | static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) | 368 | static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
375 | { | 369 | { |
376 | /* We don't have a good way to do this yet - hack */ | 370 | /* We don't have a good way to do this yet - hack */ |
377 | return physids_promote(0xFUL); | 371 | return physids_promote(0xFUL, retmap); |
378 | } | 372 | } |
379 | 373 | ||
380 | static inline int numaq_cpu_to_logical_apicid(int cpu) | 374 | static inline int numaq_cpu_to_logical_apicid(int cpu) |
@@ -402,12 +396,12 @@ static inline int numaq_apicid_to_node(int logical_apicid) | |||
402 | return logical_apicid >> 4; | 396 | return logical_apicid >> 4; |
403 | } | 397 | } |
404 | 398 | ||
405 | static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) | 399 | static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) |
406 | { | 400 | { |
407 | int node = numaq_apicid_to_node(logical_apicid); | 401 | int node = numaq_apicid_to_node(logical_apicid); |
408 | int cpu = __ffs(logical_apicid & 0xf); | 402 | int cpu = __ffs(logical_apicid & 0xf); |
409 | 403 | ||
410 | return physid_mask_of_physid(cpu + 4*node); | 404 | physid_set_mask_of_physid(cpu + 4*node, retmap); |
411 | } | 405 | } |
412 | 406 | ||
413 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ | 407 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 0c0182cc947d..1a6559f6768c 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -108,7 +108,7 @@ struct apic apic_default = { | |||
108 | .apicid_to_node = default_apicid_to_node, | 108 | .apicid_to_node = default_apicid_to_node, |
109 | .cpu_to_logical_apicid = default_cpu_to_logical_apicid, | 109 | .cpu_to_logical_apicid = default_cpu_to_logical_apicid, |
110 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | 110 | .cpu_present_to_apicid = default_cpu_present_to_apicid, |
111 | .apicid_to_cpu_present = default_apicid_to_cpu_present, | 111 | .apicid_to_cpu_present = physid_set_mask_of_physid, |
112 | .setup_portio_remap = NULL, | 112 | .setup_portio_remap = NULL, |
113 | .check_phys_apicid_present = default_check_phys_apicid_present, | 113 | .check_phys_apicid_present = default_check_phys_apicid_present, |
114 | .enable_apic_mode = NULL, | 114 | .enable_apic_mode = NULL, |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 645ecc4ff0be..9b419263d90d 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void) | |||
183 | return cpumask_of(0); | 183 | return cpumask_of(0); |
184 | } | 184 | } |
185 | 185 | ||
186 | static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) | 186 | static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid) |
187 | { | 187 | { |
188 | return 0; | 188 | return 0; |
189 | } | 189 | } |
@@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu) | |||
261 | return BAD_APICID; | 261 | return BAD_APICID; |
262 | } | 262 | } |
263 | 263 | ||
264 | static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map) | 264 | static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap) |
265 | { | 265 | { |
266 | /* For clustered we don't have a good way to do this yet - hack */ | 266 | /* For clustered we don't have a good way to do this yet - hack */ |
267 | return physids_promote(0x0F); | 267 | physids_promote(0x0FL, retmap); |
268 | } | 268 | } |
269 | 269 | ||
270 | static physid_mask_t summit_apicid_to_cpu_present(int apicid) | 270 | static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap) |
271 | { | 271 | { |
272 | return physid_mask_of_physid(0); | 272 | physid_set_mask_of_physid(0, retmap); |
273 | } | 273 | } |
274 | 274 | ||
275 | static int summit_check_phys_apicid_present(int physical_apicid) | 275 | static int summit_check_phys_apicid_present(int physical_apicid) |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index a5371ec36776..cf69c59f4910 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -148,10 +148,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
148 | break; | 148 | break; |
149 | } | 149 | } |
150 | 150 | ||
151 | if (cpu < nr_cpu_ids) | 151 | return per_cpu(x86_cpu_to_logical_apicid, cpu); |
152 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
153 | |||
154 | return BAD_APICID; | ||
155 | } | 152 | } |
156 | 153 | ||
157 | static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) | 154 | static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) |
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a8989aadc99a..8972f38c5ced 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -146,10 +146,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
146 | break; | 146 | break; |
147 | } | 147 | } |
148 | 148 | ||
149 | if (cpu < nr_cpu_ids) | 149 | return per_cpu(x86_cpu_to_apicid, cpu); |
150 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
151 | |||
152 | return BAD_APICID; | ||
153 | } | 150 | } |
154 | 151 | ||
155 | static unsigned int x2apic_phys_get_apic_id(unsigned long x) | 152 | static unsigned int x2apic_phys_get_apic_id(unsigned long x) |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f5f5886a6b53..5f92494dab61 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -30,10 +30,22 @@ | |||
30 | #include <asm/apic.h> | 30 | #include <asm/apic.h> |
31 | #include <asm/ipi.h> | 31 | #include <asm/ipi.h> |
32 | #include <asm/smp.h> | 32 | #include <asm/smp.h> |
33 | #include <asm/x86_init.h> | ||
33 | 34 | ||
34 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 35 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
35 | 36 | ||
36 | static enum uv_system_type uv_system_type; | 37 | static enum uv_system_type uv_system_type; |
38 | static u64 gru_start_paddr, gru_end_paddr; | ||
39 | |||
40 | static inline bool is_GRU_range(u64 start, u64 end) | ||
41 | { | ||
42 | return start >= gru_start_paddr && end <= gru_end_paddr; | ||
43 | } | ||
44 | |||
45 | static bool uv_is_untracked_pat_range(u64 start, u64 end) | ||
46 | { | ||
47 | return is_ISA_range(start, end) || is_GRU_range(start, end); | ||
48 | } | ||
37 | 49 | ||
38 | static int early_get_nodeid(void) | 50 | static int early_get_nodeid(void) |
39 | { | 51 | { |
@@ -49,6 +61,7 @@ static int early_get_nodeid(void) | |||
49 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 61 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
50 | { | 62 | { |
51 | if (!strcmp(oem_id, "SGI")) { | 63 | if (!strcmp(oem_id, "SGI")) { |
64 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; | ||
52 | if (!strcmp(oem_table_id, "UVL")) | 65 | if (!strcmp(oem_table_id, "UVL")) |
53 | uv_system_type = UV_LEGACY_APIC; | 66 | uv_system_type = UV_LEGACY_APIC; |
54 | else if (!strcmp(oem_table_id, "UVX")) | 67 | else if (!strcmp(oem_table_id, "UVX")) |
@@ -212,10 +225,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
212 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 225 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
213 | break; | 226 | break; |
214 | } | 227 | } |
215 | if (cpu < nr_cpu_ids) | 228 | return per_cpu(x86_cpu_to_apicid, cpu); |
216 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
217 | |||
218 | return BAD_APICID; | ||
219 | } | 229 | } |
220 | 230 | ||
221 | static unsigned int x2apic_get_apic_id(unsigned long x) | 231 | static unsigned int x2apic_get_apic_id(unsigned long x) |
@@ -352,14 +362,14 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) | |||
352 | 362 | ||
353 | for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { | 363 | for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { |
354 | alias.v = uv_read_local_mmr(redir_addrs[i].alias); | 364 | alias.v = uv_read_local_mmr(redir_addrs[i].alias); |
355 | if (alias.s.base == 0) { | 365 | if (alias.s.enable && alias.s.base == 0) { |
356 | *size = (1UL << alias.s.m_alias); | 366 | *size = (1UL << alias.s.m_alias); |
357 | redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); | 367 | redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); |
358 | *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; | 368 | *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; |
359 | return; | 369 | return; |
360 | } | 370 | } |
361 | } | 371 | } |
362 | BUG(); | 372 | *base = *size = 0; |
363 | } | 373 | } |
364 | 374 | ||
365 | enum map_type {map_wb, map_uc}; | 375 | enum map_type {map_wb, map_uc}; |
@@ -385,8 +395,12 @@ static __init void map_gru_high(int max_pnode) | |||
385 | int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; | 395 | int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; |
386 | 396 | ||
387 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); | 397 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); |
388 | if (gru.s.enable) | 398 | if (gru.s.enable) { |
389 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); | 399 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); |
400 | gru_start_paddr = ((u64)gru.s.base << shift); | ||
401 | gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); | ||
402 | |||
403 | } | ||
390 | } | 404 | } |
391 | 405 | ||
392 | static __init void map_mmr_high(int max_pnode) | 406 | static __init void map_mmr_high(int max_pnode) |
@@ -409,6 +423,12 @@ static __init void map_mmioh_high(int max_pnode) | |||
409 | map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); | 423 | map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); |
410 | } | 424 | } |
411 | 425 | ||
426 | static __init void map_low_mmrs(void) | ||
427 | { | ||
428 | init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); | ||
429 | init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); | ||
430 | } | ||
431 | |||
412 | static __init void uv_rtc_init(void) | 432 | static __init void uv_rtc_init(void) |
413 | { | 433 | { |
414 | long status; | 434 | long status; |
@@ -550,6 +570,8 @@ void __init uv_system_init(void) | |||
550 | unsigned long mmr_base, present, paddr; | 570 | unsigned long mmr_base, present, paddr; |
551 | unsigned short pnode_mask; | 571 | unsigned short pnode_mask; |
552 | 572 | ||
573 | map_low_mmrs(); | ||
574 | |||
553 | m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); | 575 | m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); |
554 | m_val = m_n_config.s.m_skt; | 576 | m_val = m_n_config.s.m_skt; |
555 | n_val = m_n_config.s.n_skt; | 577 | n_val = m_n_config.s.n_skt; |
@@ -607,8 +629,10 @@ void __init uv_system_init(void) | |||
607 | uv_rtc_init(); | 629 | uv_rtc_init(); |
608 | 630 | ||
609 | for_each_present_cpu(cpu) { | 631 | for_each_present_cpu(cpu) { |
632 | int apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
633 | |||
610 | nid = cpu_to_node(cpu); | 634 | nid = cpu_to_node(cpu); |
611 | pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); | 635 | pnode = uv_apicid_to_pnode(apicid); |
612 | blade = boot_pnode_to_blade(pnode); | 636 | blade = boot_pnode_to_blade(pnode); |
613 | lcpu = uv_blade_info[blade].nr_possible_cpus; | 637 | lcpu = uv_blade_info[blade].nr_possible_cpus; |
614 | uv_blade_info[blade].nr_possible_cpus++; | 638 | uv_blade_info[blade].nr_possible_cpus++; |
@@ -619,25 +643,23 @@ void __init uv_system_init(void) | |||
619 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; | 643 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; |
620 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; | 644 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; |
621 | uv_cpu_hub_info(cpu)->m_val = m_val; | 645 | uv_cpu_hub_info(cpu)->m_val = m_val; |
622 | uv_cpu_hub_info(cpu)->n_val = m_val; | 646 | uv_cpu_hub_info(cpu)->n_val = n_val; |
623 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; | 647 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; |
624 | uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; | 648 | uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; |
625 | uv_cpu_hub_info(cpu)->pnode = pnode; | 649 | uv_cpu_hub_info(cpu)->pnode = pnode; |
626 | uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; | 650 | uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; |
627 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; | 651 | uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; |
628 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; | 652 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; |
629 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; | 653 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; |
630 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; | 654 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; |
631 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; | 655 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; |
632 | uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; | 656 | uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); |
633 | uv_node_to_blade[nid] = blade; | 657 | uv_node_to_blade[nid] = blade; |
634 | uv_cpu_to_blade[cpu] = blade; | 658 | uv_cpu_to_blade[cpu] = blade; |
635 | max_pnode = max(pnode, max_pnode); | 659 | max_pnode = max(pnode, max_pnode); |
636 | 660 | ||
637 | printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, " | 661 | printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", |
638 | "lcpu %d, blade %d\n", | 662 | cpu, apicid, pnode, nid, lcpu, blade); |
639 | cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, | ||
640 | lcpu, blade); | ||
641 | } | 663 | } |
642 | 664 | ||
643 | /* Add blade/pnode info for nodes without cpus */ | 665 | /* Add blade/pnode info for nodes without cpus */ |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 151ace69a5aa..b5b6b23bce53 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -204,7 +204,6 @@ | |||
204 | #include <linux/module.h> | 204 | #include <linux/module.h> |
205 | 205 | ||
206 | #include <linux/poll.h> | 206 | #include <linux/poll.h> |
207 | #include <linux/smp_lock.h> | ||
208 | #include <linux/types.h> | 207 | #include <linux/types.h> |
209 | #include <linux/stddef.h> | 208 | #include <linux/stddef.h> |
210 | #include <linux/timer.h> | 209 | #include <linux/timer.h> |
@@ -403,6 +402,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); | |||
403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); | 402 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); |
404 | static struct apm_user *user_list; | 403 | static struct apm_user *user_list; |
405 | static DEFINE_SPINLOCK(user_list_lock); | 404 | static DEFINE_SPINLOCK(user_list_lock); |
405 | static DEFINE_MUTEX(apm_mutex); | ||
406 | 406 | ||
407 | /* | 407 | /* |
408 | * Set up a segment that references the real mode segment 0x40 | 408 | * Set up a segment that references the real mode segment 0x40 |
@@ -1531,7 +1531,7 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) | |||
1531 | return -EPERM; | 1531 | return -EPERM; |
1532 | switch (cmd) { | 1532 | switch (cmd) { |
1533 | case APM_IOC_STANDBY: | 1533 | case APM_IOC_STANDBY: |
1534 | lock_kernel(); | 1534 | mutex_lock(&apm_mutex); |
1535 | if (as->standbys_read > 0) { | 1535 | if (as->standbys_read > 0) { |
1536 | as->standbys_read--; | 1536 | as->standbys_read--; |
1537 | as->standbys_pending--; | 1537 | as->standbys_pending--; |
@@ -1540,10 +1540,10 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) | |||
1540 | queue_event(APM_USER_STANDBY, as); | 1540 | queue_event(APM_USER_STANDBY, as); |
1541 | if (standbys_pending <= 0) | 1541 | if (standbys_pending <= 0) |
1542 | standby(); | 1542 | standby(); |
1543 | unlock_kernel(); | 1543 | mutex_unlock(&apm_mutex); |
1544 | break; | 1544 | break; |
1545 | case APM_IOC_SUSPEND: | 1545 | case APM_IOC_SUSPEND: |
1546 | lock_kernel(); | 1546 | mutex_lock(&apm_mutex); |
1547 | if (as->suspends_read > 0) { | 1547 | if (as->suspends_read > 0) { |
1548 | as->suspends_read--; | 1548 | as->suspends_read--; |
1549 | as->suspends_pending--; | 1549 | as->suspends_pending--; |
@@ -1552,13 +1552,14 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) | |||
1552 | queue_event(APM_USER_SUSPEND, as); | 1552 | queue_event(APM_USER_SUSPEND, as); |
1553 | if (suspends_pending <= 0) { | 1553 | if (suspends_pending <= 0) { |
1554 | ret = suspend(1); | 1554 | ret = suspend(1); |
1555 | mutex_unlock(&apm_mutex); | ||
1555 | } else { | 1556 | } else { |
1556 | as->suspend_wait = 1; | 1557 | as->suspend_wait = 1; |
1558 | mutex_unlock(&apm_mutex); | ||
1557 | wait_event_interruptible(apm_suspend_waitqueue, | 1559 | wait_event_interruptible(apm_suspend_waitqueue, |
1558 | as->suspend_wait == 0); | 1560 | as->suspend_wait == 0); |
1559 | ret = as->suspend_result; | 1561 | ret = as->suspend_result; |
1560 | } | 1562 | } |
1561 | unlock_kernel(); | ||
1562 | return ret; | 1563 | return ret; |
1563 | default: | 1564 | default: |
1564 | return -ENOTTY; | 1565 | return -ENOTTY; |
@@ -1608,12 +1609,10 @@ static int do_open(struct inode *inode, struct file *filp) | |||
1608 | { | 1609 | { |
1609 | struct apm_user *as; | 1610 | struct apm_user *as; |
1610 | 1611 | ||
1611 | lock_kernel(); | ||
1612 | as = kmalloc(sizeof(*as), GFP_KERNEL); | 1612 | as = kmalloc(sizeof(*as), GFP_KERNEL); |
1613 | if (as == NULL) { | 1613 | if (as == NULL) { |
1614 | printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", | 1614 | printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", |
1615 | sizeof(*as)); | 1615 | sizeof(*as)); |
1616 | unlock_kernel(); | ||
1617 | return -ENOMEM; | 1616 | return -ENOMEM; |
1618 | } | 1617 | } |
1619 | as->magic = APM_BIOS_MAGIC; | 1618 | as->magic = APM_BIOS_MAGIC; |
@@ -1635,7 +1634,6 @@ static int do_open(struct inode *inode, struct file *filp) | |||
1635 | user_list = as; | 1634 | user_list = as; |
1636 | spin_unlock(&user_list_lock); | 1635 | spin_unlock(&user_list_lock); |
1637 | filp->private_data = as; | 1636 | filp->private_data = as; |
1638 | unlock_kernel(); | ||
1639 | return 0; | 1637 | return 0; |
1640 | } | 1638 | } |
1641 | 1639 | ||
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index 63a88e1f987d..b0206a211b09 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c | |||
@@ -101,21 +101,17 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, | |||
101 | } | 101 | } |
102 | 102 | ||
103 | int | 103 | int |
104 | uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size, | 104 | uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, |
105 | unsigned long *intr_mmr_offset) | 105 | unsigned long *intr_mmr_offset) |
106 | { | 106 | { |
107 | union uv_watchlist_u size_blade; | ||
108 | u64 watchlist; | 107 | u64 watchlist; |
109 | s64 ret; | 108 | s64 ret; |
110 | 109 | ||
111 | size_blade.size = mq_size; | ||
112 | size_blade.blade = blade; | ||
113 | |||
114 | /* | 110 | /* |
115 | * bios returns watchlist number or negative error number. | 111 | * bios returns watchlist number or negative error number. |
116 | */ | 112 | */ |
117 | ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, | 113 | ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, |
118 | size_blade.val, (u64)intr_mmr_offset, | 114 | mq_size, (u64)intr_mmr_offset, |
119 | (u64)&watchlist, 0); | 115 | (u64)&watchlist, 0); |
120 | if (ret < BIOS_STATUS_SUCCESS) | 116 | if (ret < BIOS_STATUS_SUCCESS) |
121 | return ret; | 117 | return ret; |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 68537e957a9b..1d2cb383410e 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -5,6 +5,7 @@ | |||
5 | # Don't trace early stages of a secondary CPU boot | 5 | # Don't trace early stages of a secondary CPU boot |
6 | ifdef CONFIG_FUNCTION_TRACER | 6 | ifdef CONFIG_FUNCTION_TRACER |
7 | CFLAGS_REMOVE_common.o = -pg | 7 | CFLAGS_REMOVE_common.o = -pg |
8 | CFLAGS_REMOVE_perf_event.o = -pg | ||
8 | endif | 9 | endif |
9 | 10 | ||
10 | # Make sure load_percpu_segment has no stackprotector | 11 | # Make sure load_percpu_segment has no stackprotector |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index c965e5212714..468489b57aae 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -74,6 +74,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | |||
74 | unsigned int eax, ebx, ecx, edx, sub_index; | 74 | unsigned int eax, ebx, ecx, edx, sub_index; |
75 | unsigned int ht_mask_width, core_plus_mask_width; | 75 | unsigned int ht_mask_width, core_plus_mask_width; |
76 | unsigned int core_select_mask, core_level_siblings; | 76 | unsigned int core_select_mask, core_level_siblings; |
77 | static bool printed; | ||
77 | 78 | ||
78 | if (c->cpuid_level < 0xb) | 79 | if (c->cpuid_level < 0xb) |
79 | return; | 80 | return; |
@@ -127,12 +128,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | |||
127 | 128 | ||
128 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); | 129 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); |
129 | 130 | ||
130 | 131 | if (!printed) { | |
131 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | 132 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", |
132 | c->phys_proc_id); | 133 | c->phys_proc_id); |
133 | if (c->x86_max_cores > 1) | 134 | if (c->x86_max_cores > 1) |
134 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 135 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", |
135 | c->cpu_core_id); | 136 | c->cpu_core_id); |
137 | printed = 1; | ||
138 | } | ||
136 | return; | 139 | return; |
137 | #endif | 140 | #endif |
138 | } | 141 | } |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c910a716a71c..e485825130d2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -254,59 +254,36 @@ static int __cpuinit nearby_node(int apicid) | |||
254 | 254 | ||
255 | /* | 255 | /* |
256 | * Fixup core topology information for AMD multi-node processors. | 256 | * Fixup core topology information for AMD multi-node processors. |
257 | * Assumption 1: Number of cores in each internal node is the same. | 257 | * Assumption: Number of cores in each internal node is the same. |
258 | * Assumption 2: Mixed systems with both single-node and dual-node | ||
259 | * processors are not supported. | ||
260 | */ | 258 | */ |
261 | #ifdef CONFIG_X86_HT | 259 | #ifdef CONFIG_X86_HT |
262 | static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) | 260 | static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) |
263 | { | 261 | { |
264 | #ifdef CONFIG_PCI | 262 | unsigned long long value; |
265 | u32 t, cpn; | 263 | u32 nodes, cores_per_node; |
266 | u8 n, n_id; | ||
267 | int cpu = smp_processor_id(); | 264 | int cpu = smp_processor_id(); |
268 | 265 | ||
266 | if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) | ||
267 | return; | ||
268 | |||
269 | /* fixup topology information only once for a core */ | 269 | /* fixup topology information only once for a core */ |
270 | if (cpu_has(c, X86_FEATURE_AMD_DCM)) | 270 | if (cpu_has(c, X86_FEATURE_AMD_DCM)) |
271 | return; | 271 | return; |
272 | 272 | ||
273 | /* check for multi-node processor on boot cpu */ | 273 | rdmsrl(MSR_FAM10H_NODE_ID, value); |
274 | t = read_pci_config(0, 24, 3, 0xe8); | 274 | |
275 | if (!(t & (1 << 29))) | 275 | nodes = ((value >> 3) & 7) + 1; |
276 | if (nodes == 1) | ||
276 | return; | 277 | return; |
277 | 278 | ||
278 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); | 279 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); |
280 | cores_per_node = c->x86_max_cores / nodes; | ||
279 | 281 | ||
280 | /* cores per node: each internal node has half the number of cores */ | 282 | /* store NodeID, use llc_shared_map to store sibling info */ |
281 | cpn = c->x86_max_cores >> 1; | 283 | per_cpu(cpu_llc_id, cpu) = value & 7; |
282 | |||
283 | /* even-numbered NB_id of this dual-node processor */ | ||
284 | n = c->phys_proc_id << 1; | ||
285 | |||
286 | /* | ||
287 | * determine internal node id and assign cores fifty-fifty to | ||
288 | * each node of the dual-node processor | ||
289 | */ | ||
290 | t = read_pci_config(0, 24 + n, 3, 0xe8); | ||
291 | n = (t>>30) & 0x3; | ||
292 | if (n == 0) { | ||
293 | if (c->cpu_core_id < cpn) | ||
294 | n_id = 0; | ||
295 | else | ||
296 | n_id = 1; | ||
297 | } else { | ||
298 | if (c->cpu_core_id < cpn) | ||
299 | n_id = 1; | ||
300 | else | ||
301 | n_id = 0; | ||
302 | } | ||
303 | |||
304 | /* compute entire NodeID, use llc_shared_map to store sibling info */ | ||
305 | per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id; | ||
306 | 284 | ||
307 | /* fixup core id to be in range from 0 to cpn */ | 285 | /* fixup core id to be in range from 0 to (cores_per_node - 1) */ |
308 | c->cpu_core_id = c->cpu_core_id % cpn; | 286 | c->cpu_core_id = c->cpu_core_id % cores_per_node; |
309 | #endif | ||
310 | } | 287 | } |
311 | #endif | 288 | #endif |
312 | 289 | ||
@@ -375,8 +352,6 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | |||
375 | node = nearby_node(apicid); | 352 | node = nearby_node(apicid); |
376 | } | 353 | } |
377 | numa_set_node(cpu, node); | 354 | numa_set_node(cpu, node); |
378 | |||
379 | printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); | ||
380 | #endif | 355 | #endif |
381 | } | 356 | } |
382 | 357 | ||
@@ -535,7 +510,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
535 | } | 510 | } |
536 | } | 511 | } |
537 | 512 | ||
538 | display_cacheinfo(c); | 513 | cpu_detect_cache_sizes(c); |
539 | 514 | ||
540 | /* Multi core CPU? */ | 515 | /* Multi core CPU? */ |
541 | if (c->extended_cpuid_level >= 0x80000008) { | 516 | if (c->extended_cpuid_level >= 0x80000008) { |
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index c95e831bb095..e58d978e0758 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c | |||
@@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) | |||
294 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 294 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
295 | } | 295 | } |
296 | 296 | ||
297 | display_cacheinfo(c); | 297 | cpu_detect_cache_sizes(c); |
298 | } | 298 | } |
299 | 299 | ||
300 | enum { | 300 | enum { |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3192f22f2fdd..4868e4a951ee 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void) | |||
61 | static void __cpuinit default_init(struct cpuinfo_x86 *c) | 61 | static void __cpuinit default_init(struct cpuinfo_x86 *c) |
62 | { | 62 | { |
63 | #ifdef CONFIG_X86_64 | 63 | #ifdef CONFIG_X86_64 |
64 | display_cacheinfo(c); | 64 | cpu_detect_cache_sizes(c); |
65 | #else | 65 | #else |
66 | /* Not much we can do here... */ | 66 | /* Not much we can do here... */ |
67 | /* Check if at least it has cpuid */ | 67 | /* Check if at least it has cpuid */ |
@@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) | |||
383 | } | 383 | } |
384 | } | 384 | } |
385 | 385 | ||
386 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | 386 | void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) |
387 | { | 387 | { |
388 | unsigned int n, dummy, ebx, ecx, edx, l2size; | 388 | unsigned int n, dummy, ebx, ecx, edx, l2size; |
389 | 389 | ||
@@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
391 | 391 | ||
392 | if (n >= 0x80000005) { | 392 | if (n >= 0x80000005) { |
393 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); | 393 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); |
394 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", | ||
395 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | ||
396 | c->x86_cache_size = (ecx>>24) + (edx>>24); | 394 | c->x86_cache_size = (ecx>>24) + (edx>>24); |
397 | #ifdef CONFIG_X86_64 | 395 | #ifdef CONFIG_X86_64 |
398 | /* On K8 L1 TLB is inclusive, so don't count it */ | 396 | /* On K8 L1 TLB is inclusive, so don't count it */ |
@@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
422 | #endif | 420 | #endif |
423 | 421 | ||
424 | c->x86_cache_size = l2size; | 422 | c->x86_cache_size = l2size; |
425 | |||
426 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | ||
427 | l2size, ecx & 0xFF); | ||
428 | } | 423 | } |
429 | 424 | ||
430 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 425 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
@@ -432,6 +427,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
432 | #ifdef CONFIG_X86_HT | 427 | #ifdef CONFIG_X86_HT |
433 | u32 eax, ebx, ecx, edx; | 428 | u32 eax, ebx, ecx, edx; |
434 | int index_msb, core_bits; | 429 | int index_msb, core_bits; |
430 | static bool printed; | ||
435 | 431 | ||
436 | if (!cpu_has(c, X86_FEATURE_HT)) | 432 | if (!cpu_has(c, X86_FEATURE_HT)) |
437 | return; | 433 | return; |
@@ -447,7 +443,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
447 | smp_num_siblings = (ebx & 0xff0000) >> 16; | 443 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
448 | 444 | ||
449 | if (smp_num_siblings == 1) { | 445 | if (smp_num_siblings == 1) { |
450 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | 446 | printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n"); |
451 | goto out; | 447 | goto out; |
452 | } | 448 | } |
453 | 449 | ||
@@ -474,11 +470,12 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
474 | ((1 << core_bits) - 1); | 470 | ((1 << core_bits) - 1); |
475 | 471 | ||
476 | out: | 472 | out: |
477 | if ((c->x86_max_cores * smp_num_siblings) > 1) { | 473 | if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { |
478 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | 474 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", |
479 | c->phys_proc_id); | 475 | c->phys_proc_id); |
480 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 476 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", |
481 | c->cpu_core_id); | 477 | c->cpu_core_id); |
478 | printed = 1; | ||
482 | } | 479 | } |
483 | #endif | 480 | #endif |
484 | } | 481 | } |
@@ -659,24 +656,31 @@ void __init early_cpu_init(void) | |||
659 | const struct cpu_dev *const *cdev; | 656 | const struct cpu_dev *const *cdev; |
660 | int count = 0; | 657 | int count = 0; |
661 | 658 | ||
659 | #ifdef PROCESSOR_SELECT | ||
662 | printk(KERN_INFO "KERNEL supported cpus:\n"); | 660 | printk(KERN_INFO "KERNEL supported cpus:\n"); |
661 | #endif | ||
662 | |||
663 | for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { | 663 | for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { |
664 | const struct cpu_dev *cpudev = *cdev; | 664 | const struct cpu_dev *cpudev = *cdev; |
665 | unsigned int j; | ||
666 | 665 | ||
667 | if (count >= X86_VENDOR_NUM) | 666 | if (count >= X86_VENDOR_NUM) |
668 | break; | 667 | break; |
669 | cpu_devs[count] = cpudev; | 668 | cpu_devs[count] = cpudev; |
670 | count++; | 669 | count++; |
671 | 670 | ||
672 | for (j = 0; j < 2; j++) { | 671 | #ifdef PROCESSOR_SELECT |
673 | if (!cpudev->c_ident[j]) | 672 | { |
674 | continue; | 673 | unsigned int j; |
675 | printk(KERN_INFO " %s %s\n", cpudev->c_vendor, | 674 | |
676 | cpudev->c_ident[j]); | 675 | for (j = 0; j < 2; j++) { |
676 | if (!cpudev->c_ident[j]) | ||
677 | continue; | ||
678 | printk(KERN_INFO " %s %s\n", cpudev->c_vendor, | ||
679 | cpudev->c_ident[j]); | ||
680 | } | ||
677 | } | 681 | } |
682 | #endif | ||
678 | } | 683 | } |
679 | |||
680 | early_identify_cpu(&boot_cpu_data); | 684 | early_identify_cpu(&boot_cpu_data); |
681 | } | 685 | } |
682 | 686 | ||
@@ -837,10 +841,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
837 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | 841 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; |
838 | } | 842 | } |
839 | 843 | ||
840 | #ifdef CONFIG_X86_MCE | ||
841 | /* Init Machine Check Exception if available. */ | 844 | /* Init Machine Check Exception if available. */ |
842 | mcheck_init(c); | 845 | mcheck_cpu_init(c); |
843 | #endif | ||
844 | 846 | ||
845 | select_idle_routine(c); | 847 | select_idle_routine(c); |
846 | 848 | ||
@@ -1115,7 +1117,7 @@ void __cpuinit cpu_init(void) | |||
1115 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) | 1117 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) |
1116 | panic("CPU#%d already initialized!\n", cpu); | 1118 | panic("CPU#%d already initialized!\n", cpu); |
1117 | 1119 | ||
1118 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); | 1120 | pr_debug("Initializing CPU#%d\n", cpu); |
1119 | 1121 | ||
1120 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | 1122 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
1121 | 1123 | ||
@@ -1136,7 +1138,7 @@ void __cpuinit cpu_init(void) | |||
1136 | wrmsrl(MSR_KERNEL_GS_BASE, 0); | 1138 | wrmsrl(MSR_KERNEL_GS_BASE, 0); |
1137 | barrier(); | 1139 | barrier(); |
1138 | 1140 | ||
1139 | check_efer(); | 1141 | x86_configure_nx(); |
1140 | if (cpu != 0) | 1142 | if (cpu != 0) |
1141 | enable_x2apic(); | 1143 | enable_x2apic(); |
1142 | 1144 | ||
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 6de9a908e400..3624e8a0f71b 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -32,6 +32,6 @@ struct cpu_dev { | |||
32 | extern const struct cpu_dev *const __x86_cpu_dev_start[], | 32 | extern const struct cpu_dev *const __x86_cpu_dev_start[], |
33 | *const __x86_cpu_dev_end[]; | 33 | *const __x86_cpu_dev_end[]; |
34 | 34 | ||
35 | extern void display_cacheinfo(struct cpuinfo_x86 *c); | 35 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); |
36 | 36 | ||
37 | #endif | 37 | #endif |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 43eb3465dda7..1b1920fa7c80 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -190,9 +190,11 @@ static void do_drv_write(void *_cmd) | |||
190 | 190 | ||
191 | static void drv_read(struct drv_cmd *cmd) | 191 | static void drv_read(struct drv_cmd *cmd) |
192 | { | 192 | { |
193 | int err; | ||
193 | cmd->val = 0; | 194 | cmd->val = 0; |
194 | 195 | ||
195 | smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1); | 196 | err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1); |
197 | WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ | ||
196 | } | 198 | } |
197 | 199 | ||
198 | static void drv_write(struct drv_cmd *cmd) | 200 | static void drv_write(struct drv_cmd *cmd) |
@@ -526,15 +528,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { | |||
526 | 528 | ||
527 | static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) | 529 | static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) |
528 | { | 530 | { |
529 | /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf | 531 | /* Intel Xeon Processor 7100 Series Specification Update |
532 | * http://www.intel.com/Assets/PDF/specupdate/314554.pdf | ||
530 | * AL30: A Machine Check Exception (MCE) Occurring during an | 533 | * AL30: A Machine Check Exception (MCE) Occurring during an |
531 | * Enhanced Intel SpeedStep Technology Ratio Change May Cause | 534 | * Enhanced Intel SpeedStep Technology Ratio Change May Cause |
532 | * Both Processor Cores to Lock Up when HT is enabled*/ | 535 | * Both Processor Cores to Lock Up. */ |
533 | if (c->x86_vendor == X86_VENDOR_INTEL) { | 536 | if (c->x86_vendor == X86_VENDOR_INTEL) { |
534 | if ((c->x86 == 15) && | 537 | if ((c->x86 == 15) && |
535 | (c->x86_model == 6) && | 538 | (c->x86_model == 6) && |
536 | (c->x86_mask == 8) && smt_capable()) | 539 | (c->x86_mask == 8)) { |
540 | printk(KERN_INFO "acpi-cpufreq: Intel(R) " | ||
541 | "Xeon(R) 7100 Errata AL30, processors may " | ||
542 | "lock up on frequency changes: disabling " | ||
543 | "acpi-cpufreq.\n"); | ||
537 | return -ENODEV; | 544 | return -ENODEV; |
545 | } | ||
538 | } | 546 | } |
539 | return 0; | 547 | return 0; |
540 | } | 548 | } |
@@ -549,13 +557,18 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
549 | unsigned int result = 0; | 557 | unsigned int result = 0; |
550 | struct cpuinfo_x86 *c = &cpu_data(policy->cpu); | 558 | struct cpuinfo_x86 *c = &cpu_data(policy->cpu); |
551 | struct acpi_processor_performance *perf; | 559 | struct acpi_processor_performance *perf; |
560 | #ifdef CONFIG_SMP | ||
561 | static int blacklisted; | ||
562 | #endif | ||
552 | 563 | ||
553 | dprintk("acpi_cpufreq_cpu_init\n"); | 564 | dprintk("acpi_cpufreq_cpu_init\n"); |
554 | 565 | ||
555 | #ifdef CONFIG_SMP | 566 | #ifdef CONFIG_SMP |
556 | result = acpi_cpufreq_blacklist(c); | 567 | if (blacklisted) |
557 | if (result) | 568 | return blacklisted; |
558 | return result; | 569 | blacklisted = acpi_cpufreq_blacklist(c); |
570 | if (blacklisted) | ||
571 | return blacklisted; | ||
559 | #endif | 572 | #endif |
560 | 573 | ||
561 | data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); | 574 | data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); |
@@ -753,14 +766,15 @@ static struct freq_attr *acpi_cpufreq_attr[] = { | |||
753 | }; | 766 | }; |
754 | 767 | ||
755 | static struct cpufreq_driver acpi_cpufreq_driver = { | 768 | static struct cpufreq_driver acpi_cpufreq_driver = { |
756 | .verify = acpi_cpufreq_verify, | 769 | .verify = acpi_cpufreq_verify, |
757 | .target = acpi_cpufreq_target, | 770 | .target = acpi_cpufreq_target, |
758 | .init = acpi_cpufreq_cpu_init, | 771 | .bios_limit = acpi_processor_get_bios_limit, |
759 | .exit = acpi_cpufreq_cpu_exit, | 772 | .init = acpi_cpufreq_cpu_init, |
760 | .resume = acpi_cpufreq_resume, | 773 | .exit = acpi_cpufreq_cpu_exit, |
761 | .name = "acpi-cpufreq", | 774 | .resume = acpi_cpufreq_resume, |
762 | .owner = THIS_MODULE, | 775 | .name = "acpi-cpufreq", |
763 | .attr = acpi_cpufreq_attr, | 776 | .owner = THIS_MODULE, |
777 | .attr = acpi_cpufreq_attr, | ||
764 | }; | 778 | }; |
765 | 779 | ||
766 | static int __init acpi_cpufreq_init(void) | 780 | static int __init acpi_cpufreq_init(void) |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index ce2ed3e4aad9..7e7eea4f8261 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c | |||
@@ -813,7 +813,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
813 | memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); | 813 | memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); |
814 | break; | 814 | break; |
815 | case 1 ... 15: | 815 | case 1 ... 15: |
816 | longhaul_version = TYPE_LONGHAUL_V1; | 816 | longhaul_version = TYPE_LONGHAUL_V2; |
817 | if (c->x86_mask < 8) { | 817 | if (c->x86_mask < 8) { |
818 | cpu_model = CPU_SAMUEL2; | 818 | cpu_model = CPU_SAMUEL2; |
819 | cpuname = "C3 'Samuel 2' [C5B]"; | 819 | cpuname = "C3 'Samuel 2' [C5B]"; |
@@ -885,7 +885,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
885 | 885 | ||
886 | /* Find ACPI data for processor */ | 886 | /* Find ACPI data for processor */ |
887 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, | 887 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, |
888 | ACPI_UINT32_MAX, &longhaul_walk_callback, | 888 | ACPI_UINT32_MAX, &longhaul_walk_callback, NULL, |
889 | NULL, (void *)&pr); | 889 | NULL, (void *)&pr); |
890 | 890 | ||
891 | /* Check ACPI support for C3 state */ | 891 | /* Check ACPI support for C3 state */ |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index f10dea409f40..cb01dac267d3 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c | |||
@@ -164,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) | |||
164 | } | 164 | } |
165 | 165 | ||
166 | /* cpuinfo and default policy values */ | 166 | /* cpuinfo and default policy values */ |
167 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | 167 | policy->cpuinfo.transition_latency = 200000; |
168 | policy->cur = busfreq * max_multiplier; | 168 | policy->cur = busfreq * max_multiplier; |
169 | 169 | ||
170 | result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); | 170 | result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index d47c775eb0ab..9a97116f89e5 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c | |||
@@ -714,14 +714,17 @@ static struct freq_attr *powernow_table_attr[] = { | |||
714 | }; | 714 | }; |
715 | 715 | ||
716 | static struct cpufreq_driver powernow_driver = { | 716 | static struct cpufreq_driver powernow_driver = { |
717 | .verify = powernow_verify, | 717 | .verify = powernow_verify, |
718 | .target = powernow_target, | 718 | .target = powernow_target, |
719 | .get = powernow_get, | 719 | .get = powernow_get, |
720 | .init = powernow_cpu_init, | 720 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI |
721 | .exit = powernow_cpu_exit, | 721 | .bios_limit = acpi_processor_get_bios_limit, |
722 | .name = "powernow-k7", | 722 | #endif |
723 | .owner = THIS_MODULE, | 723 | .init = powernow_cpu_init, |
724 | .attr = powernow_table_attr, | 724 | .exit = powernow_cpu_exit, |
725 | .name = "powernow-k7", | ||
726 | .owner = THIS_MODULE, | ||
727 | .attr = powernow_table_attr, | ||
725 | }; | 728 | }; |
726 | 729 | ||
727 | static int __init powernow_init(void) | 730 | static int __init powernow_init(void) |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 6394aa5c7985..f125e5c551c0 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -1022,7 +1022,7 @@ static int get_transition_latency(struct powernow_k8_data *data) | |||
1022 | * set it to 1 to avoid problems in the future. | 1022 | * set it to 1 to avoid problems in the future. |
1023 | * For all others it's a BIOS bug. | 1023 | * For all others it's a BIOS bug. |
1024 | */ | 1024 | */ |
1025 | if (!boot_cpu_data.x86 == 0x11) | 1025 | if (boot_cpu_data.x86 != 0x11) |
1026 | printk(KERN_ERR FW_WARN PFX "Invalid zero transition " | 1026 | printk(KERN_ERR FW_WARN PFX "Invalid zero transition " |
1027 | "latency\n"); | 1027 | "latency\n"); |
1028 | max_latency = 1; | 1028 | max_latency = 1; |
@@ -1118,7 +1118,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, | |||
1118 | static int powernowk8_target(struct cpufreq_policy *pol, | 1118 | static int powernowk8_target(struct cpufreq_policy *pol, |
1119 | unsigned targfreq, unsigned relation) | 1119 | unsigned targfreq, unsigned relation) |
1120 | { | 1120 | { |
1121 | cpumask_t oldmask; | 1121 | cpumask_var_t oldmask; |
1122 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | 1122 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); |
1123 | u32 checkfid; | 1123 | u32 checkfid; |
1124 | u32 checkvid; | 1124 | u32 checkvid; |
@@ -1131,9 +1131,13 @@ static int powernowk8_target(struct cpufreq_policy *pol, | |||
1131 | checkfid = data->currfid; | 1131 | checkfid = data->currfid; |
1132 | checkvid = data->currvid; | 1132 | checkvid = data->currvid; |
1133 | 1133 | ||
1134 | /* only run on specific CPU from here on */ | 1134 | /* only run on specific CPU from here on. */ |
1135 | oldmask = current->cpus_allowed; | 1135 | /* This is poor form: use a workqueue or smp_call_function_single */ |
1136 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); | 1136 | if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) |
1137 | return -ENOMEM; | ||
1138 | |||
1139 | cpumask_copy(oldmask, tsk_cpus_allowed(current)); | ||
1140 | set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); | ||
1137 | 1141 | ||
1138 | if (smp_processor_id() != pol->cpu) { | 1142 | if (smp_processor_id() != pol->cpu) { |
1139 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); | 1143 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); |
@@ -1193,7 +1197,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, | |||
1193 | ret = 0; | 1197 | ret = 0; |
1194 | 1198 | ||
1195 | err_out: | 1199 | err_out: |
1196 | set_cpus_allowed_ptr(current, &oldmask); | 1200 | set_cpus_allowed_ptr(current, oldmask); |
1201 | free_cpumask_var(oldmask); | ||
1197 | return ret; | 1202 | return ret; |
1198 | } | 1203 | } |
1199 | 1204 | ||
@@ -1393,14 +1398,15 @@ static struct freq_attr *powernow_k8_attr[] = { | |||
1393 | }; | 1398 | }; |
1394 | 1399 | ||
1395 | static struct cpufreq_driver cpufreq_amd64_driver = { | 1400 | static struct cpufreq_driver cpufreq_amd64_driver = { |
1396 | .verify = powernowk8_verify, | 1401 | .verify = powernowk8_verify, |
1397 | .target = powernowk8_target, | 1402 | .target = powernowk8_target, |
1398 | .init = powernowk8_cpu_init, | 1403 | .bios_limit = acpi_processor_get_bios_limit, |
1399 | .exit = __devexit_p(powernowk8_cpu_exit), | 1404 | .init = powernowk8_cpu_init, |
1400 | .get = powernowk8_get, | 1405 | .exit = __devexit_p(powernowk8_cpu_exit), |
1401 | .name = "powernow-k8", | 1406 | .get = powernowk8_get, |
1402 | .owner = THIS_MODULE, | 1407 | .name = "powernow-k8", |
1403 | .attr = powernow_k8_attr, | 1408 | .owner = THIS_MODULE, |
1409 | .attr = powernow_k8_attr, | ||
1404 | }; | 1410 | }; |
1405 | 1411 | ||
1406 | /* driver entry point for init */ | 1412 | /* driver entry point for init */ |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 6911e91fb4f6..2ce8e0b5cc54 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
@@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev; | |||
39 | 39 | ||
40 | /* speedstep_processor | 40 | /* speedstep_processor |
41 | */ | 41 | */ |
42 | static unsigned int speedstep_processor; | 42 | static enum speedstep_processor speedstep_processor; |
43 | 43 | ||
44 | static u32 pmbase; | 44 | static u32 pmbase; |
45 | 45 | ||
@@ -232,28 +232,23 @@ static unsigned int speedstep_detect_chipset(void) | |||
232 | return 0; | 232 | return 0; |
233 | } | 233 | } |
234 | 234 | ||
235 | struct get_freq_data { | 235 | static void get_freq_data(void *_speed) |
236 | unsigned int speed; | ||
237 | unsigned int processor; | ||
238 | }; | ||
239 | |||
240 | static void get_freq_data(void *_data) | ||
241 | { | 236 | { |
242 | struct get_freq_data *data = _data; | 237 | unsigned int *speed = _speed; |
243 | 238 | ||
244 | data->speed = speedstep_get_frequency(data->processor); | 239 | *speed = speedstep_get_frequency(speedstep_processor); |
245 | } | 240 | } |
246 | 241 | ||
247 | static unsigned int speedstep_get(unsigned int cpu) | 242 | static unsigned int speedstep_get(unsigned int cpu) |
248 | { | 243 | { |
249 | struct get_freq_data data = { .processor = cpu }; | 244 | unsigned int speed; |
250 | 245 | ||
251 | /* You're supposed to ensure CPU is online. */ | 246 | /* You're supposed to ensure CPU is online. */ |
252 | if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0) | 247 | if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0) |
253 | BUG(); | 248 | BUG(); |
254 | 249 | ||
255 | dprintk("detected %u kHz as current frequency\n", data.speed); | 250 | dprintk("detected %u kHz as current frequency\n", speed); |
256 | return data.speed; | 251 | return speed; |
257 | } | 252 | } |
258 | 253 | ||
259 | /** | 254 | /** |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index f4c290b8482f..ad0083abfa23 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | |||
@@ -34,7 +34,7 @@ static int relaxed_check; | |||
34 | * GET PROCESSOR CORE SPEED IN KHZ * | 34 | * GET PROCESSOR CORE SPEED IN KHZ * |
35 | *********************************************************************/ | 35 | *********************************************************************/ |
36 | 36 | ||
37 | static unsigned int pentium3_get_frequency(unsigned int processor) | 37 | static unsigned int pentium3_get_frequency(enum speedstep_processor processor) |
38 | { | 38 | { |
39 | /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ | 39 | /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ |
40 | struct { | 40 | struct { |
@@ -227,7 +227,7 @@ static unsigned int pentium4_get_frequency(void) | |||
227 | 227 | ||
228 | 228 | ||
229 | /* Warning: may get called from smp_call_function_single. */ | 229 | /* Warning: may get called from smp_call_function_single. */ |
230 | unsigned int speedstep_get_frequency(unsigned int processor) | 230 | unsigned int speedstep_get_frequency(enum speedstep_processor processor) |
231 | { | 231 | { |
232 | switch (processor) { | 232 | switch (processor) { |
233 | case SPEEDSTEP_CPU_PCORE: | 233 | case SPEEDSTEP_CPU_PCORE: |
@@ -380,7 +380,7 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor); | |||
380 | * DETECT SPEEDSTEP SPEEDS * | 380 | * DETECT SPEEDSTEP SPEEDS * |
381 | *********************************************************************/ | 381 | *********************************************************************/ |
382 | 382 | ||
383 | unsigned int speedstep_get_freqs(unsigned int processor, | 383 | unsigned int speedstep_get_freqs(enum speedstep_processor processor, |
384 | unsigned int *low_speed, | 384 | unsigned int *low_speed, |
385 | unsigned int *high_speed, | 385 | unsigned int *high_speed, |
386 | unsigned int *transition_latency, | 386 | unsigned int *transition_latency, |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h index 2b6c04e5a304..70d9cea1219d 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h | |||
@@ -11,18 +11,18 @@ | |||
11 | 11 | ||
12 | 12 | ||
13 | /* processors */ | 13 | /* processors */ |
14 | 14 | enum speedstep_processor { | |
15 | #define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */ | 15 | SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */ |
16 | #define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */ | 16 | SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */ |
17 | #define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */ | 17 | SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */ |
18 | #define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */ | 18 | SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */ |
19 | |||
20 | /* the following processors are not speedstep-capable and are not auto-detected | 19 | /* the following processors are not speedstep-capable and are not auto-detected |
21 | * in speedstep_detect_processor(). However, their speed can be detected using | 20 | * in speedstep_detect_processor(). However, their speed can be detected using |
22 | * the speedstep_get_frequency() call. */ | 21 | * the speedstep_get_frequency() call. */ |
23 | #define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */ | 22 | SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */ |
24 | #define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */ | 23 | SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */ |
25 | #define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */ | 24 | SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */ |
25 | }; | ||
26 | 26 | ||
27 | /* speedstep states -- only two of them */ | 27 | /* speedstep states -- only two of them */ |
28 | 28 | ||
@@ -31,10 +31,10 @@ | |||
31 | 31 | ||
32 | 32 | ||
33 | /* detect a speedstep-capable processor */ | 33 | /* detect a speedstep-capable processor */ |
34 | extern unsigned int speedstep_detect_processor (void); | 34 | extern enum speedstep_processor speedstep_detect_processor(void); |
35 | 35 | ||
36 | /* detect the current speed (in khz) of the processor */ | 36 | /* detect the current speed (in khz) of the processor */ |
37 | extern unsigned int speedstep_get_frequency(unsigned int processor); | 37 | extern unsigned int speedstep_get_frequency(enum speedstep_processor processor); |
38 | 38 | ||
39 | 39 | ||
40 | /* detect the low and high speeds of the processor. The callback | 40 | /* detect the low and high speeds of the processor. The callback |
@@ -42,7 +42,7 @@ extern unsigned int speedstep_get_frequency(unsigned int processor); | |||
42 | * SPEEDSTEP_LOW; the second argument is zero so that no | 42 | * SPEEDSTEP_LOW; the second argument is zero so that no |
43 | * cpufreq_notify_transition calls are initiated. | 43 | * cpufreq_notify_transition calls are initiated. |
44 | */ | 44 | */ |
45 | extern unsigned int speedstep_get_freqs(unsigned int processor, | 45 | extern unsigned int speedstep_get_freqs(enum speedstep_processor processor, |
46 | unsigned int *low_speed, | 46 | unsigned int *low_speed, |
47 | unsigned int *high_speed, | 47 | unsigned int *high_speed, |
48 | unsigned int *transition_latency, | 48 | unsigned int *transition_latency, |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index befea088e4f5..04d73c114e49 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | |||
@@ -35,7 +35,7 @@ static int smi_cmd; | |||
35 | static unsigned int smi_sig; | 35 | static unsigned int smi_sig; |
36 | 36 | ||
37 | /* info about the processor */ | 37 | /* info about the processor */ |
38 | static unsigned int speedstep_processor; | 38 | static enum speedstep_processor speedstep_processor; |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * There are only two frequency states for each processor. Values | 41 | * There are only two frequency states for each processor. Values |
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 19807b89f058..4fbd384fb645 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
@@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c) | |||
373 | /* Handle the GX (Formally known as the GX2) */ | 373 | /* Handle the GX (Formally known as the GX2) */ |
374 | 374 | ||
375 | if (c->x86 == 5 && c->x86_model == 5) | 375 | if (c->x86 == 5 && c->x86_model == 5) |
376 | display_cacheinfo(c); | 376 | cpu_detect_cache_sizes(c); |
377 | else | 377 | else |
378 | init_cyrix(c); | 378 | init_cyrix(c); |
379 | } | 379 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 40e1835b35e8..879666f4d871 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -70,7 +70,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
70 | if (c->x86_power & (1 << 8)) { | 70 | if (c->x86_power & (1 << 8)) { |
71 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 71 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
72 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 72 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
73 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | ||
74 | sched_clock_stable = 1; | 73 | sched_clock_stable = 1; |
75 | } | 74 | } |
76 | 75 | ||
@@ -263,11 +262,13 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | |||
263 | /* Don't do the funky fallback heuristics the AMD version employs | 262 | /* Don't do the funky fallback heuristics the AMD version employs |
264 | for now. */ | 263 | for now. */ |
265 | node = apicid_to_node[apicid]; | 264 | node = apicid_to_node[apicid]; |
266 | if (node == NUMA_NO_NODE || !node_online(node)) | 265 | if (node == NUMA_NO_NODE) |
267 | node = first_node(node_online_map); | 266 | node = first_node(node_online_map); |
267 | else if (!node_online(node)) { | ||
268 | /* reuse the value from init_cpu_to_node() */ | ||
269 | node = cpu_to_node(cpu); | ||
270 | } | ||
268 | numa_set_node(cpu, node); | 271 | numa_set_node(cpu, node); |
269 | |||
270 | printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); | ||
271 | #endif | 272 | #endif |
272 | } | 273 | } |
273 | 274 | ||
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index f5ccb4fa5a5d..fc6c8ef92dcc 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -94,7 +94,7 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
94 | { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ | 94 | { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ |
95 | { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ | 95 | { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ |
96 | { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ | 96 | { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ |
97 | { 0xd7, LVL_3, 2038 }, /* 8-way set assoc, 64 byte line size */ | 97 | { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */ |
98 | { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ | 98 | { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ |
99 | { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ | 99 | { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ |
100 | { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ | 100 | { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ |
@@ -102,6 +102,9 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
102 | { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ | 102 | { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ |
103 | { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ | 103 | { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ |
104 | { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ | 104 | { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ |
105 | { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */ | ||
106 | { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */ | ||
107 | { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */ | ||
105 | { 0x00, 0, 0} | 108 | { 0x00, 0, 0} |
106 | }; | 109 | }; |
107 | 110 | ||
@@ -488,22 +491,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
488 | #endif | 491 | #endif |
489 | } | 492 | } |
490 | 493 | ||
491 | if (trace) | ||
492 | printk(KERN_INFO "CPU: Trace cache: %dK uops", trace); | ||
493 | else if (l1i) | ||
494 | printk(KERN_INFO "CPU: L1 I cache: %dK", l1i); | ||
495 | |||
496 | if (l1d) | ||
497 | printk(KERN_CONT ", L1 D cache: %dK\n", l1d); | ||
498 | else | ||
499 | printk(KERN_CONT "\n"); | ||
500 | |||
501 | if (l2) | ||
502 | printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); | ||
503 | |||
504 | if (l3) | ||
505 | printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); | ||
506 | |||
507 | c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); | 494 | c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); |
508 | 495 | ||
509 | return l2; | 496 | return l2; |
@@ -520,18 +507,19 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | |||
520 | { | 507 | { |
521 | struct _cpuid4_info *this_leaf, *sibling_leaf; | 508 | struct _cpuid4_info *this_leaf, *sibling_leaf; |
522 | unsigned long num_threads_sharing; | 509 | unsigned long num_threads_sharing; |
523 | int index_msb, i; | 510 | int index_msb, i, sibling; |
524 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 511 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
525 | 512 | ||
526 | if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { | 513 | if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { |
527 | struct cpuinfo_x86 *d; | 514 | for_each_cpu(i, c->llc_shared_map) { |
528 | for_each_online_cpu(i) { | ||
529 | if (!per_cpu(ici_cpuid4_info, i)) | 515 | if (!per_cpu(ici_cpuid4_info, i)) |
530 | continue; | 516 | continue; |
531 | d = &cpu_data(i); | ||
532 | this_leaf = CPUID4_INFO_IDX(i, index); | 517 | this_leaf = CPUID4_INFO_IDX(i, index); |
533 | cpumask_copy(to_cpumask(this_leaf->shared_cpu_map), | 518 | for_each_cpu(sibling, c->llc_shared_map) { |
534 | d->llc_shared_map); | 519 | if (!cpu_online(sibling)) |
520 | continue; | ||
521 | set_bit(sibling, this_leaf->shared_cpu_map); | ||
522 | } | ||
535 | } | 523 | } |
536 | return; | 524 | return; |
537 | } | 525 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 472763d92098..73734baa50f2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -74,7 +74,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) | |||
74 | m->finished = 0; | 74 | m->finished = 0; |
75 | } | 75 | } |
76 | 76 | ||
77 | static cpumask_t mce_inject_cpumask; | 77 | static cpumask_var_t mce_inject_cpumask; |
78 | 78 | ||
79 | static int mce_raise_notify(struct notifier_block *self, | 79 | static int mce_raise_notify(struct notifier_block *self, |
80 | unsigned long val, void *data) | 80 | unsigned long val, void *data) |
@@ -82,9 +82,9 @@ static int mce_raise_notify(struct notifier_block *self, | |||
82 | struct die_args *args = (struct die_args *)data; | 82 | struct die_args *args = (struct die_args *)data; |
83 | int cpu = smp_processor_id(); | 83 | int cpu = smp_processor_id(); |
84 | struct mce *m = &__get_cpu_var(injectm); | 84 | struct mce *m = &__get_cpu_var(injectm); |
85 | if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask)) | 85 | if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) |
86 | return NOTIFY_DONE; | 86 | return NOTIFY_DONE; |
87 | cpu_clear(cpu, mce_inject_cpumask); | 87 | cpumask_clear_cpu(cpu, mce_inject_cpumask); |
88 | if (m->inject_flags & MCJ_EXCEPTION) | 88 | if (m->inject_flags & MCJ_EXCEPTION) |
89 | raise_exception(m, args->regs); | 89 | raise_exception(m, args->regs); |
90 | else if (m->status) | 90 | else if (m->status) |
@@ -148,22 +148,22 @@ static void raise_mce(struct mce *m) | |||
148 | unsigned long start; | 148 | unsigned long start; |
149 | int cpu; | 149 | int cpu; |
150 | get_online_cpus(); | 150 | get_online_cpus(); |
151 | mce_inject_cpumask = cpu_online_map; | 151 | cpumask_copy(mce_inject_cpumask, cpu_online_mask); |
152 | cpu_clear(get_cpu(), mce_inject_cpumask); | 152 | cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); |
153 | for_each_online_cpu(cpu) { | 153 | for_each_online_cpu(cpu) { |
154 | struct mce *mcpu = &per_cpu(injectm, cpu); | 154 | struct mce *mcpu = &per_cpu(injectm, cpu); |
155 | if (!mcpu->finished || | 155 | if (!mcpu->finished || |
156 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) | 156 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) |
157 | cpu_clear(cpu, mce_inject_cpumask); | 157 | cpumask_clear_cpu(cpu, mce_inject_cpumask); |
158 | } | 158 | } |
159 | if (!cpus_empty(mce_inject_cpumask)) | 159 | if (!cpumask_empty(mce_inject_cpumask)) |
160 | apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR); | 160 | apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); |
161 | start = jiffies; | 161 | start = jiffies; |
162 | while (!cpus_empty(mce_inject_cpumask)) { | 162 | while (!cpumask_empty(mce_inject_cpumask)) { |
163 | if (!time_before(jiffies, start + 2*HZ)) { | 163 | if (!time_before(jiffies, start + 2*HZ)) { |
164 | printk(KERN_ERR | 164 | printk(KERN_ERR |
165 | "Timeout waiting for mce inject NMI %lx\n", | 165 | "Timeout waiting for mce inject NMI %lx\n", |
166 | *cpus_addr(mce_inject_cpumask)); | 166 | *cpumask_bits(mce_inject_cpumask)); |
167 | break; | 167 | break; |
168 | } | 168 | } |
169 | cpu_relax(); | 169 | cpu_relax(); |
@@ -210,6 +210,8 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, | |||
210 | 210 | ||
211 | static int inject_init(void) | 211 | static int inject_init(void) |
212 | { | 212 | { |
213 | if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) | ||
214 | return -ENOMEM; | ||
213 | printk(KERN_INFO "Machine check injector initialized\n"); | 215 | printk(KERN_INFO "Machine check injector initialized\n"); |
214 | mce_chrdev_ops.write = mce_write; | 216 | mce_chrdev_ops.write = mce_write; |
215 | register_die_notifier(&mce_raise_nb); | 217 | register_die_notifier(&mce_raise_nb); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 183c3457d2f4..a8aacd4b513c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -46,6 +46,9 @@ | |||
46 | 46 | ||
47 | #include "mce-internal.h" | 47 | #include "mce-internal.h" |
48 | 48 | ||
49 | #define CREATE_TRACE_POINTS | ||
50 | #include <trace/events/mce.h> | ||
51 | |||
49 | int mce_disabled __read_mostly; | 52 | int mce_disabled __read_mostly; |
50 | 53 | ||
51 | #define MISC_MCELOG_MINOR 227 | 54 | #define MISC_MCELOG_MINOR 227 |
@@ -85,6 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | |||
85 | static DEFINE_PER_CPU(struct mce, mces_seen); | 88 | static DEFINE_PER_CPU(struct mce, mces_seen); |
86 | static int cpu_missing; | 89 | static int cpu_missing; |
87 | 90 | ||
91 | /* | ||
92 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
93 | * MCE errors in a human-readable form. | ||
94 | */ | ||
95 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
96 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | ||
97 | |||
98 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | ||
99 | void *data) | ||
100 | { | ||
101 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | ||
102 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | ||
103 | |||
104 | return NOTIFY_STOP; | ||
105 | } | ||
106 | |||
107 | static struct notifier_block mce_dec_nb = { | ||
108 | .notifier_call = default_decode_mce, | ||
109 | .priority = -1, | ||
110 | }; | ||
88 | 111 | ||
89 | /* MCA banks polled by the period polling timer for corrected events */ | 112 | /* MCA banks polled by the period polling timer for corrected events */ |
90 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 113 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
@@ -129,6 +152,9 @@ void mce_log(struct mce *mce) | |||
129 | { | 152 | { |
130 | unsigned next, entry; | 153 | unsigned next, entry; |
131 | 154 | ||
155 | /* Emit the trace record: */ | ||
156 | trace_mce_record(mce); | ||
157 | |||
132 | mce->finished = 0; | 158 | mce->finished = 0; |
133 | wmb(); | 159 | wmb(); |
134 | for (;;) { | 160 | for (;;) { |
@@ -165,46 +191,46 @@ void mce_log(struct mce *mce) | |||
165 | set_bit(0, &mce_need_notify); | 191 | set_bit(0, &mce_need_notify); |
166 | } | 192 | } |
167 | 193 | ||
168 | void __weak decode_mce(struct mce *m) | ||
169 | { | ||
170 | return; | ||
171 | } | ||
172 | |||
173 | static void print_mce(struct mce *m) | 194 | static void print_mce(struct mce *m) |
174 | { | 195 | { |
175 | printk(KERN_EMERG | 196 | pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", |
176 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | ||
177 | m->extcpu, m->mcgstatus, m->bank, m->status); | 197 | m->extcpu, m->mcgstatus, m->bank, m->status); |
198 | |||
178 | if (m->ip) { | 199 | if (m->ip) { |
179 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", | 200 | pr_emerg("RIP%s %02x:<%016Lx> ", |
180 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | 201 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", |
181 | m->cs, m->ip); | 202 | m->cs, m->ip); |
203 | |||
182 | if (m->cs == __KERNEL_CS) | 204 | if (m->cs == __KERNEL_CS) |
183 | print_symbol("{%s}", m->ip); | 205 | print_symbol("{%s}", m->ip); |
184 | printk(KERN_CONT "\n"); | 206 | pr_cont("\n"); |
185 | } | 207 | } |
186 | printk(KERN_EMERG "TSC %llx ", m->tsc); | 208 | |
209 | pr_emerg("TSC %llx ", m->tsc); | ||
187 | if (m->addr) | 210 | if (m->addr) |
188 | printk(KERN_CONT "ADDR %llx ", m->addr); | 211 | pr_cont("ADDR %llx ", m->addr); |
189 | if (m->misc) | 212 | if (m->misc) |
190 | printk(KERN_CONT "MISC %llx ", m->misc); | 213 | pr_cont("MISC %llx ", m->misc); |
191 | printk(KERN_CONT "\n"); | 214 | |
192 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | 215 | pr_cont("\n"); |
193 | m->cpuvendor, m->cpuid, m->time, m->socketid, | 216 | pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", |
194 | m->apicid); | 217 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); |
195 | 218 | ||
196 | decode_mce(m); | 219 | /* |
220 | * Print out human-readable details about the MCE error, | ||
221 | * (if the CPU has an implementation for that) | ||
222 | */ | ||
223 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | ||
197 | } | 224 | } |
198 | 225 | ||
199 | static void print_mce_head(void) | 226 | static void print_mce_head(void) |
200 | { | 227 | { |
201 | printk(KERN_EMERG "\nHARDWARE ERROR\n"); | 228 | pr_emerg("\nHARDWARE ERROR\n"); |
202 | } | 229 | } |
203 | 230 | ||
204 | static void print_mce_tail(void) | 231 | static void print_mce_tail(void) |
205 | { | 232 | { |
206 | printk(KERN_EMERG "This is not a software problem!\n" | 233 | pr_emerg("This is not a software problem!\n"); |
207 | "Run through mcelog --ascii to decode and contact your hardware vendor\n"); | ||
208 | } | 234 | } |
209 | 235 | ||
210 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 236 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
@@ -218,6 +244,7 @@ static atomic_t mce_fake_paniced; | |||
218 | static void wait_for_panic(void) | 244 | static void wait_for_panic(void) |
219 | { | 245 | { |
220 | long timeout = PANIC_TIMEOUT*USEC_PER_SEC; | 246 | long timeout = PANIC_TIMEOUT*USEC_PER_SEC; |
247 | |||
221 | preempt_disable(); | 248 | preempt_disable(); |
222 | local_irq_enable(); | 249 | local_irq_enable(); |
223 | while (timeout-- > 0) | 250 | while (timeout-- > 0) |
@@ -285,6 +312,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
285 | static int msr_to_offset(u32 msr) | 312 | static int msr_to_offset(u32 msr) |
286 | { | 313 | { |
287 | unsigned bank = __get_cpu_var(injectm.bank); | 314 | unsigned bank = __get_cpu_var(injectm.bank); |
315 | |||
288 | if (msr == rip_msr) | 316 | if (msr == rip_msr) |
289 | return offsetof(struct mce, ip); | 317 | return offsetof(struct mce, ip); |
290 | if (msr == MSR_IA32_MCx_STATUS(bank)) | 318 | if (msr == MSR_IA32_MCx_STATUS(bank)) |
@@ -1108,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */ | |||
1108 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ | 1136 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ |
1109 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1137 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
1110 | 1138 | ||
1111 | static void mcheck_timer(unsigned long data) | 1139 | static void mce_start_timer(unsigned long data) |
1112 | { | 1140 | { |
1113 | struct timer_list *t = &per_cpu(mce_timer, data); | 1141 | struct timer_list *t = &per_cpu(mce_timer, data); |
1114 | int *n; | 1142 | int *n; |
@@ -1173,7 +1201,7 @@ int mce_notify_irq(void) | |||
1173 | } | 1201 | } |
1174 | EXPORT_SYMBOL_GPL(mce_notify_irq); | 1202 | EXPORT_SYMBOL_GPL(mce_notify_irq); |
1175 | 1203 | ||
1176 | static int mce_banks_init(void) | 1204 | static int __cpuinit __mcheck_cpu_mce_banks_init(void) |
1177 | { | 1205 | { |
1178 | int i; | 1206 | int i; |
1179 | 1207 | ||
@@ -1192,7 +1220,7 @@ static int mce_banks_init(void) | |||
1192 | /* | 1220 | /* |
1193 | * Initialize Machine Checks for a CPU. | 1221 | * Initialize Machine Checks for a CPU. |
1194 | */ | 1222 | */ |
1195 | static int __cpuinit mce_cap_init(void) | 1223 | static int __cpuinit __mcheck_cpu_cap_init(void) |
1196 | { | 1224 | { |
1197 | unsigned b; | 1225 | unsigned b; |
1198 | u64 cap; | 1226 | u64 cap; |
@@ -1200,7 +1228,8 @@ static int __cpuinit mce_cap_init(void) | |||
1200 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 1228 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
1201 | 1229 | ||
1202 | b = cap & MCG_BANKCNT_MASK; | 1230 | b = cap & MCG_BANKCNT_MASK; |
1203 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); | 1231 | if (!banks) |
1232 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); | ||
1204 | 1233 | ||
1205 | if (b > MAX_NR_BANKS) { | 1234 | if (b > MAX_NR_BANKS) { |
1206 | printk(KERN_WARNING | 1235 | printk(KERN_WARNING |
@@ -1213,7 +1242,7 @@ static int __cpuinit mce_cap_init(void) | |||
1213 | WARN_ON(banks != 0 && b != banks); | 1242 | WARN_ON(banks != 0 && b != banks); |
1214 | banks = b; | 1243 | banks = b; |
1215 | if (!mce_banks) { | 1244 | if (!mce_banks) { |
1216 | int err = mce_banks_init(); | 1245 | int err = __mcheck_cpu_mce_banks_init(); |
1217 | 1246 | ||
1218 | if (err) | 1247 | if (err) |
1219 | return err; | 1248 | return err; |
@@ -1229,7 +1258,7 @@ static int __cpuinit mce_cap_init(void) | |||
1229 | return 0; | 1258 | return 0; |
1230 | } | 1259 | } |
1231 | 1260 | ||
1232 | static void mce_init(void) | 1261 | static void __mcheck_cpu_init_generic(void) |
1233 | { | 1262 | { |
1234 | mce_banks_t all_banks; | 1263 | mce_banks_t all_banks; |
1235 | u64 cap; | 1264 | u64 cap; |
@@ -1258,7 +1287,7 @@ static void mce_init(void) | |||
1258 | } | 1287 | } |
1259 | 1288 | ||
1260 | /* Add per CPU specific workarounds here */ | 1289 | /* Add per CPU specific workarounds here */ |
1261 | static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | 1290 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) |
1262 | { | 1291 | { |
1263 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | 1292 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { |
1264 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); | 1293 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); |
@@ -1326,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1326 | return 0; | 1355 | return 0; |
1327 | } | 1356 | } |
1328 | 1357 | ||
1329 | static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | 1358 | static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) |
1330 | { | 1359 | { |
1331 | if (c->x86 != 5) | 1360 | if (c->x86 != 5) |
1332 | return; | 1361 | return; |
@@ -1340,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | |||
1340 | } | 1369 | } |
1341 | } | 1370 | } |
1342 | 1371 | ||
1343 | static void mce_cpu_features(struct cpuinfo_x86 *c) | 1372 | static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) |
1344 | { | 1373 | { |
1345 | switch (c->x86_vendor) { | 1374 | switch (c->x86_vendor) { |
1346 | case X86_VENDOR_INTEL: | 1375 | case X86_VENDOR_INTEL: |
@@ -1354,18 +1383,19 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) | |||
1354 | } | 1383 | } |
1355 | } | 1384 | } |
1356 | 1385 | ||
1357 | static void mce_init_timer(void) | 1386 | static void __mcheck_cpu_init_timer(void) |
1358 | { | 1387 | { |
1359 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1388 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1360 | int *n = &__get_cpu_var(mce_next_interval); | 1389 | int *n = &__get_cpu_var(mce_next_interval); |
1361 | 1390 | ||
1391 | setup_timer(t, mce_start_timer, smp_processor_id()); | ||
1392 | |||
1362 | if (mce_ignore_ce) | 1393 | if (mce_ignore_ce) |
1363 | return; | 1394 | return; |
1364 | 1395 | ||
1365 | *n = check_interval * HZ; | 1396 | *n = check_interval * HZ; |
1366 | if (!*n) | 1397 | if (!*n) |
1367 | return; | 1398 | return; |
1368 | setup_timer(t, mcheck_timer, smp_processor_id()); | ||
1369 | t->expires = round_jiffies(jiffies + *n); | 1399 | t->expires = round_jiffies(jiffies + *n); |
1370 | add_timer_on(t, smp_processor_id()); | 1400 | add_timer_on(t, smp_processor_id()); |
1371 | } | 1401 | } |
@@ -1385,27 +1415,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = | |||
1385 | * Called for each booted CPU to set up machine checks. | 1415 | * Called for each booted CPU to set up machine checks. |
1386 | * Must be called with preempt off: | 1416 | * Must be called with preempt off: |
1387 | */ | 1417 | */ |
1388 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | 1418 | void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) |
1389 | { | 1419 | { |
1390 | if (mce_disabled) | 1420 | if (mce_disabled) |
1391 | return; | 1421 | return; |
1392 | 1422 | ||
1393 | mce_ancient_init(c); | 1423 | __mcheck_cpu_ancient_init(c); |
1394 | 1424 | ||
1395 | if (!mce_available(c)) | 1425 | if (!mce_available(c)) |
1396 | return; | 1426 | return; |
1397 | 1427 | ||
1398 | if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { | 1428 | if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { |
1399 | mce_disabled = 1; | 1429 | mce_disabled = 1; |
1400 | return; | 1430 | return; |
1401 | } | 1431 | } |
1402 | 1432 | ||
1403 | machine_check_vector = do_machine_check; | 1433 | machine_check_vector = do_machine_check; |
1404 | 1434 | ||
1405 | mce_init(); | 1435 | __mcheck_cpu_init_generic(); |
1406 | mce_cpu_features(c); | 1436 | __mcheck_cpu_init_vendor(c); |
1407 | mce_init_timer(); | 1437 | __mcheck_cpu_init_timer(); |
1408 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); | 1438 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); |
1439 | |||
1409 | } | 1440 | } |
1410 | 1441 | ||
1411 | /* | 1442 | /* |
@@ -1625,6 +1656,15 @@ static int __init mcheck_enable(char *str) | |||
1625 | } | 1656 | } |
1626 | __setup("mce", mcheck_enable); | 1657 | __setup("mce", mcheck_enable); |
1627 | 1658 | ||
1659 | int __init mcheck_init(void) | ||
1660 | { | ||
1661 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); | ||
1662 | |||
1663 | mcheck_intel_therm_init(); | ||
1664 | |||
1665 | return 0; | ||
1666 | } | ||
1667 | |||
1628 | /* | 1668 | /* |
1629 | * Sysfs support | 1669 | * Sysfs support |
1630 | */ | 1670 | */ |
@@ -1633,7 +1673,7 @@ __setup("mce", mcheck_enable); | |||
1633 | * Disable machine checks on suspend and shutdown. We can't really handle | 1673 | * Disable machine checks on suspend and shutdown. We can't really handle |
1634 | * them later. | 1674 | * them later. |
1635 | */ | 1675 | */ |
1636 | static int mce_disable(void) | 1676 | static int mce_disable_error_reporting(void) |
1637 | { | 1677 | { |
1638 | int i; | 1678 | int i; |
1639 | 1679 | ||
@@ -1648,12 +1688,12 @@ static int mce_disable(void) | |||
1648 | 1688 | ||
1649 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | 1689 | static int mce_suspend(struct sys_device *dev, pm_message_t state) |
1650 | { | 1690 | { |
1651 | return mce_disable(); | 1691 | return mce_disable_error_reporting(); |
1652 | } | 1692 | } |
1653 | 1693 | ||
1654 | static int mce_shutdown(struct sys_device *dev) | 1694 | static int mce_shutdown(struct sys_device *dev) |
1655 | { | 1695 | { |
1656 | return mce_disable(); | 1696 | return mce_disable_error_reporting(); |
1657 | } | 1697 | } |
1658 | 1698 | ||
1659 | /* | 1699 | /* |
@@ -1663,8 +1703,8 @@ static int mce_shutdown(struct sys_device *dev) | |||
1663 | */ | 1703 | */ |
1664 | static int mce_resume(struct sys_device *dev) | 1704 | static int mce_resume(struct sys_device *dev) |
1665 | { | 1705 | { |
1666 | mce_init(); | 1706 | __mcheck_cpu_init_generic(); |
1667 | mce_cpu_features(¤t_cpu_data); | 1707 | __mcheck_cpu_init_vendor(¤t_cpu_data); |
1668 | 1708 | ||
1669 | return 0; | 1709 | return 0; |
1670 | } | 1710 | } |
@@ -1674,8 +1714,8 @@ static void mce_cpu_restart(void *data) | |||
1674 | del_timer_sync(&__get_cpu_var(mce_timer)); | 1714 | del_timer_sync(&__get_cpu_var(mce_timer)); |
1675 | if (!mce_available(¤t_cpu_data)) | 1715 | if (!mce_available(¤t_cpu_data)) |
1676 | return; | 1716 | return; |
1677 | mce_init(); | 1717 | __mcheck_cpu_init_generic(); |
1678 | mce_init_timer(); | 1718 | __mcheck_cpu_init_timer(); |
1679 | } | 1719 | } |
1680 | 1720 | ||
1681 | /* Reinit MCEs after user configuration changes */ | 1721 | /* Reinit MCEs after user configuration changes */ |
@@ -1701,7 +1741,7 @@ static void mce_enable_ce(void *all) | |||
1701 | cmci_reenable(); | 1741 | cmci_reenable(); |
1702 | cmci_recheck(); | 1742 | cmci_recheck(); |
1703 | if (all) | 1743 | if (all) |
1704 | mce_init_timer(); | 1744 | __mcheck_cpu_init_timer(); |
1705 | } | 1745 | } |
1706 | 1746 | ||
1707 | static struct sysdev_class mce_sysclass = { | 1747 | static struct sysdev_class mce_sysclass = { |
@@ -1889,7 +1929,7 @@ error2: | |||
1889 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); | 1929 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); |
1890 | error: | 1930 | error: |
1891 | while (--i >= 0) | 1931 | while (--i >= 0) |
1892 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); | 1932 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); |
1893 | 1933 | ||
1894 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | 1934 | sysdev_unregister(&per_cpu(mce_dev, cpu)); |
1895 | 1935 | ||
@@ -1914,13 +1954,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu) | |||
1914 | } | 1954 | } |
1915 | 1955 | ||
1916 | /* Make sure there are no machine checks on offlined CPUs. */ | 1956 | /* Make sure there are no machine checks on offlined CPUs. */ |
1917 | static void mce_disable_cpu(void *h) | 1957 | static void __cpuinit mce_disable_cpu(void *h) |
1918 | { | 1958 | { |
1919 | unsigned long action = *(unsigned long *)h; | 1959 | unsigned long action = *(unsigned long *)h; |
1920 | int i; | 1960 | int i; |
1921 | 1961 | ||
1922 | if (!mce_available(¤t_cpu_data)) | 1962 | if (!mce_available(¤t_cpu_data)) |
1923 | return; | 1963 | return; |
1964 | |||
1924 | if (!(action & CPU_TASKS_FROZEN)) | 1965 | if (!(action & CPU_TASKS_FROZEN)) |
1925 | cmci_clear(); | 1966 | cmci_clear(); |
1926 | for (i = 0; i < banks; i++) { | 1967 | for (i = 0; i < banks; i++) { |
@@ -1931,7 +1972,7 @@ static void mce_disable_cpu(void *h) | |||
1931 | } | 1972 | } |
1932 | } | 1973 | } |
1933 | 1974 | ||
1934 | static void mce_reenable_cpu(void *h) | 1975 | static void __cpuinit mce_reenable_cpu(void *h) |
1935 | { | 1976 | { |
1936 | unsigned long action = *(unsigned long *)h; | 1977 | unsigned long action = *(unsigned long *)h; |
1937 | int i; | 1978 | int i; |
@@ -1976,9 +2017,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
1976 | break; | 2017 | break; |
1977 | case CPU_DOWN_FAILED: | 2018 | case CPU_DOWN_FAILED: |
1978 | case CPU_DOWN_FAILED_FROZEN: | 2019 | case CPU_DOWN_FAILED_FROZEN: |
1979 | t->expires = round_jiffies(jiffies + | 2020 | if (!mce_ignore_ce && check_interval) { |
2021 | t->expires = round_jiffies(jiffies + | ||
1980 | __get_cpu_var(mce_next_interval)); | 2022 | __get_cpu_var(mce_next_interval)); |
1981 | add_timer_on(t, cpu); | 2023 | add_timer_on(t, cpu); |
2024 | } | ||
1982 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | 2025 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); |
1983 | break; | 2026 | break; |
1984 | case CPU_POST_DEAD: | 2027 | case CPU_POST_DEAD: |
@@ -2010,7 +2053,7 @@ static __init void mce_init_banks(void) | |||
2010 | } | 2053 | } |
2011 | } | 2054 | } |
2012 | 2055 | ||
2013 | static __init int mce_init_device(void) | 2056 | static __init int mcheck_init_device(void) |
2014 | { | 2057 | { |
2015 | int err; | 2058 | int err; |
2016 | int i = 0; | 2059 | int i = 0; |
@@ -2038,7 +2081,7 @@ static __init int mce_init_device(void) | |||
2038 | return err; | 2081 | return err; |
2039 | } | 2082 | } |
2040 | 2083 | ||
2041 | device_initcall(mce_init_device); | 2084 | device_initcall(mcheck_init_device); |
2042 | 2085 | ||
2043 | /* | 2086 | /* |
2044 | * Old style boot options parsing. Only for compatibility. | 2087 | * Old style boot options parsing. Only for compatibility. |
@@ -2086,7 +2129,7 @@ static int fake_panic_set(void *data, u64 val) | |||
2086 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, | 2129 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, |
2087 | fake_panic_set, "%llu\n"); | 2130 | fake_panic_set, "%llu\n"); |
2088 | 2131 | ||
2089 | static int __init mce_debugfs_init(void) | 2132 | static int __init mcheck_debugfs_init(void) |
2090 | { | 2133 | { |
2091 | struct dentry *dmce, *ffake_panic; | 2134 | struct dentry *dmce, *ffake_panic; |
2092 | 2135 | ||
@@ -2100,5 +2143,5 @@ static int __init mce_debugfs_init(void) | |||
2100 | 2143 | ||
2101 | return 0; | 2144 | return 0; |
2102 | } | 2145 | } |
2103 | late_initcall(mce_debugfs_init); | 2146 | late_initcall(mcheck_debugfs_init); |
2104 | #endif | 2147 | #endif |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 889f665fe93d..7c785634af2b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
10 | #include <linux/percpu.h> | 10 | #include <linux/percpu.h> |
11 | #include <linux/sched.h> | ||
11 | #include <asm/apic.h> | 12 | #include <asm/apic.h> |
12 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index b3a1dba75330..81c499eceb21 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state); | |||
49 | 49 | ||
50 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | 50 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
51 | 51 | ||
52 | static u32 lvtthmr_init __read_mostly; | ||
53 | |||
52 | #ifdef CONFIG_SYSFS | 54 | #ifdef CONFIG_SYSFS |
53 | #define define_therm_throt_sysdev_one_ro(_name) \ | 55 | #define define_therm_throt_sysdev_one_ro(_name) \ |
54 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 56 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) |
@@ -254,14 +256,34 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | |||
254 | ack_APIC_irq(); | 256 | ack_APIC_irq(); |
255 | } | 257 | } |
256 | 258 | ||
259 | /* Thermal monitoring depends on APIC, ACPI and clock modulation */ | ||
260 | static int intel_thermal_supported(struct cpuinfo_x86 *c) | ||
261 | { | ||
262 | if (!cpu_has_apic) | ||
263 | return 0; | ||
264 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | ||
265 | return 0; | ||
266 | return 1; | ||
267 | } | ||
268 | |||
269 | void __init mcheck_intel_therm_init(void) | ||
270 | { | ||
271 | /* | ||
272 | * This function is only called on boot CPU. Save the init thermal | ||
273 | * LVT value on BSP and use that value to restore APs' thermal LVT | ||
274 | * entry BIOS programmed later | ||
275 | */ | ||
276 | if (intel_thermal_supported(&boot_cpu_data)) | ||
277 | lvtthmr_init = apic_read(APIC_LVTTHMR); | ||
278 | } | ||
279 | |||
257 | void intel_init_thermal(struct cpuinfo_x86 *c) | 280 | void intel_init_thermal(struct cpuinfo_x86 *c) |
258 | { | 281 | { |
259 | unsigned int cpu = smp_processor_id(); | 282 | unsigned int cpu = smp_processor_id(); |
260 | int tm2 = 0; | 283 | int tm2 = 0; |
261 | u32 l, h; | 284 | u32 l, h; |
262 | 285 | ||
263 | /* Thermal monitoring depends on ACPI and clock modulation*/ | 286 | if (!intel_thermal_supported(c)) |
264 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | ||
265 | return; | 287 | return; |
266 | 288 | ||
267 | /* | 289 | /* |
@@ -270,7 +292,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
270 | * since it might be delivered via SMI already: | 292 | * since it might be delivered via SMI already: |
271 | */ | 293 | */ |
272 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 294 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
273 | h = apic_read(APIC_LVTTHMR); | 295 | |
296 | /* | ||
297 | * The initial value of thermal LVT entries on all APs always reads | ||
298 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI | ||
299 | * sequence to them and LVT registers are reset to 0s except for | ||
300 | * the mask bits which are set to 1s when APs receive INIT IPI. | ||
301 | * Always restore the value that BIOS has programmed on AP based on | ||
302 | * BSP's info we saved since BIOS is always setting the same value | ||
303 | * for all threads/cores | ||
304 | */ | ||
305 | apic_write(APIC_LVTTHMR, lvtthmr_init); | ||
306 | |||
307 | h = lvtthmr_init; | ||
308 | |||
274 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | 309 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
275 | printk(KERN_DEBUG | 310 | printk(KERN_DEBUG |
276 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | 311 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); |
@@ -312,8 +347,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
312 | l = apic_read(APIC_LVTTHMR); | 347 | l = apic_read(APIC_LVTTHMR); |
313 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | 348 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
314 | 349 | ||
315 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | 350 | printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n", |
316 | cpu, tm2 ? "TM2" : "TM1"); | 351 | tm2 ? "TM2" : "TM1"); |
317 | 352 | ||
318 | /* enable thermal throttle processing */ | 353 | /* enable thermal throttle processing */ |
319 | atomic_set(&therm_throt_en, 1); | 354 | atomic_set(&therm_throt_en, 1); |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 315738c74aad..09b1698e0466 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -170,6 +170,41 @@ static int __init cmp_range(const void *x1, const void *x2) | |||
170 | return start1 - start2; | 170 | return start1 - start2; |
171 | } | 171 | } |
172 | 172 | ||
173 | static int __init clean_sort_range(struct res_range *range, int az) | ||
174 | { | ||
175 | int i, j, k = az - 1, nr_range = 0; | ||
176 | |||
177 | for (i = 0; i < k; i++) { | ||
178 | if (range[i].end) | ||
179 | continue; | ||
180 | for (j = k; j > i; j--) { | ||
181 | if (range[j].end) { | ||
182 | k = j; | ||
183 | break; | ||
184 | } | ||
185 | } | ||
186 | if (j == i) | ||
187 | break; | ||
188 | range[i].start = range[k].start; | ||
189 | range[i].end = range[k].end; | ||
190 | range[k].start = 0; | ||
191 | range[k].end = 0; | ||
192 | k--; | ||
193 | } | ||
194 | /* count it */ | ||
195 | for (i = 0; i < az; i++) { | ||
196 | if (!range[i].end) { | ||
197 | nr_range = i; | ||
198 | break; | ||
199 | } | ||
200 | } | ||
201 | |||
202 | /* sort them */ | ||
203 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
204 | |||
205 | return nr_range; | ||
206 | } | ||
207 | |||
173 | #define BIOS_BUG_MSG KERN_WARNING \ | 208 | #define BIOS_BUG_MSG KERN_WARNING \ |
174 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" | 209 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" |
175 | 210 | ||
@@ -223,22 +258,18 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
223 | subtract_range(range, extra_remove_base, | 258 | subtract_range(range, extra_remove_base, |
224 | extra_remove_base + extra_remove_size - 1); | 259 | extra_remove_base + extra_remove_size - 1); |
225 | 260 | ||
226 | /* get new range num */ | ||
227 | nr_range = 0; | ||
228 | for (i = 0; i < RANGE_NUM; i++) { | ||
229 | if (!range[i].end) | ||
230 | continue; | ||
231 | nr_range++; | ||
232 | } | ||
233 | if (debug_print) { | 261 | if (debug_print) { |
234 | printk(KERN_DEBUG "After UC checking\n"); | 262 | printk(KERN_DEBUG "After UC checking\n"); |
235 | for (i = 0; i < nr_range; i++) | 263 | for (i = 0; i < RANGE_NUM; i++) { |
264 | if (!range[i].end) | ||
265 | continue; | ||
236 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 266 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", |
237 | range[i].start, range[i].end + 1); | 267 | range[i].start, range[i].end + 1); |
268 | } | ||
238 | } | 269 | } |
239 | 270 | ||
240 | /* sort the ranges */ | 271 | /* sort the ranges */ |
241 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 272 | nr_range = clean_sort_range(range, RANGE_NUM); |
242 | if (debug_print) { | 273 | if (debug_print) { |
243 | printk(KERN_DEBUG "After sorting\n"); | 274 | printk(KERN_DEBUG "After sorting\n"); |
244 | for (i = 0; i < nr_range; i++) | 275 | for (i = 0; i < nr_range; i++) |
@@ -689,8 +720,6 @@ static int __init mtrr_need_cleanup(void) | |||
689 | continue; | 720 | continue; |
690 | if (!size) | 721 | if (!size) |
691 | type = MTRR_NUM_TYPES; | 722 | type = MTRR_NUM_TYPES; |
692 | if (type == MTRR_TYPE_WRPROT) | ||
693 | type = MTRR_TYPE_UNCACHABLE; | ||
694 | num[type]++; | 723 | num[type]++; |
695 | } | 724 | } |
696 | 725 | ||
@@ -846,7 +875,7 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
846 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 875 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); |
847 | 876 | ||
848 | range_sums = sum_ranges(range, nr_range); | 877 | range_sums = sum_ranges(range, nr_range); |
849 | printk(KERN_INFO "total RAM coverred: %ldM\n", | 878 | printk(KERN_INFO "total RAM covered: %ldM\n", |
850 | range_sums >> (20 - PAGE_SHIFT)); | 879 | range_sums >> (20 - PAGE_SHIFT)); |
851 | 880 | ||
852 | if (mtrr_chunk_size && mtrr_gran_size) { | 881 | if (mtrr_chunk_size && mtrr_gran_size) { |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index f04e72527604..e006e56f699c 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/proc_fs.h> | 4 | #include <linux/proc_fs.h> |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/ctype.h> | 6 | #include <linux/ctype.h> |
7 | #include <linux/string.h> | ||
7 | #include <linux/init.h> | 8 | #include <linux/init.h> |
8 | 9 | ||
9 | #define LINE_SIZE 80 | 10 | #define LINE_SIZE 80 |
@@ -96,17 +97,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
96 | unsigned long long base, size; | 97 | unsigned long long base, size; |
97 | char *ptr; | 98 | char *ptr; |
98 | char line[LINE_SIZE]; | 99 | char line[LINE_SIZE]; |
100 | int length; | ||
99 | size_t linelen; | 101 | size_t linelen; |
100 | 102 | ||
101 | if (!capable(CAP_SYS_ADMIN)) | 103 | if (!capable(CAP_SYS_ADMIN)) |
102 | return -EPERM; | 104 | return -EPERM; |
103 | if (!len) | ||
104 | return -EINVAL; | ||
105 | 105 | ||
106 | memset(line, 0, LINE_SIZE); | 106 | memset(line, 0, LINE_SIZE); |
107 | if (len > LINE_SIZE) | 107 | |
108 | len = LINE_SIZE; | 108 | length = len; |
109 | if (copy_from_user(line, buf, len - 1)) | 109 | length--; |
110 | |||
111 | if (length > LINE_SIZE - 1) | ||
112 | length = LINE_SIZE - 1; | ||
113 | |||
114 | if (length < 0) | ||
115 | return -EINVAL; | ||
116 | |||
117 | if (copy_from_user(line, buf, length)) | ||
110 | return -EFAULT; | 118 | return -EFAULT; |
111 | 119 | ||
112 | linelen = strlen(line); | 120 | linelen = strlen(line); |
@@ -126,8 +134,7 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
126 | return -EINVAL; | 134 | return -EINVAL; |
127 | 135 | ||
128 | base = simple_strtoull(line + 5, &ptr, 0); | 136 | base = simple_strtoull(line + 5, &ptr, 0); |
129 | while (isspace(*ptr)) | 137 | ptr = skip_spaces(ptr); |
130 | ptr++; | ||
131 | 138 | ||
132 | if (strncmp(ptr, "size=", 5)) | 139 | if (strncmp(ptr, "size=", 5)) |
133 | return -EINVAL; | 140 | return -EINVAL; |
@@ -135,14 +142,11 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
135 | size = simple_strtoull(ptr + 5, &ptr, 0); | 142 | size = simple_strtoull(ptr + 5, &ptr, 0); |
136 | if ((base & 0xfff) || (size & 0xfff)) | 143 | if ((base & 0xfff) || (size & 0xfff)) |
137 | return -EINVAL; | 144 | return -EINVAL; |
138 | while (isspace(*ptr)) | 145 | ptr = skip_spaces(ptr); |
139 | ptr++; | ||
140 | 146 | ||
141 | if (strncmp(ptr, "type=", 5)) | 147 | if (strncmp(ptr, "type=", 5)) |
142 | return -EINVAL; | 148 | return -EINVAL; |
143 | ptr += 5; | 149 | ptr = skip_spaces(ptr + 5); |
144 | while (isspace(*ptr)) | ||
145 | ptr++; | ||
146 | 150 | ||
147 | for (i = 0; i < MTRR_NUM_TYPES; ++i) { | 151 | for (i = 0; i < MTRR_NUM_TYPES; ++i) { |
148 | if (strcmp(ptr, mtrr_strings[i])) | 152 | if (strcmp(ptr, mtrr_strings[i])) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c311846..d616c06e99b4 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -77,6 +77,18 @@ struct cpu_hw_events { | |||
77 | struct debug_store *ds; | 77 | struct debug_store *ds; |
78 | }; | 78 | }; |
79 | 79 | ||
80 | struct event_constraint { | ||
81 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
82 | int code; | ||
83 | }; | ||
84 | |||
85 | #define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } | ||
86 | #define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } | ||
87 | |||
88 | #define for_each_event_constraint(e, c) \ | ||
89 | for ((e) = (c); (e)->idxmsk[0]; (e)++) | ||
90 | |||
91 | |||
80 | /* | 92 | /* |
81 | * struct x86_pmu - generic x86 pmu | 93 | * struct x86_pmu - generic x86 pmu |
82 | */ | 94 | */ |
@@ -102,6 +114,8 @@ struct x86_pmu { | |||
102 | u64 intel_ctrl; | 114 | u64 intel_ctrl; |
103 | void (*enable_bts)(u64 config); | 115 | void (*enable_bts)(u64 config); |
104 | void (*disable_bts)(void); | 116 | void (*disable_bts)(void); |
117 | int (*get_event_idx)(struct cpu_hw_events *cpuc, | ||
118 | struct hw_perf_event *hwc); | ||
105 | }; | 119 | }; |
106 | 120 | ||
107 | static struct x86_pmu x86_pmu __read_mostly; | 121 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | |||
110 | .enabled = 1, | 124 | .enabled = 1, |
111 | }; | 125 | }; |
112 | 126 | ||
127 | static const struct event_constraint *event_constraints; | ||
128 | |||
113 | /* | 129 | /* |
114 | * Not sure about some of these | 130 | * Not sure about some of these |
115 | */ | 131 | */ |
@@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event) | |||
155 | return hw_event & P6_EVNTSEL_MASK; | 171 | return hw_event & P6_EVNTSEL_MASK; |
156 | } | 172 | } |
157 | 173 | ||
174 | static const struct event_constraint intel_p6_event_constraints[] = | ||
175 | { | ||
176 | EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | ||
177 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
178 | EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | ||
179 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
180 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
181 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
182 | EVENT_CONSTRAINT_END | ||
183 | }; | ||
158 | 184 | ||
159 | /* | 185 | /* |
160 | * Intel PerfMon v3. Used on Core2 and later. | 186 | * Intel PerfMon v3. Used on Core2 and later. |
@@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] = | |||
170 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 196 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
171 | }; | 197 | }; |
172 | 198 | ||
199 | static const struct event_constraint intel_core_event_constraints[] = | ||
200 | { | ||
201 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
202 | EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
203 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
204 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
205 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
206 | EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
207 | EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
208 | EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
209 | EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
210 | EVENT_CONSTRAINT_END | ||
211 | }; | ||
212 | |||
213 | static const struct event_constraint intel_nehalem_event_constraints[] = | ||
214 | { | ||
215 | EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
216 | EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
217 | EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
218 | EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
219 | EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
220 | EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ | ||
221 | EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
222 | EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ | ||
223 | EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ | ||
224 | EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ | ||
225 | EVENT_CONSTRAINT_END | ||
226 | }; | ||
227 | |||
173 | static u64 intel_pmu_event_map(int hw_event) | 228 | static u64 intel_pmu_event_map(int hw_event) |
174 | { | 229 | { |
175 | return intel_perfmon_event_map[hw_event]; | 230 | return intel_perfmon_event_map[hw_event]; |
@@ -190,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids | |||
190 | [PERF_COUNT_HW_CACHE_OP_MAX] | 245 | [PERF_COUNT_HW_CACHE_OP_MAX] |
191 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 246 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
192 | 247 | ||
193 | static const u64 nehalem_hw_cache_event_ids | 248 | static __initconst u64 nehalem_hw_cache_event_ids |
194 | [PERF_COUNT_HW_CACHE_MAX] | 249 | [PERF_COUNT_HW_CACHE_MAX] |
195 | [PERF_COUNT_HW_CACHE_OP_MAX] | 250 | [PERF_COUNT_HW_CACHE_OP_MAX] |
196 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 251 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -281,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids | |||
281 | }, | 336 | }, |
282 | }; | 337 | }; |
283 | 338 | ||
284 | static const u64 core2_hw_cache_event_ids | 339 | static __initconst u64 core2_hw_cache_event_ids |
285 | [PERF_COUNT_HW_CACHE_MAX] | 340 | [PERF_COUNT_HW_CACHE_MAX] |
286 | [PERF_COUNT_HW_CACHE_OP_MAX] | 341 | [PERF_COUNT_HW_CACHE_OP_MAX] |
287 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 342 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -372,7 +427,7 @@ static const u64 core2_hw_cache_event_ids | |||
372 | }, | 427 | }, |
373 | }; | 428 | }; |
374 | 429 | ||
375 | static const u64 atom_hw_cache_event_ids | 430 | static __initconst u64 atom_hw_cache_event_ids |
376 | [PERF_COUNT_HW_CACHE_MAX] | 431 | [PERF_COUNT_HW_CACHE_MAX] |
377 | [PERF_COUNT_HW_CACHE_OP_MAX] | 432 | [PERF_COUNT_HW_CACHE_OP_MAX] |
378 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 433 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) | |||
469 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | 524 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL |
470 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | 525 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL |
471 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | 526 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL |
472 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | 527 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL |
473 | 528 | ||
474 | #define CORE_EVNTSEL_MASK \ | 529 | #define CORE_EVNTSEL_MASK \ |
475 | (CORE_EVNTSEL_EVENT_MASK | \ | 530 | (CORE_EVNTSEL_EVENT_MASK | \ |
@@ -481,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) | |||
481 | return hw_event & CORE_EVNTSEL_MASK; | 536 | return hw_event & CORE_EVNTSEL_MASK; |
482 | } | 537 | } |
483 | 538 | ||
484 | static const u64 amd_hw_cache_event_ids | 539 | static __initconst u64 amd_hw_cache_event_ids |
485 | [PERF_COUNT_HW_CACHE_MAX] | 540 | [PERF_COUNT_HW_CACHE_MAX] |
486 | [PERF_COUNT_HW_CACHE_OP_MAX] | 541 | [PERF_COUNT_HW_CACHE_OP_MAX] |
487 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 542 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
932 | */ | 987 | */ |
933 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | 988 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; |
934 | 989 | ||
990 | hwc->idx = -1; | ||
991 | |||
935 | /* | 992 | /* |
936 | * Count user and OS events unless requested not to. | 993 | * Count user and OS events unless requested not to. |
937 | */ | 994 | */ |
@@ -1229,7 +1286,7 @@ x86_perf_event_set_period(struct perf_event *event, | |||
1229 | return 0; | 1286 | return 0; |
1230 | 1287 | ||
1231 | /* | 1288 | /* |
1232 | * If we are way outside a reasoable range then just skip forward: | 1289 | * If we are way outside a reasonable range then just skip forward: |
1233 | */ | 1290 | */ |
1234 | if (unlikely(left <= -period)) { | 1291 | if (unlikely(left <= -period)) { |
1235 | left = period; | 1292 | left = period; |
@@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) | |||
1334 | x86_pmu_enable_event(hwc, idx); | 1391 | x86_pmu_enable_event(hwc, idx); |
1335 | } | 1392 | } |
1336 | 1393 | ||
1337 | static int | 1394 | static int fixed_mode_idx(struct hw_perf_event *hwc) |
1338 | fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) | ||
1339 | { | 1395 | { |
1340 | unsigned int hw_event; | 1396 | unsigned int hw_event; |
1341 | 1397 | ||
@@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) | |||
1349 | if (!x86_pmu.num_events_fixed) | 1405 | if (!x86_pmu.num_events_fixed) |
1350 | return -1; | 1406 | return -1; |
1351 | 1407 | ||
1408 | /* | ||
1409 | * fixed counters do not take all possible filters | ||
1410 | */ | ||
1411 | if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) | ||
1412 | return -1; | ||
1413 | |||
1352 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | 1414 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) |
1353 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | 1415 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; |
1354 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | 1416 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) |
@@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) | |||
1360 | } | 1422 | } |
1361 | 1423 | ||
1362 | /* | 1424 | /* |
1363 | * Find a PMC slot for the freshly enabled / scheduled in event: | 1425 | * generic counter allocator: get next free counter |
1364 | */ | 1426 | */ |
1365 | static int x86_pmu_enable(struct perf_event *event) | 1427 | static int |
1428 | gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1429 | { | ||
1430 | int idx; | ||
1431 | |||
1432 | idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); | ||
1433 | return idx == x86_pmu.num_events ? -1 : idx; | ||
1434 | } | ||
1435 | |||
1436 | /* | ||
1437 | * intel-specific counter allocator: check event constraints | ||
1438 | */ | ||
1439 | static int | ||
1440 | intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1441 | { | ||
1442 | const struct event_constraint *event_constraint; | ||
1443 | int i, code; | ||
1444 | |||
1445 | if (!event_constraints) | ||
1446 | goto skip; | ||
1447 | |||
1448 | code = hwc->config & CORE_EVNTSEL_EVENT_MASK; | ||
1449 | |||
1450 | for_each_event_constraint(event_constraint, event_constraints) { | ||
1451 | if (code == event_constraint->code) { | ||
1452 | for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { | ||
1453 | if (!test_and_set_bit(i, cpuc->used_mask)) | ||
1454 | return i; | ||
1455 | } | ||
1456 | return -1; | ||
1457 | } | ||
1458 | } | ||
1459 | skip: | ||
1460 | return gen_get_event_idx(cpuc, hwc); | ||
1461 | } | ||
1462 | |||
1463 | static int | ||
1464 | x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1366 | { | 1465 | { |
1367 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1368 | struct hw_perf_event *hwc = &event->hw; | ||
1369 | int idx; | 1466 | int idx; |
1370 | 1467 | ||
1371 | idx = fixed_mode_idx(event, hwc); | 1468 | idx = fixed_mode_idx(hwc); |
1372 | if (idx == X86_PMC_IDX_FIXED_BTS) { | 1469 | if (idx == X86_PMC_IDX_FIXED_BTS) { |
1373 | /* BTS is already occupied. */ | 1470 | /* BTS is already occupied. */ |
1374 | if (test_and_set_bit(idx, cpuc->used_mask)) | 1471 | if (test_and_set_bit(idx, cpuc->used_mask)) |
1375 | return -EAGAIN; | 1472 | return -EAGAIN; |
1376 | 1473 | ||
1377 | hwc->config_base = 0; | 1474 | hwc->config_base = 0; |
1378 | hwc->event_base = 0; | 1475 | hwc->event_base = 0; |
1379 | hwc->idx = idx; | 1476 | hwc->idx = idx; |
1380 | } else if (idx >= 0) { | 1477 | } else if (idx >= 0) { |
1381 | /* | 1478 | /* |
@@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event) | |||
1396 | } else { | 1493 | } else { |
1397 | idx = hwc->idx; | 1494 | idx = hwc->idx; |
1398 | /* Try to get the previous generic event again */ | 1495 | /* Try to get the previous generic event again */ |
1399 | if (test_and_set_bit(idx, cpuc->used_mask)) { | 1496 | if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { |
1400 | try_generic: | 1497 | try_generic: |
1401 | idx = find_first_zero_bit(cpuc->used_mask, | 1498 | idx = x86_pmu.get_event_idx(cpuc, hwc); |
1402 | x86_pmu.num_events); | 1499 | if (idx == -1) |
1403 | if (idx == x86_pmu.num_events) | ||
1404 | return -EAGAIN; | 1500 | return -EAGAIN; |
1405 | 1501 | ||
1406 | set_bit(idx, cpuc->used_mask); | 1502 | set_bit(idx, cpuc->used_mask); |
1407 | hwc->idx = idx; | 1503 | hwc->idx = idx; |
1408 | } | 1504 | } |
1409 | hwc->config_base = x86_pmu.eventsel; | 1505 | hwc->config_base = x86_pmu.eventsel; |
1410 | hwc->event_base = x86_pmu.perfctr; | 1506 | hwc->event_base = x86_pmu.perfctr; |
1411 | } | 1507 | } |
1412 | 1508 | ||
1509 | return idx; | ||
1510 | } | ||
1511 | |||
1512 | /* | ||
1513 | * Find a PMC slot for the freshly enabled / scheduled in event: | ||
1514 | */ | ||
1515 | static int x86_pmu_enable(struct perf_event *event) | ||
1516 | { | ||
1517 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1518 | struct hw_perf_event *hwc = &event->hw; | ||
1519 | int idx; | ||
1520 | |||
1521 | idx = x86_schedule_event(cpuc, hwc); | ||
1522 | if (idx < 0) | ||
1523 | return idx; | ||
1524 | |||
1413 | perf_events_lapic_init(); | 1525 | perf_events_lapic_init(); |
1414 | 1526 | ||
1415 | x86_pmu.disable(hwc, idx); | 1527 | x86_pmu.disable(hwc, idx); |
@@ -1520,6 +1632,7 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) | |||
1520 | 1632 | ||
1521 | data.period = event->hw.last_period; | 1633 | data.period = event->hw.last_period; |
1522 | data.addr = 0; | 1634 | data.addr = 0; |
1635 | data.raw = NULL; | ||
1523 | regs.ip = 0; | 1636 | regs.ip = 0; |
1524 | 1637 | ||
1525 | /* | 1638 | /* |
@@ -1637,6 +1750,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs) | |||
1637 | u64 val; | 1750 | u64 val; |
1638 | 1751 | ||
1639 | data.addr = 0; | 1752 | data.addr = 0; |
1753 | data.raw = NULL; | ||
1640 | 1754 | ||
1641 | cpuc = &__get_cpu_var(cpu_hw_events); | 1755 | cpuc = &__get_cpu_var(cpu_hw_events); |
1642 | 1756 | ||
@@ -1682,6 +1796,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1682 | u64 ack, status; | 1796 | u64 ack, status; |
1683 | 1797 | ||
1684 | data.addr = 0; | 1798 | data.addr = 0; |
1799 | data.raw = NULL; | ||
1685 | 1800 | ||
1686 | cpuc = &__get_cpu_var(cpu_hw_events); | 1801 | cpuc = &__get_cpu_var(cpu_hw_events); |
1687 | 1802 | ||
@@ -1745,6 +1860,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) | |||
1745 | u64 val; | 1860 | u64 val; |
1746 | 1861 | ||
1747 | data.addr = 0; | 1862 | data.addr = 0; |
1863 | data.raw = NULL; | ||
1748 | 1864 | ||
1749 | cpuc = &__get_cpu_var(cpu_hw_events); | 1865 | cpuc = &__get_cpu_var(cpu_hw_events); |
1750 | 1866 | ||
@@ -1852,7 +1968,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { | |||
1852 | .priority = 1 | 1968 | .priority = 1 |
1853 | }; | 1969 | }; |
1854 | 1970 | ||
1855 | static struct x86_pmu p6_pmu = { | 1971 | static __initconst struct x86_pmu p6_pmu = { |
1856 | .name = "p6", | 1972 | .name = "p6", |
1857 | .handle_irq = p6_pmu_handle_irq, | 1973 | .handle_irq = p6_pmu_handle_irq, |
1858 | .disable_all = p6_pmu_disable_all, | 1974 | .disable_all = p6_pmu_disable_all, |
@@ -1877,9 +1993,10 @@ static struct x86_pmu p6_pmu = { | |||
1877 | */ | 1993 | */ |
1878 | .event_bits = 32, | 1994 | .event_bits = 32, |
1879 | .event_mask = (1ULL << 32) - 1, | 1995 | .event_mask = (1ULL << 32) - 1, |
1996 | .get_event_idx = intel_get_event_idx, | ||
1880 | }; | 1997 | }; |
1881 | 1998 | ||
1882 | static struct x86_pmu intel_pmu = { | 1999 | static __initconst struct x86_pmu intel_pmu = { |
1883 | .name = "Intel", | 2000 | .name = "Intel", |
1884 | .handle_irq = intel_pmu_handle_irq, | 2001 | .handle_irq = intel_pmu_handle_irq, |
1885 | .disable_all = intel_pmu_disable_all, | 2002 | .disable_all = intel_pmu_disable_all, |
@@ -1900,9 +2017,10 @@ static struct x86_pmu intel_pmu = { | |||
1900 | .max_period = (1ULL << 31) - 1, | 2017 | .max_period = (1ULL << 31) - 1, |
1901 | .enable_bts = intel_pmu_enable_bts, | 2018 | .enable_bts = intel_pmu_enable_bts, |
1902 | .disable_bts = intel_pmu_disable_bts, | 2019 | .disable_bts = intel_pmu_disable_bts, |
2020 | .get_event_idx = intel_get_event_idx, | ||
1903 | }; | 2021 | }; |
1904 | 2022 | ||
1905 | static struct x86_pmu amd_pmu = { | 2023 | static __initconst struct x86_pmu amd_pmu = { |
1906 | .name = "AMD", | 2024 | .name = "AMD", |
1907 | .handle_irq = amd_pmu_handle_irq, | 2025 | .handle_irq = amd_pmu_handle_irq, |
1908 | .disable_all = amd_pmu_disable_all, | 2026 | .disable_all = amd_pmu_disable_all, |
@@ -1920,9 +2038,10 @@ static struct x86_pmu amd_pmu = { | |||
1920 | .apic = 1, | 2038 | .apic = 1, |
1921 | /* use highest bit to detect overflow */ | 2039 | /* use highest bit to detect overflow */ |
1922 | .max_period = (1ULL << 47) - 1, | 2040 | .max_period = (1ULL << 47) - 1, |
2041 | .get_event_idx = gen_get_event_idx, | ||
1923 | }; | 2042 | }; |
1924 | 2043 | ||
1925 | static int p6_pmu_init(void) | 2044 | static __init int p6_pmu_init(void) |
1926 | { | 2045 | { |
1927 | switch (boot_cpu_data.x86_model) { | 2046 | switch (boot_cpu_data.x86_model) { |
1928 | case 1: | 2047 | case 1: |
@@ -1932,10 +2051,12 @@ static int p6_pmu_init(void) | |||
1932 | case 7: | 2051 | case 7: |
1933 | case 8: | 2052 | case 8: |
1934 | case 11: /* Pentium III */ | 2053 | case 11: /* Pentium III */ |
2054 | event_constraints = intel_p6_event_constraints; | ||
1935 | break; | 2055 | break; |
1936 | case 9: | 2056 | case 9: |
1937 | case 13: | 2057 | case 13: |
1938 | /* Pentium M */ | 2058 | /* Pentium M */ |
2059 | event_constraints = intel_p6_event_constraints; | ||
1939 | break; | 2060 | break; |
1940 | default: | 2061 | default: |
1941 | pr_cont("unsupported p6 CPU model %d ", | 2062 | pr_cont("unsupported p6 CPU model %d ", |
@@ -1945,16 +2066,10 @@ static int p6_pmu_init(void) | |||
1945 | 2066 | ||
1946 | x86_pmu = p6_pmu; | 2067 | x86_pmu = p6_pmu; |
1947 | 2068 | ||
1948 | if (!cpu_has_apic) { | ||
1949 | pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); | ||
1950 | pr_info("no hardware sampling interrupt available.\n"); | ||
1951 | x86_pmu.apic = 0; | ||
1952 | } | ||
1953 | |||
1954 | return 0; | 2069 | return 0; |
1955 | } | 2070 | } |
1956 | 2071 | ||
1957 | static int intel_pmu_init(void) | 2072 | static __init int intel_pmu_init(void) |
1958 | { | 2073 | { |
1959 | union cpuid10_edx edx; | 2074 | union cpuid10_edx edx; |
1960 | union cpuid10_eax eax; | 2075 | union cpuid10_eax eax; |
@@ -2007,12 +2122,14 @@ static int intel_pmu_init(void) | |||
2007 | sizeof(hw_cache_event_ids)); | 2122 | sizeof(hw_cache_event_ids)); |
2008 | 2123 | ||
2009 | pr_cont("Core2 events, "); | 2124 | pr_cont("Core2 events, "); |
2125 | event_constraints = intel_core_event_constraints; | ||
2010 | break; | 2126 | break; |
2011 | default: | 2127 | default: |
2012 | case 26: | 2128 | case 26: |
2013 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 2129 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
2014 | sizeof(hw_cache_event_ids)); | 2130 | sizeof(hw_cache_event_ids)); |
2015 | 2131 | ||
2132 | event_constraints = intel_nehalem_event_constraints; | ||
2016 | pr_cont("Nehalem/Corei7 events, "); | 2133 | pr_cont("Nehalem/Corei7 events, "); |
2017 | break; | 2134 | break; |
2018 | case 28: | 2135 | case 28: |
@@ -2025,7 +2142,7 @@ static int intel_pmu_init(void) | |||
2025 | return 0; | 2142 | return 0; |
2026 | } | 2143 | } |
2027 | 2144 | ||
2028 | static int amd_pmu_init(void) | 2145 | static __init int amd_pmu_init(void) |
2029 | { | 2146 | { |
2030 | /* Performance-monitoring supported from K7 and later: */ | 2147 | /* Performance-monitoring supported from K7 and later: */ |
2031 | if (boot_cpu_data.x86 < 6) | 2148 | if (boot_cpu_data.x86 < 6) |
@@ -2040,6 +2157,16 @@ static int amd_pmu_init(void) | |||
2040 | return 0; | 2157 | return 0; |
2041 | } | 2158 | } |
2042 | 2159 | ||
2160 | static void __init pmu_check_apic(void) | ||
2161 | { | ||
2162 | if (cpu_has_apic) | ||
2163 | return; | ||
2164 | |||
2165 | x86_pmu.apic = 0; | ||
2166 | pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); | ||
2167 | pr_info("no hardware sampling interrupt available.\n"); | ||
2168 | } | ||
2169 | |||
2043 | void __init init_hw_perf_events(void) | 2170 | void __init init_hw_perf_events(void) |
2044 | { | 2171 | { |
2045 | int err; | 2172 | int err; |
@@ -2061,6 +2188,8 @@ void __init init_hw_perf_events(void) | |||
2061 | return; | 2188 | return; |
2062 | } | 2189 | } |
2063 | 2190 | ||
2191 | pmu_check_apic(); | ||
2192 | |||
2064 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 2193 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
2065 | 2194 | ||
2066 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { | 2195 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { |
@@ -2105,11 +2234,47 @@ static const struct pmu pmu = { | |||
2105 | .unthrottle = x86_pmu_unthrottle, | 2234 | .unthrottle = x86_pmu_unthrottle, |
2106 | }; | 2235 | }; |
2107 | 2236 | ||
2237 | static int | ||
2238 | validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
2239 | { | ||
2240 | struct hw_perf_event fake_event = event->hw; | ||
2241 | |||
2242 | if (event->pmu && event->pmu != &pmu) | ||
2243 | return 0; | ||
2244 | |||
2245 | return x86_schedule_event(cpuc, &fake_event) >= 0; | ||
2246 | } | ||
2247 | |||
2248 | static int validate_group(struct perf_event *event) | ||
2249 | { | ||
2250 | struct perf_event *sibling, *leader = event->group_leader; | ||
2251 | struct cpu_hw_events fake_pmu; | ||
2252 | |||
2253 | memset(&fake_pmu, 0, sizeof(fake_pmu)); | ||
2254 | |||
2255 | if (!validate_event(&fake_pmu, leader)) | ||
2256 | return -ENOSPC; | ||
2257 | |||
2258 | list_for_each_entry(sibling, &leader->sibling_list, group_entry) { | ||
2259 | if (!validate_event(&fake_pmu, sibling)) | ||
2260 | return -ENOSPC; | ||
2261 | } | ||
2262 | |||
2263 | if (!validate_event(&fake_pmu, event)) | ||
2264 | return -ENOSPC; | ||
2265 | |||
2266 | return 0; | ||
2267 | } | ||
2268 | |||
2108 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 2269 | const struct pmu *hw_perf_event_init(struct perf_event *event) |
2109 | { | 2270 | { |
2110 | int err; | 2271 | int err; |
2111 | 2272 | ||
2112 | err = __hw_perf_event_init(event); | 2273 | err = __hw_perf_event_init(event); |
2274 | if (!err) { | ||
2275 | if (event->group_leader != event) | ||
2276 | err = validate_group(event); | ||
2277 | } | ||
2113 | if (err) { | 2278 | if (err) { |
2114 | if (event->destroy) | 2279 | if (event->destroy) |
2115 | event->destroy(event); | 2280 | event->destroy(event); |
@@ -2132,7 +2297,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) | |||
2132 | 2297 | ||
2133 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | 2298 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); |
2134 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); | 2299 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); |
2135 | static DEFINE_PER_CPU(int, in_nmi_frame); | 2300 | static DEFINE_PER_CPU(int, in_ignored_frame); |
2136 | 2301 | ||
2137 | 2302 | ||
2138 | static void | 2303 | static void |
@@ -2148,8 +2313,9 @@ static void backtrace_warning(void *data, char *msg) | |||
2148 | 2313 | ||
2149 | static int backtrace_stack(void *data, char *name) | 2314 | static int backtrace_stack(void *data, char *name) |
2150 | { | 2315 | { |
2151 | per_cpu(in_nmi_frame, smp_processor_id()) = | 2316 | per_cpu(in_ignored_frame, smp_processor_id()) = |
2152 | x86_is_stack_id(NMI_STACK, name); | 2317 | x86_is_stack_id(NMI_STACK, name) || |
2318 | x86_is_stack_id(DEBUG_STACK, name); | ||
2153 | 2319 | ||
2154 | return 0; | 2320 | return 0; |
2155 | } | 2321 | } |
@@ -2158,7 +2324,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
2158 | { | 2324 | { |
2159 | struct perf_callchain_entry *entry = data; | 2325 | struct perf_callchain_entry *entry = data; |
2160 | 2326 | ||
2161 | if (per_cpu(in_nmi_frame, smp_processor_id())) | 2327 | if (per_cpu(in_ignored_frame, smp_processor_id())) |
2162 | return; | 2328 | return; |
2163 | 2329 | ||
2164 | if (reliable) | 2330 | if (reliable) |
@@ -2170,6 +2336,7 @@ static const struct stacktrace_ops backtrace_ops = { | |||
2170 | .warning_symbol = backtrace_warning_symbol, | 2336 | .warning_symbol = backtrace_warning_symbol, |
2171 | .stack = backtrace_stack, | 2337 | .stack = backtrace_stack, |
2172 | .address = backtrace_address, | 2338 | .address = backtrace_address, |
2339 | .walk_stack = print_context_stack_bp, | ||
2173 | }; | 2340 | }; |
2174 | 2341 | ||
2175 | #include "../dumpstack.h" | 2342 | #include "../dumpstack.h" |
@@ -2180,7 +2347,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
2180 | callchain_store(entry, PERF_CONTEXT_KERNEL); | 2347 | callchain_store(entry, PERF_CONTEXT_KERNEL); |
2181 | callchain_store(entry, regs->ip); | 2348 | callchain_store(entry, regs->ip); |
2182 | 2349 | ||
2183 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); | 2350 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
2184 | } | 2351 | } |
2185 | 2352 | ||
2186 | /* | 2353 | /* |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index fab786f60ed6..898df9719afb 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void) | |||
712 | switch (boot_cpu_data.x86_vendor) { | 712 | switch (boot_cpu_data.x86_vendor) { |
713 | case X86_VENDOR_AMD: | 713 | case X86_VENDOR_AMD: |
714 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && | 714 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && |
715 | boot_cpu_data.x86 != 16) | 715 | boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) |
716 | return; | 716 | return; |
717 | wd_ops = &k7_wd_ops; | 717 | wd_ops = &k7_wd_ops; |
718 | break; | 718 | break; |
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index bb62b3e5caad..28000743bbb0 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c | |||
@@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
26 | 26 | ||
27 | early_init_transmeta(c); | 27 | early_init_transmeta(c); |
28 | 28 | ||
29 | display_cacheinfo(c); | 29 | cpu_detect_cache_sizes(c); |
30 | 30 | ||
31 | /* Print CMS and CPU revision */ | 31 | /* Print CMS and CPU revision */ |
32 | max = cpuid_eax(0x80860000); | 32 | max = cpuid_eax(0x80860000); |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 6a52d4b36a30..cb27fd6136c9 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -116,21 +116,16 @@ static int cpuid_open(struct inode *inode, struct file *file) | |||
116 | { | 116 | { |
117 | unsigned int cpu; | 117 | unsigned int cpu; |
118 | struct cpuinfo_x86 *c; | 118 | struct cpuinfo_x86 *c; |
119 | int ret = 0; | ||
120 | |||
121 | lock_kernel(); | ||
122 | 119 | ||
123 | cpu = iminor(file->f_path.dentry->d_inode); | 120 | cpu = iminor(file->f_path.dentry->d_inode); |
124 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { | 121 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) |
125 | ret = -ENXIO; /* No such CPU */ | 122 | return -ENXIO; /* No such CPU */ |
126 | goto out; | 123 | |
127 | } | ||
128 | c = &cpu_data(cpu); | 124 | c = &cpu_data(cpu); |
129 | if (c->cpuid_level < 0) | 125 | if (c->cpuid_level < 0) |
130 | ret = -EIO; /* CPUID not supported */ | 126 | return -EIO; /* CPUID not supported */ |
131 | out: | 127 | |
132 | unlock_kernel(); | 128 | return 0; |
133 | return ret; | ||
134 | } | 129 | } |
135 | 130 | ||
136 | /* | 131 | /* |
@@ -192,7 +187,8 @@ static int __init cpuid_init(void) | |||
192 | int i, err = 0; | 187 | int i, err = 0; |
193 | i = 0; | 188 | i = 0; |
194 | 189 | ||
195 | if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) { | 190 | if (__register_chrdev(CPUID_MAJOR, 0, NR_CPUS, |
191 | "cpu/cpuid", &cpuid_fops)) { | ||
196 | printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", | 192 | printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", |
197 | CPUID_MAJOR); | 193 | CPUID_MAJOR); |
198 | err = -EBUSY; | 194 | err = -EBUSY; |
@@ -221,7 +217,7 @@ out_class: | |||
221 | } | 217 | } |
222 | class_destroy(cpuid_class); | 218 | class_destroy(cpuid_class); |
223 | out_chrdev: | 219 | out_chrdev: |
224 | unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); | 220 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); |
225 | out: | 221 | out: |
226 | return err; | 222 | return err; |
227 | } | 223 | } |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 5e409dc298a4..a4849c10a77e 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -27,8 +27,7 @@ | |||
27 | #include <asm/cpu.h> | 27 | #include <asm/cpu.h> |
28 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
29 | #include <asm/virtext.h> | 29 | #include <asm/virtext.h> |
30 | #include <asm/iommu.h> | 30 | #include <asm/x86_init.h> |
31 | |||
32 | 31 | ||
33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 32 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
34 | 33 | ||
@@ -106,7 +105,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
106 | #endif | 105 | #endif |
107 | 106 | ||
108 | #ifdef CONFIG_X86_64 | 107 | #ifdef CONFIG_X86_64 |
109 | pci_iommu_shutdown(); | 108 | x86_platform.iommu_shutdown(); |
110 | #endif | 109 | #endif |
111 | 110 | ||
112 | crash_save_cpu(regs, safe_smp_processor_id()); | 111 | crash_save_cpu(regs, safe_smp_processor_id()); |
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index f7cdb3b457aa..cd97ce18c29d 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c | |||
@@ -16,6 +16,22 @@ static void *kdump_buf_page; | |||
16 | /* Stores the physical address of elf header of crash image. */ | 16 | /* Stores the physical address of elf header of crash image. */ |
17 | unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; | 17 | unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; |
18 | 18 | ||
19 | static inline bool is_crashed_pfn_valid(unsigned long pfn) | ||
20 | { | ||
21 | #ifndef CONFIG_X86_PAE | ||
22 | /* | ||
23 | * non-PAE kdump kernel executed from a PAE one will crop high pte | ||
24 | * bits and poke unwanted space counting again from address 0, we | ||
25 | * don't want that. pte must fit into unsigned long. In fact the | ||
26 | * test checks high 12 bits for being zero (pfn will be shifted left | ||
27 | * by PAGE_SHIFT). | ||
28 | */ | ||
29 | return pte_pfn(pfn_pte(pfn, __pgprot(0))) == pfn; | ||
30 | #else | ||
31 | return true; | ||
32 | #endif | ||
33 | } | ||
34 | |||
19 | /** | 35 | /** |
20 | * copy_oldmem_page - copy one page from "oldmem" | 36 | * copy_oldmem_page - copy one page from "oldmem" |
21 | * @pfn: page frame number to be copied | 37 | * @pfn: page frame number to be copied |
@@ -41,6 +57,9 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | |||
41 | if (!csize) | 57 | if (!csize) |
42 | return 0; | 58 | return 0; |
43 | 59 | ||
60 | if (!is_crashed_pfn_valid(pfn)) | ||
61 | return -EFAULT; | ||
62 | |||
44 | vaddr = kmap_atomic_pfn(pfn, KM_PTE0); | 63 | vaddr = kmap_atomic_pfn(pfn, KM_PTE0); |
45 | 64 | ||
46 | if (!userbuf) { | 65 | if (!userbuf) { |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2d8a371d4339..c56bc2873030 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -109,6 +109,30 @@ print_context_stack(struct thread_info *tinfo, | |||
109 | } | 109 | } |
110 | return bp; | 110 | return bp; |
111 | } | 111 | } |
112 | EXPORT_SYMBOL_GPL(print_context_stack); | ||
113 | |||
114 | unsigned long | ||
115 | print_context_stack_bp(struct thread_info *tinfo, | ||
116 | unsigned long *stack, unsigned long bp, | ||
117 | const struct stacktrace_ops *ops, void *data, | ||
118 | unsigned long *end, int *graph) | ||
119 | { | ||
120 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
121 | unsigned long *ret_addr = &frame->return_address; | ||
122 | |||
123 | while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { | ||
124 | unsigned long addr = *ret_addr; | ||
125 | |||
126 | if (__kernel_text_address(addr)) { | ||
127 | ops->address(data, addr, 1); | ||
128 | frame = frame->next_frame; | ||
129 | ret_addr = &frame->return_address; | ||
130 | print_ftrace_graph_addr(addr, data, ops, tinfo, graph); | ||
131 | } | ||
132 | } | ||
133 | return (unsigned long)frame; | ||
134 | } | ||
135 | EXPORT_SYMBOL_GPL(print_context_stack_bp); | ||
112 | 136 | ||
113 | 137 | ||
114 | static void | 138 | static void |
@@ -141,10 +165,11 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) | |||
141 | } | 165 | } |
142 | 166 | ||
143 | static const struct stacktrace_ops print_trace_ops = { | 167 | static const struct stacktrace_ops print_trace_ops = { |
144 | .warning = print_trace_warning, | 168 | .warning = print_trace_warning, |
145 | .warning_symbol = print_trace_warning_symbol, | 169 | .warning_symbol = print_trace_warning_symbol, |
146 | .stack = print_trace_stack, | 170 | .stack = print_trace_stack, |
147 | .address = print_trace_address, | 171 | .address = print_trace_address, |
172 | .walk_stack = print_context_stack, | ||
148 | }; | 173 | }; |
149 | 174 | ||
150 | void | 175 | void |
@@ -188,7 +213,7 @@ void dump_stack(void) | |||
188 | } | 213 | } |
189 | EXPORT_SYMBOL(dump_stack); | 214 | EXPORT_SYMBOL(dump_stack); |
190 | 215 | ||
191 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | 216 | static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; |
192 | static int die_owner = -1; | 217 | static int die_owner = -1; |
193 | static unsigned int die_nest_count; | 218 | static unsigned int die_nest_count; |
194 | 219 | ||
@@ -207,11 +232,11 @@ unsigned __kprobes long oops_begin(void) | |||
207 | /* racy, but better than risking deadlock. */ | 232 | /* racy, but better than risking deadlock. */ |
208 | raw_local_irq_save(flags); | 233 | raw_local_irq_save(flags); |
209 | cpu = smp_processor_id(); | 234 | cpu = smp_processor_id(); |
210 | if (!__raw_spin_trylock(&die_lock)) { | 235 | if (!arch_spin_trylock(&die_lock)) { |
211 | if (cpu == die_owner) | 236 | if (cpu == die_owner) |
212 | /* nested oops. should stop eventually */; | 237 | /* nested oops. should stop eventually */; |
213 | else | 238 | else |
214 | __raw_spin_lock(&die_lock); | 239 | arch_spin_lock(&die_lock); |
215 | } | 240 | } |
216 | die_nest_count++; | 241 | die_nest_count++; |
217 | die_owner = cpu; | 242 | die_owner = cpu; |
@@ -231,7 +256,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | |||
231 | die_nest_count--; | 256 | die_nest_count--; |
232 | if (!die_nest_count) | 257 | if (!die_nest_count) |
233 | /* Nest count reaches zero, release the lock. */ | 258 | /* Nest count reaches zero, release the lock. */ |
234 | __raw_spin_unlock(&die_lock); | 259 | arch_spin_unlock(&die_lock); |
235 | raw_local_irq_restore(flags); | 260 | raw_local_irq_restore(flags); |
236 | oops_exit(); | 261 | oops_exit(); |
237 | 262 | ||
@@ -268,11 +293,12 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |||
268 | 293 | ||
269 | show_registers(regs); | 294 | show_registers(regs); |
270 | #ifdef CONFIG_X86_32 | 295 | #ifdef CONFIG_X86_32 |
271 | sp = (unsigned long) (®s->sp); | 296 | if (user_mode_vm(regs)) { |
272 | savesegment(ss, ss); | ||
273 | if (user_mode(regs)) { | ||
274 | sp = regs->sp; | 297 | sp = regs->sp; |
275 | ss = regs->ss & 0xffff; | 298 | ss = regs->ss & 0xffff; |
299 | } else { | ||
300 | sp = kernel_stack_pointer(regs); | ||
301 | savesegment(ss, ss); | ||
276 | } | 302 | } |
277 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | 303 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); |
278 | print_symbol("%s", regs->ip); | 304 | print_symbol("%s", regs->ip); |
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index 81086c227ab7..4fd1420faffa 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h | |||
@@ -14,12 +14,6 @@ | |||
14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | 14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) |
15 | #endif | 15 | #endif |
16 | 16 | ||
17 | extern unsigned long | ||
18 | print_context_stack(struct thread_info *tinfo, | ||
19 | unsigned long *stack, unsigned long bp, | ||
20 | const struct stacktrace_ops *ops, void *data, | ||
21 | unsigned long *end, int *graph); | ||
22 | |||
23 | extern void | 17 | extern void |
24 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 18 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
25 | unsigned long *stack, unsigned long bp, char *log_lvl); | 19 | unsigned long *stack, unsigned long bp, char *log_lvl); |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f7dd2a7c3bf4..ae775ca47b25 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -10,9 +10,9 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/ptrace.h> | 11 | #include <linux/ptrace.h> |
12 | #include <linux/kexec.h> | 12 | #include <linux/kexec.h> |
13 | #include <linux/sysfs.h> | ||
13 | #include <linux/bug.h> | 14 | #include <linux/bug.h> |
14 | #include <linux/nmi.h> | 15 | #include <linux/nmi.h> |
15 | #include <linux/sysfs.h> | ||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
@@ -35,6 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
35 | 35 | ||
36 | if (!stack) { | 36 | if (!stack) { |
37 | unsigned long dummy; | 37 | unsigned long dummy; |
38 | |||
38 | stack = &dummy; | 39 | stack = &dummy; |
39 | if (task && task != current) | 40 | if (task && task != current) |
40 | stack = (unsigned long *)task->thread.sp; | 41 | stack = (unsigned long *)task->thread.sp; |
@@ -57,8 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
57 | 58 | ||
58 | context = (struct thread_info *) | 59 | context = (struct thread_info *) |
59 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 60 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); |
60 | bp = print_context_stack(context, stack, bp, ops, | 61 | bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); |
61 | data, NULL, &graph); | ||
62 | 62 | ||
63 | stack = (unsigned long *)context->previous_esp; | 63 | stack = (unsigned long *)context->previous_esp; |
64 | if (!stack) | 64 | if (!stack) |
@@ -72,7 +72,7 @@ EXPORT_SYMBOL(dump_trace); | |||
72 | 72 | ||
73 | void | 73 | void |
74 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 74 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
75 | unsigned long *sp, unsigned long bp, char *log_lvl) | 75 | unsigned long *sp, unsigned long bp, char *log_lvl) |
76 | { | 76 | { |
77 | unsigned long *stack; | 77 | unsigned long *stack; |
78 | int i; | 78 | int i; |
@@ -156,4 +156,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
156 | 156 | ||
157 | return ud2 == 0x0b0f; | 157 | return ud2 == 0x0b0f; |
158 | } | 158 | } |
159 | |||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a071e6be177e..0ad9597073f5 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -10,26 +10,28 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/ptrace.h> | 11 | #include <linux/ptrace.h> |
12 | #include <linux/kexec.h> | 12 | #include <linux/kexec.h> |
13 | #include <linux/sysfs.h> | ||
13 | #include <linux/bug.h> | 14 | #include <linux/bug.h> |
14 | #include <linux/nmi.h> | 15 | #include <linux/nmi.h> |
15 | #include <linux/sysfs.h> | ||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | #include "dumpstack.h" | 19 | #include "dumpstack.h" |
20 | 20 | ||
21 | #define N_EXCEPTION_STACKS_END \ | ||
22 | (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) | ||
21 | 23 | ||
22 | static char x86_stack_ids[][8] = { | 24 | static char x86_stack_ids[][8] = { |
23 | [DEBUG_STACK - 1] = "#DB", | 25 | [ DEBUG_STACK-1 ] = "#DB", |
24 | [NMI_STACK - 1] = "NMI", | 26 | [ NMI_STACK-1 ] = "NMI", |
25 | [DOUBLEFAULT_STACK - 1] = "#DF", | 27 | [ DOUBLEFAULT_STACK-1 ] = "#DF", |
26 | [STACKFAULT_STACK - 1] = "#SS", | 28 | [ STACKFAULT_STACK-1 ] = "#SS", |
27 | [MCE_STACK - 1] = "#MC", | 29 | [ MCE_STACK-1 ] = "#MC", |
28 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 30 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
29 | [N_EXCEPTION_STACKS ... | 31 | [ N_EXCEPTION_STACKS ... |
30 | N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | 32 | N_EXCEPTION_STACKS_END ] = "#DB[?]" |
31 | #endif | 33 | #endif |
32 | }; | 34 | }; |
33 | 35 | ||
34 | int x86_is_stack_id(int id, char *name) | 36 | int x86_is_stack_id(int id, char *name) |
35 | { | 37 | { |
@@ -37,7 +39,7 @@ int x86_is_stack_id(int id, char *name) | |||
37 | } | 39 | } |
38 | 40 | ||
39 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 41 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
40 | unsigned *usedp, char **idp) | 42 | unsigned *usedp, char **idp) |
41 | { | 43 | { |
42 | unsigned k; | 44 | unsigned k; |
43 | 45 | ||
@@ -101,6 +103,35 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
101 | return NULL; | 103 | return NULL; |
102 | } | 104 | } |
103 | 105 | ||
106 | static inline int | ||
107 | in_irq_stack(unsigned long *stack, unsigned long *irq_stack, | ||
108 | unsigned long *irq_stack_end) | ||
109 | { | ||
110 | return (stack >= irq_stack && stack < irq_stack_end); | ||
111 | } | ||
112 | |||
113 | /* | ||
114 | * We are returning from the irq stack and go to the previous one. | ||
115 | * If the previous stack is also in the irq stack, then bp in the first | ||
116 | * frame of the irq stack points to the previous, interrupted one. | ||
117 | * Otherwise we have another level of indirection: We first save | ||
118 | * the bp of the previous stack, then we switch the stack to the irq one | ||
119 | * and save a new bp that links to the previous one. | ||
120 | * (See save_args()) | ||
121 | */ | ||
122 | static inline unsigned long | ||
123 | fixup_bp_irq_link(unsigned long bp, unsigned long *stack, | ||
124 | unsigned long *irq_stack, unsigned long *irq_stack_end) | ||
125 | { | ||
126 | #ifdef CONFIG_FRAME_POINTER | ||
127 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
128 | |||
129 | if (!in_irq_stack(stack, irq_stack, irq_stack_end)) | ||
130 | return (unsigned long)frame->next_frame; | ||
131 | #endif | ||
132 | return bp; | ||
133 | } | ||
134 | |||
104 | /* | 135 | /* |
105 | * x86-64 can have up to three kernel stacks: | 136 | * x86-64 can have up to three kernel stacks: |
106 | * process stack | 137 | * process stack |
@@ -157,8 +188,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
157 | if (ops->stack(data, id) < 0) | 188 | if (ops->stack(data, id) < 0) |
158 | break; | 189 | break; |
159 | 190 | ||
160 | bp = print_context_stack(tinfo, stack, bp, ops, | 191 | bp = ops->walk_stack(tinfo, stack, bp, ops, |
161 | data, estack_end, &graph); | 192 | data, estack_end, &graph); |
162 | ops->stack(data, "<EOE>"); | 193 | ops->stack(data, "<EOE>"); |
163 | /* | 194 | /* |
164 | * We link to the next stack via the | 195 | * We link to the next stack via the |
@@ -173,7 +204,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
173 | irq_stack = irq_stack_end - | 204 | irq_stack = irq_stack_end - |
174 | (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); | 205 | (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); |
175 | 206 | ||
176 | if (stack >= irq_stack && stack < irq_stack_end) { | 207 | if (in_irq_stack(stack, irq_stack, irq_stack_end)) { |
177 | if (ops->stack(data, "IRQ") < 0) | 208 | if (ops->stack(data, "IRQ") < 0) |
178 | break; | 209 | break; |
179 | bp = print_context_stack(tinfo, stack, bp, | 210 | bp = print_context_stack(tinfo, stack, bp, |
@@ -184,6 +215,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
184 | * pointer (index -1 to end) in the IRQ stack: | 215 | * pointer (index -1 to end) in the IRQ stack: |
185 | */ | 216 | */ |
186 | stack = (unsigned long *) (irq_stack_end[-1]); | 217 | stack = (unsigned long *) (irq_stack_end[-1]); |
218 | bp = fixup_bp_irq_link(bp, stack, irq_stack, | ||
219 | irq_stack_end); | ||
187 | irq_stack_end = NULL; | 220 | irq_stack_end = NULL; |
188 | ops->stack(data, "EOI"); | 221 | ops->stack(data, "EOI"); |
189 | continue; | 222 | continue; |
@@ -202,21 +235,24 @@ EXPORT_SYMBOL(dump_trace); | |||
202 | 235 | ||
203 | void | 236 | void |
204 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 237 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
205 | unsigned long *sp, unsigned long bp, char *log_lvl) | 238 | unsigned long *sp, unsigned long bp, char *log_lvl) |
206 | { | 239 | { |
240 | unsigned long *irq_stack_end; | ||
241 | unsigned long *irq_stack; | ||
207 | unsigned long *stack; | 242 | unsigned long *stack; |
243 | int cpu; | ||
208 | int i; | 244 | int i; |
209 | const int cpu = smp_processor_id(); | 245 | |
210 | unsigned long *irq_stack_end = | 246 | preempt_disable(); |
211 | (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); | 247 | cpu = smp_processor_id(); |
212 | unsigned long *irq_stack = | 248 | |
213 | (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); | 249 | irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); |
250 | irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); | ||
214 | 251 | ||
215 | /* | 252 | /* |
216 | * debugging aid: "show_stack(NULL, NULL);" prints the | 253 | * Debugging aid: "show_stack(NULL, NULL);" prints the |
217 | * back trace for this cpu. | 254 | * back trace for this cpu: |
218 | */ | 255 | */ |
219 | |||
220 | if (sp == NULL) { | 256 | if (sp == NULL) { |
221 | if (task) | 257 | if (task) |
222 | sp = (unsigned long *)task->thread.sp; | 258 | sp = (unsigned long *)task->thread.sp; |
@@ -240,6 +276,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
240 | printk(" %016lx", *stack++); | 276 | printk(" %016lx", *stack++); |
241 | touch_nmi_watchdog(); | 277 | touch_nmi_watchdog(); |
242 | } | 278 | } |
279 | preempt_enable(); | ||
280 | |||
243 | printk("\n"); | 281 | printk("\n"); |
244 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 282 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
245 | } | 283 | } |
@@ -303,4 +341,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
303 | 341 | ||
304 | return ud2 == 0x0b0f; | 342 | return ud2 == 0x0b0f; |
305 | } | 343 | } |
306 | |||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 85419bb7d4ab..05ed7ab2ca48 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -724,7 +724,7 @@ core_initcall(e820_mark_nvs_memory); | |||
724 | /* | 724 | /* |
725 | * Early reserved memory areas. | 725 | * Early reserved memory areas. |
726 | */ | 726 | */ |
727 | #define MAX_EARLY_RES 20 | 727 | #define MAX_EARLY_RES 32 |
728 | 728 | ||
729 | struct early_res { | 729 | struct early_res { |
730 | u64 start, end; | 730 | u64 start, end; |
@@ -732,7 +732,16 @@ struct early_res { | |||
732 | char overlap_ok; | 732 | char overlap_ok; |
733 | }; | 733 | }; |
734 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | 734 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { |
735 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | 735 | { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */ |
736 | #ifdef CONFIG_X86_32 | ||
737 | /* | ||
738 | * But first pinch a few for the stack/trampoline stuff | ||
739 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
740 | * trampoline before removing it. (see the GDT stuff) | ||
741 | */ | ||
742 | { PAGE_SIZE, PAGE_SIZE, "EX TRAMPOLINE", 1 }, | ||
743 | #endif | ||
744 | |||
736 | {} | 745 | {} |
737 | }; | 746 | }; |
738 | 747 | ||
@@ -1378,8 +1387,8 @@ static unsigned long ram_alignment(resource_size_t pos) | |||
1378 | if (mb < 16) | 1387 | if (mb < 16) |
1379 | return 1024*1024; | 1388 | return 1024*1024; |
1380 | 1389 | ||
1381 | /* To 32MB for anything above that */ | 1390 | /* To 64MB for anything above that */ |
1382 | return 32*1024*1024; | 1391 | return 64*1024*1024; |
1383 | } | 1392 | } |
1384 | 1393 | ||
1385 | #define MAX_RESOURCE_SIZE ((resource_size_t)-1) | 1394 | #define MAX_RESOURCE_SIZE ((resource_size_t)-1) |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 41fd965c80c6..b9c830c12b4a 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -206,8 +206,11 @@ static int __init setup_early_printk(char *buf) | |||
206 | 206 | ||
207 | while (*buf != '\0') { | 207 | while (*buf != '\0') { |
208 | if (!strncmp(buf, "serial", 6)) { | 208 | if (!strncmp(buf, "serial", 6)) { |
209 | early_serial_init(buf + 6); | 209 | buf += 6; |
210 | early_serial_init(buf); | ||
210 | early_console_register(&early_serial_console, keep); | 211 | early_console_register(&early_serial_console, keep); |
212 | if (!strncmp(buf, ",ttyS", 5)) | ||
213 | buf += 5; | ||
211 | } | 214 | } |
212 | if (!strncmp(buf, "ttyS", 4)) { | 215 | if (!strncmp(buf, "ttyS", 4)) { |
213 | early_serial_init(buf + 4); | 216 | early_serial_init(buf + 4); |
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index ad5bd988fb79..cdcfb122f256 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -454,8 +454,10 @@ void __init efi_init(void) | |||
454 | if (add_efi_memmap) | 454 | if (add_efi_memmap) |
455 | do_add_efi_memmap(); | 455 | do_add_efi_memmap(); |
456 | 456 | ||
457 | #ifdef CONFIG_X86_32 | ||
457 | x86_platform.get_wallclock = efi_get_time; | 458 | x86_platform.get_wallclock = efi_get_time; |
458 | x86_platform.set_wallclock = efi_set_rtc_mmss; | 459 | x86_platform.set_wallclock = efi_set_rtc_mmss; |
460 | #endif | ||
459 | 461 | ||
460 | /* Setup for EFI runtime service */ | 462 | /* Setup for EFI runtime service */ |
461 | reboot_type = BOOT_EFI; | 463 | reboot_type = BOOT_EFI; |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d607c6..44a8e0dc6737 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -334,6 +334,10 @@ ENTRY(ret_from_fork) | |||
334 | END(ret_from_fork) | 334 | END(ret_from_fork) |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * Interrupt exit functions should be protected against kprobes | ||
338 | */ | ||
339 | .pushsection .kprobes.text, "ax" | ||
340 | /* | ||
337 | * Return to user mode is not as complex as all this looks, | 341 | * Return to user mode is not as complex as all this looks, |
338 | * but we want the default path for a system call return to | 342 | * but we want the default path for a system call return to |
339 | * go as quickly as possible which is why some of this is | 343 | * go as quickly as possible which is why some of this is |
@@ -383,6 +387,10 @@ need_resched: | |||
383 | END(resume_kernel) | 387 | END(resume_kernel) |
384 | #endif | 388 | #endif |
385 | CFI_ENDPROC | 389 | CFI_ENDPROC |
390 | /* | ||
391 | * End of kprobes section | ||
392 | */ | ||
393 | .popsection | ||
386 | 394 | ||
387 | /* SYSENTER_RETURN points to after the "sysenter" instruction in | 395 | /* SYSENTER_RETURN points to after the "sysenter" instruction in |
388 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ | 396 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ |
@@ -513,6 +521,10 @@ sysexit_audit: | |||
513 | PTGS_TO_GS_EX | 521 | PTGS_TO_GS_EX |
514 | ENDPROC(ia32_sysenter_target) | 522 | ENDPROC(ia32_sysenter_target) |
515 | 523 | ||
524 | /* | ||
525 | * syscall stub including irq exit should be protected against kprobes | ||
526 | */ | ||
527 | .pushsection .kprobes.text, "ax" | ||
516 | # system call handler stub | 528 | # system call handler stub |
517 | ENTRY(system_call) | 529 | ENTRY(system_call) |
518 | RING0_INT_FRAME # can't unwind into user space anyway | 530 | RING0_INT_FRAME # can't unwind into user space anyway |
@@ -705,26 +717,69 @@ syscall_badsys: | |||
705 | jmp resume_userspace | 717 | jmp resume_userspace |
706 | END(syscall_badsys) | 718 | END(syscall_badsys) |
707 | CFI_ENDPROC | 719 | CFI_ENDPROC |
720 | /* | ||
721 | * End of kprobes section | ||
722 | */ | ||
723 | .popsection | ||
708 | 724 | ||
709 | /* | 725 | /* |
710 | * System calls that need a pt_regs pointer. | 726 | * System calls that need a pt_regs pointer. |
711 | */ | 727 | */ |
712 | #define PTREGSCALL(name) \ | 728 | #define PTREGSCALL0(name) \ |
713 | ALIGN; \ | 729 | ALIGN; \ |
714 | ptregs_##name: \ | 730 | ptregs_##name: \ |
715 | leal 4(%esp),%eax; \ | 731 | leal 4(%esp),%eax; \ |
716 | jmp sys_##name; | 732 | jmp sys_##name; |
717 | 733 | ||
718 | PTREGSCALL(iopl) | 734 | #define PTREGSCALL1(name) \ |
719 | PTREGSCALL(fork) | 735 | ALIGN; \ |
720 | PTREGSCALL(clone) | 736 | ptregs_##name: \ |
721 | PTREGSCALL(vfork) | 737 | leal 4(%esp),%edx; \ |
722 | PTREGSCALL(execve) | 738 | movl (PT_EBX+4)(%esp),%eax; \ |
723 | PTREGSCALL(sigaltstack) | 739 | jmp sys_##name; |
724 | PTREGSCALL(sigreturn) | 740 | |
725 | PTREGSCALL(rt_sigreturn) | 741 | #define PTREGSCALL2(name) \ |
726 | PTREGSCALL(vm86) | 742 | ALIGN; \ |
727 | PTREGSCALL(vm86old) | 743 | ptregs_##name: \ |
744 | leal 4(%esp),%ecx; \ | ||
745 | movl (PT_ECX+4)(%esp),%edx; \ | ||
746 | movl (PT_EBX+4)(%esp),%eax; \ | ||
747 | jmp sys_##name; | ||
748 | |||
749 | #define PTREGSCALL3(name) \ | ||
750 | ALIGN; \ | ||
751 | ptregs_##name: \ | ||
752 | leal 4(%esp),%eax; \ | ||
753 | pushl %eax; \ | ||
754 | movl PT_EDX(%eax),%ecx; \ | ||
755 | movl PT_ECX(%eax),%edx; \ | ||
756 | movl PT_EBX(%eax),%eax; \ | ||
757 | call sys_##name; \ | ||
758 | addl $4,%esp; \ | ||
759 | ret | ||
760 | |||
761 | PTREGSCALL1(iopl) | ||
762 | PTREGSCALL0(fork) | ||
763 | PTREGSCALL0(vfork) | ||
764 | PTREGSCALL3(execve) | ||
765 | PTREGSCALL2(sigaltstack) | ||
766 | PTREGSCALL0(sigreturn) | ||
767 | PTREGSCALL0(rt_sigreturn) | ||
768 | PTREGSCALL2(vm86) | ||
769 | PTREGSCALL1(vm86old) | ||
770 | |||
771 | /* Clone is an oddball. The 4th arg is in %edi */ | ||
772 | ALIGN; | ||
773 | ptregs_clone: | ||
774 | leal 4(%esp),%eax | ||
775 | pushl %eax | ||
776 | pushl PT_EDI(%eax) | ||
777 | movl PT_EDX(%eax),%ecx | ||
778 | movl PT_ECX(%eax),%edx | ||
779 | movl PT_EBX(%eax),%eax | ||
780 | call sys_clone | ||
781 | addl $8,%esp | ||
782 | ret | ||
728 | 783 | ||
729 | .macro FIXUP_ESPFIX_STACK | 784 | .macro FIXUP_ESPFIX_STACK |
730 | /* | 785 | /* |
@@ -814,6 +869,10 @@ common_interrupt: | |||
814 | ENDPROC(common_interrupt) | 869 | ENDPROC(common_interrupt) |
815 | CFI_ENDPROC | 870 | CFI_ENDPROC |
816 | 871 | ||
872 | /* | ||
873 | * Irq entries should be protected against kprobes | ||
874 | */ | ||
875 | .pushsection .kprobes.text, "ax" | ||
817 | #define BUILD_INTERRUPT3(name, nr, fn) \ | 876 | #define BUILD_INTERRUPT3(name, nr, fn) \ |
818 | ENTRY(name) \ | 877 | ENTRY(name) \ |
819 | RING0_INT_FRAME; \ | 878 | RING0_INT_FRAME; \ |
@@ -980,16 +1039,16 @@ ENTRY(spurious_interrupt_bug) | |||
980 | jmp error_code | 1039 | jmp error_code |
981 | CFI_ENDPROC | 1040 | CFI_ENDPROC |
982 | END(spurious_interrupt_bug) | 1041 | END(spurious_interrupt_bug) |
1042 | /* | ||
1043 | * End of kprobes section | ||
1044 | */ | ||
1045 | .popsection | ||
983 | 1046 | ||
984 | ENTRY(kernel_thread_helper) | 1047 | ENTRY(kernel_thread_helper) |
985 | pushl $0 # fake return address for unwinder | 1048 | pushl $0 # fake return address for unwinder |
986 | CFI_STARTPROC | 1049 | CFI_STARTPROC |
987 | movl %edx,%eax | 1050 | movl %edi,%eax |
988 | push %edx | 1051 | call *%esi |
989 | CFI_ADJUST_CFA_OFFSET 4 | ||
990 | call *%ebx | ||
991 | push %eax | ||
992 | CFI_ADJUST_CFA_OFFSET 4 | ||
993 | call do_exit | 1052 | call do_exit |
994 | ud2 # padding for call trace | 1053 | ud2 # padding for call trace |
995 | CFI_ENDPROC | 1054 | CFI_ENDPROC |
@@ -1185,17 +1244,14 @@ END(ftrace_graph_caller) | |||
1185 | 1244 | ||
1186 | .globl return_to_handler | 1245 | .globl return_to_handler |
1187 | return_to_handler: | 1246 | return_to_handler: |
1188 | pushl $0 | ||
1189 | pushl %eax | 1247 | pushl %eax |
1190 | pushl %ecx | ||
1191 | pushl %edx | 1248 | pushl %edx |
1192 | movl %ebp, %eax | 1249 | movl %ebp, %eax |
1193 | call ftrace_return_to_handler | 1250 | call ftrace_return_to_handler |
1194 | movl %eax, 0xc(%esp) | 1251 | movl %eax, %ecx |
1195 | popl %edx | 1252 | popl %edx |
1196 | popl %ecx | ||
1197 | popl %eax | 1253 | popl %eax |
1198 | ret | 1254 | jmp *%ecx |
1199 | #endif | 1255 | #endif |
1200 | 1256 | ||
1201 | .section .rodata,"a" | 1257 | .section .rodata,"a" |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b5c061f8f358..0697ff139837 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -155,11 +155,11 @@ GLOBAL(return_to_handler) | |||
155 | 155 | ||
156 | call ftrace_return_to_handler | 156 | call ftrace_return_to_handler |
157 | 157 | ||
158 | movq %rax, 16(%rsp) | 158 | movq %rax, %rdi |
159 | movq 8(%rsp), %rdx | 159 | movq 8(%rsp), %rdx |
160 | movq (%rsp), %rax | 160 | movq (%rsp), %rax |
161 | addq $16, %rsp | 161 | addq $24, %rsp |
162 | retq | 162 | jmp *%rdi |
163 | #endif | 163 | #endif |
164 | 164 | ||
165 | 165 | ||
@@ -803,6 +803,10 @@ END(interrupt) | |||
803 | call \func | 803 | call \func |
804 | .endm | 804 | .endm |
805 | 805 | ||
806 | /* | ||
807 | * Interrupt entry/exit should be protected against kprobes | ||
808 | */ | ||
809 | .pushsection .kprobes.text, "ax" | ||
806 | /* | 810 | /* |
807 | * The interrupt stubs push (~vector+0x80) onto the stack and | 811 | * The interrupt stubs push (~vector+0x80) onto the stack and |
808 | * then jump to common_interrupt. | 812 | * then jump to common_interrupt. |
@@ -941,6 +945,10 @@ ENTRY(retint_kernel) | |||
941 | 945 | ||
942 | CFI_ENDPROC | 946 | CFI_ENDPROC |
943 | END(common_interrupt) | 947 | END(common_interrupt) |
948 | /* | ||
949 | * End of kprobes section | ||
950 | */ | ||
951 | .popsection | ||
944 | 952 | ||
945 | /* | 953 | /* |
946 | * APIC interrupts. | 954 | * APIC interrupts. |
@@ -969,8 +977,8 @@ apicinterrupt UV_BAU_MESSAGE \ | |||
969 | #endif | 977 | #endif |
970 | apicinterrupt LOCAL_TIMER_VECTOR \ | 978 | apicinterrupt LOCAL_TIMER_VECTOR \ |
971 | apic_timer_interrupt smp_apic_timer_interrupt | 979 | apic_timer_interrupt smp_apic_timer_interrupt |
972 | apicinterrupt GENERIC_INTERRUPT_VECTOR \ | 980 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
973 | generic_interrupt smp_generic_interrupt | 981 | x86_platform_ipi smp_x86_platform_ipi |
974 | 982 | ||
975 | #ifdef CONFIG_SMP | 983 | #ifdef CONFIG_SMP |
976 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ | 984 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ |
@@ -1068,10 +1076,10 @@ ENTRY(\sym) | |||
1068 | TRACE_IRQS_OFF | 1076 | TRACE_IRQS_OFF |
1069 | movq %rsp,%rdi /* pt_regs pointer */ | 1077 | movq %rsp,%rdi /* pt_regs pointer */ |
1070 | xorl %esi,%esi /* no error code */ | 1078 | xorl %esi,%esi /* no error code */ |
1071 | PER_CPU(init_tss, %rbp) | 1079 | PER_CPU(init_tss, %r12) |
1072 | subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) | 1080 | subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) |
1073 | call \do_sym | 1081 | call \do_sym |
1074 | addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) | 1082 | addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) |
1075 | jmp paranoid_exit /* %ebx: no swapgs flag */ | 1083 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1076 | CFI_ENDPROC | 1084 | CFI_ENDPROC |
1077 | END(\sym) | 1085 | END(\sym) |
@@ -1158,63 +1166,20 @@ bad_gs: | |||
1158 | jmp 2b | 1166 | jmp 2b |
1159 | .previous | 1167 | .previous |
1160 | 1168 | ||
1161 | /* | 1169 | ENTRY(kernel_thread_helper) |
1162 | * Create a kernel thread. | ||
1163 | * | ||
1164 | * C extern interface: | ||
1165 | * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | ||
1166 | * | ||
1167 | * asm input arguments: | ||
1168 | * rdi: fn, rsi: arg, rdx: flags | ||
1169 | */ | ||
1170 | ENTRY(kernel_thread) | ||
1171 | CFI_STARTPROC | ||
1172 | FAKE_STACK_FRAME $child_rip | ||
1173 | SAVE_ALL | ||
1174 | |||
1175 | # rdi: flags, rsi: usp, rdx: will be &pt_regs | ||
1176 | movq %rdx,%rdi | ||
1177 | orq kernel_thread_flags(%rip),%rdi | ||
1178 | movq $-1, %rsi | ||
1179 | movq %rsp, %rdx | ||
1180 | |||
1181 | xorl %r8d,%r8d | ||
1182 | xorl %r9d,%r9d | ||
1183 | |||
1184 | # clone now | ||
1185 | call do_fork | ||
1186 | movq %rax,RAX(%rsp) | ||
1187 | xorl %edi,%edi | ||
1188 | |||
1189 | /* | ||
1190 | * It isn't worth to check for reschedule here, | ||
1191 | * so internally to the x86_64 port you can rely on kernel_thread() | ||
1192 | * not to reschedule the child before returning, this avoids the need | ||
1193 | * of hacks for example to fork off the per-CPU idle tasks. | ||
1194 | * [Hopefully no generic code relies on the reschedule -AK] | ||
1195 | */ | ||
1196 | RESTORE_ALL | ||
1197 | UNFAKE_STACK_FRAME | ||
1198 | ret | ||
1199 | CFI_ENDPROC | ||
1200 | END(kernel_thread) | ||
1201 | |||
1202 | ENTRY(child_rip) | ||
1203 | pushq $0 # fake return address | 1170 | pushq $0 # fake return address |
1204 | CFI_STARTPROC | 1171 | CFI_STARTPROC |
1205 | /* | 1172 | /* |
1206 | * Here we are in the child and the registers are set as they were | 1173 | * Here we are in the child and the registers are set as they were |
1207 | * at kernel_thread() invocation in the parent. | 1174 | * at kernel_thread() invocation in the parent. |
1208 | */ | 1175 | */ |
1209 | movq %rdi, %rax | 1176 | call *%rsi |
1210 | movq %rsi, %rdi | ||
1211 | call *%rax | ||
1212 | # exit | 1177 | # exit |
1213 | mov %eax, %edi | 1178 | mov %eax, %edi |
1214 | call do_exit | 1179 | call do_exit |
1215 | ud2 # padding for call trace | 1180 | ud2 # padding for call trace |
1216 | CFI_ENDPROC | 1181 | CFI_ENDPROC |
1217 | END(child_rip) | 1182 | END(kernel_thread_helper) |
1218 | 1183 | ||
1219 | /* | 1184 | /* |
1220 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. | 1185 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. |
@@ -1491,12 +1456,17 @@ error_kernelspace: | |||
1491 | leaq irq_return(%rip),%rcx | 1456 | leaq irq_return(%rip),%rcx |
1492 | cmpq %rcx,RIP+8(%rsp) | 1457 | cmpq %rcx,RIP+8(%rsp) |
1493 | je error_swapgs | 1458 | je error_swapgs |
1494 | movl %ecx,%ecx /* zero extend */ | 1459 | movl %ecx,%eax /* zero extend */ |
1495 | cmpq %rcx,RIP+8(%rsp) | 1460 | cmpq %rax,RIP+8(%rsp) |
1496 | je error_swapgs | 1461 | je bstep_iret |
1497 | cmpq $gs_change,RIP+8(%rsp) | 1462 | cmpq $gs_change,RIP+8(%rsp) |
1498 | je error_swapgs | 1463 | je error_swapgs |
1499 | jmp error_sti | 1464 | jmp error_sti |
1465 | |||
1466 | bstep_iret: | ||
1467 | /* Fix truncated RIP */ | ||
1468 | movq %rcx,RIP+8(%rsp) | ||
1469 | jmp error_swapgs | ||
1500 | END(error_entry) | 1470 | END(error_entry) |
1501 | 1471 | ||
1502 | 1472 | ||
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9dbb527e1652..309689245431 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -9,6 +9,8 @@ | |||
9 | * the dangers of modifying code on the run. | 9 | * the dangers of modifying code on the run. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
13 | |||
12 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
13 | #include <linux/hardirq.h> | 15 | #include <linux/hardirq.h> |
14 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
@@ -187,9 +189,26 @@ static void wait_for_nmi(void) | |||
187 | nmi_wait_count++; | 189 | nmi_wait_count++; |
188 | } | 190 | } |
189 | 191 | ||
192 | static inline int | ||
193 | within(unsigned long addr, unsigned long start, unsigned long end) | ||
194 | { | ||
195 | return addr >= start && addr < end; | ||
196 | } | ||
197 | |||
190 | static int | 198 | static int |
191 | do_ftrace_mod_code(unsigned long ip, void *new_code) | 199 | do_ftrace_mod_code(unsigned long ip, void *new_code) |
192 | { | 200 | { |
201 | /* | ||
202 | * On x86_64, kernel text mappings are mapped read-only with | ||
203 | * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead | ||
204 | * of the kernel text mapping to modify the kernel text. | ||
205 | * | ||
206 | * For 32bit kernels, these mappings are same and we can use | ||
207 | * kernel identity mapping to modify code. | ||
208 | */ | ||
209 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | ||
210 | ip = (unsigned long)__va(__pa(ip)); | ||
211 | |||
193 | mod_code_ip = (void *)ip; | 212 | mod_code_ip = (void *)ip; |
194 | mod_code_newcode = new_code; | 213 | mod_code_newcode = new_code; |
195 | 214 | ||
@@ -336,15 +355,15 @@ int __init ftrace_dyn_arch_init(void *data) | |||
336 | 355 | ||
337 | switch (faulted) { | 356 | switch (faulted) { |
338 | case 0: | 357 | case 0: |
339 | pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); | 358 | pr_info("converting mcount calls to 0f 1f 44 00 00\n"); |
340 | memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); | 359 | memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); |
341 | break; | 360 | break; |
342 | case 1: | 361 | case 1: |
343 | pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); | 362 | pr_info("converting mcount calls to 66 66 66 66 90\n"); |
344 | memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); | 363 | memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); |
345 | break; | 364 | break; |
346 | case 2: | 365 | case 2: |
347 | pr_info("ftrace: converting mcount calls to jmp . + 5\n"); | 366 | pr_info("converting mcount calls to jmp . + 5\n"); |
348 | memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); | 367 | memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); |
349 | break; | 368 | break; |
350 | } | 369 | } |
@@ -468,82 +487,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
468 | 487 | ||
469 | #ifdef CONFIG_FTRACE_SYSCALLS | 488 | #ifdef CONFIG_FTRACE_SYSCALLS |
470 | 489 | ||
471 | extern unsigned long __start_syscalls_metadata[]; | ||
472 | extern unsigned long __stop_syscalls_metadata[]; | ||
473 | extern unsigned long *sys_call_table; | 490 | extern unsigned long *sys_call_table; |
474 | 491 | ||
475 | static struct syscall_metadata **syscalls_metadata; | 492 | unsigned long __init arch_syscall_addr(int nr) |
476 | |||
477 | static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) | ||
478 | { | ||
479 | struct syscall_metadata *start; | ||
480 | struct syscall_metadata *stop; | ||
481 | char str[KSYM_SYMBOL_LEN]; | ||
482 | |||
483 | |||
484 | start = (struct syscall_metadata *)__start_syscalls_metadata; | ||
485 | stop = (struct syscall_metadata *)__stop_syscalls_metadata; | ||
486 | kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); | ||
487 | |||
488 | for ( ; start < stop; start++) { | ||
489 | if (start->name && !strcmp(start->name, str)) | ||
490 | return start; | ||
491 | } | ||
492 | return NULL; | ||
493 | } | ||
494 | |||
495 | struct syscall_metadata *syscall_nr_to_meta(int nr) | ||
496 | { | ||
497 | if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) | ||
498 | return NULL; | ||
499 | |||
500 | return syscalls_metadata[nr]; | ||
501 | } | ||
502 | |||
503 | int syscall_name_to_nr(char *name) | ||
504 | { | ||
505 | int i; | ||
506 | |||
507 | if (!syscalls_metadata) | ||
508 | return -1; | ||
509 | |||
510 | for (i = 0; i < NR_syscalls; i++) { | ||
511 | if (syscalls_metadata[i]) { | ||
512 | if (!strcmp(syscalls_metadata[i]->name, name)) | ||
513 | return i; | ||
514 | } | ||
515 | } | ||
516 | return -1; | ||
517 | } | ||
518 | |||
519 | void set_syscall_enter_id(int num, int id) | ||
520 | { | ||
521 | syscalls_metadata[num]->enter_id = id; | ||
522 | } | ||
523 | |||
524 | void set_syscall_exit_id(int num, int id) | ||
525 | { | 493 | { |
526 | syscalls_metadata[num]->exit_id = id; | 494 | return (unsigned long)(&sys_call_table)[nr]; |
527 | } | ||
528 | |||
529 | static int __init arch_init_ftrace_syscalls(void) | ||
530 | { | ||
531 | int i; | ||
532 | struct syscall_metadata *meta; | ||
533 | unsigned long **psys_syscall_table = &sys_call_table; | ||
534 | |||
535 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | ||
536 | NR_syscalls, GFP_KERNEL); | ||
537 | if (!syscalls_metadata) { | ||
538 | WARN_ON(1); | ||
539 | return -ENOMEM; | ||
540 | } | ||
541 | |||
542 | for (i = 0; i < NR_syscalls; i++) { | ||
543 | meta = find_syscall_meta(psys_syscall_table[i]); | ||
544 | syscalls_metadata[i] = meta; | ||
545 | } | ||
546 | return 0; | ||
547 | } | 495 | } |
548 | arch_initcall(arch_init_ftrace_syscalls); | ||
549 | #endif | 496 | #endif |
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c deleted file mode 100644 index 9b08e852fd1a..000000000000 --- a/arch/x86/kernel/geode_32.c +++ /dev/null | |||
@@ -1,196 +0,0 @@ | |||
1 | /* | ||
2 | * AMD Geode southbridge support code | ||
3 | * Copyright (C) 2006, Advanced Micro Devices, Inc. | ||
4 | * Copyright (C) 2007, Andres Salomon <dilinger@debian.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of version 2 of the GNU General Public License | ||
8 | * as published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/ioport.h> | ||
14 | #include <linux/io.h> | ||
15 | #include <asm/msr.h> | ||
16 | #include <asm/geode.h> | ||
17 | |||
18 | static struct { | ||
19 | char *name; | ||
20 | u32 msr; | ||
21 | int size; | ||
22 | u32 base; | ||
23 | } lbars[] = { | ||
24 | { "geode-pms", MSR_LBAR_PMS, LBAR_PMS_SIZE, 0 }, | ||
25 | { "geode-acpi", MSR_LBAR_ACPI, LBAR_ACPI_SIZE, 0 }, | ||
26 | { "geode-gpio", MSR_LBAR_GPIO, LBAR_GPIO_SIZE, 0 }, | ||
27 | { "geode-mfgpt", MSR_LBAR_MFGPT, LBAR_MFGPT_SIZE, 0 } | ||
28 | }; | ||
29 | |||
30 | static void __init init_lbars(void) | ||
31 | { | ||
32 | u32 lo, hi; | ||
33 | int i; | ||
34 | |||
35 | for (i = 0; i < ARRAY_SIZE(lbars); i++) { | ||
36 | rdmsr(lbars[i].msr, lo, hi); | ||
37 | if (hi & 0x01) | ||
38 | lbars[i].base = lo & 0x0000ffff; | ||
39 | |||
40 | if (lbars[i].base == 0) | ||
41 | printk(KERN_ERR "geode: Couldn't initialize '%s'\n", | ||
42 | lbars[i].name); | ||
43 | } | ||
44 | } | ||
45 | |||
46 | int geode_get_dev_base(unsigned int dev) | ||
47 | { | ||
48 | BUG_ON(dev >= ARRAY_SIZE(lbars)); | ||
49 | return lbars[dev].base; | ||
50 | } | ||
51 | EXPORT_SYMBOL_GPL(geode_get_dev_base); | ||
52 | |||
53 | /* === GPIO API === */ | ||
54 | |||
55 | void geode_gpio_set(u32 gpio, unsigned int reg) | ||
56 | { | ||
57 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); | ||
58 | |||
59 | if (!base) | ||
60 | return; | ||
61 | |||
62 | /* low bank register */ | ||
63 | if (gpio & 0xFFFF) | ||
64 | outl(gpio & 0xFFFF, base + reg); | ||
65 | /* high bank register */ | ||
66 | gpio >>= 16; | ||
67 | if (gpio) | ||
68 | outl(gpio, base + 0x80 + reg); | ||
69 | } | ||
70 | EXPORT_SYMBOL_GPL(geode_gpio_set); | ||
71 | |||
72 | void geode_gpio_clear(u32 gpio, unsigned int reg) | ||
73 | { | ||
74 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); | ||
75 | |||
76 | if (!base) | ||
77 | return; | ||
78 | |||
79 | /* low bank register */ | ||
80 | if (gpio & 0xFFFF) | ||
81 | outl((gpio & 0xFFFF) << 16, base + reg); | ||
82 | /* high bank register */ | ||
83 | gpio &= (0xFFFF << 16); | ||
84 | if (gpio) | ||
85 | outl(gpio, base + 0x80 + reg); | ||
86 | } | ||
87 | EXPORT_SYMBOL_GPL(geode_gpio_clear); | ||
88 | |||
89 | int geode_gpio_isset(u32 gpio, unsigned int reg) | ||
90 | { | ||
91 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); | ||
92 | u32 val; | ||
93 | |||
94 | if (!base) | ||
95 | return 0; | ||
96 | |||
97 | /* low bank register */ | ||
98 | if (gpio & 0xFFFF) { | ||
99 | val = inl(base + reg) & (gpio & 0xFFFF); | ||
100 | if ((gpio & 0xFFFF) == val) | ||
101 | return 1; | ||
102 | } | ||
103 | /* high bank register */ | ||
104 | gpio >>= 16; | ||
105 | if (gpio) { | ||
106 | val = inl(base + 0x80 + reg) & gpio; | ||
107 | if (gpio == val) | ||
108 | return 1; | ||
109 | } | ||
110 | return 0; | ||
111 | } | ||
112 | EXPORT_SYMBOL_GPL(geode_gpio_isset); | ||
113 | |||
114 | void geode_gpio_set_irq(unsigned int group, unsigned int irq) | ||
115 | { | ||
116 | u32 lo, hi; | ||
117 | |||
118 | if (group > 7 || irq > 15) | ||
119 | return; | ||
120 | |||
121 | rdmsr(MSR_PIC_ZSEL_HIGH, lo, hi); | ||
122 | |||
123 | lo &= ~(0xF << (group * 4)); | ||
124 | lo |= (irq & 0xF) << (group * 4); | ||
125 | |||
126 | wrmsr(MSR_PIC_ZSEL_HIGH, lo, hi); | ||
127 | } | ||
128 | EXPORT_SYMBOL_GPL(geode_gpio_set_irq); | ||
129 | |||
130 | void geode_gpio_setup_event(unsigned int gpio, int pair, int pme) | ||
131 | { | ||
132 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); | ||
133 | u32 offset, shift, val; | ||
134 | |||
135 | if (gpio >= 24) | ||
136 | offset = GPIO_MAP_W; | ||
137 | else if (gpio >= 16) | ||
138 | offset = GPIO_MAP_Z; | ||
139 | else if (gpio >= 8) | ||
140 | offset = GPIO_MAP_Y; | ||
141 | else | ||
142 | offset = GPIO_MAP_X; | ||
143 | |||
144 | shift = (gpio % 8) * 4; | ||
145 | |||
146 | val = inl(base + offset); | ||
147 | |||
148 | /* Clear whatever was there before */ | ||
149 | val &= ~(0xF << shift); | ||
150 | |||
151 | /* And set the new value */ | ||
152 | |||
153 | val |= ((pair & 7) << shift); | ||
154 | |||
155 | /* Set the PME bit if this is a PME event */ | ||
156 | |||
157 | if (pme) | ||
158 | val |= (1 << (shift + 3)); | ||
159 | |||
160 | outl(val, base + offset); | ||
161 | } | ||
162 | EXPORT_SYMBOL_GPL(geode_gpio_setup_event); | ||
163 | |||
164 | int geode_has_vsa2(void) | ||
165 | { | ||
166 | static int has_vsa2 = -1; | ||
167 | |||
168 | if (has_vsa2 == -1) { | ||
169 | u16 val; | ||
170 | |||
171 | /* | ||
172 | * The VSA has virtual registers that we can query for a | ||
173 | * signature. | ||
174 | */ | ||
175 | outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); | ||
176 | outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); | ||
177 | |||
178 | val = inw(VSA_VRC_DATA); | ||
179 | has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG); | ||
180 | } | ||
181 | |||
182 | return has_vsa2; | ||
183 | } | ||
184 | EXPORT_SYMBOL_GPL(geode_has_vsa2); | ||
185 | |||
186 | static int __init geode_southbridge_init(void) | ||
187 | { | ||
188 | if (!is_geode()) | ||
189 | return -ENODEV; | ||
190 | |||
191 | init_lbars(); | ||
192 | (void) mfgpt_timer_setup(); | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | postcore_initcall(geode_southbridge_init); | ||
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 4f8e2507e8f3..5051b94c9069 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -29,8 +29,6 @@ static void __init i386_default_early_setup(void) | |||
29 | 29 | ||
30 | void __init i386_start_kernel(void) | 30 | void __init i386_start_kernel(void) |
31 | { | 31 | { |
32 | reserve_trampoline_memory(); | ||
33 | |||
34 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 32 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); |
35 | 33 | ||
36 | #ifdef CONFIG_BLK_DEV_INITRD | 34 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 0b06cd778fd9..b5a9896ca1e7 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -98,8 +98,6 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
98 | { | 98 | { |
99 | copy_bootdata(__va(real_mode_data)); | 99 | copy_bootdata(__va(real_mode_data)); |
100 | 100 | ||
101 | reserve_trampoline_memory(); | ||
102 | |||
103 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 101 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); |
104 | 102 | ||
105 | #ifdef CONFIG_BLK_DEV_INITRD | 103 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index fd39eaf83b84..37c3d4b17d85 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <asm/asm-offsets.h> | 18 | #include <asm/asm-offsets.h> |
19 | #include <asm/setup.h> | 19 | #include <asm/setup.h> |
20 | #include <asm/processor-flags.h> | 20 | #include <asm/processor-flags.h> |
21 | #include <asm/msr-index.h> | ||
22 | #include <asm/cpufeature.h> | ||
21 | #include <asm/percpu.h> | 23 | #include <asm/percpu.h> |
22 | 24 | ||
23 | /* Physical address */ | 25 | /* Physical address */ |
@@ -297,25 +299,27 @@ ENTRY(startup_32_smp) | |||
297 | orl %edx,%eax | 299 | orl %edx,%eax |
298 | movl %eax,%cr4 | 300 | movl %eax,%cr4 |
299 | 301 | ||
300 | btl $5, %eax # check if PAE is enabled | 302 | testb $X86_CR4_PAE, %al # check if PAE is enabled |
301 | jnc 6f | 303 | jz 6f |
302 | 304 | ||
303 | /* Check if extended functions are implemented */ | 305 | /* Check if extended functions are implemented */ |
304 | movl $0x80000000, %eax | 306 | movl $0x80000000, %eax |
305 | cpuid | 307 | cpuid |
306 | cmpl $0x80000000, %eax | 308 | /* Value must be in the range 0x80000001 to 0x8000ffff */ |
307 | jbe 6f | 309 | subl $0x80000001, %eax |
310 | cmpl $(0x8000ffff-0x80000001), %eax | ||
311 | ja 6f | ||
308 | mov $0x80000001, %eax | 312 | mov $0x80000001, %eax |
309 | cpuid | 313 | cpuid |
310 | /* Execute Disable bit supported? */ | 314 | /* Execute Disable bit supported? */ |
311 | btl $20, %edx | 315 | btl $(X86_FEATURE_NX & 31), %edx |
312 | jnc 6f | 316 | jnc 6f |
313 | 317 | ||
314 | /* Setup EFER (Extended Feature Enable Register) */ | 318 | /* Setup EFER (Extended Feature Enable Register) */ |
315 | movl $0xc0000080, %ecx | 319 | movl $MSR_EFER, %ecx |
316 | rdmsr | 320 | rdmsr |
317 | 321 | ||
318 | btsl $11, %eax | 322 | btsl $_EFER_NX, %eax |
319 | /* Make changes effective */ | 323 | /* Make changes effective */ |
320 | wrmsr | 324 | wrmsr |
321 | 325 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 780cd928fcd5..2d8b5035371c 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -212,8 +212,8 @@ ENTRY(secondary_startup_64) | |||
212 | */ | 212 | */ |
213 | lgdt early_gdt_descr(%rip) | 213 | lgdt early_gdt_descr(%rip) |
214 | 214 | ||
215 | /* set up data segments. actually 0 would do too */ | 215 | /* set up data segments */ |
216 | movl $__KERNEL_DS,%eax | 216 | xorl %eax,%eax |
217 | movl %eax,%ds | 217 | movl %eax,%ds |
218 | movl %eax,%ss | 218 | movl %eax,%ss |
219 | movl %eax,%es | 219 | movl %eax,%es |
@@ -262,11 +262,11 @@ ENTRY(secondary_startup_64) | |||
262 | .quad x86_64_start_kernel | 262 | .quad x86_64_start_kernel |
263 | ENTRY(initial_gs) | 263 | ENTRY(initial_gs) |
264 | .quad INIT_PER_CPU_VAR(irq_stack_union) | 264 | .quad INIT_PER_CPU_VAR(irq_stack_union) |
265 | __FINITDATA | ||
266 | 265 | ||
267 | ENTRY(stack_start) | 266 | ENTRY(stack_start) |
268 | .quad init_thread_union+THREAD_SIZE-8 | 267 | .quad init_thread_union+THREAD_SIZE-8 |
269 | .word 0 | 268 | .word 0 |
269 | __FINITDATA | ||
270 | 270 | ||
271 | bad_address: | 271 | bad_address: |
272 | jmp bad_address | 272 | jmp bad_address |
@@ -340,6 +340,7 @@ ENTRY(name) | |||
340 | i = i + 1 ; \ | 340 | i = i + 1 ; \ |
341 | .endr | 341 | .endr |
342 | 342 | ||
343 | .data | ||
343 | /* | 344 | /* |
344 | * This default setting generates an ident mapping at address 0x100000 | 345 | * This default setting generates an ident mapping at address 0x100000 |
345 | * and a mapping for the kernel that precisely maps virtual address | 346 | * and a mapping for the kernel that precisely maps virtual address |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dedc2bddf7a5..ba6e65884603 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -33,6 +33,7 @@ | |||
33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | 33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists |
34 | */ | 34 | */ |
35 | unsigned long hpet_address; | 35 | unsigned long hpet_address; |
36 | u8 hpet_blockid; /* OS timer block num */ | ||
36 | #ifdef CONFIG_PCI_MSI | 37 | #ifdef CONFIG_PCI_MSI |
37 | static unsigned long hpet_num_timers; | 38 | static unsigned long hpet_num_timers; |
38 | #endif | 39 | #endif |
@@ -47,12 +48,12 @@ struct hpet_dev { | |||
47 | char name[10]; | 48 | char name[10]; |
48 | }; | 49 | }; |
49 | 50 | ||
50 | unsigned long hpet_readl(unsigned long a) | 51 | inline unsigned int hpet_readl(unsigned int a) |
51 | { | 52 | { |
52 | return readl(hpet_virt_address + a); | 53 | return readl(hpet_virt_address + a); |
53 | } | 54 | } |
54 | 55 | ||
55 | static inline void hpet_writel(unsigned long d, unsigned long a) | 56 | static inline void hpet_writel(unsigned int d, unsigned int a) |
56 | { | 57 | { |
57 | writel(d, hpet_virt_address + a); | 58 | writel(d, hpet_virt_address + a); |
58 | } | 59 | } |
@@ -167,7 +168,7 @@ do { \ | |||
167 | 168 | ||
168 | static void hpet_reserve_msi_timers(struct hpet_data *hd); | 169 | static void hpet_reserve_msi_timers(struct hpet_data *hd); |
169 | 170 | ||
170 | static void hpet_reserve_platform_timers(unsigned long id) | 171 | static void hpet_reserve_platform_timers(unsigned int id) |
171 | { | 172 | { |
172 | struct hpet __iomem *hpet = hpet_virt_address; | 173 | struct hpet __iomem *hpet = hpet_virt_address; |
173 | struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; | 174 | struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; |
@@ -205,7 +206,7 @@ static void hpet_reserve_platform_timers(unsigned long id) | |||
205 | 206 | ||
206 | } | 207 | } |
207 | #else | 208 | #else |
208 | static void hpet_reserve_platform_timers(unsigned long id) { } | 209 | static void hpet_reserve_platform_timers(unsigned int id) { } |
209 | #endif | 210 | #endif |
210 | 211 | ||
211 | /* | 212 | /* |
@@ -246,7 +247,7 @@ static void hpet_reset_counter(void) | |||
246 | 247 | ||
247 | static void hpet_start_counter(void) | 248 | static void hpet_start_counter(void) |
248 | { | 249 | { |
249 | unsigned long cfg = hpet_readl(HPET_CFG); | 250 | unsigned int cfg = hpet_readl(HPET_CFG); |
250 | cfg |= HPET_CFG_ENABLE; | 251 | cfg |= HPET_CFG_ENABLE; |
251 | hpet_writel(cfg, HPET_CFG); | 252 | hpet_writel(cfg, HPET_CFG); |
252 | } | 253 | } |
@@ -271,7 +272,7 @@ static void hpet_resume_counter(void) | |||
271 | 272 | ||
272 | static void hpet_enable_legacy_int(void) | 273 | static void hpet_enable_legacy_int(void) |
273 | { | 274 | { |
274 | unsigned long cfg = hpet_readl(HPET_CFG); | 275 | unsigned int cfg = hpet_readl(HPET_CFG); |
275 | 276 | ||
276 | cfg |= HPET_CFG_LEGACY; | 277 | cfg |= HPET_CFG_LEGACY; |
277 | hpet_writel(cfg, HPET_CFG); | 278 | hpet_writel(cfg, HPET_CFG); |
@@ -314,7 +315,7 @@ static int hpet_setup_msi_irq(unsigned int irq); | |||
314 | static void hpet_set_mode(enum clock_event_mode mode, | 315 | static void hpet_set_mode(enum clock_event_mode mode, |
315 | struct clock_event_device *evt, int timer) | 316 | struct clock_event_device *evt, int timer) |
316 | { | 317 | { |
317 | unsigned long cfg, cmp, now; | 318 | unsigned int cfg, cmp, now; |
318 | uint64_t delta; | 319 | uint64_t delta; |
319 | 320 | ||
320 | switch (mode) { | 321 | switch (mode) { |
@@ -323,7 +324,7 @@ static void hpet_set_mode(enum clock_event_mode mode, | |||
323 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; | 324 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; |
324 | delta >>= evt->shift; | 325 | delta >>= evt->shift; |
325 | now = hpet_readl(HPET_COUNTER); | 326 | now = hpet_readl(HPET_COUNTER); |
326 | cmp = now + (unsigned long) delta; | 327 | cmp = now + (unsigned int) delta; |
327 | cfg = hpet_readl(HPET_Tn_CFG(timer)); | 328 | cfg = hpet_readl(HPET_Tn_CFG(timer)); |
328 | /* Make sure we use edge triggered interrupts */ | 329 | /* Make sure we use edge triggered interrupts */ |
329 | cfg &= ~HPET_TN_LEVEL; | 330 | cfg &= ~HPET_TN_LEVEL; |
@@ -339,7 +340,7 @@ static void hpet_set_mode(enum clock_event_mode mode, | |||
339 | * (See AMD-8111 HyperTransport I/O Hub Data Sheet, | 340 | * (See AMD-8111 HyperTransport I/O Hub Data Sheet, |
340 | * Publication # 24674) | 341 | * Publication # 24674) |
341 | */ | 342 | */ |
342 | hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); | 343 | hpet_writel((unsigned int) delta, HPET_Tn_CMP(timer)); |
343 | hpet_start_counter(); | 344 | hpet_start_counter(); |
344 | hpet_print_config(); | 345 | hpet_print_config(); |
345 | break; | 346 | break; |
@@ -383,13 +384,24 @@ static int hpet_next_event(unsigned long delta, | |||
383 | hpet_writel(cnt, HPET_Tn_CMP(timer)); | 384 | hpet_writel(cnt, HPET_Tn_CMP(timer)); |
384 | 385 | ||
385 | /* | 386 | /* |
386 | * We need to read back the CMP register to make sure that | 387 | * We need to read back the CMP register on certain HPET |
387 | * what we wrote hit the chip before we compare it to the | 388 | * implementations (ATI chipsets) which seem to delay the |
388 | * counter. | 389 | * transfer of the compare register into the internal compare |
390 | * logic. With small deltas this might actually be too late as | ||
391 | * the counter could already be higher than the compare value | ||
392 | * at that point and we would wait for the next hpet interrupt | ||
393 | * forever. We found out that reading the CMP register back | ||
394 | * forces the transfer so we can rely on the comparison with | ||
395 | * the counter register below. If the read back from the | ||
396 | * compare register does not match the value we programmed | ||
397 | * then we might have a real hardware problem. We can not do | ||
398 | * much about it here, but at least alert the user/admin with | ||
399 | * a prominent warning. | ||
389 | */ | 400 | */ |
390 | WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt); | 401 | WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, |
402 | KERN_WARNING "hpet: compare register read back failed.\n"); | ||
391 | 403 | ||
392 | return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; | 404 | return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; |
393 | } | 405 | } |
394 | 406 | ||
395 | static void hpet_legacy_set_mode(enum clock_event_mode mode, | 407 | static void hpet_legacy_set_mode(enum clock_event_mode mode, |
@@ -415,7 +427,7 @@ static struct hpet_dev *hpet_devs; | |||
415 | void hpet_msi_unmask(unsigned int irq) | 427 | void hpet_msi_unmask(unsigned int irq) |
416 | { | 428 | { |
417 | struct hpet_dev *hdev = get_irq_data(irq); | 429 | struct hpet_dev *hdev = get_irq_data(irq); |
418 | unsigned long cfg; | 430 | unsigned int cfg; |
419 | 431 | ||
420 | /* unmask it */ | 432 | /* unmask it */ |
421 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); | 433 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); |
@@ -425,7 +437,7 @@ void hpet_msi_unmask(unsigned int irq) | |||
425 | 437 | ||
426 | void hpet_msi_mask(unsigned int irq) | 438 | void hpet_msi_mask(unsigned int irq) |
427 | { | 439 | { |
428 | unsigned long cfg; | 440 | unsigned int cfg; |
429 | struct hpet_dev *hdev = get_irq_data(irq); | 441 | struct hpet_dev *hdev = get_irq_data(irq); |
430 | 442 | ||
431 | /* mask it */ | 443 | /* mask it */ |
@@ -467,7 +479,7 @@ static int hpet_msi_next_event(unsigned long delta, | |||
467 | 479 | ||
468 | static int hpet_setup_msi_irq(unsigned int irq) | 480 | static int hpet_setup_msi_irq(unsigned int irq) |
469 | { | 481 | { |
470 | if (arch_setup_hpet_msi(irq)) { | 482 | if (arch_setup_hpet_msi(irq, hpet_blockid)) { |
471 | destroy_irq(irq); | 483 | destroy_irq(irq); |
472 | return -EINVAL; | 484 | return -EINVAL; |
473 | } | 485 | } |
@@ -584,6 +596,8 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) | |||
584 | unsigned int num_timers_used = 0; | 596 | unsigned int num_timers_used = 0; |
585 | int i; | 597 | int i; |
586 | 598 | ||
599 | if (boot_cpu_has(X86_FEATURE_ARAT)) | ||
600 | return; | ||
587 | id = hpet_readl(HPET_ID); | 601 | id = hpet_readl(HPET_ID); |
588 | 602 | ||
589 | num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); | 603 | num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); |
@@ -598,7 +612,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) | |||
598 | 612 | ||
599 | for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { | 613 | for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { |
600 | struct hpet_dev *hdev = &hpet_devs[num_timers_used]; | 614 | struct hpet_dev *hdev = &hpet_devs[num_timers_used]; |
601 | unsigned long cfg = hpet_readl(HPET_Tn_CFG(i)); | 615 | unsigned int cfg = hpet_readl(HPET_Tn_CFG(i)); |
602 | 616 | ||
603 | /* Only consider HPET timer with MSI support */ | 617 | /* Only consider HPET timer with MSI support */ |
604 | if (!(cfg & HPET_TN_FSB_CAP)) | 618 | if (!(cfg & HPET_TN_FSB_CAP)) |
@@ -813,7 +827,7 @@ static int hpet_clocksource_register(void) | |||
813 | */ | 827 | */ |
814 | int __init hpet_enable(void) | 828 | int __init hpet_enable(void) |
815 | { | 829 | { |
816 | unsigned long id; | 830 | unsigned int id; |
817 | int i; | 831 | int i; |
818 | 832 | ||
819 | if (!is_hpet_capable()) | 833 | if (!is_hpet_capable()) |
@@ -872,10 +886,8 @@ int __init hpet_enable(void) | |||
872 | 886 | ||
873 | if (id & HPET_ID_LEGSUP) { | 887 | if (id & HPET_ID_LEGSUP) { |
874 | hpet_legacy_clockevent_register(); | 888 | hpet_legacy_clockevent_register(); |
875 | hpet_msi_capability_lookup(2); | ||
876 | return 1; | 889 | return 1; |
877 | } | 890 | } |
878 | hpet_msi_capability_lookup(0); | ||
879 | return 0; | 891 | return 0; |
880 | 892 | ||
881 | out_nohpet: | 893 | out_nohpet: |
@@ -908,9 +920,17 @@ static __init int hpet_late_init(void) | |||
908 | if (!hpet_virt_address) | 920 | if (!hpet_virt_address) |
909 | return -ENODEV; | 921 | return -ENODEV; |
910 | 922 | ||
923 | if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP) | ||
924 | hpet_msi_capability_lookup(2); | ||
925 | else | ||
926 | hpet_msi_capability_lookup(0); | ||
927 | |||
911 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); | 928 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); |
912 | hpet_print_config(); | 929 | hpet_print_config(); |
913 | 930 | ||
931 | if (boot_cpu_has(X86_FEATURE_ARAT)) | ||
932 | return 0; | ||
933 | |||
914 | for_each_online_cpu(cpu) { | 934 | for_each_online_cpu(cpu) { |
915 | hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); | 935 | hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); |
916 | } | 936 | } |
@@ -925,7 +945,7 @@ fs_initcall(hpet_late_init); | |||
925 | void hpet_disable(void) | 945 | void hpet_disable(void) |
926 | { | 946 | { |
927 | if (is_hpet_capable()) { | 947 | if (is_hpet_capable()) { |
928 | unsigned long cfg = hpet_readl(HPET_CFG); | 948 | unsigned int cfg = hpet_readl(HPET_CFG); |
929 | 949 | ||
930 | if (hpet_legacy_int_enabled) { | 950 | if (hpet_legacy_int_enabled) { |
931 | cfg &= ~HPET_CFG_LEGACY; | 951 | cfg &= ~HPET_CFG_LEGACY; |
@@ -965,8 +985,8 @@ static int hpet_prev_update_sec; | |||
965 | static struct rtc_time hpet_alarm_time; | 985 | static struct rtc_time hpet_alarm_time; |
966 | static unsigned long hpet_pie_count; | 986 | static unsigned long hpet_pie_count; |
967 | static u32 hpet_t1_cmp; | 987 | static u32 hpet_t1_cmp; |
968 | static unsigned long hpet_default_delta; | 988 | static u32 hpet_default_delta; |
969 | static unsigned long hpet_pie_delta; | 989 | static u32 hpet_pie_delta; |
970 | static unsigned long hpet_pie_limit; | 990 | static unsigned long hpet_pie_limit; |
971 | 991 | ||
972 | static rtc_irq_handler irq_handler; | 992 | static rtc_irq_handler irq_handler; |
@@ -1017,7 +1037,8 @@ EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler); | |||
1017 | */ | 1037 | */ |
1018 | int hpet_rtc_timer_init(void) | 1038 | int hpet_rtc_timer_init(void) |
1019 | { | 1039 | { |
1020 | unsigned long cfg, cnt, delta, flags; | 1040 | unsigned int cfg, cnt, delta; |
1041 | unsigned long flags; | ||
1021 | 1042 | ||
1022 | if (!is_hpet_enabled()) | 1043 | if (!is_hpet_enabled()) |
1023 | return 0; | 1044 | return 0; |
@@ -1027,7 +1048,7 @@ int hpet_rtc_timer_init(void) | |||
1027 | 1048 | ||
1028 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; | 1049 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; |
1029 | clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; | 1050 | clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; |
1030 | hpet_default_delta = (unsigned long) clc; | 1051 | hpet_default_delta = clc; |
1031 | } | 1052 | } |
1032 | 1053 | ||
1033 | if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) | 1054 | if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) |
@@ -1113,7 +1134,7 @@ int hpet_set_periodic_freq(unsigned long freq) | |||
1113 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; | 1134 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; |
1114 | do_div(clc, freq); | 1135 | do_div(clc, freq); |
1115 | clc >>= hpet_clockevent.shift; | 1136 | clc >>= hpet_clockevent.shift; |
1116 | hpet_pie_delta = (unsigned long) clc; | 1137 | hpet_pie_delta = clc; |
1117 | } | 1138 | } |
1118 | return 1; | 1139 | return 1; |
1119 | } | 1140 | } |
@@ -1127,7 +1148,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); | |||
1127 | 1148 | ||
1128 | static void hpet_rtc_timer_reinit(void) | 1149 | static void hpet_rtc_timer_reinit(void) |
1129 | { | 1150 | { |
1130 | unsigned long cfg, delta; | 1151 | unsigned int cfg, delta; |
1131 | int lost_ints = -1; | 1152 | int lost_ints = -1; |
1132 | 1153 | ||
1133 | if (unlikely(!hpet_rtc_flags)) { | 1154 | if (unlikely(!hpet_rtc_flags)) { |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..05d5fec64a94 --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -0,0 +1,554 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) 2007 Alan Stern | ||
17 | * Copyright (C) 2009 IBM Corporation | ||
18 | * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> | ||
19 | * | ||
20 | * Authors: Alan Stern <stern@rowland.harvard.edu> | ||
21 | * K.Prasad <prasad@linux.vnet.ibm.com> | ||
22 | * Frederic Weisbecker <fweisbec@gmail.com> | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | ||
27 | * using the CPU's debug registers. | ||
28 | */ | ||
29 | |||
30 | #include <linux/perf_event.h> | ||
31 | #include <linux/hw_breakpoint.h> | ||
32 | #include <linux/irqflags.h> | ||
33 | #include <linux/notifier.h> | ||
34 | #include <linux/kallsyms.h> | ||
35 | #include <linux/kprobes.h> | ||
36 | #include <linux/percpu.h> | ||
37 | #include <linux/kdebug.h> | ||
38 | #include <linux/kernel.h> | ||
39 | #include <linux/module.h> | ||
40 | #include <linux/sched.h> | ||
41 | #include <linux/init.h> | ||
42 | #include <linux/smp.h> | ||
43 | |||
44 | #include <asm/hw_breakpoint.h> | ||
45 | #include <asm/processor.h> | ||
46 | #include <asm/debugreg.h> | ||
47 | |||
48 | /* Per cpu debug control register value */ | ||
49 | DEFINE_PER_CPU(unsigned long, cpu_dr7); | ||
50 | EXPORT_PER_CPU_SYMBOL(cpu_dr7); | ||
51 | |||
52 | /* Per cpu debug address registers values */ | ||
53 | static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); | ||
54 | |||
55 | /* | ||
56 | * Stores the breakpoints currently in use on each breakpoint address | ||
57 | * register for each cpus | ||
58 | */ | ||
59 | static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); | ||
60 | |||
61 | |||
62 | static inline unsigned long | ||
63 | __encode_dr7(int drnum, unsigned int len, unsigned int type) | ||
64 | { | ||
65 | unsigned long bp_info; | ||
66 | |||
67 | bp_info = (len | type) & 0xf; | ||
68 | bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); | ||
69 | bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); | ||
70 | |||
71 | return bp_info; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Encode the length, type, Exact, and Enable bits for a particular breakpoint | ||
76 | * as stored in debug register 7. | ||
77 | */ | ||
78 | unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) | ||
79 | { | ||
80 | return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Decode the length and type bits for a particular breakpoint as | ||
85 | * stored in debug register 7. Return the "enabled" status. | ||
86 | */ | ||
87 | int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) | ||
88 | { | ||
89 | int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); | ||
90 | |||
91 | *len = (bp_info & 0xc) | 0x40; | ||
92 | *type = (bp_info & 0x3) | 0x80; | ||
93 | |||
94 | return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Install a perf counter breakpoint. | ||
99 | * | ||
100 | * We seek a free debug address register and use it for this | ||
101 | * breakpoint. Eventually we enable it in the debug control register. | ||
102 | * | ||
103 | * Atomic: we hold the counter->ctx->lock and we only handle variables | ||
104 | * and registers local to this cpu. | ||
105 | */ | ||
106 | int arch_install_hw_breakpoint(struct perf_event *bp) | ||
107 | { | ||
108 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
109 | unsigned long *dr7; | ||
110 | int i; | ||
111 | |||
112 | for (i = 0; i < HBP_NUM; i++) { | ||
113 | struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); | ||
114 | |||
115 | if (!*slot) { | ||
116 | *slot = bp; | ||
117 | break; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) | ||
122 | return -EBUSY; | ||
123 | |||
124 | set_debugreg(info->address, i); | ||
125 | __get_cpu_var(cpu_debugreg[i]) = info->address; | ||
126 | |||
127 | dr7 = &__get_cpu_var(cpu_dr7); | ||
128 | *dr7 |= encode_dr7(i, info->len, info->type); | ||
129 | |||
130 | set_debugreg(*dr7, 7); | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Uninstall the breakpoint contained in the given counter. | ||
137 | * | ||
138 | * First we search the debug address register it uses and then we disable | ||
139 | * it. | ||
140 | * | ||
141 | * Atomic: we hold the counter->ctx->lock and we only handle variables | ||
142 | * and registers local to this cpu. | ||
143 | */ | ||
144 | void arch_uninstall_hw_breakpoint(struct perf_event *bp) | ||
145 | { | ||
146 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
147 | unsigned long *dr7; | ||
148 | int i; | ||
149 | |||
150 | for (i = 0; i < HBP_NUM; i++) { | ||
151 | struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); | ||
152 | |||
153 | if (*slot == bp) { | ||
154 | *slot = NULL; | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | |||
159 | if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) | ||
160 | return; | ||
161 | |||
162 | dr7 = &__get_cpu_var(cpu_dr7); | ||
163 | *dr7 &= ~__encode_dr7(i, info->len, info->type); | ||
164 | |||
165 | set_debugreg(*dr7, 7); | ||
166 | } | ||
167 | |||
168 | static int get_hbp_len(u8 hbp_len) | ||
169 | { | ||
170 | unsigned int len_in_bytes = 0; | ||
171 | |||
172 | switch (hbp_len) { | ||
173 | case X86_BREAKPOINT_LEN_1: | ||
174 | len_in_bytes = 1; | ||
175 | break; | ||
176 | case X86_BREAKPOINT_LEN_2: | ||
177 | len_in_bytes = 2; | ||
178 | break; | ||
179 | case X86_BREAKPOINT_LEN_4: | ||
180 | len_in_bytes = 4; | ||
181 | break; | ||
182 | #ifdef CONFIG_X86_64 | ||
183 | case X86_BREAKPOINT_LEN_8: | ||
184 | len_in_bytes = 8; | ||
185 | break; | ||
186 | #endif | ||
187 | } | ||
188 | return len_in_bytes; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * Check for virtual address in user space. | ||
193 | */ | ||
194 | int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) | ||
195 | { | ||
196 | unsigned int len; | ||
197 | |||
198 | len = get_hbp_len(hbp_len); | ||
199 | |||
200 | return (va <= TASK_SIZE - len); | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Check for virtual address in kernel space. | ||
205 | */ | ||
206 | static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | ||
207 | { | ||
208 | unsigned int len; | ||
209 | |||
210 | len = get_hbp_len(hbp_len); | ||
211 | |||
212 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * Store a breakpoint's encoded address, length, and type. | ||
217 | */ | ||
218 | static int arch_store_info(struct perf_event *bp) | ||
219 | { | ||
220 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
221 | /* | ||
222 | * For kernel-addresses, either the address or symbol name can be | ||
223 | * specified. | ||
224 | */ | ||
225 | if (info->name) | ||
226 | info->address = (unsigned long) | ||
227 | kallsyms_lookup_name(info->name); | ||
228 | if (info->address) | ||
229 | return 0; | ||
230 | |||
231 | return -EINVAL; | ||
232 | } | ||
233 | |||
234 | int arch_bp_generic_fields(int x86_len, int x86_type, | ||
235 | int *gen_len, int *gen_type) | ||
236 | { | ||
237 | /* Len */ | ||
238 | switch (x86_len) { | ||
239 | case X86_BREAKPOINT_LEN_1: | ||
240 | *gen_len = HW_BREAKPOINT_LEN_1; | ||
241 | break; | ||
242 | case X86_BREAKPOINT_LEN_2: | ||
243 | *gen_len = HW_BREAKPOINT_LEN_2; | ||
244 | break; | ||
245 | case X86_BREAKPOINT_LEN_4: | ||
246 | *gen_len = HW_BREAKPOINT_LEN_4; | ||
247 | break; | ||
248 | #ifdef CONFIG_X86_64 | ||
249 | case X86_BREAKPOINT_LEN_8: | ||
250 | *gen_len = HW_BREAKPOINT_LEN_8; | ||
251 | break; | ||
252 | #endif | ||
253 | default: | ||
254 | return -EINVAL; | ||
255 | } | ||
256 | |||
257 | /* Type */ | ||
258 | switch (x86_type) { | ||
259 | case X86_BREAKPOINT_EXECUTE: | ||
260 | *gen_type = HW_BREAKPOINT_X; | ||
261 | break; | ||
262 | case X86_BREAKPOINT_WRITE: | ||
263 | *gen_type = HW_BREAKPOINT_W; | ||
264 | break; | ||
265 | case X86_BREAKPOINT_RW: | ||
266 | *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; | ||
267 | break; | ||
268 | default: | ||
269 | return -EINVAL; | ||
270 | } | ||
271 | |||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | |||
276 | static int arch_build_bp_info(struct perf_event *bp) | ||
277 | { | ||
278 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
279 | |||
280 | info->address = bp->attr.bp_addr; | ||
281 | |||
282 | /* Len */ | ||
283 | switch (bp->attr.bp_len) { | ||
284 | case HW_BREAKPOINT_LEN_1: | ||
285 | info->len = X86_BREAKPOINT_LEN_1; | ||
286 | break; | ||
287 | case HW_BREAKPOINT_LEN_2: | ||
288 | info->len = X86_BREAKPOINT_LEN_2; | ||
289 | break; | ||
290 | case HW_BREAKPOINT_LEN_4: | ||
291 | info->len = X86_BREAKPOINT_LEN_4; | ||
292 | break; | ||
293 | #ifdef CONFIG_X86_64 | ||
294 | case HW_BREAKPOINT_LEN_8: | ||
295 | info->len = X86_BREAKPOINT_LEN_8; | ||
296 | break; | ||
297 | #endif | ||
298 | default: | ||
299 | return -EINVAL; | ||
300 | } | ||
301 | |||
302 | /* Type */ | ||
303 | switch (bp->attr.bp_type) { | ||
304 | case HW_BREAKPOINT_W: | ||
305 | info->type = X86_BREAKPOINT_WRITE; | ||
306 | break; | ||
307 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
308 | info->type = X86_BREAKPOINT_RW; | ||
309 | break; | ||
310 | case HW_BREAKPOINT_X: | ||
311 | info->type = X86_BREAKPOINT_EXECUTE; | ||
312 | break; | ||
313 | default: | ||
314 | return -EINVAL; | ||
315 | } | ||
316 | |||
317 | return 0; | ||
318 | } | ||
319 | /* | ||
320 | * Validate the arch-specific HW Breakpoint register settings | ||
321 | */ | ||
322 | int arch_validate_hwbkpt_settings(struct perf_event *bp, | ||
323 | struct task_struct *tsk) | ||
324 | { | ||
325 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
326 | unsigned int align; | ||
327 | int ret; | ||
328 | |||
329 | |||
330 | ret = arch_build_bp_info(bp); | ||
331 | if (ret) | ||
332 | return ret; | ||
333 | |||
334 | ret = -EINVAL; | ||
335 | |||
336 | if (info->type == X86_BREAKPOINT_EXECUTE) | ||
337 | /* | ||
338 | * Ptrace-refactoring code | ||
339 | * For now, we'll allow instruction breakpoint only for user-space | ||
340 | * addresses | ||
341 | */ | ||
342 | if ((!arch_check_va_in_userspace(info->address, info->len)) && | ||
343 | info->len != X86_BREAKPOINT_EXECUTE) | ||
344 | return ret; | ||
345 | |||
346 | switch (info->len) { | ||
347 | case X86_BREAKPOINT_LEN_1: | ||
348 | align = 0; | ||
349 | break; | ||
350 | case X86_BREAKPOINT_LEN_2: | ||
351 | align = 1; | ||
352 | break; | ||
353 | case X86_BREAKPOINT_LEN_4: | ||
354 | align = 3; | ||
355 | break; | ||
356 | #ifdef CONFIG_X86_64 | ||
357 | case X86_BREAKPOINT_LEN_8: | ||
358 | align = 7; | ||
359 | break; | ||
360 | #endif | ||
361 | default: | ||
362 | return ret; | ||
363 | } | ||
364 | |||
365 | ret = arch_store_info(bp); | ||
366 | |||
367 | if (ret < 0) | ||
368 | return ret; | ||
369 | /* | ||
370 | * Check that the low-order bits of the address are appropriate | ||
371 | * for the alignment implied by len. | ||
372 | */ | ||
373 | if (info->address & align) | ||
374 | return -EINVAL; | ||
375 | |||
376 | /* Check that the virtual address is in the proper range */ | ||
377 | if (tsk) { | ||
378 | if (!arch_check_va_in_userspace(info->address, info->len)) | ||
379 | return -EFAULT; | ||
380 | } else { | ||
381 | if (!arch_check_va_in_kernelspace(info->address, info->len)) | ||
382 | return -EFAULT; | ||
383 | } | ||
384 | |||
385 | return 0; | ||
386 | } | ||
387 | |||
388 | /* | ||
389 | * Dump the debug register contents to the user. | ||
390 | * We can't dump our per cpu values because it | ||
391 | * may contain cpu wide breakpoint, something that | ||
392 | * doesn't belong to the current task. | ||
393 | * | ||
394 | * TODO: include non-ptrace user breakpoints (perf) | ||
395 | */ | ||
396 | void aout_dump_debugregs(struct user *dump) | ||
397 | { | ||
398 | int i; | ||
399 | int dr7 = 0; | ||
400 | struct perf_event *bp; | ||
401 | struct arch_hw_breakpoint *info; | ||
402 | struct thread_struct *thread = ¤t->thread; | ||
403 | |||
404 | for (i = 0; i < HBP_NUM; i++) { | ||
405 | bp = thread->ptrace_bps[i]; | ||
406 | |||
407 | if (bp && !bp->attr.disabled) { | ||
408 | dump->u_debugreg[i] = bp->attr.bp_addr; | ||
409 | info = counter_arch_bp(bp); | ||
410 | dr7 |= encode_dr7(i, info->len, info->type); | ||
411 | } else { | ||
412 | dump->u_debugreg[i] = 0; | ||
413 | } | ||
414 | } | ||
415 | |||
416 | dump->u_debugreg[4] = 0; | ||
417 | dump->u_debugreg[5] = 0; | ||
418 | dump->u_debugreg[6] = current->thread.debugreg6; | ||
419 | |||
420 | dump->u_debugreg[7] = dr7; | ||
421 | } | ||
422 | EXPORT_SYMBOL_GPL(aout_dump_debugregs); | ||
423 | |||
424 | /* | ||
425 | * Release the user breakpoints used by ptrace | ||
426 | */ | ||
427 | void flush_ptrace_hw_breakpoint(struct task_struct *tsk) | ||
428 | { | ||
429 | int i; | ||
430 | struct thread_struct *t = &tsk->thread; | ||
431 | |||
432 | for (i = 0; i < HBP_NUM; i++) { | ||
433 | unregister_hw_breakpoint(t->ptrace_bps[i]); | ||
434 | t->ptrace_bps[i] = NULL; | ||
435 | } | ||
436 | } | ||
437 | |||
438 | void hw_breakpoint_restore(void) | ||
439 | { | ||
440 | set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); | ||
441 | set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); | ||
442 | set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); | ||
443 | set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); | ||
444 | set_debugreg(current->thread.debugreg6, 6); | ||
445 | set_debugreg(__get_cpu_var(cpu_dr7), 7); | ||
446 | } | ||
447 | EXPORT_SYMBOL_GPL(hw_breakpoint_restore); | ||
448 | |||
449 | /* | ||
450 | * Handle debug exception notifications. | ||
451 | * | ||
452 | * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. | ||
453 | * | ||
454 | * NOTIFY_DONE returned if one of the following conditions is true. | ||
455 | * i) When the causative address is from user-space and the exception | ||
456 | * is a valid one, i.e. not triggered as a result of lazy debug register | ||
457 | * switching | ||
458 | * ii) When there are more bits than trap<n> set in DR6 register (such | ||
459 | * as BD, BS or BT) indicating that more than one debug condition is | ||
460 | * met and requires some more action in do_debug(). | ||
461 | * | ||
462 | * NOTIFY_STOP returned for all other cases | ||
463 | * | ||
464 | */ | ||
465 | static int __kprobes hw_breakpoint_handler(struct die_args *args) | ||
466 | { | ||
467 | int i, cpu, rc = NOTIFY_STOP; | ||
468 | struct perf_event *bp; | ||
469 | unsigned long dr7, dr6; | ||
470 | unsigned long *dr6_p; | ||
471 | |||
472 | /* The DR6 value is pointed by args->err */ | ||
473 | dr6_p = (unsigned long *)ERR_PTR(args->err); | ||
474 | dr6 = *dr6_p; | ||
475 | |||
476 | /* Do an early return if no trap bits are set in DR6 */ | ||
477 | if ((dr6 & DR_TRAP_BITS) == 0) | ||
478 | return NOTIFY_DONE; | ||
479 | |||
480 | get_debugreg(dr7, 7); | ||
481 | /* Disable breakpoints during exception handling */ | ||
482 | set_debugreg(0UL, 7); | ||
483 | /* | ||
484 | * Assert that local interrupts are disabled | ||
485 | * Reset the DRn bits in the virtualized register value. | ||
486 | * The ptrace trigger routine will add in whatever is needed. | ||
487 | */ | ||
488 | current->thread.debugreg6 &= ~DR_TRAP_BITS; | ||
489 | cpu = get_cpu(); | ||
490 | |||
491 | /* Handle all the breakpoints that were triggered */ | ||
492 | for (i = 0; i < HBP_NUM; ++i) { | ||
493 | if (likely(!(dr6 & (DR_TRAP0 << i)))) | ||
494 | continue; | ||
495 | |||
496 | /* | ||
497 | * The counter may be concurrently released but that can only | ||
498 | * occur from a call_rcu() path. We can then safely fetch | ||
499 | * the breakpoint, use its callback, touch its counter | ||
500 | * while we are in an rcu_read_lock() path. | ||
501 | */ | ||
502 | rcu_read_lock(); | ||
503 | |||
504 | bp = per_cpu(bp_per_reg[i], cpu); | ||
505 | if (bp) | ||
506 | rc = NOTIFY_DONE; | ||
507 | /* | ||
508 | * Reset the 'i'th TRAP bit in dr6 to denote completion of | ||
509 | * exception handling | ||
510 | */ | ||
511 | (*dr6_p) &= ~(DR_TRAP0 << i); | ||
512 | /* | ||
513 | * bp can be NULL due to lazy debug register switching | ||
514 | * or due to concurrent perf counter removing. | ||
515 | */ | ||
516 | if (!bp) { | ||
517 | rcu_read_unlock(); | ||
518 | break; | ||
519 | } | ||
520 | |||
521 | perf_bp_event(bp, args->regs); | ||
522 | |||
523 | rcu_read_unlock(); | ||
524 | } | ||
525 | if (dr6 & (~DR_TRAP_BITS)) | ||
526 | rc = NOTIFY_DONE; | ||
527 | |||
528 | set_debugreg(dr7, 7); | ||
529 | put_cpu(); | ||
530 | |||
531 | return rc; | ||
532 | } | ||
533 | |||
534 | /* | ||
535 | * Handle debug exception notifications. | ||
536 | */ | ||
537 | int __kprobes hw_breakpoint_exceptions_notify( | ||
538 | struct notifier_block *unused, unsigned long val, void *data) | ||
539 | { | ||
540 | if (val != DIE_DEBUG) | ||
541 | return NOTIFY_DONE; | ||
542 | |||
543 | return hw_breakpoint_handler(data); | ||
544 | } | ||
545 | |||
546 | void hw_breakpoint_pmu_read(struct perf_event *bp) | ||
547 | { | ||
548 | /* TODO */ | ||
549 | } | ||
550 | |||
551 | void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) | ||
552 | { | ||
553 | /* TODO */ | ||
554 | } | ||
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 1736c5a725aa..9c3bd4a2050e 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c | |||
@@ -15,8 +15,10 @@ EXPORT_SYMBOL(mcount); | |||
15 | * the export, but dont use it from C code, it is used | 15 | * the export, but dont use it from C code, it is used |
16 | * by assembly code and is not using C calling convention! | 16 | * by assembly code and is not using C calling convention! |
17 | */ | 17 | */ |
18 | #ifndef CONFIG_X86_CMPXCHG64 | ||
18 | extern void cmpxchg8b_emu(void); | 19 | extern void cmpxchg8b_emu(void); |
19 | EXPORT_SYMBOL(cmpxchg8b_emu); | 20 | EXPORT_SYMBOL(cmpxchg8b_emu); |
21 | #endif | ||
20 | 22 | ||
21 | /* Networking helper routines. */ | 23 | /* Networking helper routines. */ |
22 | EXPORT_SYMBOL(csum_partial_copy_generic); | 24 | EXPORT_SYMBOL(csum_partial_copy_generic); |
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 99c4d308f16b..8eec0ec59af2 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c | |||
@@ -103,9 +103,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
103 | * on system-call entry - see also fork() and the signal handling | 103 | * on system-call entry - see also fork() and the signal handling |
104 | * code. | 104 | * code. |
105 | */ | 105 | */ |
106 | static int do_iopl(unsigned int level, struct pt_regs *regs) | 106 | long sys_iopl(unsigned int level, struct pt_regs *regs) |
107 | { | 107 | { |
108 | unsigned int old = (regs->flags >> 12) & 3; | 108 | unsigned int old = (regs->flags >> 12) & 3; |
109 | struct thread_struct *t = ¤t->thread; | ||
109 | 110 | ||
110 | if (level > 3) | 111 | if (level > 3) |
111 | return -EINVAL; | 112 | return -EINVAL; |
@@ -115,29 +116,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) | |||
115 | return -EPERM; | 116 | return -EPERM; |
116 | } | 117 | } |
117 | regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); | 118 | regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); |
118 | |||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | #ifdef CONFIG_X86_32 | ||
123 | long sys_iopl(struct pt_regs *regs) | ||
124 | { | ||
125 | unsigned int level = regs->bx; | ||
126 | struct thread_struct *t = ¤t->thread; | ||
127 | int rc; | ||
128 | |||
129 | rc = do_iopl(level, regs); | ||
130 | if (rc < 0) | ||
131 | goto out; | ||
132 | |||
133 | t->iopl = level << 12; | 119 | t->iopl = level << 12; |
134 | set_iopl_mask(t->iopl); | 120 | set_iopl_mask(t->iopl); |
135 | out: | 121 | |
136 | return rc; | 122 | return 0; |
137 | } | ||
138 | #else | ||
139 | asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) | ||
140 | { | ||
141 | return do_iopl(level, regs); | ||
142 | } | 123 | } |
143 | #endif | ||
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 74656d1d4e30..91fd0c70a18a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -18,7 +18,7 @@ | |||
18 | atomic_t irq_err_count; | 18 | atomic_t irq_err_count; |
19 | 19 | ||
20 | /* Function pointer for generic interrupt vector handling */ | 20 | /* Function pointer for generic interrupt vector handling */ |
21 | void (*generic_interrupt_extension)(void) = NULL; | 21 | void (*x86_platform_ipi_callback)(void) = NULL; |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * 'what should we do if we get a hw irq event on an illegal vector'. | 24 | * 'what should we do if we get a hw irq event on an illegal vector'. |
@@ -63,19 +63,19 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
63 | for_each_online_cpu(j) | 63 | for_each_online_cpu(j) |
64 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); | 64 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); |
65 | seq_printf(p, " Spurious interrupts\n"); | 65 | seq_printf(p, " Spurious interrupts\n"); |
66 | seq_printf(p, "%*s: ", prec, "CNT"); | 66 | seq_printf(p, "%*s: ", prec, "PMI"); |
67 | for_each_online_cpu(j) | 67 | for_each_online_cpu(j) |
68 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); | 68 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); |
69 | seq_printf(p, " Performance counter interrupts\n"); | 69 | seq_printf(p, " Performance monitoring interrupts\n"); |
70 | seq_printf(p, "%*s: ", prec, "PND"); | 70 | seq_printf(p, "%*s: ", prec, "PND"); |
71 | for_each_online_cpu(j) | 71 | for_each_online_cpu(j) |
72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); | 72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); |
73 | seq_printf(p, " Performance pending work\n"); | 73 | seq_printf(p, " Performance pending work\n"); |
74 | #endif | 74 | #endif |
75 | if (generic_interrupt_extension) { | 75 | if (x86_platform_ipi_callback) { |
76 | seq_printf(p, "%*s: ", prec, "PLT"); | 76 | seq_printf(p, "%*s: ", prec, "PLT"); |
77 | for_each_online_cpu(j) | 77 | for_each_online_cpu(j) |
78 | seq_printf(p, "%10u ", irq_stats(j)->generic_irqs); | 78 | seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); |
79 | seq_printf(p, " Platform interrupts\n"); | 79 | seq_printf(p, " Platform interrupts\n"); |
80 | } | 80 | } |
81 | #ifdef CONFIG_SMP | 81 | #ifdef CONFIG_SMP |
@@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
92 | seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); | 92 | seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); |
93 | seq_printf(p, " TLB shootdowns\n"); | 93 | seq_printf(p, " TLB shootdowns\n"); |
94 | #endif | 94 | #endif |
95 | #ifdef CONFIG_X86_MCE | 95 | #ifdef CONFIG_X86_THERMAL_VECTOR |
96 | seq_printf(p, "%*s: ", prec, "TRM"); | 96 | seq_printf(p, "%*s: ", prec, "TRM"); |
97 | for_each_online_cpu(j) | 97 | for_each_online_cpu(j) |
98 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); | 98 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); |
99 | seq_printf(p, " Thermal event interrupts\n"); | 99 | seq_printf(p, " Thermal event interrupts\n"); |
100 | # ifdef CONFIG_X86_MCE_THRESHOLD | 100 | #endif |
101 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
101 | seq_printf(p, "%*s: ", prec, "THR"); | 102 | seq_printf(p, "%*s: ", prec, "THR"); |
102 | for_each_online_cpu(j) | 103 | for_each_online_cpu(j) |
103 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); | 104 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); |
104 | seq_printf(p, " Threshold APIC interrupts\n"); | 105 | seq_printf(p, " Threshold APIC interrupts\n"); |
105 | # endif | ||
106 | #endif | 106 | #endif |
107 | #ifdef CONFIG_X86_MCE | 107 | #ifdef CONFIG_X86_MCE |
108 | seq_printf(p, "%*s: ", prec, "MCE"); | 108 | seq_printf(p, "%*s: ", prec, "MCE"); |
@@ -149,7 +149,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
149 | if (!desc) | 149 | if (!desc) |
150 | return 0; | 150 | return 0; |
151 | 151 | ||
152 | spin_lock_irqsave(&desc->lock, flags); | 152 | raw_spin_lock_irqsave(&desc->lock, flags); |
153 | for_each_online_cpu(j) | 153 | for_each_online_cpu(j) |
154 | any_count |= kstat_irqs_cpu(i, j); | 154 | any_count |= kstat_irqs_cpu(i, j); |
155 | action = desc->action; | 155 | action = desc->action; |
@@ -170,7 +170,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
170 | 170 | ||
171 | seq_putc(p, '\n'); | 171 | seq_putc(p, '\n'); |
172 | out: | 172 | out: |
173 | spin_unlock_irqrestore(&desc->lock, flags); | 173 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
174 | return 0; | 174 | return 0; |
175 | } | 175 | } |
176 | 176 | ||
@@ -187,18 +187,18 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
187 | sum += irq_stats(cpu)->apic_perf_irqs; | 187 | sum += irq_stats(cpu)->apic_perf_irqs; |
188 | sum += irq_stats(cpu)->apic_pending_irqs; | 188 | sum += irq_stats(cpu)->apic_pending_irqs; |
189 | #endif | 189 | #endif |
190 | if (generic_interrupt_extension) | 190 | if (x86_platform_ipi_callback) |
191 | sum += irq_stats(cpu)->generic_irqs; | 191 | sum += irq_stats(cpu)->x86_platform_ipis; |
192 | #ifdef CONFIG_SMP | 192 | #ifdef CONFIG_SMP |
193 | sum += irq_stats(cpu)->irq_resched_count; | 193 | sum += irq_stats(cpu)->irq_resched_count; |
194 | sum += irq_stats(cpu)->irq_call_count; | 194 | sum += irq_stats(cpu)->irq_call_count; |
195 | sum += irq_stats(cpu)->irq_tlb_count; | 195 | sum += irq_stats(cpu)->irq_tlb_count; |
196 | #endif | 196 | #endif |
197 | #ifdef CONFIG_X86_MCE | 197 | #ifdef CONFIG_X86_THERMAL_VECTOR |
198 | sum += irq_stats(cpu)->irq_thermal_count; | 198 | sum += irq_stats(cpu)->irq_thermal_count; |
199 | # ifdef CONFIG_X86_MCE_THRESHOLD | 199 | #endif |
200 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
200 | sum += irq_stats(cpu)->irq_threshold_count; | 201 | sum += irq_stats(cpu)->irq_threshold_count; |
201 | # endif | ||
202 | #endif | 202 | #endif |
203 | #ifdef CONFIG_X86_MCE | 203 | #ifdef CONFIG_X86_MCE |
204 | sum += per_cpu(mce_exception_count, cpu); | 204 | sum += per_cpu(mce_exception_count, cpu); |
@@ -251,9 +251,9 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | |||
251 | } | 251 | } |
252 | 252 | ||
253 | /* | 253 | /* |
254 | * Handler for GENERIC_INTERRUPT_VECTOR. | 254 | * Handler for X86_PLATFORM_IPI_VECTOR. |
255 | */ | 255 | */ |
256 | void smp_generic_interrupt(struct pt_regs *regs) | 256 | void smp_x86_platform_ipi(struct pt_regs *regs) |
257 | { | 257 | { |
258 | struct pt_regs *old_regs = set_irq_regs(regs); | 258 | struct pt_regs *old_regs = set_irq_regs(regs); |
259 | 259 | ||
@@ -263,10 +263,10 @@ void smp_generic_interrupt(struct pt_regs *regs) | |||
263 | 263 | ||
264 | irq_enter(); | 264 | irq_enter(); |
265 | 265 | ||
266 | inc_irq_stat(generic_irqs); | 266 | inc_irq_stat(x86_platform_ipis); |
267 | 267 | ||
268 | if (generic_interrupt_extension) | 268 | if (x86_platform_ipi_callback) |
269 | generic_interrupt_extension(); | 269 | x86_platform_ipi_callback(); |
270 | 270 | ||
271 | irq_exit(); | 271 | irq_exit(); |
272 | 272 | ||
@@ -274,3 +274,93 @@ void smp_generic_interrupt(struct pt_regs *regs) | |||
274 | } | 274 | } |
275 | 275 | ||
276 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | 276 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); |
277 | |||
278 | #ifdef CONFIG_HOTPLUG_CPU | ||
279 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | ||
280 | void fixup_irqs(void) | ||
281 | { | ||
282 | unsigned int irq, vector; | ||
283 | static int warned; | ||
284 | struct irq_desc *desc; | ||
285 | |||
286 | for_each_irq_desc(irq, desc) { | ||
287 | int break_affinity = 0; | ||
288 | int set_affinity = 1; | ||
289 | const struct cpumask *affinity; | ||
290 | |||
291 | if (!desc) | ||
292 | continue; | ||
293 | if (irq == 2) | ||
294 | continue; | ||
295 | |||
296 | /* interrupt's are disabled at this point */ | ||
297 | raw_spin_lock(&desc->lock); | ||
298 | |||
299 | affinity = desc->affinity; | ||
300 | if (!irq_has_action(irq) || | ||
301 | cpumask_equal(affinity, cpu_online_mask)) { | ||
302 | raw_spin_unlock(&desc->lock); | ||
303 | continue; | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * Complete the irq move. This cpu is going down and for | ||
308 | * non intr-remapping case, we can't wait till this interrupt | ||
309 | * arrives at this cpu before completing the irq move. | ||
310 | */ | ||
311 | irq_force_complete_move(irq); | ||
312 | |||
313 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { | ||
314 | break_affinity = 1; | ||
315 | affinity = cpu_all_mask; | ||
316 | } | ||
317 | |||
318 | if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask) | ||
319 | desc->chip->mask(irq); | ||
320 | |||
321 | if (desc->chip->set_affinity) | ||
322 | desc->chip->set_affinity(irq, affinity); | ||
323 | else if (!(warned++)) | ||
324 | set_affinity = 0; | ||
325 | |||
326 | if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) | ||
327 | desc->chip->unmask(irq); | ||
328 | |||
329 | raw_spin_unlock(&desc->lock); | ||
330 | |||
331 | if (break_affinity && set_affinity) | ||
332 | printk("Broke affinity for irq %i\n", irq); | ||
333 | else if (!set_affinity) | ||
334 | printk("Cannot set affinity for irq %i\n", irq); | ||
335 | } | ||
336 | |||
337 | /* | ||
338 | * We can remove mdelay() and then send spuriuous interrupts to | ||
339 | * new cpu targets for all the irqs that were handled previously by | ||
340 | * this cpu. While it works, I have seen spurious interrupt messages | ||
341 | * (nothing wrong but still...). | ||
342 | * | ||
343 | * So for now, retain mdelay(1) and check the IRR and then send those | ||
344 | * interrupts to new targets as this cpu is already offlined... | ||
345 | */ | ||
346 | mdelay(1); | ||
347 | |||
348 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | ||
349 | unsigned int irr; | ||
350 | |||
351 | if (__get_cpu_var(vector_irq)[vector] < 0) | ||
352 | continue; | ||
353 | |||
354 | irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); | ||
355 | if (irr & (1 << (vector % 32))) { | ||
356 | irq = __get_cpu_var(vector_irq)[vector]; | ||
357 | |||
358 | desc = irq_to_desc(irq); | ||
359 | raw_spin_lock(&desc->lock); | ||
360 | if (desc->chip->retrigger) | ||
361 | desc->chip->retrigger(irq); | ||
362 | raw_spin_unlock(&desc->lock); | ||
363 | } | ||
364 | } | ||
365 | } | ||
366 | #endif | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 7d35d0fe2329..10709f29d166 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) | |||
211 | 211 | ||
212 | return true; | 212 | return true; |
213 | } | 213 | } |
214 | |||
215 | #ifdef CONFIG_HOTPLUG_CPU | ||
216 | |||
217 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | ||
218 | void fixup_irqs(void) | ||
219 | { | ||
220 | unsigned int irq; | ||
221 | struct irq_desc *desc; | ||
222 | |||
223 | for_each_irq_desc(irq, desc) { | ||
224 | const struct cpumask *affinity; | ||
225 | |||
226 | if (!desc) | ||
227 | continue; | ||
228 | if (irq == 2) | ||
229 | continue; | ||
230 | |||
231 | affinity = desc->affinity; | ||
232 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { | ||
233 | printk("Breaking affinity for irq %i\n", irq); | ||
234 | affinity = cpu_all_mask; | ||
235 | } | ||
236 | if (desc->chip->set_affinity) | ||
237 | desc->chip->set_affinity(irq, affinity); | ||
238 | else if (desc->action) | ||
239 | printk_once("Cannot set affinity for irq %i\n", irq); | ||
240 | } | ||
241 | |||
242 | #if 0 | ||
243 | barrier(); | ||
244 | /* Ingo Molnar says: "after the IO-APIC masks have been redirected | ||
245 | [note the nop - the interrupt-enable boundary on x86 is two | ||
246 | instructions from sti] - to flush out pending hardirqs and | ||
247 | IPIs. After this point nothing is supposed to reach this CPU." */ | ||
248 | __asm__ __volatile__("sti; nop; cli"); | ||
249 | barrier(); | ||
250 | #else | ||
251 | /* That doesn't seem sufficient. Give it 1ms. */ | ||
252 | local_irq_enable(); | ||
253 | mdelay(1); | ||
254 | local_irq_disable(); | ||
255 | #endif | ||
256 | } | ||
257 | #endif | ||
258 | |||
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 977d8b43a0dd..acf8fbf8fbda 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) | |||
62 | return true; | 62 | return true; |
63 | } | 63 | } |
64 | 64 | ||
65 | #ifdef CONFIG_HOTPLUG_CPU | ||
66 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | ||
67 | void fixup_irqs(void) | ||
68 | { | ||
69 | unsigned int irq; | ||
70 | static int warned; | ||
71 | struct irq_desc *desc; | ||
72 | |||
73 | for_each_irq_desc(irq, desc) { | ||
74 | int break_affinity = 0; | ||
75 | int set_affinity = 1; | ||
76 | const struct cpumask *affinity; | ||
77 | |||
78 | if (!desc) | ||
79 | continue; | ||
80 | if (irq == 2) | ||
81 | continue; | ||
82 | |||
83 | /* interrupt's are disabled at this point */ | ||
84 | spin_lock(&desc->lock); | ||
85 | |||
86 | affinity = desc->affinity; | ||
87 | if (!irq_has_action(irq) || | ||
88 | cpumask_equal(affinity, cpu_online_mask)) { | ||
89 | spin_unlock(&desc->lock); | ||
90 | continue; | ||
91 | } | ||
92 | |||
93 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { | ||
94 | break_affinity = 1; | ||
95 | affinity = cpu_all_mask; | ||
96 | } | ||
97 | |||
98 | if (desc->chip->mask) | ||
99 | desc->chip->mask(irq); | ||
100 | |||
101 | if (desc->chip->set_affinity) | ||
102 | desc->chip->set_affinity(irq, affinity); | ||
103 | else if (!(warned++)) | ||
104 | set_affinity = 0; | ||
105 | |||
106 | if (desc->chip->unmask) | ||
107 | desc->chip->unmask(irq); | ||
108 | |||
109 | spin_unlock(&desc->lock); | ||
110 | |||
111 | if (break_affinity && set_affinity) | ||
112 | printk("Broke affinity for irq %i\n", irq); | ||
113 | else if (!set_affinity) | ||
114 | printk("Cannot set affinity for irq %i\n", irq); | ||
115 | } | ||
116 | |||
117 | /* That doesn't seem sufficient. Give it 1ms. */ | ||
118 | local_irq_enable(); | ||
119 | mdelay(1); | ||
120 | local_irq_disable(); | ||
121 | } | ||
122 | #endif | ||
123 | 65 | ||
124 | extern void call_softirq(void); | 66 | extern void call_softirq(void); |
125 | 67 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 40f30773fb29..d5932226614f 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -200,8 +200,8 @@ static void __init apic_intr_init(void) | |||
200 | /* self generated IPI for local APIC timer */ | 200 | /* self generated IPI for local APIC timer */ |
201 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | 201 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); |
202 | 202 | ||
203 | /* generic IPI for platform specific use */ | 203 | /* IPI for X86 platform specific use */ |
204 | alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); | 204 | alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); |
205 | 205 | ||
206 | /* IPI vectors for APIC spurious and error interrupts */ | 206 | /* IPI vectors for APIC spurious and error interrupts */ |
207 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 207 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77a3f3b..dd74fe7273b1 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/smp.h> | 43 | #include <linux/smp.h> |
44 | #include <linux/nmi.h> | 44 | #include <linux/nmi.h> |
45 | 45 | ||
46 | #include <asm/debugreg.h> | ||
46 | #include <asm/apicdef.h> | 47 | #include <asm/apicdef.h> |
47 | #include <asm/system.h> | 48 | #include <asm/system.h> |
48 | 49 | ||
@@ -85,10 +86,15 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
85 | gdb_regs[GDB_DS] = regs->ds; | 86 | gdb_regs[GDB_DS] = regs->ds; |
86 | gdb_regs[GDB_ES] = regs->es; | 87 | gdb_regs[GDB_ES] = regs->es; |
87 | gdb_regs[GDB_CS] = regs->cs; | 88 | gdb_regs[GDB_CS] = regs->cs; |
88 | gdb_regs[GDB_SS] = __KERNEL_DS; | ||
89 | gdb_regs[GDB_FS] = 0xFFFF; | 89 | gdb_regs[GDB_FS] = 0xFFFF; |
90 | gdb_regs[GDB_GS] = 0xFFFF; | 90 | gdb_regs[GDB_GS] = 0xFFFF; |
91 | gdb_regs[GDB_SP] = (int)®s->sp; | 91 | if (user_mode_vm(regs)) { |
92 | gdb_regs[GDB_SS] = regs->ss; | ||
93 | gdb_regs[GDB_SP] = regs->sp; | ||
94 | } else { | ||
95 | gdb_regs[GDB_SS] = __KERNEL_DS; | ||
96 | gdb_regs[GDB_SP] = kernel_stack_pointer(regs); | ||
97 | } | ||
92 | #else | 98 | #else |
93 | gdb_regs[GDB_R8] = regs->r8; | 99 | gdb_regs[GDB_R8] = regs->r8; |
94 | gdb_regs[GDB_R9] = regs->r9; | 100 | gdb_regs[GDB_R9] = regs->r9; |
@@ -101,7 +107,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
101 | gdb_regs32[GDB_PS] = regs->flags; | 107 | gdb_regs32[GDB_PS] = regs->flags; |
102 | gdb_regs32[GDB_CS] = regs->cs; | 108 | gdb_regs32[GDB_CS] = regs->cs; |
103 | gdb_regs32[GDB_SS] = regs->ss; | 109 | gdb_regs32[GDB_SS] = regs->ss; |
104 | gdb_regs[GDB_SP] = regs->sp; | 110 | gdb_regs[GDB_SP] = kernel_stack_pointer(regs); |
105 | #endif | 111 | #endif |
106 | } | 112 | } |
107 | 113 | ||
@@ -220,8 +226,7 @@ static void kgdb_correct_hw_break(void) | |||
220 | dr7 |= ((breakinfo[breakno].len << 2) | | 226 | dr7 |= ((breakinfo[breakno].len << 2) | |
221 | breakinfo[breakno].type) << | 227 | breakinfo[breakno].type) << |
222 | ((breakno << 2) + 16); | 228 | ((breakno << 2) + 16); |
223 | if (breakno >= 0 && breakno <= 3) | 229 | set_debugreg(breakinfo[breakno].addr, breakno); |
224 | set_debugreg(breakinfo[breakno].addr, breakno); | ||
225 | 230 | ||
226 | } else { | 231 | } else { |
227 | if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { | 232 | if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { |
@@ -395,7 +400,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
395 | /* set the trace bit if we're stepping */ | 400 | /* set the trace bit if we're stepping */ |
396 | if (remcomInBuffer[0] == 's') { | 401 | if (remcomInBuffer[0] == 's') { |
397 | linux_regs->flags |= X86_EFLAGS_TF; | 402 | linux_regs->flags |= X86_EFLAGS_TF; |
398 | kgdb_single_step = 1; | ||
399 | atomic_set(&kgdb_cpu_doing_single_step, | 403 | atomic_set(&kgdb_cpu_doing_single_step, |
400 | raw_smp_processor_id()); | 404 | raw_smp_processor_id()); |
401 | } | 405 | } |
@@ -434,6 +438,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) | |||
434 | "resuming...\n"); | 438 | "resuming...\n"); |
435 | kgdb_arch_handle_exception(args->trapnr, args->signr, | 439 | kgdb_arch_handle_exception(args->trapnr, args->signr, |
436 | args->err, "c", "", regs); | 440 | args->err, "c", "", regs); |
441 | /* | ||
442 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
443 | * denote completion of processing | ||
444 | */ | ||
445 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
437 | 446 | ||
438 | return NOTIFY_STOP; | 447 | return NOTIFY_STOP; |
439 | } | 448 | } |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..5b8c7505b3bc 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -48,31 +48,22 @@ | |||
48 | #include <linux/preempt.h> | 48 | #include <linux/preempt.h> |
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | #include <linux/kdebug.h> | 50 | #include <linux/kdebug.h> |
51 | #include <linux/kallsyms.h> | ||
51 | 52 | ||
52 | #include <asm/cacheflush.h> | 53 | #include <asm/cacheflush.h> |
53 | #include <asm/desc.h> | 54 | #include <asm/desc.h> |
54 | #include <asm/pgtable.h> | 55 | #include <asm/pgtable.h> |
55 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
56 | #include <asm/alternative.h> | 57 | #include <asm/alternative.h> |
58 | #include <asm/insn.h> | ||
59 | #include <asm/debugreg.h> | ||
57 | 60 | ||
58 | void jprobe_return_end(void); | 61 | void jprobe_return_end(void); |
59 | 62 | ||
60 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | 63 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
61 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | 64 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); |
62 | 65 | ||
63 | #ifdef CONFIG_X86_64 | 66 | #define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) |
64 | #define stack_addr(regs) ((unsigned long *)regs->sp) | ||
65 | #else | ||
66 | /* | ||
67 | * "®s->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs | ||
68 | * don't save the ss and esp registers if the CPU is already in kernel | ||
69 | * mode when it traps. So for kprobes, regs->sp and regs->ss are not | ||
70 | * the [nonexistent] saved stack pointer and ss register, but rather | ||
71 | * the top 8 bytes of the pre-int3 stack. So ®s->sp happens to | ||
72 | * point to the top of the pre-int3 stack. | ||
73 | */ | ||
74 | #define stack_addr(regs) ((unsigned long *)®s->sp) | ||
75 | #endif | ||
76 | 67 | ||
77 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ | 68 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ |
78 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | 69 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ |
@@ -106,50 +97,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { | |||
106 | /* ----------------------------------------------- */ | 97 | /* ----------------------------------------------- */ |
107 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 98 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
108 | }; | 99 | }; |
109 | static const u32 onebyte_has_modrm[256 / 32] = { | ||
110 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
111 | /* ----------------------------------------------- */ | ||
112 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ | ||
113 | W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ | ||
114 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ | ||
115 | W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ | ||
116 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | ||
117 | W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ | ||
118 | W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ | ||
119 | W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ | ||
120 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
121 | W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ | ||
122 | W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ | ||
123 | W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ | ||
124 | W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ | ||
125 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
126 | W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ | ||
127 | W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ | ||
128 | /* ----------------------------------------------- */ | ||
129 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
130 | }; | ||
131 | static const u32 twobyte_has_modrm[256 / 32] = { | ||
132 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
133 | /* ----------------------------------------------- */ | ||
134 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ | ||
135 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ | ||
136 | W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ | ||
137 | W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ | ||
138 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ | ||
139 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ | ||
140 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ | ||
141 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ | ||
142 | W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ | ||
143 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ | ||
144 | W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ | ||
145 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ | ||
146 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ | ||
147 | W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ | ||
148 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ | ||
149 | W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ | ||
150 | /* ----------------------------------------------- */ | ||
151 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
152 | }; | ||
153 | #undef W | 100 | #undef W |
154 | 101 | ||
155 | struct kretprobe_blackpoint kretprobe_blacklist[] = { | 102 | struct kretprobe_blackpoint kretprobe_blacklist[] = { |
@@ -244,6 +191,75 @@ retry: | |||
244 | } | 191 | } |
245 | } | 192 | } |
246 | 193 | ||
194 | /* Recover the probed instruction at addr for further analysis. */ | ||
195 | static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | ||
196 | { | ||
197 | struct kprobe *kp; | ||
198 | kp = get_kprobe((void *)addr); | ||
199 | if (!kp) | ||
200 | return -EINVAL; | ||
201 | |||
202 | /* | ||
203 | * Basically, kp->ainsn.insn has an original instruction. | ||
204 | * However, RIP-relative instruction can not do single-stepping | ||
205 | * at different place, fix_riprel() tweaks the displacement of | ||
206 | * that instruction. In that case, we can't recover the instruction | ||
207 | * from the kp->ainsn.insn. | ||
208 | * | ||
209 | * On the other hand, kp->opcode has a copy of the first byte of | ||
210 | * the probed instruction, which is overwritten by int3. And | ||
211 | * the instruction at kp->addr is not modified by kprobes except | ||
212 | * for the first byte, we can recover the original instruction | ||
213 | * from it and kp->opcode. | ||
214 | */ | ||
215 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
216 | buf[0] = kp->opcode; | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | /* Dummy buffers for kallsyms_lookup */ | ||
221 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
222 | |||
223 | /* Check if paddr is at an instruction boundary */ | ||
224 | static int __kprobes can_probe(unsigned long paddr) | ||
225 | { | ||
226 | int ret; | ||
227 | unsigned long addr, offset = 0; | ||
228 | struct insn insn; | ||
229 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
230 | |||
231 | if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) | ||
232 | return 0; | ||
233 | |||
234 | /* Decode instructions */ | ||
235 | addr = paddr - offset; | ||
236 | while (addr < paddr) { | ||
237 | kernel_insn_init(&insn, (void *)addr); | ||
238 | insn_get_opcode(&insn); | ||
239 | |||
240 | /* | ||
241 | * Check if the instruction has been modified by another | ||
242 | * kprobe, in which case we replace the breakpoint by the | ||
243 | * original instruction in our buffer. | ||
244 | */ | ||
245 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
246 | ret = recover_probed_instruction(buf, addr); | ||
247 | if (ret) | ||
248 | /* | ||
249 | * Another debugging subsystem might insert | ||
250 | * this breakpoint. In that case, we can't | ||
251 | * recover it. | ||
252 | */ | ||
253 | return 0; | ||
254 | kernel_insn_init(&insn, buf); | ||
255 | } | ||
256 | insn_get_length(&insn); | ||
257 | addr += insn.length; | ||
258 | } | ||
259 | |||
260 | return (addr == paddr); | ||
261 | } | ||
262 | |||
247 | /* | 263 | /* |
248 | * Returns non-zero if opcode modifies the interrupt flag. | 264 | * Returns non-zero if opcode modifies the interrupt flag. |
249 | */ | 265 | */ |
@@ -277,68 +293,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
277 | static void __kprobes fix_riprel(struct kprobe *p) | 293 | static void __kprobes fix_riprel(struct kprobe *p) |
278 | { | 294 | { |
279 | #ifdef CONFIG_X86_64 | 295 | #ifdef CONFIG_X86_64 |
280 | u8 *insn = p->ainsn.insn; | 296 | struct insn insn; |
281 | s64 disp; | 297 | kernel_insn_init(&insn, p->ainsn.insn); |
282 | int need_modrm; | ||
283 | |||
284 | /* Skip legacy instruction prefixes. */ | ||
285 | while (1) { | ||
286 | switch (*insn) { | ||
287 | case 0x66: | ||
288 | case 0x67: | ||
289 | case 0x2e: | ||
290 | case 0x3e: | ||
291 | case 0x26: | ||
292 | case 0x64: | ||
293 | case 0x65: | ||
294 | case 0x36: | ||
295 | case 0xf0: | ||
296 | case 0xf3: | ||
297 | case 0xf2: | ||
298 | ++insn; | ||
299 | continue; | ||
300 | } | ||
301 | break; | ||
302 | } | ||
303 | 298 | ||
304 | /* Skip REX instruction prefix. */ | 299 | if (insn_rip_relative(&insn)) { |
305 | if (is_REX_prefix(insn)) | 300 | s64 newdisp; |
306 | ++insn; | 301 | u8 *disp; |
307 | 302 | insn_get_displacement(&insn); | |
308 | if (*insn == 0x0f) { | 303 | /* |
309 | /* Two-byte opcode. */ | 304 | * The copied instruction uses the %rip-relative addressing |
310 | ++insn; | 305 | * mode. Adjust the displacement for the difference between |
311 | need_modrm = test_bit(*insn, | 306 | * the original location of this instruction and the location |
312 | (unsigned long *)twobyte_has_modrm); | 307 | * of the copy that will actually be run. The tricky bit here |
313 | } else | 308 | * is making sure that the sign extension happens correctly in |
314 | /* One-byte opcode. */ | 309 | * this calculation, since we need a signed 32-bit result to |
315 | need_modrm = test_bit(*insn, | 310 | * be sign-extended to 64 bits when it's added to the %rip |
316 | (unsigned long *)onebyte_has_modrm); | 311 | * value and yield the same 64-bit result that the sign- |
317 | 312 | * extension of the original signed 32-bit displacement would | |
318 | if (need_modrm) { | 313 | * have given. |
319 | u8 modrm = *++insn; | 314 | */ |
320 | if ((modrm & 0xc7) == 0x05) { | 315 | newdisp = (u8 *) p->addr + (s64) insn.displacement.value - |
321 | /* %rip+disp32 addressing mode */ | 316 | (u8 *) p->ainsn.insn; |
322 | /* Displacement follows ModRM byte. */ | 317 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
323 | ++insn; | 318 | disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); |
324 | /* | 319 | *(s32 *) disp = (s32) newdisp; |
325 | * The copied instruction uses the %rip-relative | ||
326 | * addressing mode. Adjust the displacement for the | ||
327 | * difference between the original location of this | ||
328 | * instruction and the location of the copy that will | ||
329 | * actually be run. The tricky bit here is making sure | ||
330 | * that the sign extension happens correctly in this | ||
331 | * calculation, since we need a signed 32-bit result to | ||
332 | * be sign-extended to 64 bits when it's added to the | ||
333 | * %rip value and yield the same 64-bit result that the | ||
334 | * sign-extension of the original signed 32-bit | ||
335 | * displacement would have given. | ||
336 | */ | ||
337 | disp = (u8 *) p->addr + *((s32 *) insn) - | ||
338 | (u8 *) p->ainsn.insn; | ||
339 | BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ | ||
340 | *(s32 *)insn = (s32) disp; | ||
341 | } | ||
342 | } | 320 | } |
343 | #endif | 321 | #endif |
344 | } | 322 | } |
@@ -359,6 +337,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) | |||
359 | 337 | ||
360 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 338 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
361 | { | 339 | { |
340 | if (!can_probe((unsigned long)p->addr)) | ||
341 | return -EILSEQ; | ||
362 | /* insn: must be on special executable page on x86. */ | 342 | /* insn: must be on special executable page on x86. */ |
363 | p->ainsn.insn = get_insn_slot(); | 343 | p->ainsn.insn = get_insn_slot(); |
364 | if (!p->ainsn.insn) | 344 | if (!p->ainsn.insn) |
@@ -472,17 +452,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
472 | { | 452 | { |
473 | switch (kcb->kprobe_status) { | 453 | switch (kcb->kprobe_status) { |
474 | case KPROBE_HIT_SSDONE: | 454 | case KPROBE_HIT_SSDONE: |
475 | #ifdef CONFIG_X86_64 | ||
476 | /* TODO: Provide re-entrancy from post_kprobes_handler() and | ||
477 | * avoid exception stack corruption while single-stepping on | ||
478 | * the instruction of the new probe. | ||
479 | */ | ||
480 | arch_disarm_kprobe(p); | ||
481 | regs->ip = (unsigned long)p->addr; | ||
482 | reset_current_kprobe(); | ||
483 | preempt_enable_no_resched(); | ||
484 | break; | ||
485 | #endif | ||
486 | case KPROBE_HIT_ACTIVE: | 455 | case KPROBE_HIT_ACTIVE: |
487 | save_previous_kprobe(kcb); | 456 | save_previous_kprobe(kcb); |
488 | set_current_kprobe(p, regs, kcb); | 457 | set_current_kprobe(p, regs, kcb); |
@@ -491,18 +460,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
491 | kcb->kprobe_status = KPROBE_REENTER; | 460 | kcb->kprobe_status = KPROBE_REENTER; |
492 | break; | 461 | break; |
493 | case KPROBE_HIT_SS: | 462 | case KPROBE_HIT_SS: |
494 | if (p == kprobe_running()) { | 463 | /* A probe has been hit in the codepath leading up to, or just |
495 | regs->flags &= ~X86_EFLAGS_TF; | 464 | * after, single-stepping of a probed instruction. This entire |
496 | regs->flags |= kcb->kprobe_saved_flags; | 465 | * codepath should strictly reside in .kprobes.text section. |
497 | return 0; | 466 | * Raise a BUG or we'll continue in an endless reentering loop |
498 | } else { | 467 | * and eventually a stack overflow. |
499 | /* A probe has been hit in the codepath leading up | 468 | */ |
500 | * to, or just after, single-stepping of a probed | 469 | printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", |
501 | * instruction. This entire codepath should strictly | 470 | p->addr); |
502 | * reside in .kprobes.text section. Raise a warning | 471 | dump_kprobe(p); |
503 | * to highlight this peculiar case. | 472 | BUG(); |
504 | */ | ||
505 | } | ||
506 | default: | 473 | default: |
507 | /* impossible cases */ | 474 | /* impossible cases */ |
508 | WARN_ON(1); | 475 | WARN_ON(1); |
@@ -514,7 +481,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
514 | 481 | ||
515 | /* | 482 | /* |
516 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they | 483 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they |
517 | * remain disabled thorough out this function. | 484 | * remain disabled throughout this function. |
518 | */ | 485 | */ |
519 | static int __kprobes kprobe_handler(struct pt_regs *regs) | 486 | static int __kprobes kprobe_handler(struct pt_regs *regs) |
520 | { | 487 | { |
@@ -851,7 +818,7 @@ no_change: | |||
851 | 818 | ||
852 | /* | 819 | /* |
853 | * Interrupts are disabled on entry as trap1 is an interrupt gate and they | 820 | * Interrupts are disabled on entry as trap1 is an interrupt gate and they |
854 | * remain disabled thoroughout this function. | 821 | * remain disabled throughout this function. |
855 | */ | 822 | */ |
856 | static int __kprobes post_kprobe_handler(struct pt_regs *regs) | 823 | static int __kprobes post_kprobe_handler(struct pt_regs *regs) |
857 | { | 824 | { |
@@ -967,8 +934,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | |||
967 | ret = NOTIFY_STOP; | 934 | ret = NOTIFY_STOP; |
968 | break; | 935 | break; |
969 | case DIE_DEBUG: | 936 | case DIE_DEBUG: |
970 | if (post_kprobe_handler(args->regs)) | 937 | if (post_kprobe_handler(args->regs)) { |
938 | /* | ||
939 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
940 | * denote completion of processing | ||
941 | */ | ||
942 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
971 | ret = NOTIFY_STOP; | 943 | ret = NOTIFY_STOP; |
944 | } | ||
972 | break; | 945 | break; |
973 | case DIE_GPF: | 946 | case DIE_GPF: |
974 | /* | 947 | /* |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d00130..a3fa43ba5d3b 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <asm/desc.h> | 25 | #include <asm/desc.h> |
26 | #include <asm/system.h> | 26 | #include <asm/system.h> |
27 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
28 | #include <asm/debugreg.h> | ||
28 | 29 | ||
29 | static void set_idt(void *newidt, __u16 limit) | 30 | static void set_idt(void *newidt, __u16 limit) |
30 | { | 31 | { |
@@ -157,8 +158,7 @@ int machine_kexec_prepare(struct kimage *image) | |||
157 | { | 158 | { |
158 | int error; | 159 | int error; |
159 | 160 | ||
160 | if (nx_enabled) | 161 | set_pages_x(image->control_code_page, 1); |
161 | set_pages_x(image->control_code_page, 1); | ||
162 | error = machine_kexec_alloc_page_tables(image); | 162 | error = machine_kexec_alloc_page_tables(image); |
163 | if (error) | 163 | if (error) |
164 | return error; | 164 | return error; |
@@ -172,8 +172,7 @@ int machine_kexec_prepare(struct kimage *image) | |||
172 | */ | 172 | */ |
173 | void machine_kexec_cleanup(struct kimage *image) | 173 | void machine_kexec_cleanup(struct kimage *image) |
174 | { | 174 | { |
175 | if (nx_enabled) | 175 | set_pages_nx(image->control_code_page, 1); |
176 | set_pages_nx(image->control_code_page, 1); | ||
177 | machine_kexec_free_page_tables(image); | 176 | machine_kexec_free_page_tables(image); |
178 | } | 177 | } |
179 | 178 | ||
@@ -202,6 +201,7 @@ void machine_kexec(struct kimage *image) | |||
202 | 201 | ||
203 | /* Interrupts aren't acceptable while we reboot */ | 202 | /* Interrupts aren't acceptable while we reboot */ |
204 | local_irq_disable(); | 203 | local_irq_disable(); |
204 | hw_breakpoint_disable(); | ||
205 | 205 | ||
206 | if (image->preserve_context) { | 206 | if (image->preserve_context) { |
207 | #ifdef CONFIG_X86_IO_APIC | 207 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf209e98..4a8bb82248ae 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
19 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
20 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
21 | #include <asm/debugreg.h> | ||
21 | 22 | ||
22 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | 23 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, |
23 | unsigned long addr) | 24 | unsigned long addr) |
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) | |||
282 | 283 | ||
283 | /* Interrupts aren't acceptable while we reboot */ | 284 | /* Interrupts aren't acceptable while we reboot */ |
284 | local_irq_disable(); | 285 | local_irq_disable(); |
286 | hw_breakpoint_disable(); | ||
285 | 287 | ||
286 | if (image->preserve_context) { | 288 | if (image->preserve_context) { |
287 | #ifdef CONFIG_X86_IO_APIC | 289 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c deleted file mode 100644 index 2a62d843f015..000000000000 --- a/arch/x86/kernel/mfgpt_32.c +++ /dev/null | |||
@@ -1,410 +0,0 @@ | |||
1 | /* | ||
2 | * Driver/API for AMD Geode Multi-Function General Purpose Timers (MFGPT) | ||
3 | * | ||
4 | * Copyright (C) 2006, Advanced Micro Devices, Inc. | ||
5 | * Copyright (C) 2007, Andres Salomon <dilinger@debian.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of version 2 of the GNU General Public License | ||
9 | * as published by the Free Software Foundation. | ||
10 | * | ||
11 | * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book. | ||
12 | */ | ||
13 | |||
14 | /* | ||
15 | * We are using the 32.768kHz input clock - it's the only one that has the | ||
16 | * ranges we find desirable. The following table lists the suitable | ||
17 | * divisors and the associated Hz, minimum interval and the maximum interval: | ||
18 | * | ||
19 | * Divisor Hz Min Delta (s) Max Delta (s) | ||
20 | * 1 32768 .00048828125 2.000 | ||
21 | * 2 16384 .0009765625 4.000 | ||
22 | * 4 8192 .001953125 8.000 | ||
23 | * 8 4096 .00390625 16.000 | ||
24 | * 16 2048 .0078125 32.000 | ||
25 | * 32 1024 .015625 64.000 | ||
26 | * 64 512 .03125 128.000 | ||
27 | * 128 256 .0625 256.000 | ||
28 | * 256 128 .125 512.000 | ||
29 | */ | ||
30 | |||
31 | #include <linux/kernel.h> | ||
32 | #include <linux/interrupt.h> | ||
33 | #include <linux/module.h> | ||
34 | #include <asm/geode.h> | ||
35 | |||
36 | #define MFGPT_DEFAULT_IRQ 7 | ||
37 | |||
38 | static struct mfgpt_timer_t { | ||
39 | unsigned int avail:1; | ||
40 | } mfgpt_timers[MFGPT_MAX_TIMERS]; | ||
41 | |||
42 | /* Selected from the table above */ | ||
43 | |||
44 | #define MFGPT_DIVISOR 16 | ||
45 | #define MFGPT_SCALE 4 /* divisor = 2^(scale) */ | ||
46 | #define MFGPT_HZ (32768 / MFGPT_DIVISOR) | ||
47 | #define MFGPT_PERIODIC (MFGPT_HZ / HZ) | ||
48 | |||
49 | /* Allow for disabling of MFGPTs */ | ||
50 | static int disable; | ||
51 | static int __init mfgpt_disable(char *s) | ||
52 | { | ||
53 | disable = 1; | ||
54 | return 1; | ||
55 | } | ||
56 | __setup("nomfgpt", mfgpt_disable); | ||
57 | |||
58 | /* Reset the MFGPT timers. This is required by some broken BIOSes which already | ||
59 | * do the same and leave the system in an unstable state. TinyBIOS 0.98 is | ||
60 | * affected at least (0.99 is OK with MFGPT workaround left to off). | ||
61 | */ | ||
62 | static int __init mfgpt_fix(char *s) | ||
63 | { | ||
64 | u32 val, dummy; | ||
65 | |||
66 | /* The following udocumented bit resets the MFGPT timers */ | ||
67 | val = 0xFF; dummy = 0; | ||
68 | wrmsr(MSR_MFGPT_SETUP, val, dummy); | ||
69 | return 1; | ||
70 | } | ||
71 | __setup("mfgptfix", mfgpt_fix); | ||
72 | |||
73 | /* | ||
74 | * Check whether any MFGPTs are available for the kernel to use. In most | ||
75 | * cases, firmware that uses AMD's VSA code will claim all timers during | ||
76 | * bootup; we certainly don't want to take them if they're already in use. | ||
77 | * In other cases (such as with VSAless OpenFirmware), the system firmware | ||
78 | * leaves timers available for us to use. | ||
79 | */ | ||
80 | |||
81 | |||
82 | static int timers = -1; | ||
83 | |||
84 | static void geode_mfgpt_detect(void) | ||
85 | { | ||
86 | int i; | ||
87 | u16 val; | ||
88 | |||
89 | timers = 0; | ||
90 | |||
91 | if (disable) { | ||
92 | printk(KERN_INFO "geode-mfgpt: MFGPT support is disabled\n"); | ||
93 | goto done; | ||
94 | } | ||
95 | |||
96 | if (!geode_get_dev_base(GEODE_DEV_MFGPT)) { | ||
97 | printk(KERN_INFO "geode-mfgpt: MFGPT LBAR is not set up\n"); | ||
98 | goto done; | ||
99 | } | ||
100 | |||
101 | for (i = 0; i < MFGPT_MAX_TIMERS; i++) { | ||
102 | val = geode_mfgpt_read(i, MFGPT_REG_SETUP); | ||
103 | if (!(val & MFGPT_SETUP_SETUP)) { | ||
104 | mfgpt_timers[i].avail = 1; | ||
105 | timers++; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | done: | ||
110 | printk(KERN_INFO "geode-mfgpt: %d MFGPT timers available.\n", timers); | ||
111 | } | ||
112 | |||
113 | int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) | ||
114 | { | ||
115 | u32 msr, mask, value, dummy; | ||
116 | int shift = (cmp == MFGPT_CMP1) ? 0 : 8; | ||
117 | |||
118 | if (timer < 0 || timer >= MFGPT_MAX_TIMERS) | ||
119 | return -EIO; | ||
120 | |||
121 | /* | ||
122 | * The register maps for these are described in sections 6.17.1.x of | ||
123 | * the AMD Geode CS5536 Companion Device Data Book. | ||
124 | */ | ||
125 | switch (event) { | ||
126 | case MFGPT_EVENT_RESET: | ||
127 | /* | ||
128 | * XXX: According to the docs, we cannot reset timers above | ||
129 | * 6; that is, resets for 7 and 8 will be ignored. Is this | ||
130 | * a problem? -dilinger | ||
131 | */ | ||
132 | msr = MSR_MFGPT_NR; | ||
133 | mask = 1 << (timer + 24); | ||
134 | break; | ||
135 | |||
136 | case MFGPT_EVENT_NMI: | ||
137 | msr = MSR_MFGPT_NR; | ||
138 | mask = 1 << (timer + shift); | ||
139 | break; | ||
140 | |||
141 | case MFGPT_EVENT_IRQ: | ||
142 | msr = MSR_MFGPT_IRQ; | ||
143 | mask = 1 << (timer + shift); | ||
144 | break; | ||
145 | |||
146 | default: | ||
147 | return -EIO; | ||
148 | } | ||
149 | |||
150 | rdmsr(msr, value, dummy); | ||
151 | |||
152 | if (enable) | ||
153 | value |= mask; | ||
154 | else | ||
155 | value &= ~mask; | ||
156 | |||
157 | wrmsr(msr, value, dummy); | ||
158 | return 0; | ||
159 | } | ||
160 | EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); | ||
161 | |||
162 | int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable) | ||
163 | { | ||
164 | u32 zsel, lpc, dummy; | ||
165 | int shift; | ||
166 | |||
167 | if (timer < 0 || timer >= MFGPT_MAX_TIMERS) | ||
168 | return -EIO; | ||
169 | |||
170 | /* | ||
171 | * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA | ||
172 | * is using the same CMP of the timer's Siamese twin, the IRQ is set to | ||
173 | * 2, and we mustn't use nor change it. | ||
174 | * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the | ||
175 | * IRQ of the 1st. This can only happen if forcing an IRQ, calling this | ||
176 | * with *irq==0 is safe. Currently there _are_ no 2 drivers. | ||
177 | */ | ||
178 | rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); | ||
179 | shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4; | ||
180 | if (((zsel >> shift) & 0xF) == 2) | ||
181 | return -EIO; | ||
182 | |||
183 | /* Choose IRQ: if none supplied, keep IRQ already set or use default */ | ||
184 | if (!*irq) | ||
185 | *irq = (zsel >> shift) & 0xF; | ||
186 | if (!*irq) | ||
187 | *irq = MFGPT_DEFAULT_IRQ; | ||
188 | |||
189 | /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ | ||
190 | if (*irq < 1 || *irq == 2 || *irq > 15) | ||
191 | return -EIO; | ||
192 | rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); | ||
193 | if (lpc & (1 << *irq)) | ||
194 | return -EIO; | ||
195 | |||
196 | /* All chosen and checked - go for it */ | ||
197 | if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) | ||
198 | return -EIO; | ||
199 | if (enable) { | ||
200 | zsel = (zsel & ~(0xF << shift)) | (*irq << shift); | ||
201 | wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); | ||
202 | } | ||
203 | |||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | static int mfgpt_get(int timer) | ||
208 | { | ||
209 | mfgpt_timers[timer].avail = 0; | ||
210 | printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer); | ||
211 | return timer; | ||
212 | } | ||
213 | |||
214 | int geode_mfgpt_alloc_timer(int timer, int domain) | ||
215 | { | ||
216 | int i; | ||
217 | |||
218 | if (timers == -1) { | ||
219 | /* timers haven't been detected yet */ | ||
220 | geode_mfgpt_detect(); | ||
221 | } | ||
222 | |||
223 | if (!timers) | ||
224 | return -1; | ||
225 | |||
226 | if (timer >= MFGPT_MAX_TIMERS) | ||
227 | return -1; | ||
228 | |||
229 | if (timer < 0) { | ||
230 | /* Try to find an available timer */ | ||
231 | for (i = 0; i < MFGPT_MAX_TIMERS; i++) { | ||
232 | if (mfgpt_timers[i].avail) | ||
233 | return mfgpt_get(i); | ||
234 | |||
235 | if (i == 5 && domain == MFGPT_DOMAIN_WORKING) | ||
236 | break; | ||
237 | } | ||
238 | } else { | ||
239 | /* If they requested a specific timer, try to honor that */ | ||
240 | if (mfgpt_timers[timer].avail) | ||
241 | return mfgpt_get(timer); | ||
242 | } | ||
243 | |||
244 | /* No timers available - too bad */ | ||
245 | return -1; | ||
246 | } | ||
247 | EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); | ||
248 | |||
249 | |||
250 | #ifdef CONFIG_GEODE_MFGPT_TIMER | ||
251 | |||
252 | /* | ||
253 | * The MFPGT timers on the CS5536 provide us with suitable timers to use | ||
254 | * as clock event sources - not as good as a HPET or APIC, but certainly | ||
255 | * better than the PIT. This isn't a general purpose MFGPT driver, but | ||
256 | * a simplified one designed specifically to act as a clock event source. | ||
257 | * For full details about the MFGPT, please consult the CS5536 data sheet. | ||
258 | */ | ||
259 | |||
260 | #include <linux/clocksource.h> | ||
261 | #include <linux/clockchips.h> | ||
262 | |||
263 | static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; | ||
264 | static u16 mfgpt_event_clock; | ||
265 | |||
266 | static int irq; | ||
267 | static int __init mfgpt_setup(char *str) | ||
268 | { | ||
269 | get_option(&str, &irq); | ||
270 | return 1; | ||
271 | } | ||
272 | __setup("mfgpt_irq=", mfgpt_setup); | ||
273 | |||
274 | static void mfgpt_disable_timer(u16 clock) | ||
275 | { | ||
276 | /* avoid races by clearing CMP1 and CMP2 unconditionally */ | ||
277 | geode_mfgpt_write(clock, MFGPT_REG_SETUP, (u16) ~MFGPT_SETUP_CNTEN | | ||
278 | MFGPT_SETUP_CMP1 | MFGPT_SETUP_CMP2); | ||
279 | } | ||
280 | |||
281 | static int mfgpt_next_event(unsigned long, struct clock_event_device *); | ||
282 | static void mfgpt_set_mode(enum clock_event_mode, struct clock_event_device *); | ||
283 | |||
284 | static struct clock_event_device mfgpt_clockevent = { | ||
285 | .name = "mfgpt-timer", | ||
286 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
287 | .set_mode = mfgpt_set_mode, | ||
288 | .set_next_event = mfgpt_next_event, | ||
289 | .rating = 250, | ||
290 | .cpumask = cpu_all_mask, | ||
291 | .shift = 32 | ||
292 | }; | ||
293 | |||
294 | static void mfgpt_start_timer(u16 delta) | ||
295 | { | ||
296 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta); | ||
297 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0); | ||
298 | |||
299 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, | ||
300 | MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2); | ||
301 | } | ||
302 | |||
303 | static void mfgpt_set_mode(enum clock_event_mode mode, | ||
304 | struct clock_event_device *evt) | ||
305 | { | ||
306 | mfgpt_disable_timer(mfgpt_event_clock); | ||
307 | |||
308 | if (mode == CLOCK_EVT_MODE_PERIODIC) | ||
309 | mfgpt_start_timer(MFGPT_PERIODIC); | ||
310 | |||
311 | mfgpt_tick_mode = mode; | ||
312 | } | ||
313 | |||
314 | static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt) | ||
315 | { | ||
316 | mfgpt_start_timer(delta); | ||
317 | return 0; | ||
318 | } | ||
319 | |||
320 | static irqreturn_t mfgpt_tick(int irq, void *dev_id) | ||
321 | { | ||
322 | u16 val = geode_mfgpt_read(mfgpt_event_clock, MFGPT_REG_SETUP); | ||
323 | |||
324 | /* See if the interrupt was for us */ | ||
325 | if (!(val & (MFGPT_SETUP_SETUP | MFGPT_SETUP_CMP2 | MFGPT_SETUP_CMP1))) | ||
326 | return IRQ_NONE; | ||
327 | |||
328 | /* Turn off the clock (and clear the event) */ | ||
329 | mfgpt_disable_timer(mfgpt_event_clock); | ||
330 | |||
331 | if (mfgpt_tick_mode == CLOCK_EVT_MODE_SHUTDOWN) | ||
332 | return IRQ_HANDLED; | ||
333 | |||
334 | /* Clear the counter */ | ||
335 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0); | ||
336 | |||
337 | /* Restart the clock in periodic mode */ | ||
338 | |||
339 | if (mfgpt_tick_mode == CLOCK_EVT_MODE_PERIODIC) { | ||
340 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, | ||
341 | MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2); | ||
342 | } | ||
343 | |||
344 | mfgpt_clockevent.event_handler(&mfgpt_clockevent); | ||
345 | return IRQ_HANDLED; | ||
346 | } | ||
347 | |||
348 | static struct irqaction mfgptirq = { | ||
349 | .handler = mfgpt_tick, | ||
350 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER, | ||
351 | .name = "mfgpt-timer" | ||
352 | }; | ||
353 | |||
354 | int __init mfgpt_timer_setup(void) | ||
355 | { | ||
356 | int timer, ret; | ||
357 | u16 val; | ||
358 | |||
359 | timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING); | ||
360 | if (timer < 0) { | ||
361 | printk(KERN_ERR | ||
362 | "mfgpt-timer: Could not allocate a MFPGT timer\n"); | ||
363 | return -ENODEV; | ||
364 | } | ||
365 | |||
366 | mfgpt_event_clock = timer; | ||
367 | |||
368 | /* Set up the IRQ on the MFGPT side */ | ||
369 | if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) { | ||
370 | printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); | ||
371 | return -EIO; | ||
372 | } | ||
373 | |||
374 | /* And register it with the kernel */ | ||
375 | ret = setup_irq(irq, &mfgptirq); | ||
376 | |||
377 | if (ret) { | ||
378 | printk(KERN_ERR | ||
379 | "mfgpt-timer: Unable to set up the interrupt.\n"); | ||
380 | goto err; | ||
381 | } | ||
382 | |||
383 | /* Set the clock scale and enable the event mode for CMP2 */ | ||
384 | val = MFGPT_SCALE | (3 << 8); | ||
385 | |||
386 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val); | ||
387 | |||
388 | /* Set up the clock event */ | ||
389 | mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, | ||
390 | mfgpt_clockevent.shift); | ||
391 | mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF, | ||
392 | &mfgpt_clockevent); | ||
393 | mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE, | ||
394 | &mfgpt_clockevent); | ||
395 | |||
396 | printk(KERN_INFO | ||
397 | "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n", | ||
398 | timer, irq); | ||
399 | clockevents_register_device(&mfgpt_clockevent); | ||
400 | |||
401 | return 0; | ||
402 | |||
403 | err: | ||
404 | geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq); | ||
405 | printk(KERN_ERR | ||
406 | "mfgpt-timer: Unable to set up the MFGPT clock source\n"); | ||
407 | return -EIO; | ||
408 | } | ||
409 | |||
410 | #endif | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 366baa179913..37542b67c57e 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -13,6 +13,9 @@ | |||
13 | * Licensed under the terms of the GNU General Public | 13 | * Licensed under the terms of the GNU General Public |
14 | * License version 2. See file COPYING for details. | 14 | * License version 2. See file COPYING for details. |
15 | */ | 15 | */ |
16 | |||
17 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
18 | |||
16 | #include <linux/firmware.h> | 19 | #include <linux/firmware.h> |
17 | #include <linux/pci_ids.h> | 20 | #include <linux/pci_ids.h> |
18 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
@@ -33,6 +36,9 @@ MODULE_LICENSE("GPL v2"); | |||
33 | #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 | 36 | #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 |
34 | #define UCODE_UCODE_TYPE 0x00000001 | 37 | #define UCODE_UCODE_TYPE 0x00000001 |
35 | 38 | ||
39 | const struct firmware *firmware; | ||
40 | static int supported_cpu; | ||
41 | |||
36 | struct equiv_cpu_entry { | 42 | struct equiv_cpu_entry { |
37 | u32 installed_cpu; | 43 | u32 installed_cpu; |
38 | u32 fixed_errata_mask; | 44 | u32 fixed_errata_mask; |
@@ -71,17 +77,14 @@ static struct equiv_cpu_entry *equiv_cpu_table; | |||
71 | 77 | ||
72 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 78 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
73 | { | 79 | { |
74 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
75 | u32 dummy; | 80 | u32 dummy; |
76 | 81 | ||
77 | memset(csig, 0, sizeof(*csig)); | 82 | if (!supported_cpu) |
78 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { | ||
79 | printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not " | ||
80 | "supported\n", cpu, c->x86); | ||
81 | return -1; | 83 | return -1; |
82 | } | 84 | |
85 | memset(csig, 0, sizeof(*csig)); | ||
83 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); | 86 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); |
84 | printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); | 87 | pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); |
85 | return 0; | 88 | return 0; |
86 | } | 89 | } |
87 | 90 | ||
@@ -103,23 +106,16 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
103 | i++; | 106 | i++; |
104 | } | 107 | } |
105 | 108 | ||
106 | if (!equiv_cpu_id) { | 109 | if (!equiv_cpu_id) |
107 | printk(KERN_WARNING "microcode: CPU%d: cpu revision " | ||
108 | "not listed in equivalent cpu table\n", cpu); | ||
109 | return 0; | 110 | return 0; |
110 | } | ||
111 | 111 | ||
112 | if (mc_header->processor_rev_id != equiv_cpu_id) { | 112 | if (mc_header->processor_rev_id != equiv_cpu_id) |
113 | printk(KERN_ERR "microcode: CPU%d: patch mismatch " | ||
114 | "(processor_rev_id: %x, equiv_cpu_id: %x)\n", | ||
115 | cpu, mc_header->processor_rev_id, equiv_cpu_id); | ||
116 | return 0; | 113 | return 0; |
117 | } | ||
118 | 114 | ||
119 | /* ucode might be chipset specific -- currently we don't support this */ | 115 | /* ucode might be chipset specific -- currently we don't support this */ |
120 | if (mc_header->nb_dev_id || mc_header->sb_dev_id) { | 116 | if (mc_header->nb_dev_id || mc_header->sb_dev_id) { |
121 | printk(KERN_ERR "microcode: CPU%d: loading of chipset " | 117 | pr_err("CPU%d: loading of chipset specific code not yet supported\n", |
122 | "specific code not yet supported\n", cpu); | 118 | cpu); |
123 | return 0; | 119 | return 0; |
124 | } | 120 | } |
125 | 121 | ||
@@ -148,14 +144,12 @@ static int apply_microcode_amd(int cpu) | |||
148 | 144 | ||
149 | /* check current patch id and patch's id for match */ | 145 | /* check current patch id and patch's id for match */ |
150 | if (rev != mc_amd->hdr.patch_id) { | 146 | if (rev != mc_amd->hdr.patch_id) { |
151 | printk(KERN_ERR "microcode: CPU%d: update failed " | 147 | pr_err("CPU%d: update failed (for patch_level=0x%x)\n", |
152 | "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); | 148 | cpu, mc_amd->hdr.patch_id); |
153 | return -1; | 149 | return -1; |
154 | } | 150 | } |
155 | 151 | ||
156 | printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n", | 152 | pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); |
157 | cpu, rev); | ||
158 | |||
159 | uci->cpu_sig.rev = rev; | 153 | uci->cpu_sig.rev = rev; |
160 | 154 | ||
161 | return 0; | 155 | return 0; |
@@ -178,18 +172,14 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) | |||
178 | return NULL; | 172 | return NULL; |
179 | 173 | ||
180 | if (section_hdr[0] != UCODE_UCODE_TYPE) { | 174 | if (section_hdr[0] != UCODE_UCODE_TYPE) { |
181 | printk(KERN_ERR "microcode: error: invalid type field in " | 175 | pr_err("error: invalid type field in container file section header\n"); |
182 | "container file section header\n"); | ||
183 | return NULL; | 176 | return NULL; |
184 | } | 177 | } |
185 | 178 | ||
186 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); | 179 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); |
187 | 180 | ||
188 | printk(KERN_DEBUG "microcode: size %u, total_size %u\n", | ||
189 | size, total_size); | ||
190 | |||
191 | if (total_size > size || total_size > UCODE_MAX_SIZE) { | 181 | if (total_size > size || total_size > UCODE_MAX_SIZE) { |
192 | printk(KERN_ERR "microcode: error: size mismatch\n"); | 182 | pr_err("error: size mismatch\n"); |
193 | return NULL; | 183 | return NULL; |
194 | } | 184 | } |
195 | 185 | ||
@@ -218,15 +208,13 @@ static int install_equiv_cpu_table(const u8 *buf) | |||
218 | size = buf_pos[2]; | 208 | size = buf_pos[2]; |
219 | 209 | ||
220 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { | 210 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { |
221 | printk(KERN_ERR "microcode: error: invalid type field in " | 211 | pr_err("error: invalid type field in container file section header\n"); |
222 | "container file section header\n"); | ||
223 | return 0; | 212 | return 0; |
224 | } | 213 | } |
225 | 214 | ||
226 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); | 215 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); |
227 | if (!equiv_cpu_table) { | 216 | if (!equiv_cpu_table) { |
228 | printk(KERN_ERR "microcode: failed to allocate " | 217 | pr_err("failed to allocate equivalent CPU table\n"); |
229 | "equivalent CPU table\n"); | ||
230 | return 0; | 218 | return 0; |
231 | } | 219 | } |
232 | 220 | ||
@@ -259,8 +247,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
259 | 247 | ||
260 | offset = install_equiv_cpu_table(ucode_ptr); | 248 | offset = install_equiv_cpu_table(ucode_ptr); |
261 | if (!offset) { | 249 | if (!offset) { |
262 | printk(KERN_ERR "microcode: failed to create " | 250 | pr_err("failed to create equivalent cpu table\n"); |
263 | "equivalent cpu table\n"); | ||
264 | return UCODE_ERROR; | 251 | return UCODE_ERROR; |
265 | } | 252 | } |
266 | 253 | ||
@@ -291,8 +278,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
291 | if (!leftover) { | 278 | if (!leftover) { |
292 | vfree(uci->mc); | 279 | vfree(uci->mc); |
293 | uci->mc = new_mc; | 280 | uci->mc = new_mc; |
294 | pr_debug("microcode: CPU%d found a matching microcode " | 281 | pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", |
295 | "update with version 0x%x (current=0x%x)\n", | ||
296 | cpu, new_rev, uci->cpu_sig.rev); | 282 | cpu, new_rev, uci->cpu_sig.rev); |
297 | } else { | 283 | } else { |
298 | vfree(new_mc); | 284 | vfree(new_mc); |
@@ -308,27 +294,26 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
308 | 294 | ||
309 | static enum ucode_state request_microcode_fw(int cpu, struct device *device) | 295 | static enum ucode_state request_microcode_fw(int cpu, struct device *device) |
310 | { | 296 | { |
311 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | ||
312 | const struct firmware *firmware; | ||
313 | enum ucode_state ret; | 297 | enum ucode_state ret; |
314 | 298 | ||
315 | if (request_firmware(&firmware, fw_name, device)) { | 299 | if (firmware == NULL) |
316 | printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); | ||
317 | return UCODE_NFOUND; | 300 | return UCODE_NFOUND; |
301 | |||
302 | if (*(u32 *)firmware->data != UCODE_MAGIC) { | ||
303 | pr_err("invalid UCODE_MAGIC (0x%08x)\n", | ||
304 | *(u32 *)firmware->data); | ||
305 | return UCODE_ERROR; | ||
318 | } | 306 | } |
319 | 307 | ||
320 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); | 308 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); |
321 | 309 | ||
322 | release_firmware(firmware); | ||
323 | |||
324 | return ret; | 310 | return ret; |
325 | } | 311 | } |
326 | 312 | ||
327 | static enum ucode_state | 313 | static enum ucode_state |
328 | request_microcode_user(int cpu, const void __user *buf, size_t size) | 314 | request_microcode_user(int cpu, const void __user *buf, size_t size) |
329 | { | 315 | { |
330 | printk(KERN_INFO "microcode: AMD microcode update via " | 316 | pr_info("AMD microcode update via /dev/cpu/microcode not supported\n"); |
331 | "/dev/cpu/microcode not supported\n"); | ||
332 | return UCODE_ERROR; | 317 | return UCODE_ERROR; |
333 | } | 318 | } |
334 | 319 | ||
@@ -340,7 +325,31 @@ static void microcode_fini_cpu_amd(int cpu) | |||
340 | uci->mc = NULL; | 325 | uci->mc = NULL; |
341 | } | 326 | } |
342 | 327 | ||
328 | void init_microcode_amd(struct device *device) | ||
329 | { | ||
330 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | ||
331 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
332 | |||
333 | WARN_ON(c->x86_vendor != X86_VENDOR_AMD); | ||
334 | |||
335 | if (c->x86 < 0x10) { | ||
336 | pr_warning("AMD CPU family 0x%x not supported\n", c->x86); | ||
337 | return; | ||
338 | } | ||
339 | supported_cpu = 1; | ||
340 | |||
341 | if (request_firmware(&firmware, fw_name, device)) | ||
342 | pr_err("failed to load file %s\n", fw_name); | ||
343 | } | ||
344 | |||
345 | void fini_microcode_amd(void) | ||
346 | { | ||
347 | release_firmware(firmware); | ||
348 | } | ||
349 | |||
343 | static struct microcode_ops microcode_amd_ops = { | 350 | static struct microcode_ops microcode_amd_ops = { |
351 | .init = init_microcode_amd, | ||
352 | .fini = fini_microcode_amd, | ||
344 | .request_microcode_user = request_microcode_user, | 353 | .request_microcode_user = request_microcode_user, |
345 | .request_microcode_fw = request_microcode_fw, | 354 | .request_microcode_fw = request_microcode_fw, |
346 | .collect_cpu_info = collect_cpu_info_amd, | 355 | .collect_cpu_info = collect_cpu_info_amd, |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 378e9a8f1bf8..0c8632433090 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -70,10 +70,12 @@ | |||
70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. | 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. |
71 | * Thanks to Stuart Swales for pointing out this bug. | 71 | * Thanks to Stuart Swales for pointing out this bug. |
72 | */ | 72 | */ |
73 | |||
74 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
75 | |||
73 | #include <linux/platform_device.h> | 76 | #include <linux/platform_device.h> |
74 | #include <linux/miscdevice.h> | 77 | #include <linux/miscdevice.h> |
75 | #include <linux/capability.h> | 78 | #include <linux/capability.h> |
76 | #include <linux/smp_lock.h> | ||
77 | #include <linux/kernel.h> | 79 | #include <linux/kernel.h> |
78 | #include <linux/module.h> | 80 | #include <linux/module.h> |
79 | #include <linux/mutex.h> | 81 | #include <linux/mutex.h> |
@@ -201,7 +203,6 @@ static int do_microcode_update(const void __user *buf, size_t size) | |||
201 | 203 | ||
202 | static int microcode_open(struct inode *unused1, struct file *unused2) | 204 | static int microcode_open(struct inode *unused1, struct file *unused2) |
203 | { | 205 | { |
204 | cycle_kernel_lock(); | ||
205 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | 206 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; |
206 | } | 207 | } |
207 | 208 | ||
@@ -211,7 +212,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, | |||
211 | ssize_t ret = -EINVAL; | 212 | ssize_t ret = -EINVAL; |
212 | 213 | ||
213 | if ((len >> PAGE_SHIFT) > totalram_pages) { | 214 | if ((len >> PAGE_SHIFT) > totalram_pages) { |
214 | pr_err("microcode: too much data (max %ld pages)\n", totalram_pages); | 215 | pr_err("too much data (max %ld pages)\n", totalram_pages); |
215 | return ret; | 216 | return ret; |
216 | } | 217 | } |
217 | 218 | ||
@@ -246,7 +247,7 @@ static int __init microcode_dev_init(void) | |||
246 | 247 | ||
247 | error = misc_register(µcode_dev); | 248 | error = misc_register(µcode_dev); |
248 | if (error) { | 249 | if (error) { |
249 | pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR); | 250 | pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR); |
250 | return error; | 251 | return error; |
251 | } | 252 | } |
252 | 253 | ||
@@ -361,7 +362,7 @@ static enum ucode_state microcode_resume_cpu(int cpu) | |||
361 | if (!uci->mc) | 362 | if (!uci->mc) |
362 | return UCODE_NFOUND; | 363 | return UCODE_NFOUND; |
363 | 364 | ||
364 | pr_debug("microcode: CPU%d updated upon resume\n", cpu); | 365 | pr_debug("CPU%d updated upon resume\n", cpu); |
365 | apply_microcode_on_target(cpu); | 366 | apply_microcode_on_target(cpu); |
366 | 367 | ||
367 | return UCODE_OK; | 368 | return UCODE_OK; |
@@ -381,7 +382,7 @@ static enum ucode_state microcode_init_cpu(int cpu) | |||
381 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); | 382 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); |
382 | 383 | ||
383 | if (ustate == UCODE_OK) { | 384 | if (ustate == UCODE_OK) { |
384 | pr_debug("microcode: CPU%d updated upon init\n", cpu); | 385 | pr_debug("CPU%d updated upon init\n", cpu); |
385 | apply_microcode_on_target(cpu); | 386 | apply_microcode_on_target(cpu); |
386 | } | 387 | } |
387 | 388 | ||
@@ -408,7 +409,7 @@ static int mc_sysdev_add(struct sys_device *sys_dev) | |||
408 | if (!cpu_online(cpu)) | 409 | if (!cpu_online(cpu)) |
409 | return 0; | 410 | return 0; |
410 | 411 | ||
411 | pr_debug("microcode: CPU%d added\n", cpu); | 412 | pr_debug("CPU%d added\n", cpu); |
412 | 413 | ||
413 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); | 414 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); |
414 | if (err) | 415 | if (err) |
@@ -427,7 +428,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) | |||
427 | if (!cpu_online(cpu)) | 428 | if (!cpu_online(cpu)) |
428 | return 0; | 429 | return 0; |
429 | 430 | ||
430 | pr_debug("microcode: CPU%d removed\n", cpu); | 431 | pr_debug("CPU%d removed\n", cpu); |
431 | microcode_fini_cpu(cpu); | 432 | microcode_fini_cpu(cpu); |
432 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 433 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); |
433 | return 0; | 434 | return 0; |
@@ -475,15 +476,15 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
475 | microcode_update_cpu(cpu); | 476 | microcode_update_cpu(cpu); |
476 | case CPU_DOWN_FAILED: | 477 | case CPU_DOWN_FAILED: |
477 | case CPU_DOWN_FAILED_FROZEN: | 478 | case CPU_DOWN_FAILED_FROZEN: |
478 | pr_debug("microcode: CPU%d added\n", cpu); | 479 | pr_debug("CPU%d added\n", cpu); |
479 | if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) | 480 | if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) |
480 | pr_err("microcode: Failed to create group for CPU%d\n", cpu); | 481 | pr_err("Failed to create group for CPU%d\n", cpu); |
481 | break; | 482 | break; |
482 | case CPU_DOWN_PREPARE: | 483 | case CPU_DOWN_PREPARE: |
483 | case CPU_DOWN_PREPARE_FROZEN: | 484 | case CPU_DOWN_PREPARE_FROZEN: |
484 | /* Suspend is in progress, only remove the interface */ | 485 | /* Suspend is in progress, only remove the interface */ |
485 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 486 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); |
486 | pr_debug("microcode: CPU%d removed\n", cpu); | 487 | pr_debug("CPU%d removed\n", cpu); |
487 | break; | 488 | break; |
488 | case CPU_DEAD: | 489 | case CPU_DEAD: |
489 | case CPU_UP_CANCELED_FROZEN: | 490 | case CPU_UP_CANCELED_FROZEN: |
@@ -509,7 +510,7 @@ static int __init microcode_init(void) | |||
509 | microcode_ops = init_amd_microcode(); | 510 | microcode_ops = init_amd_microcode(); |
510 | 511 | ||
511 | if (!microcode_ops) { | 512 | if (!microcode_ops) { |
512 | pr_err("microcode: no support for this CPU vendor\n"); | 513 | pr_err("no support for this CPU vendor\n"); |
513 | return -ENODEV; | 514 | return -ENODEV; |
514 | } | 515 | } |
515 | 516 | ||
@@ -520,6 +521,9 @@ static int __init microcode_init(void) | |||
520 | return PTR_ERR(microcode_pdev); | 521 | return PTR_ERR(microcode_pdev); |
521 | } | 522 | } |
522 | 523 | ||
524 | if (microcode_ops->init) | ||
525 | microcode_ops->init(µcode_pdev->dev); | ||
526 | |||
523 | get_online_cpus(); | 527 | get_online_cpus(); |
524 | mutex_lock(µcode_mutex); | 528 | mutex_lock(µcode_mutex); |
525 | 529 | ||
@@ -540,8 +544,7 @@ static int __init microcode_init(void) | |||
540 | register_hotcpu_notifier(&mc_cpu_notifier); | 544 | register_hotcpu_notifier(&mc_cpu_notifier); |
541 | 545 | ||
542 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION | 546 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION |
543 | " <tigran@aivazian.fsnet.co.uk>," | 547 | " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n"); |
544 | " Peter Oruba\n"); | ||
545 | 548 | ||
546 | return 0; | 549 | return 0; |
547 | } | 550 | } |
@@ -563,6 +566,9 @@ static void __exit microcode_exit(void) | |||
563 | 566 | ||
564 | platform_device_unregister(microcode_pdev); | 567 | platform_device_unregister(microcode_pdev); |
565 | 568 | ||
569 | if (microcode_ops->fini) | ||
570 | microcode_ops->fini(); | ||
571 | |||
566 | microcode_ops = NULL; | 572 | microcode_ops = NULL; |
567 | 573 | ||
568 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); | 574 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 0d334ddd0a96..ebd193e476ca 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -70,6 +70,9 @@ | |||
70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. | 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. |
71 | * Thanks to Stuart Swales for pointing out this bug. | 71 | * Thanks to Stuart Swales for pointing out this bug. |
72 | */ | 72 | */ |
73 | |||
74 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
75 | |||
73 | #include <linux/firmware.h> | 76 | #include <linux/firmware.h> |
74 | #include <linux/uaccess.h> | 77 | #include <linux/uaccess.h> |
75 | #include <linux/kernel.h> | 78 | #include <linux/kernel.h> |
@@ -146,8 +149,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | |||
146 | 149 | ||
147 | if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || | 150 | if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || |
148 | cpu_has(c, X86_FEATURE_IA64)) { | 151 | cpu_has(c, X86_FEATURE_IA64)) { |
149 | printk(KERN_ERR "microcode: CPU%d not a capable Intel " | 152 | pr_err("CPU%d not a capable Intel processor\n", cpu_num); |
150 | "processor\n", cpu_num); | ||
151 | return -1; | 153 | return -1; |
152 | } | 154 | } |
153 | 155 | ||
@@ -165,8 +167,8 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | |||
165 | /* get the current revision from MSR 0x8B */ | 167 | /* get the current revision from MSR 0x8B */ |
166 | rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); | 168 | rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); |
167 | 169 | ||
168 | printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", | 170 | pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", |
169 | cpu_num, csig->sig, csig->pf, csig->rev); | 171 | cpu_num, csig->sig, csig->pf, csig->rev); |
170 | 172 | ||
171 | return 0; | 173 | return 0; |
172 | } | 174 | } |
@@ -194,28 +196,24 @@ static int microcode_sanity_check(void *mc) | |||
194 | data_size = get_datasize(mc_header); | 196 | data_size = get_datasize(mc_header); |
195 | 197 | ||
196 | if (data_size + MC_HEADER_SIZE > total_size) { | 198 | if (data_size + MC_HEADER_SIZE > total_size) { |
197 | printk(KERN_ERR "microcode: error! " | 199 | pr_err("error! Bad data size in microcode data file\n"); |
198 | "Bad data size in microcode data file\n"); | ||
199 | return -EINVAL; | 200 | return -EINVAL; |
200 | } | 201 | } |
201 | 202 | ||
202 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { | 203 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { |
203 | printk(KERN_ERR "microcode: error! " | 204 | pr_err("error! Unknown microcode update format\n"); |
204 | "Unknown microcode update format\n"); | ||
205 | return -EINVAL; | 205 | return -EINVAL; |
206 | } | 206 | } |
207 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); | 207 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); |
208 | if (ext_table_size) { | 208 | if (ext_table_size) { |
209 | if ((ext_table_size < EXT_HEADER_SIZE) | 209 | if ((ext_table_size < EXT_HEADER_SIZE) |
210 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { | 210 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { |
211 | printk(KERN_ERR "microcode: error! " | 211 | pr_err("error! Small exttable size in microcode data file\n"); |
212 | "Small exttable size in microcode data file\n"); | ||
213 | return -EINVAL; | 212 | return -EINVAL; |
214 | } | 213 | } |
215 | ext_header = mc + MC_HEADER_SIZE + data_size; | 214 | ext_header = mc + MC_HEADER_SIZE + data_size; |
216 | if (ext_table_size != exttable_size(ext_header)) { | 215 | if (ext_table_size != exttable_size(ext_header)) { |
217 | printk(KERN_ERR "microcode: error! " | 216 | pr_err("error! Bad exttable size in microcode data file\n"); |
218 | "Bad exttable size in microcode data file\n"); | ||
219 | return -EFAULT; | 217 | return -EFAULT; |
220 | } | 218 | } |
221 | ext_sigcount = ext_header->count; | 219 | ext_sigcount = ext_header->count; |
@@ -230,8 +228,7 @@ static int microcode_sanity_check(void *mc) | |||
230 | while (i--) | 228 | while (i--) |
231 | ext_table_sum += ext_tablep[i]; | 229 | ext_table_sum += ext_tablep[i]; |
232 | if (ext_table_sum) { | 230 | if (ext_table_sum) { |
233 | printk(KERN_WARNING "microcode: aborting, " | 231 | pr_warning("aborting, bad extended signature table checksum\n"); |
234 | "bad extended signature table checksum\n"); | ||
235 | return -EINVAL; | 232 | return -EINVAL; |
236 | } | 233 | } |
237 | } | 234 | } |
@@ -242,7 +239,7 @@ static int microcode_sanity_check(void *mc) | |||
242 | while (i--) | 239 | while (i--) |
243 | orig_sum += ((int *)mc)[i]; | 240 | orig_sum += ((int *)mc)[i]; |
244 | if (orig_sum) { | 241 | if (orig_sum) { |
245 | printk(KERN_ERR "microcode: aborting, bad checksum\n"); | 242 | pr_err("aborting, bad checksum\n"); |
246 | return -EINVAL; | 243 | return -EINVAL; |
247 | } | 244 | } |
248 | if (!ext_table_size) | 245 | if (!ext_table_size) |
@@ -255,7 +252,7 @@ static int microcode_sanity_check(void *mc) | |||
255 | - (mc_header->sig + mc_header->pf + mc_header->cksum) | 252 | - (mc_header->sig + mc_header->pf + mc_header->cksum) |
256 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); | 253 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); |
257 | if (sum) { | 254 | if (sum) { |
258 | printk(KERN_ERR "microcode: aborting, bad checksum\n"); | 255 | pr_err("aborting, bad checksum\n"); |
259 | return -EINVAL; | 256 | return -EINVAL; |
260 | } | 257 | } |
261 | } | 258 | } |
@@ -327,13 +324,11 @@ static int apply_microcode(int cpu) | |||
327 | rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | 324 | rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); |
328 | 325 | ||
329 | if (val[1] != mc_intel->hdr.rev) { | 326 | if (val[1] != mc_intel->hdr.rev) { |
330 | printk(KERN_ERR "microcode: CPU%d update " | 327 | pr_err("CPU%d update to revision 0x%x failed\n", |
331 | "to revision 0x%x failed\n", | 328 | cpu_num, mc_intel->hdr.rev); |
332 | cpu_num, mc_intel->hdr.rev); | ||
333 | return -1; | 329 | return -1; |
334 | } | 330 | } |
335 | printk(KERN_INFO "microcode: CPU%d updated to revision " | 331 | pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x \n", |
336 | "0x%x, date = %04x-%02x-%02x \n", | ||
337 | cpu_num, val[1], | 332 | cpu_num, val[1], |
338 | mc_intel->hdr.date & 0xffff, | 333 | mc_intel->hdr.date & 0xffff, |
339 | mc_intel->hdr.date >> 24, | 334 | mc_intel->hdr.date >> 24, |
@@ -362,8 +357,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
362 | 357 | ||
363 | mc_size = get_totalsize(&mc_header); | 358 | mc_size = get_totalsize(&mc_header); |
364 | if (!mc_size || mc_size > leftover) { | 359 | if (!mc_size || mc_size > leftover) { |
365 | printk(KERN_ERR "microcode: error!" | 360 | pr_err("error! Bad data in microcode data file\n"); |
366 | "Bad data in microcode data file\n"); | ||
367 | break; | 361 | break; |
368 | } | 362 | } |
369 | 363 | ||
@@ -405,9 +399,8 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
405 | vfree(uci->mc); | 399 | vfree(uci->mc); |
406 | uci->mc = (struct microcode_intel *)new_mc; | 400 | uci->mc = (struct microcode_intel *)new_mc; |
407 | 401 | ||
408 | pr_debug("microcode: CPU%d found a matching microcode update with" | 402 | pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", |
409 | " version 0x%x (current=0x%x)\n", | 403 | cpu, new_rev, uci->cpu_sig.rev); |
410 | cpu, new_rev, uci->cpu_sig.rev); | ||
411 | out: | 404 | out: |
412 | return state; | 405 | return state; |
413 | } | 406 | } |
@@ -429,7 +422,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) | |||
429 | c->x86, c->x86_model, c->x86_mask); | 422 | c->x86, c->x86_model, c->x86_mask); |
430 | 423 | ||
431 | if (request_firmware(&firmware, name, device)) { | 424 | if (request_firmware(&firmware, name, device)) { |
432 | pr_debug("microcode: data file %s load failed\n", name); | 425 | pr_debug("data file %s load failed\n", name); |
433 | return UCODE_NFOUND; | 426 | return UCODE_NFOUND; |
434 | } | 427 | } |
435 | 428 | ||
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5be95ef4ffec..40b54ceb68b5 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -667,36 +667,18 @@ void __init default_get_smp_config(unsigned int early) | |||
667 | */ | 667 | */ |
668 | } | 668 | } |
669 | 669 | ||
670 | static void __init smp_reserve_bootmem(struct mpf_intel *mpf) | 670 | static void __init smp_reserve_memory(struct mpf_intel *mpf) |
671 | { | 671 | { |
672 | unsigned long size = get_mpc_size(mpf->physptr); | 672 | unsigned long size = get_mpc_size(mpf->physptr); |
673 | #ifdef CONFIG_X86_32 | ||
674 | /* | ||
675 | * We cannot access to MPC table to compute table size yet, | ||
676 | * as only few megabytes from the bottom is mapped now. | ||
677 | * PC-9800's MPC table places on the very last of physical | ||
678 | * memory; so that simply reserving PAGE_SIZE from mpf->physptr | ||
679 | * yields BUG() in reserve_bootmem. | ||
680 | * also need to make sure physptr is below than max_low_pfn | ||
681 | * we don't need reserve the area above max_low_pfn | ||
682 | */ | ||
683 | unsigned long end = max_low_pfn * PAGE_SIZE; | ||
684 | 673 | ||
685 | if (mpf->physptr < end) { | 674 | reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc"); |
686 | if (mpf->physptr + size > end) | ||
687 | size = end - mpf->physptr; | ||
688 | reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); | ||
689 | } | ||
690 | #else | ||
691 | reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); | ||
692 | #endif | ||
693 | } | 675 | } |
694 | 676 | ||
695 | static int __init smp_scan_config(unsigned long base, unsigned long length, | 677 | static int __init smp_scan_config(unsigned long base, unsigned long length) |
696 | unsigned reserve) | ||
697 | { | 678 | { |
698 | unsigned int *bp = phys_to_virt(base); | 679 | unsigned int *bp = phys_to_virt(base); |
699 | struct mpf_intel *mpf; | 680 | struct mpf_intel *mpf; |
681 | unsigned long mem; | ||
700 | 682 | ||
701 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", | 683 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", |
702 | bp, length); | 684 | bp, length); |
@@ -717,12 +699,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
717 | printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", | 699 | printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", |
718 | mpf, (u64)virt_to_phys(mpf)); | 700 | mpf, (u64)virt_to_phys(mpf)); |
719 | 701 | ||
720 | if (!reserve) | 702 | mem = virt_to_phys(mpf); |
721 | return 1; | 703 | reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf"); |
722 | reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf), | ||
723 | BOOTMEM_DEFAULT); | ||
724 | if (mpf->physptr) | 704 | if (mpf->physptr) |
725 | smp_reserve_bootmem(mpf); | 705 | smp_reserve_memory(mpf); |
726 | 706 | ||
727 | return 1; | 707 | return 1; |
728 | } | 708 | } |
@@ -732,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
732 | return 0; | 712 | return 0; |
733 | } | 713 | } |
734 | 714 | ||
735 | void __init default_find_smp_config(unsigned int reserve) | 715 | void __init default_find_smp_config(void) |
736 | { | 716 | { |
737 | unsigned int address; | 717 | unsigned int address; |
738 | 718 | ||
@@ -744,9 +724,9 @@ void __init default_find_smp_config(unsigned int reserve) | |||
744 | * 2) Scan the top 1K of base RAM | 724 | * 2) Scan the top 1K of base RAM |
745 | * 3) Scan the 64K of bios | 725 | * 3) Scan the 64K of bios |
746 | */ | 726 | */ |
747 | if (smp_scan_config(0x0, 0x400, reserve) || | 727 | if (smp_scan_config(0x0, 0x400) || |
748 | smp_scan_config(639 * 0x400, 0x400, reserve) || | 728 | smp_scan_config(639 * 0x400, 0x400) || |
749 | smp_scan_config(0xF0000, 0x10000, reserve)) | 729 | smp_scan_config(0xF0000, 0x10000)) |
750 | return; | 730 | return; |
751 | /* | 731 | /* |
752 | * If it is an SMP machine we should know now, unless the | 732 | * If it is an SMP machine we should know now, unless the |
@@ -767,7 +747,7 @@ void __init default_find_smp_config(unsigned int reserve) | |||
767 | 747 | ||
768 | address = get_bios_ebda(); | 748 | address = get_bios_ebda(); |
769 | if (address) | 749 | if (address) |
770 | smp_scan_config(address, 0x400, reserve); | 750 | smp_scan_config(address, 0x400); |
771 | } | 751 | } |
772 | 752 | ||
773 | #ifdef CONFIG_X86_IO_APIC | 753 | #ifdef CONFIG_X86_IO_APIC |
@@ -965,9 +945,6 @@ void __init early_reserve_e820_mpc_new(void) | |||
965 | { | 945 | { |
966 | if (enable_update_mptable && alloc_mptable) { | 946 | if (enable_update_mptable && alloc_mptable) { |
967 | u64 startt = 0; | 947 | u64 startt = 0; |
968 | #ifdef CONFIG_X86_TRAMPOLINE | ||
969 | startt = TRAMPOLINE_BASE; | ||
970 | #endif | ||
971 | mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); | 948 | mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); |
972 | } | 949 | } |
973 | } | 950 | } |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 6a3cefc7dda1..4bd93c9b2b27 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -172,23 +172,18 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) | |||
172 | 172 | ||
173 | static int msr_open(struct inode *inode, struct file *file) | 173 | static int msr_open(struct inode *inode, struct file *file) |
174 | { | 174 | { |
175 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); | 175 | unsigned int cpu; |
176 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 176 | struct cpuinfo_x86 *c; |
177 | int ret = 0; | ||
178 | 177 | ||
179 | lock_kernel(); | ||
180 | cpu = iminor(file->f_path.dentry->d_inode); | 178 | cpu = iminor(file->f_path.dentry->d_inode); |
179 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) | ||
180 | return -ENXIO; /* No such CPU */ | ||
181 | 181 | ||
182 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { | ||
183 | ret = -ENXIO; /* No such CPU */ | ||
184 | goto out; | ||
185 | } | ||
186 | c = &cpu_data(cpu); | 182 | c = &cpu_data(cpu); |
187 | if (!cpu_has(c, X86_FEATURE_MSR)) | 183 | if (!cpu_has(c, X86_FEATURE_MSR)) |
188 | ret = -EIO; /* MSR not supported */ | 184 | return -EIO; /* MSR not supported */ |
189 | out: | 185 | |
190 | unlock_kernel(); | 186 | return 0; |
191 | return ret; | ||
192 | } | 187 | } |
193 | 188 | ||
194 | /* | 189 | /* |
@@ -251,7 +246,7 @@ static int __init msr_init(void) | |||
251 | int i, err = 0; | 246 | int i, err = 0; |
252 | i = 0; | 247 | i = 0; |
253 | 248 | ||
254 | if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) { | 249 | if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) { |
255 | printk(KERN_ERR "msr: unable to get major %d for msr\n", | 250 | printk(KERN_ERR "msr: unable to get major %d for msr\n", |
256 | MSR_MAJOR); | 251 | MSR_MAJOR); |
257 | err = -EBUSY; | 252 | err = -EBUSY; |
@@ -279,7 +274,7 @@ out_class: | |||
279 | msr_device_destroy(i); | 274 | msr_device_destroy(i); |
280 | class_destroy(msr_class); | 275 | class_destroy(msr_class); |
281 | out_chrdev: | 276 | out_chrdev: |
282 | unregister_chrdev(MSR_MAJOR, "cpu/msr"); | 277 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); |
283 | out: | 278 | out: |
284 | return err; | 279 | return err; |
285 | } | 280 | } |
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 4006c522adc7..9d1d263f786f 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c | |||
@@ -212,7 +212,7 @@ static int __init olpc_init(void) | |||
212 | unsigned char *romsig; | 212 | unsigned char *romsig; |
213 | 213 | ||
214 | /* The ioremap check is dangerous; limit what we run it on */ | 214 | /* The ioremap check is dangerous; limit what we run it on */ |
215 | if (!is_geode() || geode_has_vsa2()) | 215 | if (!is_geode() || cs5535_has_vsa2()) |
216 | return 0; | 216 | return 0; |
217 | 217 | ||
218 | spin_lock_init(&ec_lock); | 218 | spin_lock_init(&ec_lock); |
@@ -244,7 +244,7 @@ static int __init olpc_init(void) | |||
244 | (unsigned char *) &olpc_platform_info.ecver, 1); | 244 | (unsigned char *) &olpc_platform_info.ecver, 1); |
245 | 245 | ||
246 | /* check to see if the VSA exists */ | 246 | /* check to see if the VSA exists */ |
247 | if (geode_has_vsa2()) | 247 | if (cs5535_has_vsa2()) |
248 | olpc_platform_info.flags |= OLPC_F_VSA; | 248 | olpc_platform_info.flags |= OLPC_F_VSA; |
249 | 249 | ||
250 | printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", | 250 | printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", |
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 3a7c5a44082e..676b8c77a976 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
@@ -8,9 +8,9 @@ | |||
8 | #include <asm/paravirt.h> | 8 | #include <asm/paravirt.h> |
9 | 9 | ||
10 | static inline void | 10 | static inline void |
11 | default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) | 11 | default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) |
12 | { | 12 | { |
13 | __raw_spin_lock(lock); | 13 | arch_spin_lock(lock); |
14 | } | 14 | } |
15 | 15 | ||
16 | struct pv_lock_ops pv_lock_ops = { | 16 | struct pv_lock_ops pv_lock_ops = { |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 971a3bec47a8..2bbde6078143 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <linux/string.h> | 31 | #include <linux/string.h> |
32 | #include <linux/crash_dump.h> | 32 | #include <linux/crash_dump.h> |
33 | #include <linux/dma-mapping.h> | 33 | #include <linux/dma-mapping.h> |
34 | #include <linux/bitops.h> | 34 | #include <linux/bitmap.h> |
35 | #include <linux/pci_ids.h> | 35 | #include <linux/pci_ids.h> |
36 | #include <linux/pci.h> | 36 | #include <linux/pci.h> |
37 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
@@ -46,6 +46,7 @@ | |||
46 | #include <asm/dma.h> | 46 | #include <asm/dma.h> |
47 | #include <asm/rio.h> | 47 | #include <asm/rio.h> |
48 | #include <asm/bios_ebda.h> | 48 | #include <asm/bios_ebda.h> |
49 | #include <asm/x86_init.h> | ||
49 | 50 | ||
50 | #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT | 51 | #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT |
51 | int use_calgary __read_mostly = 1; | 52 | int use_calgary __read_mostly = 1; |
@@ -211,7 +212,7 @@ static void iommu_range_reserve(struct iommu_table *tbl, | |||
211 | 212 | ||
212 | spin_lock_irqsave(&tbl->it_lock, flags); | 213 | spin_lock_irqsave(&tbl->it_lock, flags); |
213 | 214 | ||
214 | iommu_area_reserve(tbl->it_map, index, npages); | 215 | bitmap_set(tbl->it_map, index, npages); |
215 | 216 | ||
216 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 217 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
217 | } | 218 | } |
@@ -244,7 +245,7 @@ static unsigned long iommu_range_alloc(struct device *dev, | |||
244 | if (panic_on_overflow) | 245 | if (panic_on_overflow) |
245 | panic("Calgary: fix the allocator.\n"); | 246 | panic("Calgary: fix the allocator.\n"); |
246 | else | 247 | else |
247 | return bad_dma_address; | 248 | return DMA_ERROR_CODE; |
248 | } | 249 | } |
249 | } | 250 | } |
250 | 251 | ||
@@ -260,12 +261,15 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, | |||
260 | void *vaddr, unsigned int npages, int direction) | 261 | void *vaddr, unsigned int npages, int direction) |
261 | { | 262 | { |
262 | unsigned long entry; | 263 | unsigned long entry; |
263 | dma_addr_t ret = bad_dma_address; | 264 | dma_addr_t ret; |
264 | 265 | ||
265 | entry = iommu_range_alloc(dev, tbl, npages); | 266 | entry = iommu_range_alloc(dev, tbl, npages); |
266 | 267 | ||
267 | if (unlikely(entry == bad_dma_address)) | 268 | if (unlikely(entry == DMA_ERROR_CODE)) { |
268 | goto error; | 269 | printk(KERN_WARNING "Calgary: failed to allocate %u pages in " |
270 | "iommu %p\n", npages, tbl); | ||
271 | return DMA_ERROR_CODE; | ||
272 | } | ||
269 | 273 | ||
270 | /* set the return dma address */ | 274 | /* set the return dma address */ |
271 | ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); | 275 | ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); |
@@ -273,13 +277,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, | |||
273 | /* put the TCEs in the HW table */ | 277 | /* put the TCEs in the HW table */ |
274 | tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, | 278 | tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, |
275 | direction); | 279 | direction); |
276 | |||
277 | return ret; | 280 | return ret; |
278 | |||
279 | error: | ||
280 | printk(KERN_WARNING "Calgary: failed to allocate %u pages in " | ||
281 | "iommu %p\n", npages, tbl); | ||
282 | return bad_dma_address; | ||
283 | } | 281 | } |
284 | 282 | ||
285 | static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | 283 | static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, |
@@ -290,8 +288,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
290 | unsigned long flags; | 288 | unsigned long flags; |
291 | 289 | ||
292 | /* were we called with bad_dma_address? */ | 290 | /* were we called with bad_dma_address? */ |
293 | badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); | 291 | badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); |
294 | if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { | 292 | if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { |
295 | WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " | 293 | WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " |
296 | "address 0x%Lx\n", dma_addr); | 294 | "address 0x%Lx\n", dma_addr); |
297 | return; | 295 | return; |
@@ -305,7 +303,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
305 | 303 | ||
306 | spin_lock_irqsave(&tbl->it_lock, flags); | 304 | spin_lock_irqsave(&tbl->it_lock, flags); |
307 | 305 | ||
308 | iommu_area_free(tbl->it_map, entry, npages); | 306 | bitmap_clear(tbl->it_map, entry, npages); |
309 | 307 | ||
310 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 308 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
311 | } | 309 | } |
@@ -318,13 +316,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev) | |||
318 | 316 | ||
319 | pdev = to_pci_dev(dev); | 317 | pdev = to_pci_dev(dev); |
320 | 318 | ||
319 | /* search up the device tree for an iommu */ | ||
321 | pbus = pdev->bus; | 320 | pbus = pdev->bus; |
322 | 321 | do { | |
323 | /* is the device behind a bridge? Look for the root bus */ | 322 | tbl = pci_iommu(pbus); |
324 | while (pbus->parent) | 323 | if (tbl && tbl->it_busno == pbus->number) |
324 | break; | ||
325 | tbl = NULL; | ||
325 | pbus = pbus->parent; | 326 | pbus = pbus->parent; |
326 | 327 | } while (pbus); | |
327 | tbl = pci_iommu(pbus); | ||
328 | 328 | ||
329 | BUG_ON(tbl && (tbl->it_busno != pbus->number)); | 329 | BUG_ON(tbl && (tbl->it_busno != pbus->number)); |
330 | 330 | ||
@@ -373,7 +373,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | |||
373 | npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); | 373 | npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); |
374 | 374 | ||
375 | entry = iommu_range_alloc(dev, tbl, npages); | 375 | entry = iommu_range_alloc(dev, tbl, npages); |
376 | if (entry == bad_dma_address) { | 376 | if (entry == DMA_ERROR_CODE) { |
377 | /* makes sure unmap knows to stop */ | 377 | /* makes sure unmap knows to stop */ |
378 | s->dma_length = 0; | 378 | s->dma_length = 0; |
379 | goto error; | 379 | goto error; |
@@ -391,7 +391,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | |||
391 | error: | 391 | error: |
392 | calgary_unmap_sg(dev, sg, nelems, dir, NULL); | 392 | calgary_unmap_sg(dev, sg, nelems, dir, NULL); |
393 | for_each_sg(sg, s, nelems, i) { | 393 | for_each_sg(sg, s, nelems, i) { |
394 | sg->dma_address = bad_dma_address; | 394 | sg->dma_address = DMA_ERROR_CODE; |
395 | sg->dma_length = 0; | 395 | sg->dma_length = 0; |
396 | } | 396 | } |
397 | return 0; | 397 | return 0; |
@@ -446,7 +446,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, | |||
446 | 446 | ||
447 | /* set up tces to cover the allocated range */ | 447 | /* set up tces to cover the allocated range */ |
448 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); | 448 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); |
449 | if (mapping == bad_dma_address) | 449 | if (mapping == DMA_ERROR_CODE) |
450 | goto free; | 450 | goto free; |
451 | *dma_handle = mapping; | 451 | *dma_handle = mapping; |
452 | return ret; | 452 | return ret; |
@@ -727,7 +727,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) | |||
727 | struct iommu_table *tbl = pci_iommu(dev->bus); | 727 | struct iommu_table *tbl = pci_iommu(dev->bus); |
728 | 728 | ||
729 | /* reserve EMERGENCY_PAGES from bad_dma_address and up */ | 729 | /* reserve EMERGENCY_PAGES from bad_dma_address and up */ |
730 | iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES); | 730 | iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES); |
731 | 731 | ||
732 | /* avoid the BIOS/VGA first 640KB-1MB region */ | 732 | /* avoid the BIOS/VGA first 640KB-1MB region */ |
733 | /* for CalIOC2 - avoid the entire first MB */ | 733 | /* for CalIOC2 - avoid the entire first MB */ |
@@ -1344,6 +1344,23 @@ static void __init get_tce_space_from_tar(void) | |||
1344 | return; | 1344 | return; |
1345 | } | 1345 | } |
1346 | 1346 | ||
1347 | static int __init calgary_iommu_init(void) | ||
1348 | { | ||
1349 | int ret; | ||
1350 | |||
1351 | /* ok, we're trying to use Calgary - let's roll */ | ||
1352 | printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); | ||
1353 | |||
1354 | ret = calgary_init(); | ||
1355 | if (ret) { | ||
1356 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " | ||
1357 | "falling back to no_iommu\n", ret); | ||
1358 | return ret; | ||
1359 | } | ||
1360 | |||
1361 | return 0; | ||
1362 | } | ||
1363 | |||
1347 | void __init detect_calgary(void) | 1364 | void __init detect_calgary(void) |
1348 | { | 1365 | { |
1349 | int bus; | 1366 | int bus; |
@@ -1357,7 +1374,7 @@ void __init detect_calgary(void) | |||
1357 | * if the user specified iommu=off or iommu=soft or we found | 1374 | * if the user specified iommu=off or iommu=soft or we found |
1358 | * another HW IOMMU already, bail out. | 1375 | * another HW IOMMU already, bail out. |
1359 | */ | 1376 | */ |
1360 | if (swiotlb || no_iommu || iommu_detected) | 1377 | if (no_iommu || iommu_detected) |
1361 | return; | 1378 | return; |
1362 | 1379 | ||
1363 | if (!use_calgary) | 1380 | if (!use_calgary) |
@@ -1442,9 +1459,7 @@ void __init detect_calgary(void) | |||
1442 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", | 1459 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", |
1443 | specified_table_size); | 1460 | specified_table_size); |
1444 | 1461 | ||
1445 | /* swiotlb for devices that aren't behind the Calgary. */ | 1462 | x86_init.iommu.iommu_init = calgary_iommu_init; |
1446 | if (max_pfn > MAX_DMA32_PFN) | ||
1447 | swiotlb = 1; | ||
1448 | } | 1463 | } |
1449 | return; | 1464 | return; |
1450 | 1465 | ||
@@ -1457,35 +1472,6 @@ cleanup: | |||
1457 | } | 1472 | } |
1458 | } | 1473 | } |
1459 | 1474 | ||
1460 | int __init calgary_iommu_init(void) | ||
1461 | { | ||
1462 | int ret; | ||
1463 | |||
1464 | if (no_iommu || (swiotlb && !calgary_detected)) | ||
1465 | return -ENODEV; | ||
1466 | |||
1467 | if (!calgary_detected) | ||
1468 | return -ENODEV; | ||
1469 | |||
1470 | /* ok, we're trying to use Calgary - let's roll */ | ||
1471 | printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); | ||
1472 | |||
1473 | ret = calgary_init(); | ||
1474 | if (ret) { | ||
1475 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " | ||
1476 | "falling back to no_iommu\n", ret); | ||
1477 | return ret; | ||
1478 | } | ||
1479 | |||
1480 | force_iommu = 1; | ||
1481 | bad_dma_address = 0x0; | ||
1482 | /* dma_ops is set to swiotlb or nommu */ | ||
1483 | if (!dma_ops) | ||
1484 | dma_ops = &nommu_dma_ops; | ||
1485 | |||
1486 | return 0; | ||
1487 | } | ||
1488 | |||
1489 | static int __init calgary_parse_options(char *p) | 1475 | static int __init calgary_parse_options(char *p) |
1490 | { | 1476 | { |
1491 | unsigned int bridge; | 1477 | unsigned int bridge; |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 64b838eac18c..75e14e21f61a 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -11,10 +11,11 @@ | |||
11 | #include <asm/gart.h> | 11 | #include <asm/gart.h> |
12 | #include <asm/calgary.h> | 12 | #include <asm/calgary.h> |
13 | #include <asm/amd_iommu.h> | 13 | #include <asm/amd_iommu.h> |
14 | #include <asm/x86_init.h> | ||
14 | 15 | ||
15 | static int forbid_dac __read_mostly; | 16 | static int forbid_dac __read_mostly; |
16 | 17 | ||
17 | struct dma_map_ops *dma_ops; | 18 | struct dma_map_ops *dma_ops = &nommu_dma_ops; |
18 | EXPORT_SYMBOL(dma_ops); | 19 | EXPORT_SYMBOL(dma_ops); |
19 | 20 | ||
20 | static int iommu_sac_force __read_mostly; | 21 | static int iommu_sac_force __read_mostly; |
@@ -35,22 +36,17 @@ int iommu_detected __read_mostly = 0; | |||
35 | 36 | ||
36 | /* | 37 | /* |
37 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. | 38 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. |
38 | * If this variable is 1, IOMMU implementations do no DMA ranslation for | 39 | * If this variable is 1, IOMMU implementations do no DMA translation for |
39 | * devices and allow every device to access to whole physical memory. This is | 40 | * devices and allow every device to access to whole physical memory. This is |
40 | * useful if a user want to use an IOMMU only for KVM device assignment to | 41 | * useful if a user want to use an IOMMU only for KVM device assignment to |
41 | * guests and not for driver dma translation. | 42 | * guests and not for driver dma translation. |
42 | */ | 43 | */ |
43 | int iommu_pass_through __read_mostly; | 44 | int iommu_pass_through __read_mostly; |
44 | 45 | ||
45 | dma_addr_t bad_dma_address __read_mostly = 0; | 46 | /* Dummy device used for NULL arguments (normally ISA). */ |
46 | EXPORT_SYMBOL(bad_dma_address); | ||
47 | |||
48 | /* Dummy device used for NULL arguments (normally ISA). Better would | ||
49 | be probably a smaller DMA mask, but this is bug-to-bug compatible | ||
50 | to older i386. */ | ||
51 | struct device x86_dma_fallback_dev = { | 47 | struct device x86_dma_fallback_dev = { |
52 | .init_name = "fallback device", | 48 | .init_name = "fallback device", |
53 | .coherent_dma_mask = DMA_BIT_MASK(32), | 49 | .coherent_dma_mask = ISA_DMA_BIT_MASK, |
54 | .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, | 50 | .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, |
55 | }; | 51 | }; |
56 | EXPORT_SYMBOL(x86_dma_fallback_dev); | 52 | EXPORT_SYMBOL(x86_dma_fallback_dev); |
@@ -128,19 +124,18 @@ void __init pci_iommu_alloc(void) | |||
128 | /* free the range so iommu could get some range less than 4G */ | 124 | /* free the range so iommu could get some range less than 4G */ |
129 | dma32_free_bootmem(); | 125 | dma32_free_bootmem(); |
130 | #endif | 126 | #endif |
127 | if (pci_swiotlb_detect()) | ||
128 | goto out; | ||
131 | 129 | ||
132 | /* | ||
133 | * The order of these functions is important for | ||
134 | * fall-back/fail-over reasons | ||
135 | */ | ||
136 | gart_iommu_hole_init(); | 130 | gart_iommu_hole_init(); |
137 | 131 | ||
138 | detect_calgary(); | 132 | detect_calgary(); |
139 | 133 | ||
140 | detect_intel_iommu(); | 134 | detect_intel_iommu(); |
141 | 135 | ||
136 | /* needs to be called after gart_iommu_hole_init */ | ||
142 | amd_iommu_detect(); | 137 | amd_iommu_detect(); |
143 | 138 | out: | |
144 | pci_swiotlb_init(); | 139 | pci_swiotlb_init(); |
145 | } | 140 | } |
146 | 141 | ||
@@ -216,7 +211,7 @@ static __init int iommu_setup(char *p) | |||
216 | if (!strncmp(p, "allowdac", 8)) | 211 | if (!strncmp(p, "allowdac", 8)) |
217 | forbid_dac = 0; | 212 | forbid_dac = 0; |
218 | if (!strncmp(p, "nodac", 5)) | 213 | if (!strncmp(p, "nodac", 5)) |
219 | forbid_dac = -1; | 214 | forbid_dac = 1; |
220 | if (!strncmp(p, "usedac", 6)) { | 215 | if (!strncmp(p, "usedac", 6)) { |
221 | forbid_dac = -1; | 216 | forbid_dac = -1; |
222 | return 1; | 217 | return 1; |
@@ -291,27 +286,19 @@ static int __init pci_iommu_init(void) | |||
291 | #ifdef CONFIG_PCI | 286 | #ifdef CONFIG_PCI |
292 | dma_debug_add_bus(&pci_bus_type); | 287 | dma_debug_add_bus(&pci_bus_type); |
293 | #endif | 288 | #endif |
289 | x86_init.iommu.iommu_init(); | ||
294 | 290 | ||
295 | calgary_iommu_init(); | 291 | if (swiotlb) { |
296 | 292 | printk(KERN_INFO "PCI-DMA: " | |
297 | intel_iommu_init(); | 293 | "Using software bounce buffering for IO (SWIOTLB)\n"); |
294 | swiotlb_print_info(); | ||
295 | } else | ||
296 | swiotlb_free(); | ||
298 | 297 | ||
299 | amd_iommu_init(); | ||
300 | |||
301 | gart_iommu_init(); | ||
302 | |||
303 | no_iommu_init(); | ||
304 | return 0; | 298 | return 0; |
305 | } | 299 | } |
306 | |||
307 | void pci_iommu_shutdown(void) | ||
308 | { | ||
309 | gart_iommu_shutdown(); | ||
310 | |||
311 | amd_iommu_shutdown(); | ||
312 | } | ||
313 | /* Must execute after PCI subsystem */ | 300 | /* Must execute after PCI subsystem */ |
314 | fs_initcall(pci_iommu_init); | 301 | rootfs_initcall(pci_iommu_init); |
315 | 302 | ||
316 | #ifdef CONFIG_PCI | 303 | #ifdef CONFIG_PCI |
317 | /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ | 304 | /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 98a827ee9ed7..34de53b46f87 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -16,13 +16,14 @@ | |||
16 | #include <linux/agp_backend.h> | 16 | #include <linux/agp_backend.h> |
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
19 | #include <linux/sched.h> | ||
19 | #include <linux/string.h> | 20 | #include <linux/string.h> |
20 | #include <linux/spinlock.h> | 21 | #include <linux/spinlock.h> |
21 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
22 | #include <linux/module.h> | 23 | #include <linux/module.h> |
23 | #include <linux/topology.h> | 24 | #include <linux/topology.h> |
24 | #include <linux/interrupt.h> | 25 | #include <linux/interrupt.h> |
25 | #include <linux/bitops.h> | 26 | #include <linux/bitmap.h> |
26 | #include <linux/kdebug.h> | 27 | #include <linux/kdebug.h> |
27 | #include <linux/scatterlist.h> | 28 | #include <linux/scatterlist.h> |
28 | #include <linux/iommu-helper.h> | 29 | #include <linux/iommu-helper.h> |
@@ -38,6 +39,7 @@ | |||
38 | #include <asm/swiotlb.h> | 39 | #include <asm/swiotlb.h> |
39 | #include <asm/dma.h> | 40 | #include <asm/dma.h> |
40 | #include <asm/k8.h> | 41 | #include <asm/k8.h> |
42 | #include <asm/x86_init.h> | ||
41 | 43 | ||
42 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ | 44 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ |
43 | static unsigned long iommu_size; /* size of remapping area bytes */ | 45 | static unsigned long iommu_size; /* size of remapping area bytes */ |
@@ -45,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */ | |||
45 | 47 | ||
46 | static u32 *iommu_gatt_base; /* Remapping table */ | 48 | static u32 *iommu_gatt_base; /* Remapping table */ |
47 | 49 | ||
50 | static dma_addr_t bad_dma_addr; | ||
51 | |||
48 | /* | 52 | /* |
49 | * If this is disabled the IOMMU will use an optimized flushing strategy | 53 | * If this is disabled the IOMMU will use an optimized flushing strategy |
50 | * of only flushing when an mapping is reused. With it true the GART is | 54 | * of only flushing when an mapping is reused. With it true the GART is |
@@ -91,7 +95,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, | |||
91 | 95 | ||
92 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), | 96 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), |
93 | PAGE_SIZE) >> PAGE_SHIFT; | 97 | PAGE_SIZE) >> PAGE_SHIFT; |
94 | boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, | 98 | boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1, |
95 | PAGE_SIZE) >> PAGE_SHIFT; | 99 | PAGE_SIZE) >> PAGE_SHIFT; |
96 | 100 | ||
97 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 101 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
@@ -122,7 +126,7 @@ static void free_iommu(unsigned long offset, int size) | |||
122 | unsigned long flags; | 126 | unsigned long flags; |
123 | 127 | ||
124 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 128 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
125 | iommu_area_free(iommu_gart_bitmap, offset, size); | 129 | bitmap_clear(iommu_gart_bitmap, offset, size); |
126 | if (offset >= next_bit) | 130 | if (offset >= next_bit) |
127 | next_bit = offset + size; | 131 | next_bit = offset + size; |
128 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 132 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
@@ -215,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | |||
215 | if (panic_on_overflow) | 219 | if (panic_on_overflow) |
216 | panic("dma_map_area overflow %lu bytes\n", size); | 220 | panic("dma_map_area overflow %lu bytes\n", size); |
217 | iommu_full(dev, size, dir); | 221 | iommu_full(dev, size, dir); |
218 | return bad_dma_address; | 222 | return bad_dma_addr; |
219 | } | 223 | } |
220 | 224 | ||
221 | for (i = 0; i < npages; i++) { | 225 | for (i = 0; i < npages; i++) { |
@@ -293,7 +297,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
293 | int i; | 297 | int i; |
294 | 298 | ||
295 | #ifdef CONFIG_IOMMU_DEBUG | 299 | #ifdef CONFIG_IOMMU_DEBUG |
296 | printk(KERN_DEBUG "dma_map_sg overflow\n"); | 300 | pr_debug("dma_map_sg overflow\n"); |
297 | #endif | 301 | #endif |
298 | 302 | ||
299 | for_each_sg(sg, s, nents, i) { | 303 | for_each_sg(sg, s, nents, i) { |
@@ -301,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
301 | 305 | ||
302 | if (nonforced_iommu(dev, addr, s->length)) { | 306 | if (nonforced_iommu(dev, addr, s->length)) { |
303 | addr = dma_map_area(dev, addr, s->length, dir, 0); | 307 | addr = dma_map_area(dev, addr, s->length, dir, 0); |
304 | if (addr == bad_dma_address) { | 308 | if (addr == bad_dma_addr) { |
305 | if (i > 0) | 309 | if (i > 0) |
306 | gart_unmap_sg(dev, sg, i, dir, NULL); | 310 | gart_unmap_sg(dev, sg, i, dir, NULL); |
307 | nents = 0; | 311 | nents = 0; |
@@ -388,12 +392,14 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, | |||
388 | if (!dev) | 392 | if (!dev) |
389 | dev = &x86_dma_fallback_dev; | 393 | dev = &x86_dma_fallback_dev; |
390 | 394 | ||
391 | out = 0; | 395 | out = 0; |
392 | start = 0; | 396 | start = 0; |
393 | start_sg = sgmap = sg; | 397 | start_sg = sg; |
394 | seg_size = 0; | 398 | sgmap = sg; |
395 | max_seg_size = dma_get_max_seg_size(dev); | 399 | seg_size = 0; |
396 | ps = NULL; /* shut up gcc */ | 400 | max_seg_size = dma_get_max_seg_size(dev); |
401 | ps = NULL; /* shut up gcc */ | ||
402 | |||
397 | for_each_sg(sg, s, nents, i) { | 403 | for_each_sg(sg, s, nents, i) { |
398 | dma_addr_t addr = sg_phys(s); | 404 | dma_addr_t addr = sg_phys(s); |
399 | 405 | ||
@@ -416,11 +422,12 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, | |||
416 | sgmap, pages, need) < 0) | 422 | sgmap, pages, need) < 0) |
417 | goto error; | 423 | goto error; |
418 | out++; | 424 | out++; |
419 | seg_size = 0; | 425 | |
420 | sgmap = sg_next(sgmap); | 426 | seg_size = 0; |
421 | pages = 0; | 427 | sgmap = sg_next(sgmap); |
422 | start = i; | 428 | pages = 0; |
423 | start_sg = s; | 429 | start = i; |
430 | start_sg = s; | ||
424 | } | 431 | } |
425 | } | 432 | } |
426 | 433 | ||
@@ -454,7 +461,7 @@ error: | |||
454 | 461 | ||
455 | iommu_full(dev, pages << PAGE_SHIFT, dir); | 462 | iommu_full(dev, pages << PAGE_SHIFT, dir); |
456 | for_each_sg(sg, s, nents, i) | 463 | for_each_sg(sg, s, nents, i) |
457 | s->dma_address = bad_dma_address; | 464 | s->dma_address = bad_dma_addr; |
458 | return 0; | 465 | return 0; |
459 | } | 466 | } |
460 | 467 | ||
@@ -478,7 +485,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, | |||
478 | DMA_BIDIRECTIONAL, align_mask); | 485 | DMA_BIDIRECTIONAL, align_mask); |
479 | 486 | ||
480 | flush_gart(); | 487 | flush_gart(); |
481 | if (paddr != bad_dma_address) { | 488 | if (paddr != bad_dma_addr) { |
482 | *dma_addr = paddr; | 489 | *dma_addr = paddr; |
483 | return page_address(page); | 490 | return page_address(page); |
484 | } | 491 | } |
@@ -498,6 +505,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, | |||
498 | free_pages((unsigned long)vaddr, get_order(size)); | 505 | free_pages((unsigned long)vaddr, get_order(size)); |
499 | } | 506 | } |
500 | 507 | ||
508 | static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
509 | { | ||
510 | return (dma_addr == bad_dma_addr); | ||
511 | } | ||
512 | |||
501 | static int no_agp; | 513 | static int no_agp; |
502 | 514 | ||
503 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | 515 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) |
@@ -514,7 +526,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | |||
514 | iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; | 526 | iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; |
515 | 527 | ||
516 | if (iommu_size < 64*1024*1024) { | 528 | if (iommu_size < 64*1024*1024) { |
517 | printk(KERN_WARNING | 529 | pr_warning( |
518 | "PCI-DMA: Warning: Small IOMMU %luMB." | 530 | "PCI-DMA: Warning: Small IOMMU %luMB." |
519 | " Consider increasing the AGP aperture in BIOS\n", | 531 | " Consider increasing the AGP aperture in BIOS\n", |
520 | iommu_size >> 20); | 532 | iommu_size >> 20); |
@@ -569,28 +581,32 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc) | |||
569 | aperture_alloc = aper_alloc; | 581 | aperture_alloc = aper_alloc; |
570 | } | 582 | } |
571 | 583 | ||
572 | static int gart_resume(struct sys_device *dev) | 584 | static void gart_fixup_northbridges(struct sys_device *dev) |
573 | { | 585 | { |
574 | printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n"); | 586 | int i; |
575 | 587 | ||
576 | if (fix_up_north_bridges) { | 588 | if (!fix_up_north_bridges) |
577 | int i; | 589 | return; |
578 | 590 | ||
579 | printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n"); | 591 | pr_info("PCI-DMA: Restoring GART aperture settings\n"); |
580 | 592 | ||
581 | for (i = 0; i < num_k8_northbridges; i++) { | 593 | for (i = 0; i < num_k8_northbridges; i++) { |
582 | struct pci_dev *dev = k8_northbridges[i]; | 594 | struct pci_dev *dev = k8_northbridges[i]; |
583 | 595 | ||
584 | /* | 596 | /* |
585 | * Don't enable translations just yet. That is the next | 597 | * Don't enable translations just yet. That is the next |
586 | * step. Restore the pre-suspend aperture settings. | 598 | * step. Restore the pre-suspend aperture settings. |
587 | */ | 599 | */ |
588 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, | 600 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); |
589 | aperture_order << 1); | 601 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); |
590 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, | ||
591 | aperture_alloc >> 25); | ||
592 | } | ||
593 | } | 602 | } |
603 | } | ||
604 | |||
605 | static int gart_resume(struct sys_device *dev) | ||
606 | { | ||
607 | pr_info("PCI-DMA: Resuming GART IOMMU\n"); | ||
608 | |||
609 | gart_fixup_northbridges(dev); | ||
594 | 610 | ||
595 | enable_gart_translations(); | 611 | enable_gart_translations(); |
596 | 612 | ||
@@ -603,15 +619,14 @@ static int gart_suspend(struct sys_device *dev, pm_message_t state) | |||
603 | } | 619 | } |
604 | 620 | ||
605 | static struct sysdev_class gart_sysdev_class = { | 621 | static struct sysdev_class gart_sysdev_class = { |
606 | .name = "gart", | 622 | .name = "gart", |
607 | .suspend = gart_suspend, | 623 | .suspend = gart_suspend, |
608 | .resume = gart_resume, | 624 | .resume = gart_resume, |
609 | 625 | ||
610 | }; | 626 | }; |
611 | 627 | ||
612 | static struct sys_device device_gart = { | 628 | static struct sys_device device_gart = { |
613 | .id = 0, | 629 | .cls = &gart_sysdev_class, |
614 | .cls = &gart_sysdev_class, | ||
615 | }; | 630 | }; |
616 | 631 | ||
617 | /* | 632 | /* |
@@ -626,7 +641,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
626 | void *gatt; | 641 | void *gatt; |
627 | int i, error; | 642 | int i, error; |
628 | 643 | ||
629 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | 644 | pr_info("PCI-DMA: Disabling AGP.\n"); |
645 | |||
630 | aper_size = aper_base = info->aper_size = 0; | 646 | aper_size = aper_base = info->aper_size = 0; |
631 | dev = NULL; | 647 | dev = NULL; |
632 | for (i = 0; i < num_k8_northbridges; i++) { | 648 | for (i = 0; i < num_k8_northbridges; i++) { |
@@ -644,6 +660,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
644 | } | 660 | } |
645 | if (!aper_base) | 661 | if (!aper_base) |
646 | goto nommu; | 662 | goto nommu; |
663 | |||
647 | info->aper_base = aper_base; | 664 | info->aper_base = aper_base; |
648 | info->aper_size = aper_size >> 20; | 665 | info->aper_size = aper_size >> 20; |
649 | 666 | ||
@@ -666,14 +683,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
666 | 683 | ||
667 | flush_gart(); | 684 | flush_gart(); |
668 | 685 | ||
669 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", | 686 | pr_info("PCI-DMA: aperture base @ %x size %u KB\n", |
670 | aper_base, aper_size>>10); | 687 | aper_base, aper_size>>10); |
671 | 688 | ||
672 | return 0; | 689 | return 0; |
673 | 690 | ||
674 | nommu: | 691 | nommu: |
675 | /* Should not happen anymore */ | 692 | /* Should not happen anymore */ |
676 | printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" | 693 | pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n" |
677 | "falling back to iommu=soft.\n"); | 694 | "falling back to iommu=soft.\n"); |
678 | return -1; | 695 | return -1; |
679 | } | 696 | } |
@@ -685,14 +702,16 @@ static struct dma_map_ops gart_dma_ops = { | |||
685 | .unmap_page = gart_unmap_page, | 702 | .unmap_page = gart_unmap_page, |
686 | .alloc_coherent = gart_alloc_coherent, | 703 | .alloc_coherent = gart_alloc_coherent, |
687 | .free_coherent = gart_free_coherent, | 704 | .free_coherent = gart_free_coherent, |
705 | .mapping_error = gart_mapping_error, | ||
688 | }; | 706 | }; |
689 | 707 | ||
690 | void gart_iommu_shutdown(void) | 708 | static void gart_iommu_shutdown(void) |
691 | { | 709 | { |
692 | struct pci_dev *dev; | 710 | struct pci_dev *dev; |
693 | int i; | 711 | int i; |
694 | 712 | ||
695 | if (no_agp && (dma_ops != &gart_dma_ops)) | 713 | /* don't shutdown it if there is AGP installed */ |
714 | if (!no_agp) | ||
696 | return; | 715 | return; |
697 | 716 | ||
698 | for (i = 0; i < num_k8_northbridges; i++) { | 717 | for (i = 0; i < num_k8_northbridges; i++) { |
@@ -707,7 +726,7 @@ void gart_iommu_shutdown(void) | |||
707 | } | 726 | } |
708 | } | 727 | } |
709 | 728 | ||
710 | void __init gart_iommu_init(void) | 729 | int __init gart_iommu_init(void) |
711 | { | 730 | { |
712 | struct agp_kern_info info; | 731 | struct agp_kern_info info; |
713 | unsigned long iommu_start; | 732 | unsigned long iommu_start; |
@@ -717,7 +736,7 @@ void __init gart_iommu_init(void) | |||
717 | long i; | 736 | long i; |
718 | 737 | ||
719 | if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) | 738 | if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) |
720 | return; | 739 | return 0; |
721 | 740 | ||
722 | #ifndef CONFIG_AGP_AMD64 | 741 | #ifndef CONFIG_AGP_AMD64 |
723 | no_agp = 1; | 742 | no_agp = 1; |
@@ -729,35 +748,28 @@ void __init gart_iommu_init(void) | |||
729 | (agp_copy_info(agp_bridge, &info) < 0); | 748 | (agp_copy_info(agp_bridge, &info) < 0); |
730 | #endif | 749 | #endif |
731 | 750 | ||
732 | if (swiotlb) | ||
733 | return; | ||
734 | |||
735 | /* Did we detect a different HW IOMMU? */ | ||
736 | if (iommu_detected && !gart_iommu_aperture) | ||
737 | return; | ||
738 | |||
739 | if (no_iommu || | 751 | if (no_iommu || |
740 | (!force_iommu && max_pfn <= MAX_DMA32_PFN) || | 752 | (!force_iommu && max_pfn <= MAX_DMA32_PFN) || |
741 | !gart_iommu_aperture || | 753 | !gart_iommu_aperture || |
742 | (no_agp && init_k8_gatt(&info) < 0)) { | 754 | (no_agp && init_k8_gatt(&info) < 0)) { |
743 | if (max_pfn > MAX_DMA32_PFN) { | 755 | if (max_pfn > MAX_DMA32_PFN) { |
744 | printk(KERN_WARNING "More than 4GB of memory " | 756 | pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); |
745 | "but GART IOMMU not available.\n"); | 757 | pr_warning("falling back to iommu=soft.\n"); |
746 | printk(KERN_WARNING "falling back to iommu=soft.\n"); | ||
747 | } | 758 | } |
748 | return; | 759 | return 0; |
749 | } | 760 | } |
750 | 761 | ||
751 | /* need to map that range */ | 762 | /* need to map that range */ |
752 | aper_size = info.aper_size << 20; | 763 | aper_size = info.aper_size << 20; |
753 | aper_base = info.aper_base; | 764 | aper_base = info.aper_base; |
754 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | 765 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); |
766 | |||
755 | if (end_pfn > max_low_pfn_mapped) { | 767 | if (end_pfn > max_low_pfn_mapped) { |
756 | start_pfn = (aper_base>>PAGE_SHIFT); | 768 | start_pfn = (aper_base>>PAGE_SHIFT); |
757 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | 769 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); |
758 | } | 770 | } |
759 | 771 | ||
760 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | 772 | pr_info("PCI-DMA: using GART IOMMU.\n"); |
761 | iommu_size = check_iommu_size(info.aper_base, aper_size); | 773 | iommu_size = check_iommu_size(info.aper_base, aper_size); |
762 | iommu_pages = iommu_size >> PAGE_SHIFT; | 774 | iommu_pages = iommu_size >> PAGE_SHIFT; |
763 | 775 | ||
@@ -772,8 +784,7 @@ void __init gart_iommu_init(void) | |||
772 | 784 | ||
773 | ret = dma_debug_resize_entries(iommu_pages); | 785 | ret = dma_debug_resize_entries(iommu_pages); |
774 | if (ret) | 786 | if (ret) |
775 | printk(KERN_DEBUG | 787 | pr_debug("PCI-DMA: Cannot trace all the entries\n"); |
776 | "PCI-DMA: Cannot trace all the entries\n"); | ||
777 | } | 788 | } |
778 | #endif | 789 | #endif |
779 | 790 | ||
@@ -781,17 +792,16 @@ void __init gart_iommu_init(void) | |||
781 | * Out of IOMMU space handling. | 792 | * Out of IOMMU space handling. |
782 | * Reserve some invalid pages at the beginning of the GART. | 793 | * Reserve some invalid pages at the beginning of the GART. |
783 | */ | 794 | */ |
784 | iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); | 795 | bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES); |
785 | 796 | ||
786 | agp_memory_reserved = iommu_size; | 797 | pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", |
787 | printk(KERN_INFO | ||
788 | "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", | ||
789 | iommu_size >> 20); | 798 | iommu_size >> 20); |
790 | 799 | ||
791 | iommu_start = aper_size - iommu_size; | 800 | agp_memory_reserved = iommu_size; |
792 | iommu_bus_base = info.aper_base + iommu_start; | 801 | iommu_start = aper_size - iommu_size; |
793 | bad_dma_address = iommu_bus_base; | 802 | iommu_bus_base = info.aper_base + iommu_start; |
794 | iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); | 803 | bad_dma_addr = iommu_bus_base; |
804 | iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); | ||
795 | 805 | ||
796 | /* | 806 | /* |
797 | * Unmap the IOMMU part of the GART. The alias of the page is | 807 | * Unmap the IOMMU part of the GART. The alias of the page is |
@@ -813,7 +823,7 @@ void __init gart_iommu_init(void) | |||
813 | * the pages as Not-Present: | 823 | * the pages as Not-Present: |
814 | */ | 824 | */ |
815 | wbinvd(); | 825 | wbinvd(); |
816 | 826 | ||
817 | /* | 827 | /* |
818 | * Now all caches are flushed and we can safely enable | 828 | * Now all caches are flushed and we can safely enable |
819 | * GART hardware. Doing it early leaves the possibility | 829 | * GART hardware. Doing it early leaves the possibility |
@@ -837,6 +847,10 @@ void __init gart_iommu_init(void) | |||
837 | 847 | ||
838 | flush_gart(); | 848 | flush_gart(); |
839 | dma_ops = &gart_dma_ops; | 849 | dma_ops = &gart_dma_ops; |
850 | x86_platform.iommu_shutdown = gart_iommu_shutdown; | ||
851 | swiotlb = 0; | ||
852 | |||
853 | return 0; | ||
840 | } | 854 | } |
841 | 855 | ||
842 | void __init gart_parse_options(char *p) | 856 | void __init gart_parse_options(char *p) |
@@ -855,7 +869,7 @@ void __init gart_parse_options(char *p) | |||
855 | #endif | 869 | #endif |
856 | if (isdigit(*p) && get_option(&p, &arg)) | 870 | if (isdigit(*p) && get_option(&p, &arg)) |
857 | iommu_size = arg; | 871 | iommu_size = arg; |
858 | if (!strncmp(p, "fullflush", 8)) | 872 | if (!strncmp(p, "fullflush", 9)) |
859 | iommu_fullflush = 1; | 873 | iommu_fullflush = 1; |
860 | if (!strncmp(p, "nofullflush", 11)) | 874 | if (!strncmp(p, "nofullflush", 11)) |
861 | iommu_fullflush = 0; | 875 | iommu_fullflush = 0; |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index a3933d4330cd..22be12b60a8f 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, | |||
33 | dma_addr_t bus = page_to_phys(page) + offset; | 33 | dma_addr_t bus = page_to_phys(page) + offset; |
34 | WARN_ON(size == 0); | 34 | WARN_ON(size == 0); |
35 | if (!check_addr("map_single", dev, bus, size)) | 35 | if (!check_addr("map_single", dev, bus, size)) |
36 | return bad_dma_address; | 36 | return DMA_ERROR_CODE; |
37 | flush_write_buffers(); | 37 | flush_write_buffers(); |
38 | return bus; | 38 | return bus; |
39 | } | 39 | } |
@@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = { | |||
103 | .sync_sg_for_device = nommu_sync_sg_for_device, | 103 | .sync_sg_for_device = nommu_sync_sg_for_device, |
104 | .is_phys = 1, | 104 | .is_phys = 1, |
105 | }; | 105 | }; |
106 | |||
107 | void __init no_iommu_init(void) | ||
108 | { | ||
109 | if (dma_ops) | ||
110 | return; | ||
111 | |||
112 | force_iommu = 0; /* no HW IOMMU */ | ||
113 | dma_ops = &nommu_dma_ops; | ||
114 | } | ||
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index aaa6b7839f1e..7d2829dde20e 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -42,18 +42,31 @@ static struct dma_map_ops swiotlb_dma_ops = { | |||
42 | .dma_supported = NULL, | 42 | .dma_supported = NULL, |
43 | }; | 43 | }; |
44 | 44 | ||
45 | void __init pci_swiotlb_init(void) | 45 | /* |
46 | * pci_swiotlb_detect - set swiotlb to 1 if necessary | ||
47 | * | ||
48 | * This returns non-zero if we are forced to use swiotlb (by the boot | ||
49 | * option). | ||
50 | */ | ||
51 | int __init pci_swiotlb_detect(void) | ||
46 | { | 52 | { |
53 | int use_swiotlb = swiotlb | swiotlb_force; | ||
54 | |||
47 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 55 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
48 | #ifdef CONFIG_X86_64 | 56 | #ifdef CONFIG_X86_64 |
49 | if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) | 57 | if (!no_iommu && max_pfn > MAX_DMA32_PFN) |
50 | swiotlb = 1; | 58 | swiotlb = 1; |
51 | #endif | 59 | #endif |
52 | if (swiotlb_force) | 60 | if (swiotlb_force) |
53 | swiotlb = 1; | 61 | swiotlb = 1; |
62 | |||
63 | return use_swiotlb; | ||
64 | } | ||
65 | |||
66 | void __init pci_swiotlb_init(void) | ||
67 | { | ||
54 | if (swiotlb) { | 68 | if (swiotlb) { |
55 | printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); | 69 | swiotlb_init(0); |
56 | swiotlb_init(); | ||
57 | dma_ops = &swiotlb_dma_ops; | 70 | dma_ops = &swiotlb_dma_ops; |
58 | } | 71 | } |
59 | } | 72 | } |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5284cd2b5776..c6ee241c8a98 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -9,7 +9,11 @@ | |||
9 | #include <linux/pm.h> | 9 | #include <linux/pm.h> |
10 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
11 | #include <linux/random.h> | 11 | #include <linux/random.h> |
12 | #include <linux/user-return-notifier.h> | ||
13 | #include <linux/dmi.h> | ||
14 | #include <linux/utsname.h> | ||
12 | #include <trace/events/power.h> | 15 | #include <trace/events/power.h> |
16 | #include <linux/hw_breakpoint.h> | ||
13 | #include <asm/system.h> | 17 | #include <asm/system.h> |
14 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
15 | #include <asm/syscalls.h> | 19 | #include <asm/syscalls.h> |
@@ -17,6 +21,7 @@ | |||
17 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
18 | #include <asm/i387.h> | 22 | #include <asm/i387.h> |
19 | #include <asm/ds.h> | 23 | #include <asm/ds.h> |
24 | #include <asm/debugreg.h> | ||
20 | 25 | ||
21 | unsigned long idle_halt; | 26 | unsigned long idle_halt; |
22 | EXPORT_SYMBOL(idle_halt); | 27 | EXPORT_SYMBOL(idle_halt); |
@@ -87,6 +92,25 @@ void exit_thread(void) | |||
87 | } | 92 | } |
88 | } | 93 | } |
89 | 94 | ||
95 | void show_regs_common(void) | ||
96 | { | ||
97 | const char *board, *product; | ||
98 | |||
99 | board = dmi_get_system_info(DMI_BOARD_NAME); | ||
100 | if (!board) | ||
101 | board = ""; | ||
102 | product = dmi_get_system_info(DMI_PRODUCT_NAME); | ||
103 | if (!product) | ||
104 | product = ""; | ||
105 | |||
106 | printk(KERN_CONT "\n"); | ||
107 | printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", | ||
108 | current->pid, current->comm, print_tainted(), | ||
109 | init_utsname()->release, | ||
110 | (int)strcspn(init_utsname()->version, " "), | ||
111 | init_utsname()->version, board, product); | ||
112 | } | ||
113 | |||
90 | void flush_thread(void) | 114 | void flush_thread(void) |
91 | { | 115 | { |
92 | struct task_struct *tsk = current; | 116 | struct task_struct *tsk = current; |
@@ -103,14 +127,7 @@ void flush_thread(void) | |||
103 | } | 127 | } |
104 | #endif | 128 | #endif |
105 | 129 | ||
106 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | 130 | flush_ptrace_hw_breakpoint(tsk); |
107 | |||
108 | tsk->thread.debugreg0 = 0; | ||
109 | tsk->thread.debugreg1 = 0; | ||
110 | tsk->thread.debugreg2 = 0; | ||
111 | tsk->thread.debugreg3 = 0; | ||
112 | tsk->thread.debugreg6 = 0; | ||
113 | tsk->thread.debugreg7 = 0; | ||
114 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 131 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
115 | /* | 132 | /* |
116 | * Forget coprocessor state.. | 133 | * Forget coprocessor state.. |
@@ -192,16 +209,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
192 | else if (next->debugctlmsr != prev->debugctlmsr) | 209 | else if (next->debugctlmsr != prev->debugctlmsr) |
193 | update_debugctlmsr(next->debugctlmsr); | 210 | update_debugctlmsr(next->debugctlmsr); |
194 | 211 | ||
195 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | ||
196 | set_debugreg(next->debugreg0, 0); | ||
197 | set_debugreg(next->debugreg1, 1); | ||
198 | set_debugreg(next->debugreg2, 2); | ||
199 | set_debugreg(next->debugreg3, 3); | ||
200 | /* no 4 and 5 */ | ||
201 | set_debugreg(next->debugreg6, 6); | ||
202 | set_debugreg(next->debugreg7, 7); | ||
203 | } | ||
204 | |||
205 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | 212 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ |
206 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | 213 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { |
207 | /* prev and next are different */ | 214 | /* prev and next are different */ |
@@ -224,6 +231,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
224 | */ | 231 | */ |
225 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 232 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
226 | } | 233 | } |
234 | propagate_user_return_notify(prev_p, next_p); | ||
227 | } | 235 | } |
228 | 236 | ||
229 | int sys_fork(struct pt_regs *regs) | 237 | int sys_fork(struct pt_regs *regs) |
@@ -247,6 +255,76 @@ int sys_vfork(struct pt_regs *regs) | |||
247 | NULL, NULL); | 255 | NULL, NULL); |
248 | } | 256 | } |
249 | 257 | ||
258 | long | ||
259 | sys_clone(unsigned long clone_flags, unsigned long newsp, | ||
260 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | ||
261 | { | ||
262 | if (!newsp) | ||
263 | newsp = regs->sp; | ||
264 | return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); | ||
265 | } | ||
266 | |||
267 | /* | ||
268 | * This gets run with %si containing the | ||
269 | * function to call, and %di containing | ||
270 | * the "args". | ||
271 | */ | ||
272 | extern void kernel_thread_helper(void); | ||
273 | |||
274 | /* | ||
275 | * Create a kernel thread | ||
276 | */ | ||
277 | int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | ||
278 | { | ||
279 | struct pt_regs regs; | ||
280 | |||
281 | memset(®s, 0, sizeof(regs)); | ||
282 | |||
283 | regs.si = (unsigned long) fn; | ||
284 | regs.di = (unsigned long) arg; | ||
285 | |||
286 | #ifdef CONFIG_X86_32 | ||
287 | regs.ds = __USER_DS; | ||
288 | regs.es = __USER_DS; | ||
289 | regs.fs = __KERNEL_PERCPU; | ||
290 | regs.gs = __KERNEL_STACK_CANARY; | ||
291 | #endif | ||
292 | |||
293 | regs.orig_ax = -1; | ||
294 | regs.ip = (unsigned long) kernel_thread_helper; | ||
295 | regs.cs = __KERNEL_CS | get_kernel_rpl(); | ||
296 | regs.flags = X86_EFLAGS_IF | 0x2; | ||
297 | |||
298 | /* Ok, create the new process.. */ | ||
299 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); | ||
300 | } | ||
301 | EXPORT_SYMBOL(kernel_thread); | ||
302 | |||
303 | /* | ||
304 | * sys_execve() executes a new program. | ||
305 | */ | ||
306 | long sys_execve(char __user *name, char __user * __user *argv, | ||
307 | char __user * __user *envp, struct pt_regs *regs) | ||
308 | { | ||
309 | long error; | ||
310 | char *filename; | ||
311 | |||
312 | filename = getname(name); | ||
313 | error = PTR_ERR(filename); | ||
314 | if (IS_ERR(filename)) | ||
315 | return error; | ||
316 | error = do_execve(filename, argv, envp, regs); | ||
317 | |||
318 | #ifdef CONFIG_X86_32 | ||
319 | if (error == 0) { | ||
320 | /* Make sure we don't return using sysenter.. */ | ||
321 | set_thread_flag(TIF_IRET); | ||
322 | } | ||
323 | #endif | ||
324 | |||
325 | putname(filename); | ||
326 | return error; | ||
327 | } | ||
250 | 328 | ||
251 | /* | 329 | /* |
252 | * Idle related variables and functions | 330 | * Idle related variables and functions |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf79567cdab..37ad1e046aae 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
24 | #include <linux/user.h> | 24 | #include <linux/user.h> |
25 | #include <linux/interrupt.h> | 25 | #include <linux/interrupt.h> |
26 | #include <linux/utsname.h> | ||
27 | #include <linux/delay.h> | 26 | #include <linux/delay.h> |
28 | #include <linux/reboot.h> | 27 | #include <linux/reboot.h> |
29 | #include <linux/init.h> | 28 | #include <linux/init.h> |
@@ -35,7 +34,6 @@ | |||
35 | #include <linux/tick.h> | 34 | #include <linux/tick.h> |
36 | #include <linux/percpu.h> | 35 | #include <linux/percpu.h> |
37 | #include <linux/prctl.h> | 36 | #include <linux/prctl.h> |
38 | #include <linux/dmi.h> | ||
39 | #include <linux/ftrace.h> | 37 | #include <linux/ftrace.h> |
40 | #include <linux/uaccess.h> | 38 | #include <linux/uaccess.h> |
41 | #include <linux/io.h> | 39 | #include <linux/io.h> |
@@ -58,6 +56,7 @@ | |||
58 | #include <asm/idle.h> | 56 | #include <asm/idle.h> |
59 | #include <asm/syscalls.h> | 57 | #include <asm/syscalls.h> |
60 | #include <asm/ds.h> | 58 | #include <asm/ds.h> |
59 | #include <asm/debugreg.h> | ||
61 | 60 | ||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 61 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 62 | ||
@@ -127,39 +126,29 @@ void __show_regs(struct pt_regs *regs, int all) | |||
127 | unsigned long d0, d1, d2, d3, d6, d7; | 126 | unsigned long d0, d1, d2, d3, d6, d7; |
128 | unsigned long sp; | 127 | unsigned long sp; |
129 | unsigned short ss, gs; | 128 | unsigned short ss, gs; |
130 | const char *board; | ||
131 | 129 | ||
132 | if (user_mode_vm(regs)) { | 130 | if (user_mode_vm(regs)) { |
133 | sp = regs->sp; | 131 | sp = regs->sp; |
134 | ss = regs->ss & 0xffff; | 132 | ss = regs->ss & 0xffff; |
135 | gs = get_user_gs(regs); | 133 | gs = get_user_gs(regs); |
136 | } else { | 134 | } else { |
137 | sp = (unsigned long) (®s->sp); | 135 | sp = kernel_stack_pointer(regs); |
138 | savesegment(ss, ss); | 136 | savesegment(ss, ss); |
139 | savesegment(gs, gs); | 137 | savesegment(gs, gs); |
140 | } | 138 | } |
141 | 139 | ||
142 | printk("\n"); | 140 | show_regs_common(); |
143 | 141 | ||
144 | board = dmi_get_system_info(DMI_PRODUCT_NAME); | 142 | printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", |
145 | if (!board) | ||
146 | board = ""; | ||
147 | printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", | ||
148 | task_pid_nr(current), current->comm, | ||
149 | print_tainted(), init_utsname()->release, | ||
150 | (int)strcspn(init_utsname()->version, " "), | ||
151 | init_utsname()->version, board); | ||
152 | |||
153 | printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", | ||
154 | (u16)regs->cs, regs->ip, regs->flags, | 143 | (u16)regs->cs, regs->ip, regs->flags, |
155 | smp_processor_id()); | 144 | smp_processor_id()); |
156 | print_symbol("EIP is at %s\n", regs->ip); | 145 | print_symbol("EIP is at %s\n", regs->ip); |
157 | 146 | ||
158 | printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", | 147 | printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", |
159 | regs->ax, regs->bx, regs->cx, regs->dx); | 148 | regs->ax, regs->bx, regs->cx, regs->dx); |
160 | printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", | 149 | printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", |
161 | regs->si, regs->di, regs->bp, sp); | 150 | regs->si, regs->di, regs->bp, sp); |
162 | printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", | 151 | printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", |
163 | (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); | 152 | (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); |
164 | 153 | ||
165 | if (!all) | 154 | if (!all) |
@@ -169,61 +158,28 @@ void __show_regs(struct pt_regs *regs, int all) | |||
169 | cr2 = read_cr2(); | 158 | cr2 = read_cr2(); |
170 | cr3 = read_cr3(); | 159 | cr3 = read_cr3(); |
171 | cr4 = read_cr4_safe(); | 160 | cr4 = read_cr4_safe(); |
172 | printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", | 161 | printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", |
173 | cr0, cr2, cr3, cr4); | 162 | cr0, cr2, cr3, cr4); |
174 | 163 | ||
175 | get_debugreg(d0, 0); | 164 | get_debugreg(d0, 0); |
176 | get_debugreg(d1, 1); | 165 | get_debugreg(d1, 1); |
177 | get_debugreg(d2, 2); | 166 | get_debugreg(d2, 2); |
178 | get_debugreg(d3, 3); | 167 | get_debugreg(d3, 3); |
179 | printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", | 168 | printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", |
180 | d0, d1, d2, d3); | 169 | d0, d1, d2, d3); |
181 | 170 | ||
182 | get_debugreg(d6, 6); | 171 | get_debugreg(d6, 6); |
183 | get_debugreg(d7, 7); | 172 | get_debugreg(d7, 7); |
184 | printk("DR6: %08lx DR7: %08lx\n", | 173 | printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n", |
185 | d6, d7); | 174 | d6, d7); |
186 | } | 175 | } |
187 | 176 | ||
188 | void show_regs(struct pt_regs *regs) | 177 | void show_regs(struct pt_regs *regs) |
189 | { | 178 | { |
190 | __show_regs(regs, 1); | 179 | show_registers(regs); |
191 | show_trace(NULL, regs, ®s->sp, regs->bp); | 180 | show_trace(NULL, regs, ®s->sp, regs->bp); |
192 | } | 181 | } |
193 | 182 | ||
194 | /* | ||
195 | * This gets run with %bx containing the | ||
196 | * function to call, and %dx containing | ||
197 | * the "args". | ||
198 | */ | ||
199 | extern void kernel_thread_helper(void); | ||
200 | |||
201 | /* | ||
202 | * Create a kernel thread | ||
203 | */ | ||
204 | int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | ||
205 | { | ||
206 | struct pt_regs regs; | ||
207 | |||
208 | memset(®s, 0, sizeof(regs)); | ||
209 | |||
210 | regs.bx = (unsigned long) fn; | ||
211 | regs.dx = (unsigned long) arg; | ||
212 | |||
213 | regs.ds = __USER_DS; | ||
214 | regs.es = __USER_DS; | ||
215 | regs.fs = __KERNEL_PERCPU; | ||
216 | regs.gs = __KERNEL_STACK_CANARY; | ||
217 | regs.orig_ax = -1; | ||
218 | regs.ip = (unsigned long) kernel_thread_helper; | ||
219 | regs.cs = __KERNEL_CS | get_kernel_rpl(); | ||
220 | regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; | ||
221 | |||
222 | /* Ok, create the new process.. */ | ||
223 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); | ||
224 | } | ||
225 | EXPORT_SYMBOL(kernel_thread); | ||
226 | |||
227 | void release_thread(struct task_struct *dead_task) | 183 | void release_thread(struct task_struct *dead_task) |
228 | { | 184 | { |
229 | BUG_ON(dead_task->mm); | 185 | BUG_ON(dead_task->mm); |
@@ -259,7 +215,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
259 | 215 | ||
260 | task_user_gs(p) = get_user_gs(regs); | 216 | task_user_gs(p) = get_user_gs(regs); |
261 | 217 | ||
218 | p->thread.io_bitmap_ptr = NULL; | ||
262 | tsk = current; | 219 | tsk = current; |
220 | err = -ENOMEM; | ||
221 | |||
222 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
223 | |||
263 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 224 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
264 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, | 225 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, |
265 | IO_BITMAP_BYTES, GFP_KERNEL); | 226 | IO_BITMAP_BYTES, GFP_KERNEL); |
@@ -430,46 +391,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
430 | return prev_p; | 391 | return prev_p; |
431 | } | 392 | } |
432 | 393 | ||
433 | int sys_clone(struct pt_regs *regs) | ||
434 | { | ||
435 | unsigned long clone_flags; | ||
436 | unsigned long newsp; | ||
437 | int __user *parent_tidptr, *child_tidptr; | ||
438 | |||
439 | clone_flags = regs->bx; | ||
440 | newsp = regs->cx; | ||
441 | parent_tidptr = (int __user *)regs->dx; | ||
442 | child_tidptr = (int __user *)regs->di; | ||
443 | if (!newsp) | ||
444 | newsp = regs->sp; | ||
445 | return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); | ||
446 | } | ||
447 | |||
448 | /* | ||
449 | * sys_execve() executes a new program. | ||
450 | */ | ||
451 | int sys_execve(struct pt_regs *regs) | ||
452 | { | ||
453 | int error; | ||
454 | char *filename; | ||
455 | |||
456 | filename = getname((char __user *) regs->bx); | ||
457 | error = PTR_ERR(filename); | ||
458 | if (IS_ERR(filename)) | ||
459 | goto out; | ||
460 | error = do_execve(filename, | ||
461 | (char __user * __user *) regs->cx, | ||
462 | (char __user * __user *) regs->dx, | ||
463 | regs); | ||
464 | if (error == 0) { | ||
465 | /* Make sure we don't return using sysenter.. */ | ||
466 | set_thread_flag(TIF_IRET); | ||
467 | } | ||
468 | putname(filename); | ||
469 | out: | ||
470 | return error; | ||
471 | } | ||
472 | |||
473 | #define top_esp (THREAD_SIZE - sizeof(unsigned long)) | 394 | #define top_esp (THREAD_SIZE - sizeof(unsigned long)) |
474 | #define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) | 395 | #define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) |
475 | 396 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad535b683170..f9e033150cdf 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/user.h> | 27 | #include <linux/user.h> |
28 | #include <linux/interrupt.h> | 28 | #include <linux/interrupt.h> |
29 | #include <linux/utsname.h> | ||
30 | #include <linux/delay.h> | 29 | #include <linux/delay.h> |
31 | #include <linux/module.h> | 30 | #include <linux/module.h> |
32 | #include <linux/ptrace.h> | 31 | #include <linux/ptrace.h> |
@@ -38,7 +37,6 @@ | |||
38 | #include <linux/uaccess.h> | 37 | #include <linux/uaccess.h> |
39 | #include <linux/io.h> | 38 | #include <linux/io.h> |
40 | #include <linux/ftrace.h> | 39 | #include <linux/ftrace.h> |
41 | #include <linux/dmi.h> | ||
42 | 40 | ||
43 | #include <asm/pgtable.h> | 41 | #include <asm/pgtable.h> |
44 | #include <asm/system.h> | 42 | #include <asm/system.h> |
@@ -52,14 +50,13 @@ | |||
52 | #include <asm/idle.h> | 50 | #include <asm/idle.h> |
53 | #include <asm/syscalls.h> | 51 | #include <asm/syscalls.h> |
54 | #include <asm/ds.h> | 52 | #include <asm/ds.h> |
53 | #include <asm/debugreg.h> | ||
55 | 54 | ||
56 | asmlinkage extern void ret_from_fork(void); | 55 | asmlinkage extern void ret_from_fork(void); |
57 | 56 | ||
58 | DEFINE_PER_CPU(unsigned long, old_rsp); | 57 | DEFINE_PER_CPU(unsigned long, old_rsp); |
59 | static DEFINE_PER_CPU(unsigned char, is_idle); | 58 | static DEFINE_PER_CPU(unsigned char, is_idle); |
60 | 59 | ||
61 | unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; | ||
62 | |||
63 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | 60 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); |
64 | 61 | ||
65 | void idle_notifier_register(struct notifier_block *n) | 62 | void idle_notifier_register(struct notifier_block *n) |
@@ -162,31 +159,21 @@ void __show_regs(struct pt_regs *regs, int all) | |||
162 | unsigned long d0, d1, d2, d3, d6, d7; | 159 | unsigned long d0, d1, d2, d3, d6, d7; |
163 | unsigned int fsindex, gsindex; | 160 | unsigned int fsindex, gsindex; |
164 | unsigned int ds, cs, es; | 161 | unsigned int ds, cs, es; |
165 | const char *board; | 162 | |
166 | 163 | show_regs_common(); | |
167 | printk("\n"); | 164 | printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); |
168 | print_modules(); | ||
169 | board = dmi_get_system_info(DMI_PRODUCT_NAME); | ||
170 | if (!board) | ||
171 | board = ""; | ||
172 | printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", | ||
173 | current->pid, current->comm, print_tainted(), | ||
174 | init_utsname()->release, | ||
175 | (int)strcspn(init_utsname()->version, " "), | ||
176 | init_utsname()->version, board); | ||
177 | printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); | ||
178 | printk_address(regs->ip, 1); | 165 | printk_address(regs->ip, 1); |
179 | printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, | 166 | printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, |
180 | regs->sp, regs->flags); | 167 | regs->sp, regs->flags); |
181 | printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", | 168 | printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n", |
182 | regs->ax, regs->bx, regs->cx); | 169 | regs->ax, regs->bx, regs->cx); |
183 | printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", | 170 | printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n", |
184 | regs->dx, regs->si, regs->di); | 171 | regs->dx, regs->si, regs->di); |
185 | printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", | 172 | printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n", |
186 | regs->bp, regs->r8, regs->r9); | 173 | regs->bp, regs->r8, regs->r9); |
187 | printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", | 174 | printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n", |
188 | regs->r10, regs->r11, regs->r12); | 175 | regs->r10, regs->r11, regs->r12); |
189 | printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", | 176 | printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", |
190 | regs->r13, regs->r14, regs->r15); | 177 | regs->r13, regs->r14, regs->r15); |
191 | 178 | ||
192 | asm("movl %%ds,%0" : "=r" (ds)); | 179 | asm("movl %%ds,%0" : "=r" (ds)); |
@@ -207,27 +194,26 @@ void __show_regs(struct pt_regs *regs, int all) | |||
207 | cr3 = read_cr3(); | 194 | cr3 = read_cr3(); |
208 | cr4 = read_cr4(); | 195 | cr4 = read_cr4(); |
209 | 196 | ||
210 | printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", | 197 | printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", |
211 | fs, fsindex, gs, gsindex, shadowgs); | 198 | fs, fsindex, gs, gsindex, shadowgs); |
212 | printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, | 199 | printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, |
213 | es, cr0); | 200 | es, cr0); |
214 | printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, | 201 | printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, |
215 | cr4); | 202 | cr4); |
216 | 203 | ||
217 | get_debugreg(d0, 0); | 204 | get_debugreg(d0, 0); |
218 | get_debugreg(d1, 1); | 205 | get_debugreg(d1, 1); |
219 | get_debugreg(d2, 2); | 206 | get_debugreg(d2, 2); |
220 | printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); | 207 | printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); |
221 | get_debugreg(d3, 3); | 208 | get_debugreg(d3, 3); |
222 | get_debugreg(d6, 6); | 209 | get_debugreg(d6, 6); |
223 | get_debugreg(d7, 7); | 210 | get_debugreg(d7, 7); |
224 | printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); | 211 | printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); |
225 | } | 212 | } |
226 | 213 | ||
227 | void show_regs(struct pt_regs *regs) | 214 | void show_regs(struct pt_regs *regs) |
228 | { | 215 | { |
229 | printk(KERN_INFO "CPU %d:", smp_processor_id()); | 216 | show_registers(regs); |
230 | __show_regs(regs, 1); | ||
231 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); | 217 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); |
232 | } | 218 | } |
233 | 219 | ||
@@ -285,8 +271,9 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
285 | *childregs = *regs; | 271 | *childregs = *regs; |
286 | 272 | ||
287 | childregs->ax = 0; | 273 | childregs->ax = 0; |
288 | childregs->sp = sp; | 274 | if (user_mode(regs)) |
289 | if (sp == ~0UL) | 275 | childregs->sp = sp; |
276 | else | ||
290 | childregs->sp = (unsigned long)childregs; | 277 | childregs->sp = (unsigned long)childregs; |
291 | 278 | ||
292 | p->thread.sp = (unsigned long) childregs; | 279 | p->thread.sp = (unsigned long) childregs; |
@@ -297,12 +284,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
297 | 284 | ||
298 | p->thread.fs = me->thread.fs; | 285 | p->thread.fs = me->thread.fs; |
299 | p->thread.gs = me->thread.gs; | 286 | p->thread.gs = me->thread.gs; |
287 | p->thread.io_bitmap_ptr = NULL; | ||
300 | 288 | ||
301 | savesegment(gs, p->thread.gsindex); | 289 | savesegment(gs, p->thread.gsindex); |
302 | savesegment(fs, p->thread.fsindex); | 290 | savesegment(fs, p->thread.fsindex); |
303 | savesegment(es, p->thread.es); | 291 | savesegment(es, p->thread.es); |
304 | savesegment(ds, p->thread.ds); | 292 | savesegment(ds, p->thread.ds); |
305 | 293 | ||
294 | err = -ENOMEM; | ||
295 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
296 | |||
306 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { | 297 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
307 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | 298 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); |
308 | if (!p->thread.io_bitmap_ptr) { | 299 | if (!p->thread.io_bitmap_ptr) { |
@@ -341,29 +332,46 @@ out: | |||
341 | kfree(p->thread.io_bitmap_ptr); | 332 | kfree(p->thread.io_bitmap_ptr); |
342 | p->thread.io_bitmap_max = 0; | 333 | p->thread.io_bitmap_max = 0; |
343 | } | 334 | } |
335 | |||
344 | return err; | 336 | return err; |
345 | } | 337 | } |
346 | 338 | ||
347 | void | 339 | static void |
348 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | 340 | start_thread_common(struct pt_regs *regs, unsigned long new_ip, |
341 | unsigned long new_sp, | ||
342 | unsigned int _cs, unsigned int _ss, unsigned int _ds) | ||
349 | { | 343 | { |
350 | loadsegment(fs, 0); | 344 | loadsegment(fs, 0); |
351 | loadsegment(es, 0); | 345 | loadsegment(es, _ds); |
352 | loadsegment(ds, 0); | 346 | loadsegment(ds, _ds); |
353 | load_gs_index(0); | 347 | load_gs_index(0); |
354 | regs->ip = new_ip; | 348 | regs->ip = new_ip; |
355 | regs->sp = new_sp; | 349 | regs->sp = new_sp; |
356 | percpu_write(old_rsp, new_sp); | 350 | percpu_write(old_rsp, new_sp); |
357 | regs->cs = __USER_CS; | 351 | regs->cs = _cs; |
358 | regs->ss = __USER_DS; | 352 | regs->ss = _ss; |
359 | regs->flags = 0x200; | 353 | regs->flags = X86_EFLAGS_IF; |
360 | set_fs(USER_DS); | 354 | set_fs(USER_DS); |
361 | /* | 355 | /* |
362 | * Free the old FP and other extended state | 356 | * Free the old FP and other extended state |
363 | */ | 357 | */ |
364 | free_thread_xstate(current); | 358 | free_thread_xstate(current); |
365 | } | 359 | } |
366 | EXPORT_SYMBOL_GPL(start_thread); | 360 | |
361 | void | ||
362 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | ||
363 | { | ||
364 | start_thread_common(regs, new_ip, new_sp, | ||
365 | __USER_CS, __USER_DS, 0); | ||
366 | } | ||
367 | |||
368 | #ifdef CONFIG_IA32_EMULATION | ||
369 | void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) | ||
370 | { | ||
371 | start_thread_common(regs, new_ip, new_sp, | ||
372 | __USER32_CS, __USER32_DS, __USER32_DS); | ||
373 | } | ||
374 | #endif | ||
367 | 375 | ||
368 | /* | 376 | /* |
369 | * switch_to(x,y) should switch tasks from x to y. | 377 | * switch_to(x,y) should switch tasks from x to y. |
@@ -495,26 +503,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
495 | */ | 503 | */ |
496 | if (preload_fpu) | 504 | if (preload_fpu) |
497 | __math_state_restore(); | 505 | __math_state_restore(); |
498 | return prev_p; | ||
499 | } | ||
500 | 506 | ||
501 | /* | 507 | return prev_p; |
502 | * sys_execve() executes a new program. | ||
503 | */ | ||
504 | asmlinkage | ||
505 | long sys_execve(char __user *name, char __user * __user *argv, | ||
506 | char __user * __user *envp, struct pt_regs *regs) | ||
507 | { | ||
508 | long error; | ||
509 | char *filename; | ||
510 | |||
511 | filename = getname(name); | ||
512 | error = PTR_ERR(filename); | ||
513 | if (IS_ERR(filename)) | ||
514 | return error; | ||
515 | error = do_execve(filename, argv, envp, regs); | ||
516 | putname(filename); | ||
517 | return error; | ||
518 | } | 508 | } |
519 | 509 | ||
520 | void set_personality_64bit(void) | 510 | void set_personality_64bit(void) |
@@ -531,15 +521,6 @@ void set_personality_64bit(void) | |||
531 | current->personality &= ~READ_IMPLIES_EXEC; | 521 | current->personality &= ~READ_IMPLIES_EXEC; |
532 | } | 522 | } |
533 | 523 | ||
534 | asmlinkage long | ||
535 | sys_clone(unsigned long clone_flags, unsigned long newsp, | ||
536 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | ||
537 | { | ||
538 | if (!newsp) | ||
539 | newsp = regs->sp; | ||
540 | return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); | ||
541 | } | ||
542 | |||
543 | unsigned long get_wchan(struct task_struct *p) | 524 | unsigned long get_wchan(struct task_struct *p) |
544 | { | 525 | { |
545 | unsigned long stack; | 526 | unsigned long stack; |
@@ -664,3 +645,8 @@ long sys_arch_prctl(int code, unsigned long addr) | |||
664 | return do_arch_prctl(current, code, addr); | 645 | return do_arch_prctl(current, code, addr); |
665 | } | 646 | } |
666 | 647 | ||
648 | unsigned long KSTK_ESP(struct task_struct *task) | ||
649 | { | ||
650 | return (test_tsk_thread_flag(task, TIF_IA32)) ? | ||
651 | (task_pt_regs(task)->sp) : ((task)->thread.usersp); | ||
652 | } | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7b058a2dc66a..017d937639fe 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <linux/seccomp.h> | 22 | #include <linux/seccomp.h> |
23 | #include <linux/signal.h> | 23 | #include <linux/signal.h> |
24 | #include <linux/workqueue.h> | 24 | #include <linux/workqueue.h> |
25 | #include <linux/perf_event.h> | ||
26 | #include <linux/hw_breakpoint.h> | ||
25 | 27 | ||
26 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
27 | #include <asm/pgtable.h> | 29 | #include <asm/pgtable.h> |
@@ -34,6 +36,7 @@ | |||
34 | #include <asm/prctl.h> | 36 | #include <asm/prctl.h> |
35 | #include <asm/proto.h> | 37 | #include <asm/proto.h> |
36 | #include <asm/ds.h> | 38 | #include <asm/ds.h> |
39 | #include <asm/hw_breakpoint.h> | ||
37 | 40 | ||
38 | #include "tls.h" | 41 | #include "tls.h" |
39 | 42 | ||
@@ -49,6 +52,118 @@ enum x86_regset { | |||
49 | REGSET_IOPERM32, | 52 | REGSET_IOPERM32, |
50 | }; | 53 | }; |
51 | 54 | ||
55 | struct pt_regs_offset { | ||
56 | const char *name; | ||
57 | int offset; | ||
58 | }; | ||
59 | |||
60 | #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} | ||
61 | #define REG_OFFSET_END {.name = NULL, .offset = 0} | ||
62 | |||
63 | static const struct pt_regs_offset regoffset_table[] = { | ||
64 | #ifdef CONFIG_X86_64 | ||
65 | REG_OFFSET_NAME(r15), | ||
66 | REG_OFFSET_NAME(r14), | ||
67 | REG_OFFSET_NAME(r13), | ||
68 | REG_OFFSET_NAME(r12), | ||
69 | REG_OFFSET_NAME(r11), | ||
70 | REG_OFFSET_NAME(r10), | ||
71 | REG_OFFSET_NAME(r9), | ||
72 | REG_OFFSET_NAME(r8), | ||
73 | #endif | ||
74 | REG_OFFSET_NAME(bx), | ||
75 | REG_OFFSET_NAME(cx), | ||
76 | REG_OFFSET_NAME(dx), | ||
77 | REG_OFFSET_NAME(si), | ||
78 | REG_OFFSET_NAME(di), | ||
79 | REG_OFFSET_NAME(bp), | ||
80 | REG_OFFSET_NAME(ax), | ||
81 | #ifdef CONFIG_X86_32 | ||
82 | REG_OFFSET_NAME(ds), | ||
83 | REG_OFFSET_NAME(es), | ||
84 | REG_OFFSET_NAME(fs), | ||
85 | REG_OFFSET_NAME(gs), | ||
86 | #endif | ||
87 | REG_OFFSET_NAME(orig_ax), | ||
88 | REG_OFFSET_NAME(ip), | ||
89 | REG_OFFSET_NAME(cs), | ||
90 | REG_OFFSET_NAME(flags), | ||
91 | REG_OFFSET_NAME(sp), | ||
92 | REG_OFFSET_NAME(ss), | ||
93 | REG_OFFSET_END, | ||
94 | }; | ||
95 | |||
96 | /** | ||
97 | * regs_query_register_offset() - query register offset from its name | ||
98 | * @name: the name of a register | ||
99 | * | ||
100 | * regs_query_register_offset() returns the offset of a register in struct | ||
101 | * pt_regs from its name. If the name is invalid, this returns -EINVAL; | ||
102 | */ | ||
103 | int regs_query_register_offset(const char *name) | ||
104 | { | ||
105 | const struct pt_regs_offset *roff; | ||
106 | for (roff = regoffset_table; roff->name != NULL; roff++) | ||
107 | if (!strcmp(roff->name, name)) | ||
108 | return roff->offset; | ||
109 | return -EINVAL; | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * regs_query_register_name() - query register name from its offset | ||
114 | * @offset: the offset of a register in struct pt_regs. | ||
115 | * | ||
116 | * regs_query_register_name() returns the name of a register from its | ||
117 | * offset in struct pt_regs. If the @offset is invalid, this returns NULL; | ||
118 | */ | ||
119 | const char *regs_query_register_name(unsigned int offset) | ||
120 | { | ||
121 | const struct pt_regs_offset *roff; | ||
122 | for (roff = regoffset_table; roff->name != NULL; roff++) | ||
123 | if (roff->offset == offset) | ||
124 | return roff->name; | ||
125 | return NULL; | ||
126 | } | ||
127 | |||
128 | static const int arg_offs_table[] = { | ||
129 | #ifdef CONFIG_X86_32 | ||
130 | [0] = offsetof(struct pt_regs, ax), | ||
131 | [1] = offsetof(struct pt_regs, dx), | ||
132 | [2] = offsetof(struct pt_regs, cx) | ||
133 | #else /* CONFIG_X86_64 */ | ||
134 | [0] = offsetof(struct pt_regs, di), | ||
135 | [1] = offsetof(struct pt_regs, si), | ||
136 | [2] = offsetof(struct pt_regs, dx), | ||
137 | [3] = offsetof(struct pt_regs, cx), | ||
138 | [4] = offsetof(struct pt_regs, r8), | ||
139 | [5] = offsetof(struct pt_regs, r9) | ||
140 | #endif | ||
141 | }; | ||
142 | |||
143 | /** | ||
144 | * regs_get_argument_nth() - get Nth argument at function call | ||
145 | * @regs: pt_regs which contains registers at function entry. | ||
146 | * @n: argument number. | ||
147 | * | ||
148 | * regs_get_argument_nth() returns @n th argument of a function call. | ||
149 | * Since usually the kernel stack will be changed right after function entry, | ||
150 | * you must use this at function entry. If the @n th entry is NOT in the | ||
151 | * kernel stack or pt_regs, this returns 0. | ||
152 | */ | ||
153 | unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) | ||
154 | { | ||
155 | if (n < ARRAY_SIZE(arg_offs_table)) | ||
156 | return *(unsigned long *)((char *)regs + arg_offs_table[n]); | ||
157 | else { | ||
158 | /* | ||
159 | * The typical case: arg n is on the stack. | ||
160 | * (Note: stack[0] = return address, so skip it) | ||
161 | */ | ||
162 | n -= ARRAY_SIZE(arg_offs_table); | ||
163 | return regs_get_kernel_stack_nth(regs, 1 + n); | ||
164 | } | ||
165 | } | ||
166 | |||
52 | /* | 167 | /* |
53 | * does not yet catch signals sent when the child dies. | 168 | * does not yet catch signals sent when the child dies. |
54 | * in exit.c or in signal.c. | 169 | * in exit.c or in signal.c. |
@@ -137,11 +252,6 @@ static int set_segment_reg(struct task_struct *task, | |||
137 | return 0; | 252 | return 0; |
138 | } | 253 | } |
139 | 254 | ||
140 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
141 | { | ||
142 | return TASK_SIZE - 3; | ||
143 | } | ||
144 | |||
145 | #else /* CONFIG_X86_64 */ | 255 | #else /* CONFIG_X86_64 */ |
146 | 256 | ||
147 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) | 257 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) |
@@ -266,15 +376,6 @@ static int set_segment_reg(struct task_struct *task, | |||
266 | return 0; | 376 | return 0; |
267 | } | 377 | } |
268 | 378 | ||
269 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
270 | { | ||
271 | #ifdef CONFIG_IA32_EMULATION | ||
272 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
273 | return IA32_PAGE_OFFSET - 3; | ||
274 | #endif | ||
275 | return TASK_SIZE_MAX - 7; | ||
276 | } | ||
277 | |||
278 | #endif /* CONFIG_X86_32 */ | 379 | #endif /* CONFIG_X86_32 */ |
279 | 380 | ||
280 | static unsigned long get_flags(struct task_struct *task) | 381 | static unsigned long get_flags(struct task_struct *task) |
@@ -408,14 +509,14 @@ static int genregs_get(struct task_struct *target, | |||
408 | { | 509 | { |
409 | if (kbuf) { | 510 | if (kbuf) { |
410 | unsigned long *k = kbuf; | 511 | unsigned long *k = kbuf; |
411 | while (count > 0) { | 512 | while (count >= sizeof(*k)) { |
412 | *k++ = getreg(target, pos); | 513 | *k++ = getreg(target, pos); |
413 | count -= sizeof(*k); | 514 | count -= sizeof(*k); |
414 | pos += sizeof(*k); | 515 | pos += sizeof(*k); |
415 | } | 516 | } |
416 | } else { | 517 | } else { |
417 | unsigned long __user *u = ubuf; | 518 | unsigned long __user *u = ubuf; |
418 | while (count > 0) { | 519 | while (count >= sizeof(*u)) { |
419 | if (__put_user(getreg(target, pos), u++)) | 520 | if (__put_user(getreg(target, pos), u++)) |
420 | return -EFAULT; | 521 | return -EFAULT; |
421 | count -= sizeof(*u); | 522 | count -= sizeof(*u); |
@@ -434,14 +535,14 @@ static int genregs_set(struct task_struct *target, | |||
434 | int ret = 0; | 535 | int ret = 0; |
435 | if (kbuf) { | 536 | if (kbuf) { |
436 | const unsigned long *k = kbuf; | 537 | const unsigned long *k = kbuf; |
437 | while (count > 0 && !ret) { | 538 | while (count >= sizeof(*k) && !ret) { |
438 | ret = putreg(target, pos, *k++); | 539 | ret = putreg(target, pos, *k++); |
439 | count -= sizeof(*k); | 540 | count -= sizeof(*k); |
440 | pos += sizeof(*k); | 541 | pos += sizeof(*k); |
441 | } | 542 | } |
442 | } else { | 543 | } else { |
443 | const unsigned long __user *u = ubuf; | 544 | const unsigned long __user *u = ubuf; |
444 | while (count > 0 && !ret) { | 545 | while (count >= sizeof(*u) && !ret) { |
445 | unsigned long word; | 546 | unsigned long word; |
446 | ret = __get_user(word, u++); | 547 | ret = __get_user(word, u++); |
447 | if (ret) | 548 | if (ret) |
@@ -454,99 +555,237 @@ static int genregs_set(struct task_struct *target, | |||
454 | return ret; | 555 | return ret; |
455 | } | 556 | } |
456 | 557 | ||
558 | static void ptrace_triggered(struct perf_event *bp, int nmi, | ||
559 | struct perf_sample_data *data, | ||
560 | struct pt_regs *regs) | ||
561 | { | ||
562 | int i; | ||
563 | struct thread_struct *thread = &(current->thread); | ||
564 | |||
565 | /* | ||
566 | * Store in the virtual DR6 register the fact that the breakpoint | ||
567 | * was hit so the thread's debugger will see it. | ||
568 | */ | ||
569 | for (i = 0; i < HBP_NUM; i++) { | ||
570 | if (thread->ptrace_bps[i] == bp) | ||
571 | break; | ||
572 | } | ||
573 | |||
574 | thread->debugreg6 |= (DR_TRAP0 << i); | ||
575 | } | ||
576 | |||
457 | /* | 577 | /* |
458 | * This function is trivial and will be inlined by the compiler. | 578 | * Walk through every ptrace breakpoints for this thread and |
459 | * Having it separates the implementation details of debug | 579 | * build the dr7 value on top of their attributes. |
460 | * registers from the interface details of ptrace. | 580 | * |
461 | */ | 581 | */ |
462 | static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) | 582 | static unsigned long ptrace_get_dr7(struct perf_event *bp[]) |
463 | { | 583 | { |
464 | switch (n) { | 584 | int i; |
465 | case 0: return child->thread.debugreg0; | 585 | int dr7 = 0; |
466 | case 1: return child->thread.debugreg1; | 586 | struct arch_hw_breakpoint *info; |
467 | case 2: return child->thread.debugreg2; | 587 | |
468 | case 3: return child->thread.debugreg3; | 588 | for (i = 0; i < HBP_NUM; i++) { |
469 | case 6: return child->thread.debugreg6; | 589 | if (bp[i] && !bp[i]->attr.disabled) { |
470 | case 7: return child->thread.debugreg7; | 590 | info = counter_arch_bp(bp[i]); |
591 | dr7 |= encode_dr7(i, info->len, info->type); | ||
592 | } | ||
471 | } | 593 | } |
472 | return 0; | 594 | |
595 | return dr7; | ||
473 | } | 596 | } |
474 | 597 | ||
475 | static int ptrace_set_debugreg(struct task_struct *child, | 598 | static int |
476 | int n, unsigned long data) | 599 | ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, |
600 | struct task_struct *tsk, int disabled) | ||
477 | { | 601 | { |
478 | int i; | 602 | int err; |
603 | int gen_len, gen_type; | ||
604 | struct perf_event_attr attr; | ||
479 | 605 | ||
480 | if (unlikely(n == 4 || n == 5)) | 606 | /* |
481 | return -EIO; | 607 | * We shoud have at least an inactive breakpoint at this |
608 | * slot. It means the user is writing dr7 without having | ||
609 | * written the address register first | ||
610 | */ | ||
611 | if (!bp) | ||
612 | return -EINVAL; | ||
482 | 613 | ||
483 | if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) | 614 | err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); |
484 | return -EIO; | 615 | if (err) |
616 | return err; | ||
485 | 617 | ||
486 | switch (n) { | 618 | attr = bp->attr; |
487 | case 0: child->thread.debugreg0 = data; break; | 619 | attr.bp_len = gen_len; |
488 | case 1: child->thread.debugreg1 = data; break; | 620 | attr.bp_type = gen_type; |
489 | case 2: child->thread.debugreg2 = data; break; | 621 | attr.disabled = disabled; |
490 | case 3: child->thread.debugreg3 = data; break; | ||
491 | 622 | ||
492 | case 6: | 623 | return modify_user_hw_breakpoint(bp, &attr); |
493 | if ((data & ~0xffffffffUL) != 0) | 624 | } |
494 | return -EIO; | ||
495 | child->thread.debugreg6 = data; | ||
496 | break; | ||
497 | 625 | ||
498 | case 7: | 626 | /* |
627 | * Handle ptrace writes to debug register 7. | ||
628 | */ | ||
629 | static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) | ||
630 | { | ||
631 | struct thread_struct *thread = &(tsk->thread); | ||
632 | unsigned long old_dr7; | ||
633 | int i, orig_ret = 0, rc = 0; | ||
634 | int enabled, second_pass = 0; | ||
635 | unsigned len, type; | ||
636 | struct perf_event *bp; | ||
637 | |||
638 | data &= ~DR_CONTROL_RESERVED; | ||
639 | old_dr7 = ptrace_get_dr7(thread->ptrace_bps); | ||
640 | restore: | ||
641 | /* | ||
642 | * Loop through all the hardware breakpoints, making the | ||
643 | * appropriate changes to each. | ||
644 | */ | ||
645 | for (i = 0; i < HBP_NUM; i++) { | ||
646 | enabled = decode_dr7(data, i, &len, &type); | ||
647 | bp = thread->ptrace_bps[i]; | ||
648 | |||
649 | if (!enabled) { | ||
650 | if (bp) { | ||
651 | /* | ||
652 | * Don't unregister the breakpoints right-away, | ||
653 | * unless all register_user_hw_breakpoint() | ||
654 | * requests have succeeded. This prevents | ||
655 | * any window of opportunity for debug | ||
656 | * register grabbing by other users. | ||
657 | */ | ||
658 | if (!second_pass) | ||
659 | continue; | ||
660 | |||
661 | rc = ptrace_modify_breakpoint(bp, len, type, | ||
662 | tsk, 1); | ||
663 | if (rc) | ||
664 | break; | ||
665 | } | ||
666 | continue; | ||
667 | } | ||
668 | |||
669 | rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0); | ||
670 | if (rc) | ||
671 | break; | ||
672 | } | ||
673 | /* | ||
674 | * Make a second pass to free the remaining unused breakpoints | ||
675 | * or to restore the original breakpoints if an error occurred. | ||
676 | */ | ||
677 | if (!second_pass) { | ||
678 | second_pass = 1; | ||
679 | if (rc < 0) { | ||
680 | orig_ret = rc; | ||
681 | data = old_dr7; | ||
682 | } | ||
683 | goto restore; | ||
684 | } | ||
685 | return ((orig_ret < 0) ? orig_ret : rc); | ||
686 | } | ||
687 | |||
688 | /* | ||
689 | * Handle PTRACE_PEEKUSR calls for the debug register area. | ||
690 | */ | ||
691 | static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) | ||
692 | { | ||
693 | struct thread_struct *thread = &(tsk->thread); | ||
694 | unsigned long val = 0; | ||
695 | |||
696 | if (n < HBP_NUM) { | ||
697 | struct perf_event *bp; | ||
698 | bp = thread->ptrace_bps[n]; | ||
699 | if (!bp) | ||
700 | return 0; | ||
701 | val = bp->hw.info.address; | ||
702 | } else if (n == 6) { | ||
703 | val = thread->debugreg6; | ||
704 | } else if (n == 7) { | ||
705 | val = ptrace_get_dr7(thread->ptrace_bps); | ||
706 | } | ||
707 | return val; | ||
708 | } | ||
709 | |||
710 | static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | ||
711 | unsigned long addr) | ||
712 | { | ||
713 | struct perf_event *bp; | ||
714 | struct thread_struct *t = &tsk->thread; | ||
715 | struct perf_event_attr attr; | ||
716 | |||
717 | if (!t->ptrace_bps[nr]) { | ||
718 | hw_breakpoint_init(&attr); | ||
499 | /* | 719 | /* |
500 | * Sanity-check data. Take one half-byte at once with | 720 | * Put stub len and type to register (reserve) an inactive but |
501 | * check = (val >> (16 + 4*i)) & 0xf. It contains the | 721 | * correct bp |
502 | * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits | ||
503 | * 2 and 3 are LENi. Given a list of invalid values, | ||
504 | * we do mask |= 1 << invalid_value, so that | ||
505 | * (mask >> check) & 1 is a correct test for invalid | ||
506 | * values. | ||
507 | * | ||
508 | * R/Wi contains the type of the breakpoint / | ||
509 | * watchpoint, LENi contains the length of the watched | ||
510 | * data in the watchpoint case. | ||
511 | * | ||
512 | * The invalid values are: | ||
513 | * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] | ||
514 | * - R/Wi == 0x10 (break on I/O reads or writes), so | ||
515 | * mask |= 0x4444. | ||
516 | * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= | ||
517 | * 0x1110. | ||
518 | * | ||
519 | * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. | ||
520 | * | ||
521 | * See the Intel Manual "System Programming Guide", | ||
522 | * 15.2.4 | ||
523 | * | ||
524 | * Note that LENi == 0x10 is defined on x86_64 in long | ||
525 | * mode (i.e. even for 32-bit userspace software, but | ||
526 | * 64-bit kernel), so the x86_64 mask value is 0x5454. | ||
527 | * See the AMD manual no. 24593 (AMD64 System Programming) | ||
528 | */ | 722 | */ |
529 | #ifdef CONFIG_X86_32 | 723 | attr.bp_addr = addr; |
530 | #define DR7_MASK 0x5f54 | 724 | attr.bp_len = HW_BREAKPOINT_LEN_1; |
531 | #else | 725 | attr.bp_type = HW_BREAKPOINT_W; |
532 | #define DR7_MASK 0x5554 | 726 | attr.disabled = 1; |
533 | #endif | 727 | |
534 | data &= ~DR_CONTROL_RESERVED; | 728 | bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); |
535 | for (i = 0; i < 4; i++) | 729 | |
536 | if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) | 730 | /* |
537 | return -EIO; | 731 | * CHECKME: the previous code returned -EIO if the addr wasn't |
538 | child->thread.debugreg7 = data; | 732 | * a valid task virtual addr. The new one will return -EINVAL in |
539 | if (data) | 733 | * this case. |
540 | set_tsk_thread_flag(child, TIF_DEBUG); | 734 | * -EINVAL may be what we want for in-kernel breakpoints users, |
541 | else | 735 | * but -EIO looks better for ptrace, since we refuse a register |
542 | clear_tsk_thread_flag(child, TIF_DEBUG); | 736 | * writing for the user. And anyway this is the previous |
543 | break; | 737 | * behaviour. |
738 | */ | ||
739 | if (IS_ERR(bp)) | ||
740 | return PTR_ERR(bp); | ||
741 | |||
742 | t->ptrace_bps[nr] = bp; | ||
743 | } else { | ||
744 | int err; | ||
745 | |||
746 | bp = t->ptrace_bps[nr]; | ||
747 | |||
748 | attr = bp->attr; | ||
749 | attr.bp_addr = addr; | ||
750 | err = modify_user_hw_breakpoint(bp, &attr); | ||
751 | if (err) | ||
752 | return err; | ||
544 | } | 753 | } |
545 | 754 | ||
755 | |||
546 | return 0; | 756 | return 0; |
547 | } | 757 | } |
548 | 758 | ||
549 | /* | 759 | /* |
760 | * Handle PTRACE_POKEUSR calls for the debug register area. | ||
761 | */ | ||
762 | int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) | ||
763 | { | ||
764 | struct thread_struct *thread = &(tsk->thread); | ||
765 | int rc = 0; | ||
766 | |||
767 | /* There are no DR4 or DR5 registers */ | ||
768 | if (n == 4 || n == 5) | ||
769 | return -EIO; | ||
770 | |||
771 | if (n == 6) { | ||
772 | thread->debugreg6 = val; | ||
773 | goto ret_path; | ||
774 | } | ||
775 | if (n < HBP_NUM) { | ||
776 | rc = ptrace_set_breakpoint_addr(tsk, n, val); | ||
777 | if (rc) | ||
778 | return rc; | ||
779 | } | ||
780 | /* All that's left is DR7 */ | ||
781 | if (n == 7) | ||
782 | rc = ptrace_write_dr7(tsk, val); | ||
783 | |||
784 | ret_path: | ||
785 | return rc; | ||
786 | } | ||
787 | |||
788 | /* | ||
550 | * These access the current or another (stopped) task's io permission | 789 | * These access the current or another (stopped) task's io permission |
551 | * bitmap for debugging or core dump. | 790 | * bitmap for debugging or core dump. |
552 | */ | 791 | */ |
@@ -1219,14 +1458,14 @@ static int genregs32_get(struct task_struct *target, | |||
1219 | { | 1458 | { |
1220 | if (kbuf) { | 1459 | if (kbuf) { |
1221 | compat_ulong_t *k = kbuf; | 1460 | compat_ulong_t *k = kbuf; |
1222 | while (count > 0) { | 1461 | while (count >= sizeof(*k)) { |
1223 | getreg32(target, pos, k++); | 1462 | getreg32(target, pos, k++); |
1224 | count -= sizeof(*k); | 1463 | count -= sizeof(*k); |
1225 | pos += sizeof(*k); | 1464 | pos += sizeof(*k); |
1226 | } | 1465 | } |
1227 | } else { | 1466 | } else { |
1228 | compat_ulong_t __user *u = ubuf; | 1467 | compat_ulong_t __user *u = ubuf; |
1229 | while (count > 0) { | 1468 | while (count >= sizeof(*u)) { |
1230 | compat_ulong_t word; | 1469 | compat_ulong_t word; |
1231 | getreg32(target, pos, &word); | 1470 | getreg32(target, pos, &word); |
1232 | if (__put_user(word, u++)) | 1471 | if (__put_user(word, u++)) |
@@ -1247,14 +1486,14 @@ static int genregs32_set(struct task_struct *target, | |||
1247 | int ret = 0; | 1486 | int ret = 0; |
1248 | if (kbuf) { | 1487 | if (kbuf) { |
1249 | const compat_ulong_t *k = kbuf; | 1488 | const compat_ulong_t *k = kbuf; |
1250 | while (count > 0 && !ret) { | 1489 | while (count >= sizeof(*k) && !ret) { |
1251 | ret = putreg32(target, pos, *k++); | 1490 | ret = putreg32(target, pos, *k++); |
1252 | count -= sizeof(*k); | 1491 | count -= sizeof(*k); |
1253 | pos += sizeof(*k); | 1492 | pos += sizeof(*k); |
1254 | } | 1493 | } |
1255 | } else { | 1494 | } else { |
1256 | const compat_ulong_t __user *u = ubuf; | 1495 | const compat_ulong_t __user *u = ubuf; |
1257 | while (count > 0 && !ret) { | 1496 | while (count >= sizeof(*u) && !ret) { |
1258 | compat_ulong_t word; | 1497 | compat_ulong_t word; |
1259 | ret = __get_user(word, u++); | 1498 | ret = __get_user(word, u++); |
1260 | if (ret) | 1499 | if (ret) |
@@ -1437,21 +1676,33 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) | |||
1437 | #endif | 1676 | #endif |
1438 | } | 1677 | } |
1439 | 1678 | ||
1440 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, | 1679 | static void fill_sigtrap_info(struct task_struct *tsk, |
1441 | int error_code, int si_code) | 1680 | struct pt_regs *regs, |
1681 | int error_code, int si_code, | ||
1682 | struct siginfo *info) | ||
1442 | { | 1683 | { |
1443 | struct siginfo info; | ||
1444 | |||
1445 | tsk->thread.trap_no = 1; | 1684 | tsk->thread.trap_no = 1; |
1446 | tsk->thread.error_code = error_code; | 1685 | tsk->thread.error_code = error_code; |
1447 | 1686 | ||
1448 | memset(&info, 0, sizeof(info)); | 1687 | memset(info, 0, sizeof(*info)); |
1449 | info.si_signo = SIGTRAP; | 1688 | info->si_signo = SIGTRAP; |
1450 | info.si_code = si_code; | 1689 | info->si_code = si_code; |
1690 | info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL; | ||
1691 | } | ||
1451 | 1692 | ||
1452 | /* User-mode ip? */ | 1693 | void user_single_step_siginfo(struct task_struct *tsk, |
1453 | info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; | 1694 | struct pt_regs *regs, |
1695 | struct siginfo *info) | ||
1696 | { | ||
1697 | fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info); | ||
1698 | } | ||
1454 | 1699 | ||
1700 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, | ||
1701 | int error_code, int si_code) | ||
1702 | { | ||
1703 | struct siginfo info; | ||
1704 | |||
1705 | fill_sigtrap_info(tsk, regs, error_code, si_code, &info); | ||
1455 | /* Send us the fake SIGTRAP */ | 1706 | /* Send us the fake SIGTRAP */ |
1456 | force_sig_info(SIGTRAP, &info, tsk); | 1707 | force_sig_info(SIGTRAP, &info, tsk); |
1457 | } | 1708 | } |
@@ -1516,29 +1767,22 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) | |||
1516 | 1767 | ||
1517 | asmregparm void syscall_trace_leave(struct pt_regs *regs) | 1768 | asmregparm void syscall_trace_leave(struct pt_regs *regs) |
1518 | { | 1769 | { |
1770 | bool step; | ||
1771 | |||
1519 | if (unlikely(current->audit_context)) | 1772 | if (unlikely(current->audit_context)) |
1520 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | 1773 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); |
1521 | 1774 | ||
1522 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) | 1775 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1523 | trace_sys_exit(regs, regs->ax); | 1776 | trace_sys_exit(regs, regs->ax); |
1524 | 1777 | ||
1525 | if (test_thread_flag(TIF_SYSCALL_TRACE)) | ||
1526 | tracehook_report_syscall_exit(regs, 0); | ||
1527 | |||
1528 | /* | 1778 | /* |
1529 | * If TIF_SYSCALL_EMU is set, we only get here because of | 1779 | * If TIF_SYSCALL_EMU is set, we only get here because of |
1530 | * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). | 1780 | * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). |
1531 | * We already reported this syscall instruction in | 1781 | * We already reported this syscall instruction in |
1532 | * syscall_trace_enter(), so don't do any more now. | 1782 | * syscall_trace_enter(). |
1533 | */ | ||
1534 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) | ||
1535 | return; | ||
1536 | |||
1537 | /* | ||
1538 | * If we are single-stepping, synthesize a trap to follow the | ||
1539 | * system call instruction. | ||
1540 | */ | 1783 | */ |
1541 | if (test_thread_flag(TIF_SINGLESTEP) && | 1784 | step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && |
1542 | tracehook_consider_fatal_signal(current, SIGTRAP)) | 1785 | !test_thread_flag(TIF_SYSCALL_EMU); |
1543 | send_sigtrap(current, regs, 0, TRAP_BRKPT); | 1786 | if (step || test_thread_flag(TIF_SYSCALL_TRACE)) |
1787 | tracehook_report_syscall_exit(regs, step); | ||
1544 | } | 1788 | } |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 6c3b2c6fd772..18093d7498f0 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -499,6 +499,7 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev) | |||
499 | { | 499 | { |
500 | struct pci_dev *nb_ht; | 500 | struct pci_dev *nb_ht; |
501 | unsigned int devfn; | 501 | unsigned int devfn; |
502 | u32 node; | ||
502 | u32 val; | 503 | u32 val; |
503 | 504 | ||
504 | devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); | 505 | devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); |
@@ -507,7 +508,13 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev) | |||
507 | return; | 508 | return; |
508 | 509 | ||
509 | pci_read_config_dword(nb_ht, 0x60, &val); | 510 | pci_read_config_dword(nb_ht, 0x60, &val); |
510 | set_dev_node(&dev->dev, val & 7); | 511 | node = val & 7; |
512 | /* | ||
513 | * Some hardware may return an invalid node ID, | ||
514 | * so check it first: | ||
515 | */ | ||
516 | if (node_online(node)) | ||
517 | set_dev_node(&dev->dev, node); | ||
511 | pci_dev_put(nb_ht); | 518 | pci_dev_put(nb_ht); |
512 | } | 519 | } |
513 | 520 | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 27349f92a6d7..1545bc0c9845 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/pm.h> | 4 | #include <linux/pm.h> |
5 | #include <linux/efi.h> | 5 | #include <linux/efi.h> |
6 | #include <linux/dmi.h> | 6 | #include <linux/dmi.h> |
7 | #include <linux/sched.h> | ||
7 | #include <linux/tboot.h> | 8 | #include <linux/tboot.h> |
8 | #include <acpi/reboot.h> | 9 | #include <acpi/reboot.h> |
9 | #include <asm/io.h> | 10 | #include <asm/io.h> |
@@ -22,7 +23,7 @@ | |||
22 | # include <linux/ctype.h> | 23 | # include <linux/ctype.h> |
23 | # include <linux/mc146818rtc.h> | 24 | # include <linux/mc146818rtc.h> |
24 | #else | 25 | #else |
25 | # include <asm/iommu.h> | 26 | # include <asm/x86_init.h> |
26 | #endif | 27 | #endif |
27 | 28 | ||
28 | /* | 29 | /* |
@@ -258,6 +259,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
258 | DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), | 259 | DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), |
259 | }, | 260 | }, |
260 | }, | 261 | }, |
262 | { /* Handle problems with rebooting on ASUS P4S800 */ | ||
263 | .callback = set_bios_reboot, | ||
264 | .ident = "ASUS P4S800", | ||
265 | .matches = { | ||
266 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | ||
267 | DMI_MATCH(DMI_BOARD_NAME, "P4S800"), | ||
268 | }, | ||
269 | }, | ||
261 | { } | 270 | { } |
262 | }; | 271 | }; |
263 | 272 | ||
@@ -435,6 +444,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { | |||
435 | DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), | 444 | DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), |
436 | }, | 445 | }, |
437 | }, | 446 | }, |
447 | { /* Handle problems with rebooting on Apple Macmini3,1 */ | ||
448 | .callback = set_pci_reboot, | ||
449 | .ident = "Apple Macmini3,1", | ||
450 | .matches = { | ||
451 | DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), | ||
452 | DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), | ||
453 | }, | ||
454 | }, | ||
438 | { } | 455 | { } |
439 | }; | 456 | }; |
440 | 457 | ||
@@ -613,7 +630,7 @@ void native_machine_shutdown(void) | |||
613 | #endif | 630 | #endif |
614 | 631 | ||
615 | #ifdef CONFIG_X86_64 | 632 | #ifdef CONFIG_X86_64 |
616 | pci_iommu_shutdown(); | 633 | x86_platform.iommu_shutdown(); |
617 | #endif | 634 | #endif |
618 | } | 635 | } |
619 | 636 | ||
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index 61a837743fe5..fda313ebbb03 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <linux/interrupt.h> | 12 | #include <linux/interrupt.h> |
13 | #include <asm/reboot_fixups.h> | 13 | #include <asm/reboot_fixups.h> |
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | #include <asm/geode.h> | 15 | #include <linux/cs5535.h> |
16 | 16 | ||
17 | static void cs5530a_warm_reset(struct pci_dev *dev) | 17 | static void cs5530a_warm_reset(struct pci_dev *dev) |
18 | { | 18 | { |
@@ -80,6 +80,7 @@ void mach_reboot_fixups(void) | |||
80 | continue; | 80 | continue; |
81 | 81 | ||
82 | cur->reboot_fixup(dev); | 82 | cur->reboot_fixup(dev); |
83 | pci_dev_put(dev); | ||
83 | } | 84 | } |
84 | } | 85 | } |
85 | 86 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e09f0e2c14b5..f7b8b9894b22 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -73,6 +73,7 @@ | |||
73 | 73 | ||
74 | #include <asm/mtrr.h> | 74 | #include <asm/mtrr.h> |
75 | #include <asm/apic.h> | 75 | #include <asm/apic.h> |
76 | #include <asm/trampoline.h> | ||
76 | #include <asm/e820.h> | 77 | #include <asm/e820.h> |
77 | #include <asm/mpspec.h> | 78 | #include <asm/mpspec.h> |
78 | #include <asm/setup.h> | 79 | #include <asm/setup.h> |
@@ -106,9 +107,11 @@ | |||
106 | #include <asm/percpu.h> | 107 | #include <asm/percpu.h> |
107 | #include <asm/topology.h> | 108 | #include <asm/topology.h> |
108 | #include <asm/apicdef.h> | 109 | #include <asm/apicdef.h> |
110 | #include <asm/k8.h> | ||
109 | #ifdef CONFIG_X86_64 | 111 | #ifdef CONFIG_X86_64 |
110 | #include <asm/numa_64.h> | 112 | #include <asm/numa_64.h> |
111 | #endif | 113 | #endif |
114 | #include <asm/mce.h> | ||
112 | 115 | ||
113 | /* | 116 | /* |
114 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 117 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. |
@@ -247,7 +250,7 @@ EXPORT_SYMBOL(edd); | |||
247 | * from boot_params into a safe place. | 250 | * from boot_params into a safe place. |
248 | * | 251 | * |
249 | */ | 252 | */ |
250 | static inline void copy_edd(void) | 253 | static inline void __init copy_edd(void) |
251 | { | 254 | { |
252 | memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, | 255 | memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, |
253 | sizeof(edd.mbr_signature)); | 256 | sizeof(edd.mbr_signature)); |
@@ -256,7 +259,7 @@ static inline void copy_edd(void) | |||
256 | edd.edd_info_nr = boot_params.eddbuf_entries; | 259 | edd.edd_info_nr = boot_params.eddbuf_entries; |
257 | } | 260 | } |
258 | #else | 261 | #else |
259 | static inline void copy_edd(void) | 262 | static inline void __init copy_edd(void) |
260 | { | 263 | { |
261 | } | 264 | } |
262 | #endif | 265 | #endif |
@@ -486,42 +489,11 @@ static void __init reserve_early_setup_data(void) | |||
486 | 489 | ||
487 | #ifdef CONFIG_KEXEC | 490 | #ifdef CONFIG_KEXEC |
488 | 491 | ||
489 | /** | ||
490 | * Reserve @size bytes of crashkernel memory at any suitable offset. | ||
491 | * | ||
492 | * @size: Size of the crashkernel memory to reserve. | ||
493 | * Returns the base address on success, and -1ULL on failure. | ||
494 | */ | ||
495 | static | ||
496 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) | ||
497 | { | ||
498 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
499 | unsigned long long start = 0LL; | ||
500 | |||
501 | while (1) { | ||
502 | int ret; | ||
503 | |||
504 | start = find_e820_area(start, ULONG_MAX, size, alignment); | ||
505 | if (start == -1ULL) | ||
506 | return start; | ||
507 | |||
508 | /* try to reserve it */ | ||
509 | ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE); | ||
510 | if (ret >= 0) | ||
511 | return start; | ||
512 | |||
513 | start += alignment; | ||
514 | } | ||
515 | } | ||
516 | |||
517 | static inline unsigned long long get_total_mem(void) | 492 | static inline unsigned long long get_total_mem(void) |
518 | { | 493 | { |
519 | unsigned long long total; | 494 | unsigned long long total; |
520 | 495 | ||
521 | total = max_low_pfn - min_low_pfn; | 496 | total = max_pfn - min_low_pfn; |
522 | #ifdef CONFIG_HIGHMEM | ||
523 | total += highend_pfn - highstart_pfn; | ||
524 | #endif | ||
525 | 497 | ||
526 | return total << PAGE_SHIFT; | 498 | return total << PAGE_SHIFT; |
527 | } | 499 | } |
@@ -541,21 +513,25 @@ static void __init reserve_crashkernel(void) | |||
541 | 513 | ||
542 | /* 0 means: find the address automatically */ | 514 | /* 0 means: find the address automatically */ |
543 | if (crash_base <= 0) { | 515 | if (crash_base <= 0) { |
544 | crash_base = find_and_reserve_crashkernel(crash_size); | 516 | const unsigned long long alignment = 16<<20; /* 16M */ |
517 | |||
518 | crash_base = find_e820_area(alignment, ULONG_MAX, crash_size, | ||
519 | alignment); | ||
545 | if (crash_base == -1ULL) { | 520 | if (crash_base == -1ULL) { |
546 | pr_info("crashkernel reservation failed. " | 521 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
547 | "No suitable area found.\n"); | ||
548 | return; | 522 | return; |
549 | } | 523 | } |
550 | } else { | 524 | } else { |
551 | ret = reserve_bootmem_generic(crash_base, crash_size, | 525 | unsigned long long start; |
552 | BOOTMEM_EXCLUSIVE); | 526 | |
553 | if (ret < 0) { | 527 | start = find_e820_area(crash_base, ULONG_MAX, crash_size, |
554 | pr_info("crashkernel reservation failed - " | 528 | 1<<20); |
555 | "memory is in use\n"); | 529 | if (start != crash_base) { |
530 | pr_info("crashkernel reservation failed - memory is in use.\n"); | ||
556 | return; | 531 | return; |
557 | } | 532 | } |
558 | } | 533 | } |
534 | reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL"); | ||
559 | 535 | ||
560 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | 536 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " |
561 | "for crashkernel (System RAM: %ldMB)\n", | 537 | "for crashkernel (System RAM: %ldMB)\n", |
@@ -660,6 +636,13 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | |||
660 | }, | 636 | }, |
661 | }, | 637 | }, |
662 | { | 638 | { |
639 | .callback = dmi_low_memory_corruption, | ||
640 | .ident = "Phoenix/MSC BIOS", | ||
641 | .matches = { | ||
642 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), | ||
643 | }, | ||
644 | }, | ||
645 | { | ||
663 | /* | 646 | /* |
664 | * AMI BIOS with low memory corruption was found on Intel DG45ID board. | 647 | * AMI BIOS with low memory corruption was found on Intel DG45ID board. |
665 | * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will | 648 | * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will |
@@ -691,6 +674,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | |||
691 | 674 | ||
692 | void __init setup_arch(char **cmdline_p) | 675 | void __init setup_arch(char **cmdline_p) |
693 | { | 676 | { |
677 | int acpi = 0; | ||
678 | int k8 = 0; | ||
679 | |||
694 | #ifdef CONFIG_X86_32 | 680 | #ifdef CONFIG_X86_32 |
695 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 681 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
696 | visws_early_detect(); | 682 | visws_early_detect(); |
@@ -783,21 +769,18 @@ void __init setup_arch(char **cmdline_p) | |||
783 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 769 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
784 | *cmdline_p = command_line; | 770 | *cmdline_p = command_line; |
785 | 771 | ||
786 | #ifdef CONFIG_X86_64 | ||
787 | /* | 772 | /* |
788 | * Must call this twice: Once just to detect whether hardware doesn't | 773 | * x86_configure_nx() is called before parse_early_param() to detect |
789 | * support NX (so that the early EHCI debug console setup can safely | 774 | * whether hardware doesn't support NX (so that the early EHCI debug |
790 | * call set_fixmap(), and then again after parsing early parameters to | 775 | * console setup can safely call set_fixmap()). It may then be called |
791 | * honor the respective command line option. | 776 | * again from within noexec_setup() during parsing early parameters |
777 | * to honor the respective command line option. | ||
792 | */ | 778 | */ |
793 | check_efer(); | 779 | x86_configure_nx(); |
794 | #endif | ||
795 | 780 | ||
796 | parse_early_param(); | 781 | parse_early_param(); |
797 | 782 | ||
798 | #ifdef CONFIG_X86_64 | 783 | x86_report_nx(); |
799 | check_efer(); | ||
800 | #endif | ||
801 | 784 | ||
802 | /* Must be before kernel pagetables are setup */ | 785 | /* Must be before kernel pagetables are setup */ |
803 | vmi_activate(); | 786 | vmi_activate(); |
@@ -893,6 +876,20 @@ void __init setup_arch(char **cmdline_p) | |||
893 | 876 | ||
894 | reserve_brk(); | 877 | reserve_brk(); |
895 | 878 | ||
879 | /* | ||
880 | * Find and reserve possible boot-time SMP configuration: | ||
881 | */ | ||
882 | find_smp_config(); | ||
883 | |||
884 | reserve_trampoline_memory(); | ||
885 | |||
886 | #ifdef CONFIG_ACPI_SLEEP | ||
887 | /* | ||
888 | * Reserve low memory region for sleep support. | ||
889 | * even before init_memory_mapping | ||
890 | */ | ||
891 | acpi_reserve_wakeup_memory(); | ||
892 | #endif | ||
896 | init_gbpages(); | 893 | init_gbpages(); |
897 | 894 | ||
898 | /* max_pfn_mapped is updated here */ | 895 | /* max_pfn_mapped is updated here */ |
@@ -919,6 +916,8 @@ void __init setup_arch(char **cmdline_p) | |||
919 | 916 | ||
920 | reserve_initrd(); | 917 | reserve_initrd(); |
921 | 918 | ||
919 | reserve_crashkernel(); | ||
920 | |||
922 | vsmp_init(); | 921 | vsmp_init(); |
923 | 922 | ||
924 | io_delay_init(); | 923 | io_delay_init(); |
@@ -934,23 +933,15 @@ void __init setup_arch(char **cmdline_p) | |||
934 | /* | 933 | /* |
935 | * Parse SRAT to discover nodes. | 934 | * Parse SRAT to discover nodes. |
936 | */ | 935 | */ |
937 | acpi_numa_init(); | 936 | acpi = acpi_numa_init(); |
938 | #endif | 937 | #endif |
939 | 938 | ||
940 | initmem_init(0, max_pfn); | 939 | #ifdef CONFIG_K8_NUMA |
941 | 940 | if (!acpi) | |
942 | #ifdef CONFIG_ACPI_SLEEP | 941 | k8 = !k8_numa_init(0, max_pfn); |
943 | /* | ||
944 | * Reserve low memory region for sleep support. | ||
945 | */ | ||
946 | acpi_reserve_bootmem(); | ||
947 | #endif | 942 | #endif |
948 | /* | ||
949 | * Find and reserve possible boot-time SMP configuration: | ||
950 | */ | ||
951 | find_smp_config(); | ||
952 | 943 | ||
953 | reserve_crashkernel(); | 944 | initmem_init(0, max_pfn, acpi, k8); |
954 | 945 | ||
955 | #ifdef CONFIG_X86_64 | 946 | #ifdef CONFIG_X86_64 |
956 | /* | 947 | /* |
@@ -1024,6 +1015,8 @@ void __init setup_arch(char **cmdline_p) | |||
1024 | #endif | 1015 | #endif |
1025 | #endif | 1016 | #endif |
1026 | x86_init.oem.banner(); | 1017 | x86_init.oem.banner(); |
1018 | |||
1019 | mcheck_init(); | ||
1027 | } | 1020 | } |
1028 | 1021 | ||
1029 | #ifdef CONFIG_X86_32 | 1022 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d559af913e1f..35abcb8b00e9 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -1,3 +1,5 @@ | |||
1 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
2 | |||
1 | #include <linux/kernel.h> | 3 | #include <linux/kernel.h> |
2 | #include <linux/module.h> | 4 | #include <linux/module.h> |
3 | #include <linux/init.h> | 5 | #include <linux/init.h> |
@@ -20,9 +22,9 @@ | |||
20 | #include <asm/stackprotector.h> | 22 | #include <asm/stackprotector.h> |
21 | 23 | ||
22 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 24 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
23 | # define DBG(x...) printk(KERN_DEBUG x) | 25 | # define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__) |
24 | #else | 26 | #else |
25 | # define DBG(x...) | 27 | # define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0) |
26 | #endif | 28 | #endif |
27 | 29 | ||
28 | DEFINE_PER_CPU(int, cpu_number); | 30 | DEFINE_PER_CPU(int, cpu_number); |
@@ -116,8 +118,8 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
116 | } else { | 118 | } else { |
117 | ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), | 119 | ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), |
118 | size, align, goal); | 120 | size, align, goal); |
119 | pr_debug("per cpu data for cpu%d %lu bytes on node%d at " | 121 | pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n", |
120 | "%016lx\n", cpu, size, node, __pa(ptr)); | 122 | cpu, size, node, __pa(ptr)); |
121 | } | 123 | } |
122 | return ptr; | 124 | return ptr; |
123 | #else | 125 | #else |
@@ -198,8 +200,7 @@ void __init setup_per_cpu_areas(void) | |||
198 | pcpu_cpu_distance, | 200 | pcpu_cpu_distance, |
199 | pcpu_fc_alloc, pcpu_fc_free); | 201 | pcpu_fc_alloc, pcpu_fc_free); |
200 | if (rc < 0) | 202 | if (rc < 0) |
201 | pr_warning("PERCPU: %s allocator failed (%d), " | 203 | pr_warning("%s allocator failed (%d), falling back to page size\n", |
202 | "falling back to page size\n", | ||
203 | pcpu_fc_names[pcpu_chosen_fc], rc); | 204 | pcpu_fc_names[pcpu_chosen_fc], rc); |
204 | } | 205 | } |
205 | if (rc < 0) | 206 | if (rc < 0) |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6a44a76055ad..4fd173cd8e57 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/stddef.h> | 19 | #include <linux/stddef.h> |
20 | #include <linux/personality.h> | 20 | #include <linux/personality.h> |
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/user-return-notifier.h> | ||
22 | 23 | ||
23 | #include <asm/processor.h> | 24 | #include <asm/processor.h> |
24 | #include <asm/ucontext.h> | 25 | #include <asm/ucontext.h> |
@@ -544,22 +545,12 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, | |||
544 | } | 545 | } |
545 | #endif /* CONFIG_X86_32 */ | 546 | #endif /* CONFIG_X86_32 */ |
546 | 547 | ||
547 | #ifdef CONFIG_X86_32 | 548 | long |
548 | int sys_sigaltstack(struct pt_regs *regs) | ||
549 | { | ||
550 | const stack_t __user *uss = (const stack_t __user *)regs->bx; | ||
551 | stack_t __user *uoss = (stack_t __user *)regs->cx; | ||
552 | |||
553 | return do_sigaltstack(uss, uoss, regs->sp); | ||
554 | } | ||
555 | #else /* !CONFIG_X86_32 */ | ||
556 | asmlinkage long | ||
557 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | 549 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, |
558 | struct pt_regs *regs) | 550 | struct pt_regs *regs) |
559 | { | 551 | { |
560 | return do_sigaltstack(uss, uoss, regs->sp); | 552 | return do_sigaltstack(uss, uoss, regs->sp); |
561 | } | 553 | } |
562 | #endif /* CONFIG_X86_32 */ | ||
563 | 554 | ||
564 | /* | 555 | /* |
565 | * Do a signal return; undo the signal stack. | 556 | * Do a signal return; undo the signal stack. |
@@ -799,15 +790,6 @@ static void do_signal(struct pt_regs *regs) | |||
799 | 790 | ||
800 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | 791 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); |
801 | if (signr > 0) { | 792 | if (signr > 0) { |
802 | /* | ||
803 | * Re-enable any watchpoints before delivering the | ||
804 | * signal to user space. The processor register will | ||
805 | * have been cleared if the watchpoint triggered | ||
806 | * inside the kernel. | ||
807 | */ | ||
808 | if (current->thread.debugreg7) | ||
809 | set_debugreg(current->thread.debugreg7, 7); | ||
810 | |||
811 | /* Whee! Actually deliver the signal. */ | 793 | /* Whee! Actually deliver the signal. */ |
812 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | 794 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { |
813 | /* | 795 | /* |
@@ -872,6 +854,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
872 | if (current->replacement_session_keyring) | 854 | if (current->replacement_session_keyring) |
873 | key_replace_session_keyring(); | 855 | key_replace_session_keyring(); |
874 | } | 856 | } |
857 | if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) | ||
858 | fire_user_return_notifiers(); | ||
875 | 859 | ||
876 | #ifdef CONFIG_X86_32 | 860 | #ifdef CONFIG_X86_32 |
877 | clear_thread_flag(TIF_IRET); | 861 | clear_thread_flag(TIF_IRET); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 565ebc65920e..678d0b8c26f3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -671,6 +671,26 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
671 | complete(&c_idle->done); | 671 | complete(&c_idle->done); |
672 | } | 672 | } |
673 | 673 | ||
674 | /* reduce the number of lines printed when booting a large cpu count system */ | ||
675 | static void __cpuinit announce_cpu(int cpu, int apicid) | ||
676 | { | ||
677 | static int current_node = -1; | ||
678 | int node = cpu_to_node(cpu); | ||
679 | |||
680 | if (system_state == SYSTEM_BOOTING) { | ||
681 | if (node != current_node) { | ||
682 | if (current_node > (-1)) | ||
683 | pr_cont(" Ok.\n"); | ||
684 | current_node = node; | ||
685 | pr_info("Booting Node %3d, Processors ", node); | ||
686 | } | ||
687 | pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); | ||
688 | return; | ||
689 | } else | ||
690 | pr_info("Booting Node %d Processor %d APIC 0x%x\n", | ||
691 | node, cpu, apicid); | ||
692 | } | ||
693 | |||
674 | /* | 694 | /* |
675 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 695 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
676 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | 696 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. |
@@ -687,7 +707,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
687 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), | 707 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), |
688 | }; | 708 | }; |
689 | 709 | ||
690 | INIT_WORK(&c_idle.work, do_fork_idle); | 710 | INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle); |
691 | 711 | ||
692 | alternatives_smp_switch(1); | 712 | alternatives_smp_switch(1); |
693 | 713 | ||
@@ -713,6 +733,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
713 | 733 | ||
714 | if (IS_ERR(c_idle.idle)) { | 734 | if (IS_ERR(c_idle.idle)) { |
715 | printk("failed fork for CPU %d\n", cpu); | 735 | printk("failed fork for CPU %d\n", cpu); |
736 | destroy_work_on_stack(&c_idle.work); | ||
716 | return PTR_ERR(c_idle.idle); | 737 | return PTR_ERR(c_idle.idle); |
717 | } | 738 | } |
718 | 739 | ||
@@ -736,9 +757,8 @@ do_rest: | |||
736 | /* start_ip had better be page-aligned! */ | 757 | /* start_ip had better be page-aligned! */ |
737 | start_ip = setup_trampoline(); | 758 | start_ip = setup_trampoline(); |
738 | 759 | ||
739 | /* So we see what's up */ | 760 | /* So we see what's up */ |
740 | printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n", | 761 | announce_cpu(cpu, apicid); |
741 | cpu, apicid, start_ip); | ||
742 | 762 | ||
743 | /* | 763 | /* |
744 | * This grunge runs the startup process for | 764 | * This grunge runs the startup process for |
@@ -787,21 +807,17 @@ do_rest: | |||
787 | udelay(100); | 807 | udelay(100); |
788 | } | 808 | } |
789 | 809 | ||
790 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) { | 810 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) |
791 | /* number CPUs logically, starting from 1 (BSP is 0) */ | 811 | pr_debug("CPU%d: has booted.\n", cpu); |
792 | pr_debug("OK.\n"); | 812 | else { |
793 | printk(KERN_INFO "CPU%d: ", cpu); | ||
794 | print_cpu_info(&cpu_data(cpu)); | ||
795 | pr_debug("CPU has booted.\n"); | ||
796 | } else { | ||
797 | boot_error = 1; | 813 | boot_error = 1; |
798 | if (*((volatile unsigned char *)trampoline_base) | 814 | if (*((volatile unsigned char *)trampoline_base) |
799 | == 0xA5) | 815 | == 0xA5) |
800 | /* trampoline started but...? */ | 816 | /* trampoline started but...? */ |
801 | printk(KERN_ERR "Stuck ??\n"); | 817 | pr_err("CPU%d: Stuck ??\n", cpu); |
802 | else | 818 | else |
803 | /* trampoline code not run */ | 819 | /* trampoline code not run */ |
804 | printk(KERN_ERR "Not responding.\n"); | 820 | pr_err("CPU%d: Not responding.\n", cpu); |
805 | if (apic->inquire_remote_apic) | 821 | if (apic->inquire_remote_apic) |
806 | apic->inquire_remote_apic(apicid); | 822 | apic->inquire_remote_apic(apicid); |
807 | } | 823 | } |
@@ -831,6 +847,7 @@ do_rest: | |||
831 | smpboot_restore_warm_reset_vector(); | 847 | smpboot_restore_warm_reset_vector(); |
832 | } | 848 | } |
833 | 849 | ||
850 | destroy_work_on_stack(&c_idle.work); | ||
834 | return boot_error; | 851 | return boot_error; |
835 | } | 852 | } |
836 | 853 | ||
@@ -1250,16 +1267,7 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1250 | void cpu_disable_common(void) | 1267 | void cpu_disable_common(void) |
1251 | { | 1268 | { |
1252 | int cpu = smp_processor_id(); | 1269 | int cpu = smp_processor_id(); |
1253 | /* | ||
1254 | * HACK: | ||
1255 | * Allow any queued timer interrupts to get serviced | ||
1256 | * This is only a temporary solution until we cleanup | ||
1257 | * fixup_irqs as we do for IA64. | ||
1258 | */ | ||
1259 | local_irq_enable(); | ||
1260 | mdelay(1); | ||
1261 | 1270 | ||
1262 | local_irq_disable(); | ||
1263 | remove_siblinginfo(cpu); | 1271 | remove_siblinginfo(cpu); |
1264 | 1272 | ||
1265 | /* It's now safe to remove this processor from the online map */ | 1273 | /* It's now safe to remove this processor from the online map */ |
@@ -1300,14 +1308,16 @@ void native_cpu_die(unsigned int cpu) | |||
1300 | for (i = 0; i < 10; i++) { | 1308 | for (i = 0; i < 10; i++) { |
1301 | /* They ack this in play_dead by setting CPU_DEAD */ | 1309 | /* They ack this in play_dead by setting CPU_DEAD */ |
1302 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { | 1310 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { |
1303 | printk(KERN_INFO "CPU %d is now offline\n", cpu); | 1311 | if (system_state == SYSTEM_RUNNING) |
1312 | pr_info("CPU %u is now offline\n", cpu); | ||
1313 | |||
1304 | if (1 == num_online_cpus()) | 1314 | if (1 == num_online_cpus()) |
1305 | alternatives_smp_switch(0); | 1315 | alternatives_smp_switch(0); |
1306 | return; | 1316 | return; |
1307 | } | 1317 | } |
1308 | msleep(100); | 1318 | msleep(100); |
1309 | } | 1319 | } |
1310 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | 1320 | pr_err("CPU %u didn't die...\n", cpu); |
1311 | } | 1321 | } |
1312 | 1322 | ||
1313 | void play_dead_common(void) | 1323 | void play_dead_common(void) |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index c3eb207181fe..922eefbb3f6c 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -53,17 +53,19 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable) | |||
53 | } | 53 | } |
54 | 54 | ||
55 | static const struct stacktrace_ops save_stack_ops = { | 55 | static const struct stacktrace_ops save_stack_ops = { |
56 | .warning = save_stack_warning, | 56 | .warning = save_stack_warning, |
57 | .warning_symbol = save_stack_warning_symbol, | 57 | .warning_symbol = save_stack_warning_symbol, |
58 | .stack = save_stack_stack, | 58 | .stack = save_stack_stack, |
59 | .address = save_stack_address, | 59 | .address = save_stack_address, |
60 | .walk_stack = print_context_stack, | ||
60 | }; | 61 | }; |
61 | 62 | ||
62 | static const struct stacktrace_ops save_stack_ops_nosched = { | 63 | static const struct stacktrace_ops save_stack_ops_nosched = { |
63 | .warning = save_stack_warning, | 64 | .warning = save_stack_warning, |
64 | .warning_symbol = save_stack_warning_symbol, | 65 | .warning_symbol = save_stack_warning_symbol, |
65 | .stack = save_stack_stack, | 66 | .stack = save_stack_stack, |
66 | .address = save_stack_address_nosched, | 67 | .address = save_stack_address_nosched, |
68 | .walk_stack = print_context_stack, | ||
67 | }; | 69 | }; |
68 | 70 | ||
69 | /* | 71 | /* |
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 1884a8d12bfa..dee1ff7cba58 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c | |||
@@ -24,31 +24,6 @@ | |||
24 | 24 | ||
25 | #include <asm/syscalls.h> | 25 | #include <asm/syscalls.h> |
26 | 26 | ||
27 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, | ||
28 | unsigned long prot, unsigned long flags, | ||
29 | unsigned long fd, unsigned long pgoff) | ||
30 | { | ||
31 | int error = -EBADF; | ||
32 | struct file *file = NULL; | ||
33 | struct mm_struct *mm = current->mm; | ||
34 | |||
35 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
36 | if (!(flags & MAP_ANONYMOUS)) { | ||
37 | file = fget(fd); | ||
38 | if (!file) | ||
39 | goto out; | ||
40 | } | ||
41 | |||
42 | down_write(&mm->mmap_sem); | ||
43 | error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
44 | up_write(&mm->mmap_sem); | ||
45 | |||
46 | if (file) | ||
47 | fput(file); | ||
48 | out: | ||
49 | return error; | ||
50 | } | ||
51 | |||
52 | /* | 27 | /* |
53 | * Perform the select(nd, in, out, ex, tv) and mmap() system | 28 | * Perform the select(nd, in, out, ex, tv) and mmap() system |
54 | * calls. Linux/i386 didn't use to be able to handle more than | 29 | * calls. Linux/i386 didn't use to be able to handle more than |
@@ -77,7 +52,7 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) | |||
77 | if (a.offset & ~PAGE_MASK) | 52 | if (a.offset & ~PAGE_MASK) |
78 | goto out; | 53 | goto out; |
79 | 54 | ||
80 | err = sys_mmap2(a.addr, a.len, a.prot, a.flags, | 55 | err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, |
81 | a.fd, a.offset >> PAGE_SHIFT); | 56 | a.fd, a.offset >> PAGE_SHIFT); |
82 | out: | 57 | out: |
83 | return err; | 58 | return err; |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 45e00eb09c3a..8aa2057efd12 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -23,26 +23,11 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, | |||
23 | unsigned long, fd, unsigned long, off) | 23 | unsigned long, fd, unsigned long, off) |
24 | { | 24 | { |
25 | long error; | 25 | long error; |
26 | struct file *file; | ||
27 | |||
28 | error = -EINVAL; | 26 | error = -EINVAL; |
29 | if (off & ~PAGE_MASK) | 27 | if (off & ~PAGE_MASK) |
30 | goto out; | 28 | goto out; |
31 | 29 | ||
32 | error = -EBADF; | 30 | error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); |
33 | file = NULL; | ||
34 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
35 | if (!(flags & MAP_ANONYMOUS)) { | ||
36 | file = fget(fd); | ||
37 | if (!file) | ||
38 | goto out; | ||
39 | } | ||
40 | down_write(¤t->mm->mmap_sem); | ||
41 | error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT); | ||
42 | up_write(¤t->mm->mmap_sem); | ||
43 | |||
44 | if (file) | ||
45 | fput(file); | ||
46 | out: | 31 | out: |
47 | return error; | 32 | return error; |
48 | } | 33 | } |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 0157cd26d7cc..15228b5d3eb7 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -191,7 +191,7 @@ ENTRY(sys_call_table) | |||
191 | .long sys_ni_syscall /* reserved for streams2 */ | 191 | .long sys_ni_syscall /* reserved for streams2 */ |
192 | .long ptregs_vfork /* 190 */ | 192 | .long ptregs_vfork /* 190 */ |
193 | .long sys_getrlimit | 193 | .long sys_getrlimit |
194 | .long sys_mmap2 | 194 | .long sys_mmap_pgoff |
195 | .long sys_truncate64 | 195 | .long sys_truncate64 |
196 | .long sys_ftruncate64 | 196 | .long sys_ftruncate64 |
197 | .long sys_stat64 /* 195 */ | 197 | .long sys_stat64 /* 195 */ |
@@ -336,3 +336,4 @@ ENTRY(sys_call_table) | |||
336 | .long sys_pwritev | 336 | .long sys_pwritev |
337 | .long sys_rt_tgsigqueueinfo /* 335 */ | 337 | .long sys_rt_tgsigqueueinfo /* 335 */ |
338 | .long sys_perf_event_open | 338 | .long sys_perf_event_open |
339 | .long sys_recvmmsg | ||
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index dcb00d278512..be2573448ed9 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -38,7 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
38 | #ifdef CONFIG_FRAME_POINTER | 38 | #ifdef CONFIG_FRAME_POINTER |
39 | return *(unsigned long *)(regs->bp + sizeof(long)); | 39 | return *(unsigned long *)(regs->bp + sizeof(long)); |
40 | #else | 40 | #else |
41 | unsigned long *sp = (unsigned long *)regs->sp; | 41 | unsigned long *sp = |
42 | (unsigned long *)kernel_stack_pointer(regs); | ||
42 | /* | 43 | /* |
43 | * Return address is either directly at stack pointer | 44 | * Return address is either directly at stack pointer |
44 | * or above a saved flags. Eflags has bits 22-31 zero, | 45 | * or above a saved flags. Eflags has bits 22-31 zero, |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 503c1f2e8835..364d015efebc 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -23,8 +23,6 @@ | |||
23 | static struct bau_control **uv_bau_table_bases __read_mostly; | 23 | static struct bau_control **uv_bau_table_bases __read_mostly; |
24 | static int uv_bau_retry_limit __read_mostly; | 24 | static int uv_bau_retry_limit __read_mostly; |
25 | 25 | ||
26 | /* position of pnode (which is nasid>>1): */ | ||
27 | static int uv_nshift __read_mostly; | ||
28 | /* base pnode in this partition */ | 26 | /* base pnode in this partition */ |
29 | static int uv_partition_base_pnode __read_mostly; | 27 | static int uv_partition_base_pnode __read_mostly; |
30 | 28 | ||
@@ -723,7 +721,7 @@ uv_activation_descriptor_init(int node, int pnode) | |||
723 | BUG_ON(!adp); | 721 | BUG_ON(!adp); |
724 | 722 | ||
725 | pa = uv_gpa(adp); /* need the real nasid*/ | 723 | pa = uv_gpa(adp); /* need the real nasid*/ |
726 | n = pa >> uv_nshift; | 724 | n = uv_gpa_to_pnode(pa); |
727 | m = pa & uv_mmask; | 725 | m = pa & uv_mmask; |
728 | 726 | ||
729 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, | 727 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, |
@@ -778,7 +776,7 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) | |||
778 | * need the pnode of where the memory was really allocated | 776 | * need the pnode of where the memory was really allocated |
779 | */ | 777 | */ |
780 | pa = uv_gpa(pqp); | 778 | pa = uv_gpa(pqp); |
781 | pn = pa >> uv_nshift; | 779 | pn = uv_gpa_to_pnode(pa); |
782 | uv_write_global_mmr64(pnode, | 780 | uv_write_global_mmr64(pnode, |
783 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, | 781 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, |
784 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | | 782 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | |
@@ -819,10 +817,8 @@ static int __init uv_init_blade(int blade) | |||
819 | */ | 817 | */ |
820 | apicid = blade_to_first_apicid(blade); | 818 | apicid = blade_to_first_apicid(blade); |
821 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); | 819 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); |
822 | if ((pa & 0xff) != UV_BAU_MESSAGE) { | 820 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
823 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | ||
824 | ((apicid << 32) | UV_BAU_MESSAGE)); | 821 | ((apicid << 32) | UV_BAU_MESSAGE)); |
825 | } | ||
826 | return 0; | 822 | return 0; |
827 | } | 823 | } |
828 | 824 | ||
@@ -843,8 +839,7 @@ static int __init uv_bau_init(void) | |||
843 | GFP_KERNEL, cpu_to_node(cur_cpu)); | 839 | GFP_KERNEL, cpu_to_node(cur_cpu)); |
844 | 840 | ||
845 | uv_bau_retry_limit = 1; | 841 | uv_bau_retry_limit = 1; |
846 | uv_nshift = uv_hub_info->n_val; | 842 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; |
847 | uv_mmask = (1UL << uv_hub_info->n_val) - 1; | ||
848 | nblades = uv_num_possible_blades(); | 843 | nblades = uv_num_possible_blades(); |
849 | 844 | ||
850 | uv_bau_table_bases = (struct bau_control **) | 845 | uv_bau_table_bases = (struct bau_control **) |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index 699f7eeb896a..c652ef62742d 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -3,22 +3,28 @@ | |||
3 | #include <asm/trampoline.h> | 3 | #include <asm/trampoline.h> |
4 | #include <asm/e820.h> | 4 | #include <asm/e820.h> |
5 | 5 | ||
6 | #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) | ||
7 | #define __trampinit | ||
8 | #define __trampinitdata | ||
9 | #else | ||
10 | #define __trampinit __cpuinit | ||
11 | #define __trampinitdata __cpuinitdata | ||
12 | #endif | ||
13 | |||
6 | /* ready for x86_64 and x86 */ | 14 | /* ready for x86_64 and x86 */ |
7 | unsigned char *__cpuinitdata trampoline_base = __va(TRAMPOLINE_BASE); | 15 | unsigned char *__trampinitdata trampoline_base; |
8 | 16 | ||
9 | void __init reserve_trampoline_memory(void) | 17 | void __init reserve_trampoline_memory(void) |
10 | { | 18 | { |
11 | #ifdef CONFIG_X86_32 | 19 | unsigned long mem; |
12 | /* | 20 | |
13 | * But first pinch a few for the stack/trampoline stuff | ||
14 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
15 | * trampoline before removing it. (see the GDT stuff) | ||
16 | */ | ||
17 | reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); | ||
18 | #endif | ||
19 | /* Has to be in very low memory so we can execute real-mode AP code. */ | 21 | /* Has to be in very low memory so we can execute real-mode AP code. */ |
20 | reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE, | 22 | mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); |
21 | "TRAMPOLINE"); | 23 | if (mem == -1L) |
24 | panic("Cannot allocate trampoline\n"); | ||
25 | |||
26 | trampoline_base = __va(mem); | ||
27 | reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); | ||
22 | } | 28 | } |
23 | 29 | ||
24 | /* | 30 | /* |
@@ -26,7 +32,7 @@ void __init reserve_trampoline_memory(void) | |||
26 | * bootstrap into the page concerned. The caller | 32 | * bootstrap into the page concerned. The caller |
27 | * has made sure it's suitably aligned. | 33 | * has made sure it's suitably aligned. |
28 | */ | 34 | */ |
29 | unsigned long __cpuinit setup_trampoline(void) | 35 | unsigned long __trampinit setup_trampoline(void) |
30 | { | 36 | { |
31 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); | 37 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); |
32 | return virt_to_phys(trampoline_base); | 38 | return virt_to_phys(trampoline_base); |
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 596d54c660a5..3af2dff58b21 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S | |||
@@ -32,8 +32,12 @@ | |||
32 | #include <asm/segment.h> | 32 | #include <asm/segment.h> |
33 | #include <asm/processor-flags.h> | 33 | #include <asm/processor-flags.h> |
34 | 34 | ||
35 | #ifdef CONFIG_ACPI_SLEEP | ||
36 | .section .rodata, "a", @progbits | ||
37 | #else | ||
35 | /* We can free up the trampoline after bootup if cpu hotplug is not supported. */ | 38 | /* We can free up the trampoline after bootup if cpu hotplug is not supported. */ |
36 | __CPUINITRODATA | 39 | __CPUINITRODATA |
40 | #endif | ||
37 | .code16 | 41 | .code16 |
38 | 42 | ||
39 | ENTRY(trampoline_data) | 43 | ENTRY(trampoline_data) |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7e37dcee0cc3..33399176512a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
529 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | 529 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) |
530 | { | 530 | { |
531 | struct task_struct *tsk = current; | 531 | struct task_struct *tsk = current; |
532 | unsigned long condition; | 532 | unsigned long dr6; |
533 | int si_code; | 533 | int si_code; |
534 | 534 | ||
535 | get_debugreg(condition, 6); | 535 | get_debugreg(dr6, 6); |
536 | 536 | ||
537 | /* Catch kmemcheck conditions first of all! */ | 537 | /* Catch kmemcheck conditions first of all! */ |
538 | if (condition & DR_STEP && kmemcheck_trap(regs)) | 538 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) |
539 | return; | 539 | return; |
540 | 540 | ||
541 | /* DR6 may or may not be cleared by the CPU */ | ||
542 | set_debugreg(0, 6); | ||
541 | /* | 543 | /* |
542 | * The processor cleared BTF, so don't mark that we need it set. | 544 | * The processor cleared BTF, so don't mark that we need it set. |
543 | */ | 545 | */ |
544 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | 546 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); |
545 | tsk->thread.debugctlmsr = 0; | 547 | tsk->thread.debugctlmsr = 0; |
546 | 548 | ||
547 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 549 | /* Store the virtualized DR6 value */ |
548 | SIGTRAP) == NOTIFY_STOP) | 550 | tsk->thread.debugreg6 = dr6; |
551 | |||
552 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, | ||
553 | SIGTRAP) == NOTIFY_STOP) | ||
549 | return; | 554 | return; |
550 | 555 | ||
551 | /* It's safe to allow irq's after DR6 has been saved */ | 556 | /* It's safe to allow irq's after DR6 has been saved */ |
552 | preempt_conditional_sti(regs); | 557 | preempt_conditional_sti(regs); |
553 | 558 | ||
554 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 559 | if (regs->flags & X86_VM_MASK) { |
555 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 560 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
556 | if (!tsk->thread.debugreg7) | 561 | error_code, 1); |
557 | goto clear_dr7; | 562 | return; |
558 | } | 563 | } |
559 | 564 | ||
560 | #ifdef CONFIG_X86_32 | ||
561 | if (regs->flags & X86_VM_MASK) | ||
562 | goto debug_vm86; | ||
563 | #endif | ||
564 | |||
565 | /* Save debug status register where ptrace can see it */ | ||
566 | tsk->thread.debugreg6 = condition; | ||
567 | |||
568 | /* | 565 | /* |
569 | * Single-stepping through TF: make sure we ignore any events in | 566 | * Single-stepping through system calls: ignore any exceptions in |
570 | * kernel space (but re-enable TF when returning to user mode). | 567 | * kernel space, but re-enable TF when returning to user mode. |
568 | * | ||
569 | * We already checked v86 mode above, so we can check for kernel mode | ||
570 | * by just checking the CPL of CS. | ||
571 | */ | 571 | */ |
572 | if (condition & DR_STEP) { | 572 | if ((dr6 & DR_STEP) && !user_mode(regs)) { |
573 | if (!user_mode(regs)) | 573 | tsk->thread.debugreg6 &= ~DR_STEP; |
574 | goto clear_TF_reenable; | 574 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
575 | regs->flags &= ~X86_EFLAGS_TF; | ||
575 | } | 576 | } |
576 | 577 | si_code = get_si_code(tsk->thread.debugreg6); | |
577 | si_code = get_si_code(condition); | 578 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) |
578 | /* Ok, finally something we can handle */ | 579 | send_sigtrap(tsk, regs, error_code, si_code); |
579 | send_sigtrap(tsk, regs, error_code, si_code); | ||
580 | |||
581 | /* | ||
582 | * Disable additional traps. They'll be re-enabled when | ||
583 | * the signal is delivered. | ||
584 | */ | ||
585 | clear_dr7: | ||
586 | set_debugreg(0, 7); | ||
587 | preempt_conditional_cli(regs); | 580 | preempt_conditional_cli(regs); |
588 | return; | ||
589 | 581 | ||
590 | #ifdef CONFIG_X86_32 | ||
591 | debug_vm86: | ||
592 | /* reenable preemption: handle_vm86_trap() might sleep */ | ||
593 | dec_preempt_count(); | ||
594 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | ||
595 | conditional_cli(regs); | ||
596 | return; | ||
597 | #endif | ||
598 | |||
599 | clear_TF_reenable: | ||
600 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | ||
601 | regs->flags &= ~X86_EFLAGS_TF; | ||
602 | preempt_conditional_cli(regs); | ||
603 | return; | 582 | return; |
604 | } | 583 | } |
605 | 584 | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index cd982f48e23e..597683aa5ba0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -763,6 +763,7 @@ void mark_tsc_unstable(char *reason) | |||
763 | { | 763 | { |
764 | if (!tsc_unstable) { | 764 | if (!tsc_unstable) { |
765 | tsc_unstable = 1; | 765 | tsc_unstable = 1; |
766 | sched_clock_stable = 0; | ||
766 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); | 767 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); |
767 | /* Change only the rating, when not registered */ | 768 | /* Change only the rating, when not registered */ |
768 | if (clocksource_tsc.mult) | 769 | if (clocksource_tsc.mult) |
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index f37930954d15..0aa5fed8b9e6 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -33,7 +33,7 @@ static __cpuinitdata atomic_t stop_count; | |||
33 | * we want to have the fastest, inlined, non-debug version | 33 | * we want to have the fastest, inlined, non-debug version |
34 | * of a critical section, to be able to prove TSC time-warps: | 34 | * of a critical section, to be able to prove TSC time-warps: |
35 | */ | 35 | */ |
36 | static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; | 36 | static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED; |
37 | 37 | ||
38 | static __cpuinitdata cycles_t last_tsc; | 38 | static __cpuinitdata cycles_t last_tsc; |
39 | static __cpuinitdata cycles_t max_warp; | 39 | static __cpuinitdata cycles_t max_warp; |
@@ -62,13 +62,13 @@ static __cpuinit void check_tsc_warp(void) | |||
62 | * previous TSC that was measured (possibly on | 62 | * previous TSC that was measured (possibly on |
63 | * another CPU) and update the previous TSC timestamp. | 63 | * another CPU) and update the previous TSC timestamp. |
64 | */ | 64 | */ |
65 | __raw_spin_lock(&sync_lock); | 65 | arch_spin_lock(&sync_lock); |
66 | prev = last_tsc; | 66 | prev = last_tsc; |
67 | rdtsc_barrier(); | 67 | rdtsc_barrier(); |
68 | now = get_cycles(); | 68 | now = get_cycles(); |
69 | rdtsc_barrier(); | 69 | rdtsc_barrier(); |
70 | last_tsc = now; | 70 | last_tsc = now; |
71 | __raw_spin_unlock(&sync_lock); | 71 | arch_spin_unlock(&sync_lock); |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * Be nice every now and then (and also check whether | 74 | * Be nice every now and then (and also check whether |
@@ -87,10 +87,10 @@ static __cpuinit void check_tsc_warp(void) | |||
87 | * we saw a time-warp of the TSC going backwards: | 87 | * we saw a time-warp of the TSC going backwards: |
88 | */ | 88 | */ |
89 | if (unlikely(prev > now)) { | 89 | if (unlikely(prev > now)) { |
90 | __raw_spin_lock(&sync_lock); | 90 | arch_spin_lock(&sync_lock); |
91 | max_warp = max(max_warp, prev - now); | 91 | max_warp = max(max_warp, prev - now); |
92 | nr_warps++; | 92 | nr_warps++; |
93 | __raw_spin_unlock(&sync_lock); | 93 | arch_spin_unlock(&sync_lock); |
94 | } | 94 | } |
95 | } | 95 | } |
96 | WARN(!(now-start), | 96 | WARN(!(now-start), |
@@ -114,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
114 | return; | 114 | return; |
115 | 115 | ||
116 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { | 116 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { |
117 | printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n"); | 117 | if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) |
118 | pr_info( | ||
119 | "Skipped synchronization checks as TSC is reliable.\n"); | ||
118 | return; | 120 | return; |
119 | } | 121 | } |
120 | 122 | ||
121 | pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:", | ||
122 | smp_processor_id(), cpu); | ||
123 | |||
124 | /* | 123 | /* |
125 | * Reset it - in case this is a second bootup: | 124 | * Reset it - in case this is a second bootup: |
126 | */ | 125 | */ |
@@ -142,12 +141,14 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
142 | cpu_relax(); | 141 | cpu_relax(); |
143 | 142 | ||
144 | if (nr_warps) { | 143 | if (nr_warps) { |
145 | printk("\n"); | 144 | pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", |
145 | smp_processor_id(), cpu); | ||
146 | pr_warning("Measured %Ld cycles TSC warp between CPUs, " | 146 | pr_warning("Measured %Ld cycles TSC warp between CPUs, " |
147 | "turning off TSC clock.\n", max_warp); | 147 | "turning off TSC clock.\n", max_warp); |
148 | mark_tsc_unstable("check_tsc_sync_source failed"); | 148 | mark_tsc_unstable("check_tsc_sync_source failed"); |
149 | } else { | 149 | } else { |
150 | printk(" passed.\n"); | 150 | pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", |
151 | smp_processor_id(), cpu); | ||
151 | } | 152 | } |
152 | 153 | ||
153 | /* | 154 | /* |
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index aeef529917e4..ece73d8e3240 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c | |||
@@ -9,10 +9,25 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/rbtree.h> | ||
12 | #include <linux/irq.h> | 13 | #include <linux/irq.h> |
13 | 14 | ||
14 | #include <asm/apic.h> | 15 | #include <asm/apic.h> |
15 | #include <asm/uv/uv_irq.h> | 16 | #include <asm/uv/uv_irq.h> |
17 | #include <asm/uv/uv_hub.h> | ||
18 | |||
19 | /* MMR offset and pnode of hub sourcing interrupts for a given irq */ | ||
20 | struct uv_irq_2_mmr_pnode{ | ||
21 | struct rb_node list; | ||
22 | unsigned long offset; | ||
23 | int pnode; | ||
24 | int irq; | ||
25 | }; | ||
26 | |||
27 | static spinlock_t uv_irq_lock; | ||
28 | static struct rb_root uv_irq_root; | ||
29 | |||
30 | static int uv_set_irq_affinity(unsigned int, const struct cpumask *); | ||
16 | 31 | ||
17 | static void uv_noop(unsigned int irq) | 32 | static void uv_noop(unsigned int irq) |
18 | { | 33 | { |
@@ -39,25 +54,213 @@ struct irq_chip uv_irq_chip = { | |||
39 | .unmask = uv_noop, | 54 | .unmask = uv_noop, |
40 | .eoi = uv_ack_apic, | 55 | .eoi = uv_ack_apic, |
41 | .end = uv_noop, | 56 | .end = uv_noop, |
57 | .set_affinity = uv_set_irq_affinity, | ||
42 | }; | 58 | }; |
43 | 59 | ||
44 | /* | 60 | /* |
61 | * Add offset and pnode information of the hub sourcing interrupts to the | ||
62 | * rb tree for a specific irq. | ||
63 | */ | ||
64 | static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) | ||
65 | { | ||
66 | struct rb_node **link = &uv_irq_root.rb_node; | ||
67 | struct rb_node *parent = NULL; | ||
68 | struct uv_irq_2_mmr_pnode *n; | ||
69 | struct uv_irq_2_mmr_pnode *e; | ||
70 | unsigned long irqflags; | ||
71 | |||
72 | n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, | ||
73 | uv_blade_to_memory_nid(blade)); | ||
74 | if (!n) | ||
75 | return -ENOMEM; | ||
76 | |||
77 | n->irq = irq; | ||
78 | n->offset = offset; | ||
79 | n->pnode = uv_blade_to_pnode(blade); | ||
80 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
81 | /* Find the right place in the rbtree: */ | ||
82 | while (*link) { | ||
83 | parent = *link; | ||
84 | e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); | ||
85 | |||
86 | if (unlikely(irq == e->irq)) { | ||
87 | /* irq entry exists */ | ||
88 | e->pnode = uv_blade_to_pnode(blade); | ||
89 | e->offset = offset; | ||
90 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
91 | kfree(n); | ||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | if (irq < e->irq) | ||
96 | link = &(*link)->rb_left; | ||
97 | else | ||
98 | link = &(*link)->rb_right; | ||
99 | } | ||
100 | |||
101 | /* Insert the node into the rbtree. */ | ||
102 | rb_link_node(&n->list, parent, link); | ||
103 | rb_insert_color(&n->list, &uv_irq_root); | ||
104 | |||
105 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | /* Retrieve offset and pnode information from the rb tree for a specific irq */ | ||
110 | int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) | ||
111 | { | ||
112 | struct uv_irq_2_mmr_pnode *e; | ||
113 | struct rb_node *n; | ||
114 | unsigned long irqflags; | ||
115 | |||
116 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
117 | n = uv_irq_root.rb_node; | ||
118 | while (n) { | ||
119 | e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); | ||
120 | |||
121 | if (e->irq == irq) { | ||
122 | *offset = e->offset; | ||
123 | *pnode = e->pnode; | ||
124 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | if (irq < e->irq) | ||
129 | n = n->rb_left; | ||
130 | else | ||
131 | n = n->rb_right; | ||
132 | } | ||
133 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
134 | return -1; | ||
135 | } | ||
136 | |||
137 | /* | ||
138 | * Re-target the irq to the specified CPU and enable the specified MMR located | ||
139 | * on the specified blade to allow the sending of MSIs to the specified CPU. | ||
140 | */ | ||
141 | static int | ||
142 | arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | ||
143 | unsigned long mmr_offset, int restrict) | ||
144 | { | ||
145 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | ||
146 | struct irq_desc *desc = irq_to_desc(irq); | ||
147 | struct irq_cfg *cfg; | ||
148 | int mmr_pnode; | ||
149 | unsigned long mmr_value; | ||
150 | struct uv_IO_APIC_route_entry *entry; | ||
151 | int err; | ||
152 | |||
153 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | ||
154 | sizeof(unsigned long)); | ||
155 | |||
156 | cfg = irq_cfg(irq); | ||
157 | |||
158 | err = assign_irq_vector(irq, cfg, eligible_cpu); | ||
159 | if (err != 0) | ||
160 | return err; | ||
161 | |||
162 | if (restrict == UV_AFFINITY_CPU) | ||
163 | desc->status |= IRQ_NO_BALANCING; | ||
164 | else | ||
165 | desc->status |= IRQ_MOVE_PCNTXT; | ||
166 | |||
167 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, | ||
168 | irq_name); | ||
169 | |||
170 | mmr_value = 0; | ||
171 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
172 | entry->vector = cfg->vector; | ||
173 | entry->delivery_mode = apic->irq_delivery_mode; | ||
174 | entry->dest_mode = apic->irq_dest_mode; | ||
175 | entry->polarity = 0; | ||
176 | entry->trigger = 0; | ||
177 | entry->mask = 0; | ||
178 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); | ||
179 | |||
180 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
181 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
182 | |||
183 | if (cfg->move_in_progress) | ||
184 | send_cleanup_vector(cfg); | ||
185 | |||
186 | return irq; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Disable the specified MMR located on the specified blade so that MSIs are | ||
191 | * longer allowed to be sent. | ||
192 | */ | ||
193 | static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) | ||
194 | { | ||
195 | unsigned long mmr_value; | ||
196 | struct uv_IO_APIC_route_entry *entry; | ||
197 | |||
198 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | ||
199 | sizeof(unsigned long)); | ||
200 | |||
201 | mmr_value = 0; | ||
202 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
203 | entry->mask = 1; | ||
204 | |||
205 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
206 | } | ||
207 | |||
208 | static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | ||
209 | { | ||
210 | struct irq_desc *desc = irq_to_desc(irq); | ||
211 | struct irq_cfg *cfg = desc->chip_data; | ||
212 | unsigned int dest; | ||
213 | unsigned long mmr_value; | ||
214 | struct uv_IO_APIC_route_entry *entry; | ||
215 | unsigned long mmr_offset; | ||
216 | unsigned mmr_pnode; | ||
217 | |||
218 | if (set_desc_affinity(desc, mask, &dest)) | ||
219 | return -1; | ||
220 | |||
221 | mmr_value = 0; | ||
222 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
223 | |||
224 | entry->vector = cfg->vector; | ||
225 | entry->delivery_mode = apic->irq_delivery_mode; | ||
226 | entry->dest_mode = apic->irq_dest_mode; | ||
227 | entry->polarity = 0; | ||
228 | entry->trigger = 0; | ||
229 | entry->mask = 0; | ||
230 | entry->dest = dest; | ||
231 | |||
232 | /* Get previously stored MMR and pnode of hub sourcing interrupts */ | ||
233 | if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) | ||
234 | return -1; | ||
235 | |||
236 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
237 | |||
238 | if (cfg->move_in_progress) | ||
239 | send_cleanup_vector(cfg); | ||
240 | |||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | /* | ||
45 | * Set up a mapping of an available irq and vector, and enable the specified | 245 | * Set up a mapping of an available irq and vector, and enable the specified |
46 | * MMR that defines the MSI that is to be sent to the specified CPU when an | 246 | * MMR that defines the MSI that is to be sent to the specified CPU when an |
47 | * interrupt is raised. | 247 | * interrupt is raised. |
48 | */ | 248 | */ |
49 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, | 249 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, |
50 | unsigned long mmr_offset) | 250 | unsigned long mmr_offset, int restrict) |
51 | { | 251 | { |
52 | int irq; | 252 | int irq, ret; |
53 | int ret; | 253 | |
254 | irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); | ||
54 | 255 | ||
55 | irq = create_irq(); | ||
56 | if (irq <= 0) | 256 | if (irq <= 0) |
57 | return -EBUSY; | 257 | return -EBUSY; |
58 | 258 | ||
59 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); | 259 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, |
60 | if (ret != irq) | 260 | restrict); |
261 | if (ret == irq) | ||
262 | uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); | ||
263 | else | ||
61 | destroy_irq(irq); | 264 | destroy_irq(irq); |
62 | 265 | ||
63 | return ret; | 266 | return ret; |
@@ -71,9 +274,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); | |||
71 | * | 274 | * |
72 | * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). | 275 | * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). |
73 | */ | 276 | */ |
74 | void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) | 277 | void uv_teardown_irq(unsigned int irq) |
75 | { | 278 | { |
76 | arch_disable_uv_irq(mmr_blade, mmr_offset); | 279 | struct uv_irq_2_mmr_pnode *e; |
280 | struct rb_node *n; | ||
281 | unsigned long irqflags; | ||
282 | |||
283 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
284 | n = uv_irq_root.rb_node; | ||
285 | while (n) { | ||
286 | e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); | ||
287 | if (e->irq == irq) { | ||
288 | arch_disable_uv_irq(e->pnode, e->offset); | ||
289 | rb_erase(n, &uv_irq_root); | ||
290 | kfree(e); | ||
291 | break; | ||
292 | } | ||
293 | if (irq < e->irq) | ||
294 | n = n->rb_left; | ||
295 | else | ||
296 | n = n->rb_right; | ||
297 | } | ||
298 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
77 | destroy_irq(irq); | 299 | destroy_irq(irq); |
78 | } | 300 | } |
79 | EXPORT_SYMBOL_GPL(uv_teardown_irq); | 301 | EXPORT_SYMBOL_GPL(uv_teardown_irq); |
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 583f11d5c480..3c84aa001c11 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c | |||
@@ -74,7 +74,7 @@ struct uv_rtc_timer_head { | |||
74 | */ | 74 | */ |
75 | static struct uv_rtc_timer_head **blade_info __read_mostly; | 75 | static struct uv_rtc_timer_head **blade_info __read_mostly; |
76 | 76 | ||
77 | static int uv_rtc_enable; | 77 | static int uv_rtc_evt_enable; |
78 | 78 | ||
79 | /* | 79 | /* |
80 | * Hardware interface routines | 80 | * Hardware interface routines |
@@ -90,7 +90,7 @@ static void uv_rtc_send_IPI(int cpu) | |||
90 | pnode = uv_apicid_to_pnode(apicid); | 90 | pnode = uv_apicid_to_pnode(apicid); |
91 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | 91 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | |
92 | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | | 92 | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | |
93 | (GENERIC_INTERRUPT_VECTOR << UVH_IPI_INT_VECTOR_SHFT); | 93 | (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); |
94 | 94 | ||
95 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 95 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
96 | } | 96 | } |
@@ -115,7 +115,7 @@ static int uv_setup_intr(int cpu, u64 expires) | |||
115 | uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, | 115 | uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, |
116 | UVH_EVENT_OCCURRED0_RTC1_MASK); | 116 | UVH_EVENT_OCCURRED0_RTC1_MASK); |
117 | 117 | ||
118 | val = (GENERIC_INTERRUPT_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | | 118 | val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | |
119 | ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); | 119 | ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); |
120 | 120 | ||
121 | /* Set configuration */ | 121 | /* Set configuration */ |
@@ -123,7 +123,10 @@ static int uv_setup_intr(int cpu, u64 expires) | |||
123 | /* Initialize comparator value */ | 123 | /* Initialize comparator value */ |
124 | uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); | 124 | uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); |
125 | 125 | ||
126 | return (expires < uv_read_rtc(NULL) && !uv_intr_pending(pnode)); | 126 | if (uv_read_rtc(NULL) <= expires) |
127 | return 0; | ||
128 | |||
129 | return !uv_intr_pending(pnode); | ||
127 | } | 130 | } |
128 | 131 | ||
129 | /* | 132 | /* |
@@ -223,6 +226,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |||
223 | 226 | ||
224 | next_cpu = head->next_cpu; | 227 | next_cpu = head->next_cpu; |
225 | *t = expires; | 228 | *t = expires; |
229 | |||
226 | /* Will this one be next to go off? */ | 230 | /* Will this one be next to go off? */ |
227 | if (next_cpu < 0 || bcpu == next_cpu || | 231 | if (next_cpu < 0 || bcpu == next_cpu || |
228 | expires < head->cpu[next_cpu].expires) { | 232 | expires < head->cpu[next_cpu].expires) { |
@@ -231,7 +235,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |||
231 | *t = ULLONG_MAX; | 235 | *t = ULLONG_MAX; |
232 | uv_rtc_find_next_timer(head, pnode); | 236 | uv_rtc_find_next_timer(head, pnode); |
233 | spin_unlock_irqrestore(&head->lock, flags); | 237 | spin_unlock_irqrestore(&head->lock, flags); |
234 | return 1; | 238 | return -ETIME; |
235 | } | 239 | } |
236 | } | 240 | } |
237 | 241 | ||
@@ -244,7 +248,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |||
244 | * | 248 | * |
245 | * Returns 1 if this timer was pending. | 249 | * Returns 1 if this timer was pending. |
246 | */ | 250 | */ |
247 | static int uv_rtc_unset_timer(int cpu) | 251 | static int uv_rtc_unset_timer(int cpu, int force) |
248 | { | 252 | { |
249 | int pnode = uv_cpu_to_pnode(cpu); | 253 | int pnode = uv_cpu_to_pnode(cpu); |
250 | int bid = uv_cpu_to_blade_id(cpu); | 254 | int bid = uv_cpu_to_blade_id(cpu); |
@@ -256,14 +260,15 @@ static int uv_rtc_unset_timer(int cpu) | |||
256 | 260 | ||
257 | spin_lock_irqsave(&head->lock, flags); | 261 | spin_lock_irqsave(&head->lock, flags); |
258 | 262 | ||
259 | if (head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) | 263 | if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) |
260 | rc = 1; | 264 | rc = 1; |
261 | 265 | ||
262 | *t = ULLONG_MAX; | 266 | if (rc) { |
263 | 267 | *t = ULLONG_MAX; | |
264 | /* Was the hardware setup for this timer? */ | 268 | /* Was the hardware setup for this timer? */ |
265 | if (head->next_cpu == bcpu) | 269 | if (head->next_cpu == bcpu) |
266 | uv_rtc_find_next_timer(head, pnode); | 270 | uv_rtc_find_next_timer(head, pnode); |
271 | } | ||
267 | 272 | ||
268 | spin_unlock_irqrestore(&head->lock, flags); | 273 | spin_unlock_irqrestore(&head->lock, flags); |
269 | 274 | ||
@@ -310,32 +315,32 @@ static void uv_rtc_timer_setup(enum clock_event_mode mode, | |||
310 | break; | 315 | break; |
311 | case CLOCK_EVT_MODE_UNUSED: | 316 | case CLOCK_EVT_MODE_UNUSED: |
312 | case CLOCK_EVT_MODE_SHUTDOWN: | 317 | case CLOCK_EVT_MODE_SHUTDOWN: |
313 | uv_rtc_unset_timer(ced_cpu); | 318 | uv_rtc_unset_timer(ced_cpu, 1); |
314 | break; | 319 | break; |
315 | } | 320 | } |
316 | } | 321 | } |
317 | 322 | ||
318 | static void uv_rtc_interrupt(void) | 323 | static void uv_rtc_interrupt(void) |
319 | { | 324 | { |
320 | struct clock_event_device *ced = &__get_cpu_var(cpu_ced); | ||
321 | int cpu = smp_processor_id(); | 325 | int cpu = smp_processor_id(); |
326 | struct clock_event_device *ced = &per_cpu(cpu_ced, cpu); | ||
322 | 327 | ||
323 | if (!ced || !ced->event_handler) | 328 | if (!ced || !ced->event_handler) |
324 | return; | 329 | return; |
325 | 330 | ||
326 | if (uv_rtc_unset_timer(cpu) != 1) | 331 | if (uv_rtc_unset_timer(cpu, 0) != 1) |
327 | return; | 332 | return; |
328 | 333 | ||
329 | ced->event_handler(ced); | 334 | ced->event_handler(ced); |
330 | } | 335 | } |
331 | 336 | ||
332 | static int __init uv_enable_rtc(char *str) | 337 | static int __init uv_enable_evt_rtc(char *str) |
333 | { | 338 | { |
334 | uv_rtc_enable = 1; | 339 | uv_rtc_evt_enable = 1; |
335 | 340 | ||
336 | return 1; | 341 | return 1; |
337 | } | 342 | } |
338 | __setup("uvrtc", uv_enable_rtc); | 343 | __setup("uvrtcevt", uv_enable_evt_rtc); |
339 | 344 | ||
340 | static __init void uv_rtc_register_clockevents(struct work_struct *dummy) | 345 | static __init void uv_rtc_register_clockevents(struct work_struct *dummy) |
341 | { | 346 | { |
@@ -350,27 +355,32 @@ static __init int uv_rtc_setup_clock(void) | |||
350 | { | 355 | { |
351 | int rc; | 356 | int rc; |
352 | 357 | ||
353 | if (!uv_rtc_enable || !is_uv_system() || generic_interrupt_extension) | 358 | if (!is_uv_system()) |
354 | return -ENODEV; | 359 | return -ENODEV; |
355 | 360 | ||
356 | generic_interrupt_extension = uv_rtc_interrupt; | ||
357 | |||
358 | clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, | 361 | clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, |
359 | clocksource_uv.shift); | 362 | clocksource_uv.shift); |
360 | 363 | ||
364 | /* If single blade, prefer tsc */ | ||
365 | if (uv_num_possible_blades() == 1) | ||
366 | clocksource_uv.rating = 250; | ||
367 | |||
361 | rc = clocksource_register(&clocksource_uv); | 368 | rc = clocksource_register(&clocksource_uv); |
362 | if (rc) { | 369 | if (rc) |
363 | generic_interrupt_extension = NULL; | 370 | printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); |
371 | else | ||
372 | printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n", | ||
373 | sn_rtc_cycles_per_second/(unsigned long)1E6); | ||
374 | |||
375 | if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback) | ||
364 | return rc; | 376 | return rc; |
365 | } | ||
366 | 377 | ||
367 | /* Setup and register clockevents */ | 378 | /* Setup and register clockevents */ |
368 | rc = uv_rtc_allocate_timers(); | 379 | rc = uv_rtc_allocate_timers(); |
369 | if (rc) { | 380 | if (rc) |
370 | clocksource_unregister(&clocksource_uv); | 381 | goto error; |
371 | generic_interrupt_extension = NULL; | 382 | |
372 | return rc; | 383 | x86_platform_ipi_callback = uv_rtc_interrupt; |
373 | } | ||
374 | 384 | ||
375 | clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, | 385 | clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, |
376 | NSEC_PER_SEC, clock_event_device_uv.shift); | 386 | NSEC_PER_SEC, clock_event_device_uv.shift); |
@@ -383,11 +393,19 @@ static __init int uv_rtc_setup_clock(void) | |||
383 | 393 | ||
384 | rc = schedule_on_each_cpu(uv_rtc_register_clockevents); | 394 | rc = schedule_on_each_cpu(uv_rtc_register_clockevents); |
385 | if (rc) { | 395 | if (rc) { |
386 | clocksource_unregister(&clocksource_uv); | 396 | x86_platform_ipi_callback = NULL; |
387 | generic_interrupt_extension = NULL; | ||
388 | uv_rtc_deallocate_timers(); | 397 | uv_rtc_deallocate_timers(); |
398 | goto error; | ||
389 | } | 399 | } |
390 | 400 | ||
401 | printk(KERN_INFO "UV RTC clockevents registered\n"); | ||
402 | |||
403 | return 0; | ||
404 | |||
405 | error: | ||
406 | clocksource_unregister(&clocksource_uv); | ||
407 | printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc); | ||
408 | |||
391 | return rc; | 409 | return rc; |
392 | } | 410 | } |
393 | arch_initcall(uv_rtc_setup_clock); | 411 | arch_initcall(uv_rtc_setup_clock); |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index f068553a1b17..34a279a7471d 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -183,7 +183,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
183 | return; | 183 | return; |
184 | } | 184 | } |
185 | 185 | ||
186 | apic_cpus = apic->apicid_to_cpu_present(m->apicid); | 186 | apic->apicid_to_cpu_present(m->apicid, &apic_cpus); |
187 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); | 187 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); |
188 | /* | 188 | /* |
189 | * Validate version | 189 | * Validate version |
@@ -197,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
197 | apic_version[m->apicid] = ver; | 197 | apic_version[m->apicid] = ver; |
198 | } | 198 | } |
199 | 199 | ||
200 | static void __init visws_find_smp_config(unsigned int reserve) | 200 | static void __init visws_find_smp_config(void) |
201 | { | 201 | { |
202 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); | 202 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); |
203 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | 203 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); |
@@ -486,7 +486,7 @@ static void end_cobalt_irq(unsigned int irq) | |||
486 | } | 486 | } |
487 | 487 | ||
488 | static struct irq_chip cobalt_irq_type = { | 488 | static struct irq_chip cobalt_irq_type = { |
489 | .typename = "Cobalt-APIC", | 489 | .name = "Cobalt-APIC", |
490 | .startup = startup_cobalt_irq, | 490 | .startup = startup_cobalt_irq, |
491 | .shutdown = disable_cobalt_irq, | 491 | .shutdown = disable_cobalt_irq, |
492 | .enable = enable_cobalt_irq, | 492 | .enable = enable_cobalt_irq, |
@@ -523,7 +523,7 @@ static void end_piix4_master_irq(unsigned int irq) | |||
523 | } | 523 | } |
524 | 524 | ||
525 | static struct irq_chip piix4_master_irq_type = { | 525 | static struct irq_chip piix4_master_irq_type = { |
526 | .typename = "PIIX4-master", | 526 | .name = "PIIX4-master", |
527 | .startup = startup_piix4_master_irq, | 527 | .startup = startup_piix4_master_irq, |
528 | .ack = ack_cobalt_irq, | 528 | .ack = ack_cobalt_irq, |
529 | .end = end_piix4_master_irq, | 529 | .end = end_piix4_master_irq, |
@@ -531,7 +531,7 @@ static struct irq_chip piix4_master_irq_type = { | |||
531 | 531 | ||
532 | 532 | ||
533 | static struct irq_chip piix4_virtual_irq_type = { | 533 | static struct irq_chip piix4_virtual_irq_type = { |
534 | .typename = "PIIX4-virtual", | 534 | .name = "PIIX4-virtual", |
535 | .shutdown = disable_8259A_irq, | 535 | .shutdown = disable_8259A_irq, |
536 | .enable = enable_8259A_irq, | 536 | .enable = enable_8259A_irq, |
537 | .disable = disable_8259A_irq, | 537 | .disable = disable_8259A_irq, |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 9c4e62539058..5ffb5622f793 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -197,9 +197,8 @@ out: | |||
197 | static int do_vm86_irq_handling(int subfunction, int irqnumber); | 197 | static int do_vm86_irq_handling(int subfunction, int irqnumber); |
198 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); | 198 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); |
199 | 199 | ||
200 | int sys_vm86old(struct pt_regs *regs) | 200 | int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs) |
201 | { | 201 | { |
202 | struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx; | ||
203 | struct kernel_vm86_struct info; /* declare this _on top_, | 202 | struct kernel_vm86_struct info; /* declare this _on top_, |
204 | * this avoids wasting of stack space. | 203 | * this avoids wasting of stack space. |
205 | * This remains on the stack until we | 204 | * This remains on the stack until we |
@@ -227,7 +226,7 @@ out: | |||
227 | } | 226 | } |
228 | 227 | ||
229 | 228 | ||
230 | int sys_vm86(struct pt_regs *regs) | 229 | int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs) |
231 | { | 230 | { |
232 | struct kernel_vm86_struct info; /* declare this _on top_, | 231 | struct kernel_vm86_struct info; /* declare this _on top_, |
233 | * this avoids wasting of stack space. | 232 | * this avoids wasting of stack space. |
@@ -239,12 +238,12 @@ int sys_vm86(struct pt_regs *regs) | |||
239 | struct vm86plus_struct __user *v86; | 238 | struct vm86plus_struct __user *v86; |
240 | 239 | ||
241 | tsk = current; | 240 | tsk = current; |
242 | switch (regs->bx) { | 241 | switch (cmd) { |
243 | case VM86_REQUEST_IRQ: | 242 | case VM86_REQUEST_IRQ: |
244 | case VM86_FREE_IRQ: | 243 | case VM86_FREE_IRQ: |
245 | case VM86_GET_IRQ_BITS: | 244 | case VM86_GET_IRQ_BITS: |
246 | case VM86_GET_AND_RESET_IRQ: | 245 | case VM86_GET_AND_RESET_IRQ: |
247 | ret = do_vm86_irq_handling(regs->bx, (int)regs->cx); | 246 | ret = do_vm86_irq_handling(cmd, (int)arg); |
248 | goto out; | 247 | goto out; |
249 | case VM86_PLUS_INSTALL_CHECK: | 248 | case VM86_PLUS_INSTALL_CHECK: |
250 | /* | 249 | /* |
@@ -261,7 +260,7 @@ int sys_vm86(struct pt_regs *regs) | |||
261 | ret = -EPERM; | 260 | ret = -EPERM; |
262 | if (tsk->thread.saved_sp0) | 261 | if (tsk->thread.saved_sp0) |
263 | goto out; | 262 | goto out; |
264 | v86 = (struct vm86plus_struct __user *)regs->cx; | 263 | v86 = (struct vm86plus_struct __user *)arg; |
265 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, | 264 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, |
266 | offsetof(struct kernel_vm86_struct, regs32) - | 265 | offsetof(struct kernel_vm86_struct, regs32) - |
267 | sizeof(info.regs)); | 266 | sizeof(info.regs)); |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 31e6f6cfe53e..d430e4c30193 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -648,7 +648,7 @@ static inline int __init activate_vmi(void) | |||
648 | 648 | ||
649 | pv_info.paravirt_enabled = 1; | 649 | pv_info.paravirt_enabled = 1; |
650 | pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; | 650 | pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; |
651 | pv_info.name = "vmi"; | 651 | pv_info.name = "vmi [deprecated]"; |
652 | 652 | ||
653 | pv_init_ops.patch = vmi_patch; | 653 | pv_init_ops.patch = vmi_patch; |
654 | 654 | ||
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 611b9e2360d3..74c92bb194df 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void) | |||
226 | evt->min_delta_ns = clockevent_delta2ns(1, evt); | 226 | evt->min_delta_ns = clockevent_delta2ns(1, evt); |
227 | evt->cpumask = cpumask_of(cpu); | 227 | evt->cpumask = cpumask_of(cpu); |
228 | 228 | ||
229 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", | 229 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%u shift=%u\n", |
230 | evt->name, evt->mult, evt->shift); | 230 | evt->name, evt->mult, evt->shift); |
231 | clockevents_register_device(evt); | 231 | clockevents_register_device(evt); |
232 | } | 232 | } |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index ecb92717c412..44879df55696 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -41,6 +41,32 @@ ENTRY(phys_startup_64) | |||
41 | jiffies_64 = jiffies; | 41 | jiffies_64 = jiffies; |
42 | #endif | 42 | #endif |
43 | 43 | ||
44 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | ||
45 | /* | ||
46 | * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA | ||
47 | * we retain large page mappings for boundaries spanning kernel text, rodata | ||
48 | * and data sections. | ||
49 | * | ||
50 | * However, kernel identity mappings will have different RWX permissions | ||
51 | * to the pages mapping to text and to the pages padding (which are freed) the | ||
52 | * text section. Hence kernel identity mappings will be broken to smaller | ||
53 | * pages. For 64-bit, kernel text and kernel identity mappings are different, | ||
54 | * so we can enable protection checks that come with CONFIG_DEBUG_RODATA, | ||
55 | * as well as retain 2MB large page mappings for kernel text. | ||
56 | */ | ||
57 | #define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); | ||
58 | |||
59 | #define X64_ALIGN_DEBUG_RODATA_END \ | ||
60 | . = ALIGN(HPAGE_SIZE); \ | ||
61 | __end_rodata_hpage_align = .; | ||
62 | |||
63 | #else | ||
64 | |||
65 | #define X64_ALIGN_DEBUG_RODATA_BEGIN | ||
66 | #define X64_ALIGN_DEBUG_RODATA_END | ||
67 | |||
68 | #endif | ||
69 | |||
44 | PHDRS { | 70 | PHDRS { |
45 | text PT_LOAD FLAGS(5); /* R_E */ | 71 | text PT_LOAD FLAGS(5); /* R_E */ |
46 | data PT_LOAD FLAGS(7); /* RWE */ | 72 | data PT_LOAD FLAGS(7); /* RWE */ |
@@ -90,7 +116,9 @@ SECTIONS | |||
90 | 116 | ||
91 | EXCEPTION_TABLE(16) :text = 0x9090 | 117 | EXCEPTION_TABLE(16) :text = 0x9090 |
92 | 118 | ||
119 | X64_ALIGN_DEBUG_RODATA_BEGIN | ||
93 | RO_DATA(PAGE_SIZE) | 120 | RO_DATA(PAGE_SIZE) |
121 | X64_ALIGN_DEBUG_RODATA_END | ||
94 | 122 | ||
95 | /* Data */ | 123 | /* Data */ |
96 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | 124 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
@@ -107,13 +135,13 @@ SECTIONS | |||
107 | 135 | ||
108 | PAGE_ALIGNED_DATA(PAGE_SIZE) | 136 | PAGE_ALIGNED_DATA(PAGE_SIZE) |
109 | 137 | ||
110 | CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) | 138 | CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) |
111 | 139 | ||
112 | DATA_DATA | 140 | DATA_DATA |
113 | CONSTRUCTORS | 141 | CONSTRUCTORS |
114 | 142 | ||
115 | /* rarely changed data like cpu maps */ | 143 | /* rarely changed data like cpu maps */ |
116 | READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES) | 144 | READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES) |
117 | 145 | ||
118 | /* End of data section */ | 146 | /* End of data section */ |
119 | _edata = .; | 147 | _edata = .; |
@@ -137,12 +165,12 @@ SECTIONS | |||
137 | *(.vsyscall_0) | 165 | *(.vsyscall_0) |
138 | } :user | 166 | } :user |
139 | 167 | ||
140 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 168 | . = ALIGN(L1_CACHE_BYTES); |
141 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { | 169 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { |
142 | *(.vsyscall_fn) | 170 | *(.vsyscall_fn) |
143 | } | 171 | } |
144 | 172 | ||
145 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 173 | . = ALIGN(L1_CACHE_BYTES); |
146 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { | 174 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { |
147 | *(.vsyscall_gtod_data) | 175 | *(.vsyscall_gtod_data) |
148 | } | 176 | } |
@@ -166,7 +194,7 @@ SECTIONS | |||
166 | } | 194 | } |
167 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | 195 | vgetcpu_mode = VVIRT(.vgetcpu_mode); |
168 | 196 | ||
169 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 197 | . = ALIGN(L1_CACHE_BYTES); |
170 | .jiffies : AT(VLOAD(.jiffies)) { | 198 | .jiffies : AT(VLOAD(.jiffies)) { |
171 | *(.jiffies) | 199 | *(.jiffies) |
172 | } | 200 | } |
@@ -291,9 +319,7 @@ SECTIONS | |||
291 | __brk_limit = .; | 319 | __brk_limit = .; |
292 | } | 320 | } |
293 | 321 | ||
294 | .end : AT(ADDR(.end) - LOAD_OFFSET) { | 322 | _end = .; |
295 | _end = .; | ||
296 | } | ||
297 | 323 | ||
298 | STABS_DEBUG | 324 | STABS_DEBUG |
299 | DWARF_DEBUG | 325 | DWARF_DEBUG |
@@ -305,6 +331,9 @@ SECTIONS | |||
305 | 331 | ||
306 | 332 | ||
307 | #ifdef CONFIG_X86_32 | 333 | #ifdef CONFIG_X86_32 |
334 | /* | ||
335 | * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: | ||
336 | */ | ||
308 | . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), | 337 | . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), |
309 | "kernel image bigger than KERNEL_IMAGE_SIZE"); | 338 | "kernel image bigger than KERNEL_IMAGE_SIZE"); |
310 | #else | 339 | #else |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8cb4974ff599..9055e5872ff0 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -73,7 +73,8 @@ void update_vsyscall_tz(void) | |||
73 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 73 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
74 | } | 74 | } |
75 | 75 | ||
76 | void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) | 76 | void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, |
77 | u32 mult) | ||
77 | { | 78 | { |
78 | unsigned long flags; | 79 | unsigned long flags; |
79 | 80 | ||
@@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) | |||
82 | vsyscall_gtod_data.clock.vread = clock->vread; | 83 | vsyscall_gtod_data.clock.vread = clock->vread; |
83 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; | 84 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; |
84 | vsyscall_gtod_data.clock.mask = clock->mask; | 85 | vsyscall_gtod_data.clock.mask = clock->mask; |
85 | vsyscall_gtod_data.clock.mult = clock->mult; | 86 | vsyscall_gtod_data.clock.mult = mult; |
86 | vsyscall_gtod_data.clock.shift = clock->shift; | 87 | vsyscall_gtod_data.clock.shift = clock->shift; |
87 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 88 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
88 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 89 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
@@ -237,7 +238,7 @@ static ctl_table kernel_table2[] = { | |||
237 | }; | 238 | }; |
238 | 239 | ||
239 | static ctl_table kernel_root_table2[] = { | 240 | static ctl_table kernel_root_table2[] = { |
240 | { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, | 241 | { .procname = "kernel", .mode = 0555, |
241 | .child = kernel_table2 }, | 242 | .child = kernel_table2 }, |
242 | {} | 243 | {} |
243 | }; | 244 | }; |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 3909e3ba5ce3..619f7f88b8cc 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -17,8 +17,6 @@ | |||
17 | EXPORT_SYMBOL(mcount); | 17 | EXPORT_SYMBOL(mcount); |
18 | #endif | 18 | #endif |
19 | 19 | ||
20 | EXPORT_SYMBOL(kernel_thread); | ||
21 | |||
22 | EXPORT_SYMBOL(__get_user_1); | 20 | EXPORT_SYMBOL(__get_user_1); |
23 | EXPORT_SYMBOL(__get_user_2); | 21 | EXPORT_SYMBOL(__get_user_2); |
24 | EXPORT_SYMBOL(__get_user_4); | 22 | EXPORT_SYMBOL(__get_user_4); |
@@ -30,9 +28,8 @@ EXPORT_SYMBOL(__put_user_8); | |||
30 | 28 | ||
31 | EXPORT_SYMBOL(copy_user_generic); | 29 | EXPORT_SYMBOL(copy_user_generic); |
32 | EXPORT_SYMBOL(__copy_user_nocache); | 30 | EXPORT_SYMBOL(__copy_user_nocache); |
33 | EXPORT_SYMBOL(copy_from_user); | 31 | EXPORT_SYMBOL(_copy_from_user); |
34 | EXPORT_SYMBOL(copy_to_user); | 32 | EXPORT_SYMBOL(_copy_to_user); |
35 | EXPORT_SYMBOL(__copy_from_user_inatomic); | ||
36 | 33 | ||
37 | EXPORT_SYMBOL(copy_page); | 34 | EXPORT_SYMBOL(copy_page); |
38 | EXPORT_SYMBOL(clear_page); | 35 | EXPORT_SYMBOL(clear_page); |
@@ -57,4 +54,6 @@ EXPORT_SYMBOL(__memcpy); | |||
57 | 54 | ||
58 | EXPORT_SYMBOL(empty_zero_page); | 55 | EXPORT_SYMBOL(empty_zero_page); |
59 | EXPORT_SYMBOL(init_level4_pgt); | 56 | EXPORT_SYMBOL(init_level4_pgt); |
60 | EXPORT_SYMBOL(load_gs_index); | 57 | #ifndef CONFIG_PARAVIRT |
58 | EXPORT_SYMBOL(native_load_gs_index); | ||
59 | #endif | ||
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 4449a4a2c2ed..ccd179dec36e 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -13,11 +13,15 @@ | |||
13 | #include <asm/e820.h> | 13 | #include <asm/e820.h> |
14 | #include <asm/time.h> | 14 | #include <asm/time.h> |
15 | #include <asm/irq.h> | 15 | #include <asm/irq.h> |
16 | #include <asm/pat.h> | ||
16 | #include <asm/tsc.h> | 17 | #include <asm/tsc.h> |
18 | #include <asm/iommu.h> | ||
17 | 19 | ||
18 | void __cpuinit x86_init_noop(void) { } | 20 | void __cpuinit x86_init_noop(void) { } |
19 | void __init x86_init_uint_noop(unsigned int unused) { } | 21 | void __init x86_init_uint_noop(unsigned int unused) { } |
20 | void __init x86_init_pgd_noop(pgd_t *unused) { } | 22 | void __init x86_init_pgd_noop(pgd_t *unused) { } |
23 | int __init iommu_init_noop(void) { return 0; } | ||
24 | void iommu_shutdown_noop(void) { } | ||
21 | 25 | ||
22 | /* | 26 | /* |
23 | * The platform setup functions are preset with the default functions | 27 | * The platform setup functions are preset with the default functions |
@@ -62,6 +66,10 @@ struct x86_init_ops x86_init __initdata = { | |||
62 | .tsc_pre_init = x86_init_noop, | 66 | .tsc_pre_init = x86_init_noop, |
63 | .timer_init = hpet_time_init, | 67 | .timer_init = hpet_time_init, |
64 | }, | 68 | }, |
69 | |||
70 | .iommu = { | ||
71 | .iommu_init = iommu_init_noop, | ||
72 | }, | ||
65 | }; | 73 | }; |
66 | 74 | ||
67 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { | 75 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { |
@@ -72,4 +80,6 @@ struct x86_platform_ops x86_platform = { | |||
72 | .calibrate_tsc = native_calibrate_tsc, | 80 | .calibrate_tsc = native_calibrate_tsc, |
73 | .get_wallclock = mach_get_cmos_time, | 81 | .get_wallclock = mach_get_cmos_time, |
74 | .set_wallclock = mach_set_rtc_mmss, | 82 | .set_wallclock = mach_set_rtc_mmss, |
83 | .iommu_shutdown = iommu_shutdown_noop, | ||
84 | .is_untracked_pat_range = is_ISA_range, | ||
75 | }; | 85 | }; |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b84e571f4175..4cd498332466 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -28,6 +28,7 @@ config KVM | |||
28 | select HAVE_KVM_IRQCHIP | 28 | select HAVE_KVM_IRQCHIP |
29 | select HAVE_KVM_EVENTFD | 29 | select HAVE_KVM_EVENTFD |
30 | select KVM_APIC_ARCHITECTURE | 30 | select KVM_APIC_ARCHITECTURE |
31 | select USER_RETURN_NOTIFIER | ||
31 | ---help--- | 32 | ---help--- |
32 | Support hosting fully virtualized guest machines using hardware | 33 | Support hosting fully virtualized guest machines using hardware |
33 | virtualization extensions. You will need a fairly recent | 34 | virtualization extensions. You will need a fairly recent |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 0e7fe78d0f74..31a7035c4bd9 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -6,7 +6,8 @@ CFLAGS_svm.o := -I. | |||
6 | CFLAGS_vmx.o := -I. | 6 | CFLAGS_vmx.o := -I. |
7 | 7 | ||
8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
9 | coalesced_mmio.o irq_comm.o eventfd.o) | 9 | coalesced_mmio.o irq_comm.o eventfd.o \ |
10 | assigned-dev.o) | ||
10 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | 11 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) |
11 | 12 | ||
12 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 13 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1be5cd640e93..7e8faea4651e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -75,6 +75,8 @@ | |||
75 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 75 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
76 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 76 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
77 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ | 77 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ |
78 | /* Misc flags */ | ||
79 | #define No64 (1<<28) | ||
78 | /* Source 2 operand type */ | 80 | /* Source 2 operand type */ |
79 | #define Src2None (0<<29) | 81 | #define Src2None (0<<29) |
80 | #define Src2CL (1<<29) | 82 | #define Src2CL (1<<29) |
@@ -92,19 +94,23 @@ static u32 opcode_table[256] = { | |||
92 | /* 0x00 - 0x07 */ | 94 | /* 0x00 - 0x07 */ |
93 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 95 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
94 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 96 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
95 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, | 97 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
98 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | ||
96 | /* 0x08 - 0x0F */ | 99 | /* 0x08 - 0x0F */ |
97 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 100 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
98 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 101 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
99 | 0, 0, 0, 0, | 102 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
103 | ImplicitOps | Stack | No64, 0, | ||
100 | /* 0x10 - 0x17 */ | 104 | /* 0x10 - 0x17 */ |
101 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 105 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
102 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 106 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
103 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, | 107 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
108 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | ||
104 | /* 0x18 - 0x1F */ | 109 | /* 0x18 - 0x1F */ |
105 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 110 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
106 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 111 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
107 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, | 112 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
113 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | ||
108 | /* 0x20 - 0x27 */ | 114 | /* 0x20 - 0x27 */ |
109 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 115 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
110 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 116 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
@@ -133,7 +139,8 @@ static u32 opcode_table[256] = { | |||
133 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, | 139 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, |
134 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, | 140 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, |
135 | /* 0x60 - 0x67 */ | 141 | /* 0x60 - 0x67 */ |
136 | 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , | 142 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, |
143 | 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , | ||
137 | 0, 0, 0, 0, | 144 | 0, 0, 0, 0, |
138 | /* 0x68 - 0x6F */ | 145 | /* 0x68 - 0x6F */ |
139 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, | 146 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, |
@@ -158,7 +165,7 @@ static u32 opcode_table[256] = { | |||
158 | /* 0x90 - 0x97 */ | 165 | /* 0x90 - 0x97 */ |
159 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, | 166 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, |
160 | /* 0x98 - 0x9F */ | 167 | /* 0x98 - 0x9F */ |
161 | 0, 0, SrcImm | Src2Imm16, 0, | 168 | 0, 0, SrcImm | Src2Imm16 | No64, 0, |
162 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | 169 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, |
163 | /* 0xA0 - 0xA7 */ | 170 | /* 0xA0 - 0xA7 */ |
164 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 171 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
@@ -185,7 +192,7 @@ static u32 opcode_table[256] = { | |||
185 | ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, | 192 | ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, |
186 | /* 0xC8 - 0xCF */ | 193 | /* 0xC8 - 0xCF */ |
187 | 0, 0, 0, ImplicitOps | Stack, | 194 | 0, 0, 0, ImplicitOps | Stack, |
188 | ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps, | 195 | ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps, |
189 | /* 0xD0 - 0xD7 */ | 196 | /* 0xD0 - 0xD7 */ |
190 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, | 197 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, |
191 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, | 198 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, |
@@ -198,7 +205,7 @@ static u32 opcode_table[256] = { | |||
198 | ByteOp | SrcImmUByte, SrcImmUByte, | 205 | ByteOp | SrcImmUByte, SrcImmUByte, |
199 | /* 0xE8 - 0xEF */ | 206 | /* 0xE8 - 0xEF */ |
200 | SrcImm | Stack, SrcImm | ImplicitOps, | 207 | SrcImm | Stack, SrcImm | ImplicitOps, |
201 | SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps, | 208 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, |
202 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 209 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
203 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 210 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
204 | /* 0xF0 - 0xF7 */ | 211 | /* 0xF0 - 0xF7 */ |
@@ -244,11 +251,13 @@ static u32 twobyte_table[256] = { | |||
244 | /* 0x90 - 0x9F */ | 251 | /* 0x90 - 0x9F */ |
245 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 252 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
246 | /* 0xA0 - 0xA7 */ | 253 | /* 0xA0 - 0xA7 */ |
247 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, | 254 | ImplicitOps | Stack, ImplicitOps | Stack, |
255 | 0, DstMem | SrcReg | ModRM | BitOp, | ||
248 | DstMem | SrcReg | Src2ImmByte | ModRM, | 256 | DstMem | SrcReg | Src2ImmByte | ModRM, |
249 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, | 257 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, |
250 | /* 0xA8 - 0xAF */ | 258 | /* 0xA8 - 0xAF */ |
251 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, | 259 | ImplicitOps | Stack, ImplicitOps | Stack, |
260 | 0, DstMem | SrcReg | ModRM | BitOp, | ||
252 | DstMem | SrcReg | Src2ImmByte | ModRM, | 261 | DstMem | SrcReg | Src2ImmByte | ModRM, |
253 | DstMem | SrcReg | Src2CL | ModRM, | 262 | DstMem | SrcReg | Src2CL | ModRM, |
254 | ModRM, 0, | 263 | ModRM, 0, |
@@ -613,6 +622,9 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | |||
613 | { | 622 | { |
614 | int rc = 0; | 623 | int rc = 0; |
615 | 624 | ||
625 | /* x86 instructions are limited to 15 bytes. */ | ||
626 | if (eip + size - ctxt->decode.eip_orig > 15) | ||
627 | return X86EMUL_UNHANDLEABLE; | ||
616 | eip += ctxt->cs_base; | 628 | eip += ctxt->cs_base; |
617 | while (size--) { | 629 | while (size--) { |
618 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); | 630 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); |
@@ -871,7 +883,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
871 | /* Shadow copy of register state. Committed on successful emulation. */ | 883 | /* Shadow copy of register state. Committed on successful emulation. */ |
872 | 884 | ||
873 | memset(c, 0, sizeof(struct decode_cache)); | 885 | memset(c, 0, sizeof(struct decode_cache)); |
874 | c->eip = kvm_rip_read(ctxt->vcpu); | 886 | c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); |
875 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 887 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); |
876 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 888 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
877 | 889 | ||
@@ -962,6 +974,11 @@ done_prefixes: | |||
962 | } | 974 | } |
963 | } | 975 | } |
964 | 976 | ||
977 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
978 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");; | ||
979 | return -1; | ||
980 | } | ||
981 | |||
965 | if (c->d & Group) { | 982 | if (c->d & Group) { |
966 | group = c->d & GroupMask; | 983 | group = c->d & GroupMask; |
967 | c->modrm = insn_fetch(u8, 1, c->eip); | 984 | c->modrm = insn_fetch(u8, 1, c->eip); |
@@ -1186,6 +1203,69 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1186 | return rc; | 1203 | return rc; |
1187 | } | 1204 | } |
1188 | 1205 | ||
1206 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) | ||
1207 | { | ||
1208 | struct decode_cache *c = &ctxt->decode; | ||
1209 | struct kvm_segment segment; | ||
1210 | |||
1211 | kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg); | ||
1212 | |||
1213 | c->src.val = segment.selector; | ||
1214 | emulate_push(ctxt); | ||
1215 | } | ||
1216 | |||
1217 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | ||
1218 | struct x86_emulate_ops *ops, int seg) | ||
1219 | { | ||
1220 | struct decode_cache *c = &ctxt->decode; | ||
1221 | unsigned long selector; | ||
1222 | int rc; | ||
1223 | |||
1224 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); | ||
1225 | if (rc != 0) | ||
1226 | return rc; | ||
1227 | |||
1228 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg); | ||
1229 | return rc; | ||
1230 | } | ||
1231 | |||
1232 | static void emulate_pusha(struct x86_emulate_ctxt *ctxt) | ||
1233 | { | ||
1234 | struct decode_cache *c = &ctxt->decode; | ||
1235 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; | ||
1236 | int reg = VCPU_REGS_RAX; | ||
1237 | |||
1238 | while (reg <= VCPU_REGS_RDI) { | ||
1239 | (reg == VCPU_REGS_RSP) ? | ||
1240 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); | ||
1241 | |||
1242 | emulate_push(ctxt); | ||
1243 | ++reg; | ||
1244 | } | ||
1245 | } | ||
1246 | |||
1247 | static int emulate_popa(struct x86_emulate_ctxt *ctxt, | ||
1248 | struct x86_emulate_ops *ops) | ||
1249 | { | ||
1250 | struct decode_cache *c = &ctxt->decode; | ||
1251 | int rc = 0; | ||
1252 | int reg = VCPU_REGS_RDI; | ||
1253 | |||
1254 | while (reg >= VCPU_REGS_RAX) { | ||
1255 | if (reg == VCPU_REGS_RSP) { | ||
1256 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], | ||
1257 | c->op_bytes); | ||
1258 | --reg; | ||
1259 | } | ||
1260 | |||
1261 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); | ||
1262 | if (rc != 0) | ||
1263 | break; | ||
1264 | --reg; | ||
1265 | } | ||
1266 | return rc; | ||
1267 | } | ||
1268 | |||
1189 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | 1269 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, |
1190 | struct x86_emulate_ops *ops) | 1270 | struct x86_emulate_ops *ops) |
1191 | { | 1271 | { |
@@ -1707,18 +1787,45 @@ special_insn: | |||
1707 | add: /* add */ | 1787 | add: /* add */ |
1708 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | 1788 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); |
1709 | break; | 1789 | break; |
1790 | case 0x06: /* push es */ | ||
1791 | emulate_push_sreg(ctxt, VCPU_SREG_ES); | ||
1792 | break; | ||
1793 | case 0x07: /* pop es */ | ||
1794 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | ||
1795 | if (rc != 0) | ||
1796 | goto done; | ||
1797 | break; | ||
1710 | case 0x08 ... 0x0d: | 1798 | case 0x08 ... 0x0d: |
1711 | or: /* or */ | 1799 | or: /* or */ |
1712 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | 1800 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); |
1713 | break; | 1801 | break; |
1802 | case 0x0e: /* push cs */ | ||
1803 | emulate_push_sreg(ctxt, VCPU_SREG_CS); | ||
1804 | break; | ||
1714 | case 0x10 ... 0x15: | 1805 | case 0x10 ... 0x15: |
1715 | adc: /* adc */ | 1806 | adc: /* adc */ |
1716 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); | 1807 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); |
1717 | break; | 1808 | break; |
1809 | case 0x16: /* push ss */ | ||
1810 | emulate_push_sreg(ctxt, VCPU_SREG_SS); | ||
1811 | break; | ||
1812 | case 0x17: /* pop ss */ | ||
1813 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | ||
1814 | if (rc != 0) | ||
1815 | goto done; | ||
1816 | break; | ||
1718 | case 0x18 ... 0x1d: | 1817 | case 0x18 ... 0x1d: |
1719 | sbb: /* sbb */ | 1818 | sbb: /* sbb */ |
1720 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | 1819 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); |
1721 | break; | 1820 | break; |
1821 | case 0x1e: /* push ds */ | ||
1822 | emulate_push_sreg(ctxt, VCPU_SREG_DS); | ||
1823 | break; | ||
1824 | case 0x1f: /* pop ds */ | ||
1825 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | ||
1826 | if (rc != 0) | ||
1827 | goto done; | ||
1828 | break; | ||
1722 | case 0x20 ... 0x25: | 1829 | case 0x20 ... 0x25: |
1723 | and: /* and */ | 1830 | and: /* and */ |
1724 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); | 1831 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); |
@@ -1750,6 +1857,14 @@ special_insn: | |||
1750 | if (rc != 0) | 1857 | if (rc != 0) |
1751 | goto done; | 1858 | goto done; |
1752 | break; | 1859 | break; |
1860 | case 0x60: /* pusha */ | ||
1861 | emulate_pusha(ctxt); | ||
1862 | break; | ||
1863 | case 0x61: /* popa */ | ||
1864 | rc = emulate_popa(ctxt, ops); | ||
1865 | if (rc != 0) | ||
1866 | goto done; | ||
1867 | break; | ||
1753 | case 0x63: /* movsxd */ | 1868 | case 0x63: /* movsxd */ |
1754 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 1869 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
1755 | goto cannot_emulate; | 1870 | goto cannot_emulate; |
@@ -1761,7 +1876,7 @@ special_insn: | |||
1761 | break; | 1876 | break; |
1762 | case 0x6c: /* insb */ | 1877 | case 0x6c: /* insb */ |
1763 | case 0x6d: /* insw/insd */ | 1878 | case 0x6d: /* insw/insd */ |
1764 | if (kvm_emulate_pio_string(ctxt->vcpu, NULL, | 1879 | if (kvm_emulate_pio_string(ctxt->vcpu, |
1765 | 1, | 1880 | 1, |
1766 | (c->d & ByteOp) ? 1 : c->op_bytes, | 1881 | (c->d & ByteOp) ? 1 : c->op_bytes, |
1767 | c->rep_prefix ? | 1882 | c->rep_prefix ? |
@@ -1777,7 +1892,7 @@ special_insn: | |||
1777 | return 0; | 1892 | return 0; |
1778 | case 0x6e: /* outsb */ | 1893 | case 0x6e: /* outsb */ |
1779 | case 0x6f: /* outsw/outsd */ | 1894 | case 0x6f: /* outsw/outsd */ |
1780 | if (kvm_emulate_pio_string(ctxt->vcpu, NULL, | 1895 | if (kvm_emulate_pio_string(ctxt->vcpu, |
1781 | 0, | 1896 | 0, |
1782 | (c->d & ByteOp) ? 1 : c->op_bytes, | 1897 | (c->d & ByteOp) ? 1 : c->op_bytes, |
1783 | c->rep_prefix ? | 1898 | c->rep_prefix ? |
@@ -2070,7 +2185,7 @@ special_insn: | |||
2070 | case 0xef: /* out (e/r)ax,dx */ | 2185 | case 0xef: /* out (e/r)ax,dx */ |
2071 | port = c->regs[VCPU_REGS_RDX]; | 2186 | port = c->regs[VCPU_REGS_RDX]; |
2072 | io_dir_in = 0; | 2187 | io_dir_in = 0; |
2073 | do_io: if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in, | 2188 | do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, |
2074 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2189 | (c->d & ByteOp) ? 1 : c->op_bytes, |
2075 | port) != 0) { | 2190 | port) != 0) { |
2076 | c->eip = saved_eip; | 2191 | c->eip = saved_eip; |
@@ -2297,6 +2412,14 @@ twobyte_insn: | |||
2297 | jmp_rel(c, c->src.val); | 2412 | jmp_rel(c, c->src.val); |
2298 | c->dst.type = OP_NONE; | 2413 | c->dst.type = OP_NONE; |
2299 | break; | 2414 | break; |
2415 | case 0xa0: /* push fs */ | ||
2416 | emulate_push_sreg(ctxt, VCPU_SREG_FS); | ||
2417 | break; | ||
2418 | case 0xa1: /* pop fs */ | ||
2419 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | ||
2420 | if (rc != 0) | ||
2421 | goto done; | ||
2422 | break; | ||
2300 | case 0xa3: | 2423 | case 0xa3: |
2301 | bt: /* bt */ | 2424 | bt: /* bt */ |
2302 | c->dst.type = OP_NONE; | 2425 | c->dst.type = OP_NONE; |
@@ -2308,6 +2431,14 @@ twobyte_insn: | |||
2308 | case 0xa5: /* shld cl, r, r/m */ | 2431 | case 0xa5: /* shld cl, r, r/m */ |
2309 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); | 2432 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); |
2310 | break; | 2433 | break; |
2434 | case 0xa8: /* push gs */ | ||
2435 | emulate_push_sreg(ctxt, VCPU_SREG_GS); | ||
2436 | break; | ||
2437 | case 0xa9: /* pop gs */ | ||
2438 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | ||
2439 | if (rc != 0) | ||
2440 | goto done; | ||
2441 | break; | ||
2311 | case 0xab: | 2442 | case 0xab: |
2312 | bts: /* bts */ | 2443 | bts: /* bts */ |
2313 | /* only subword offset */ | 2444 | /* only subword offset */ |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 82ad523b4901..296aba49472a 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -29,6 +29,8 @@ | |||
29 | * Based on QEMU and Xen. | 29 | * Based on QEMU and Xen. |
30 | */ | 30 | */ |
31 | 31 | ||
32 | #define pr_fmt(fmt) "pit: " fmt | ||
33 | |||
32 | #include <linux/kvm_host.h> | 34 | #include <linux/kvm_host.h> |
33 | 35 | ||
34 | #include "irq.h" | 36 | #include "irq.h" |
@@ -116,7 +118,7 @@ static s64 __kpit_elapsed(struct kvm *kvm) | |||
116 | * itself with the initial count and continues counting | 118 | * itself with the initial count and continues counting |
117 | * from there. | 119 | * from there. |
118 | */ | 120 | */ |
119 | remaining = hrtimer_expires_remaining(&ps->pit_timer.timer); | 121 | remaining = hrtimer_get_remaining(&ps->pit_timer.timer); |
120 | elapsed = ps->pit_timer.period - ktime_to_ns(remaining); | 122 | elapsed = ps->pit_timer.period - ktime_to_ns(remaining); |
121 | elapsed = mod_64(elapsed, ps->pit_timer.period); | 123 | elapsed = mod_64(elapsed, ps->pit_timer.period); |
122 | 124 | ||
@@ -262,7 +264,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | |||
262 | 264 | ||
263 | static void destroy_pit_timer(struct kvm_timer *pt) | 265 | static void destroy_pit_timer(struct kvm_timer *pt) |
264 | { | 266 | { |
265 | pr_debug("pit: execute del timer!\n"); | 267 | pr_debug("execute del timer!\n"); |
266 | hrtimer_cancel(&pt->timer); | 268 | hrtimer_cancel(&pt->timer); |
267 | } | 269 | } |
268 | 270 | ||
@@ -284,7 +286,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) | |||
284 | 286 | ||
285 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | 287 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); |
286 | 288 | ||
287 | pr_debug("pit: create pit timer, interval is %llu nsec\n", interval); | 289 | pr_debug("create pit timer, interval is %llu nsec\n", interval); |
288 | 290 | ||
289 | /* TODO The new value only affected after the retriggered */ | 291 | /* TODO The new value only affected after the retriggered */ |
290 | hrtimer_cancel(&pt->timer); | 292 | hrtimer_cancel(&pt->timer); |
@@ -309,7 +311,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
309 | 311 | ||
310 | WARN_ON(!mutex_is_locked(&ps->lock)); | 312 | WARN_ON(!mutex_is_locked(&ps->lock)); |
311 | 313 | ||
312 | pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); | 314 | pr_debug("load_count val is %d, channel is %d\n", val, channel); |
313 | 315 | ||
314 | /* | 316 | /* |
315 | * The largest possible initial count is 0; this is equivalent | 317 | * The largest possible initial count is 0; this is equivalent |
@@ -395,8 +397,8 @@ static int pit_ioport_write(struct kvm_io_device *this, | |||
395 | mutex_lock(&pit_state->lock); | 397 | mutex_lock(&pit_state->lock); |
396 | 398 | ||
397 | if (val != 0) | 399 | if (val != 0) |
398 | pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n", | 400 | pr_debug("write addr is 0x%x, len is %d, val is 0x%x\n", |
399 | (unsigned int)addr, len, val); | 401 | (unsigned int)addr, len, val); |
400 | 402 | ||
401 | if (addr == 3) { | 403 | if (addr == 3) { |
402 | channel = val >> 6; | 404 | channel = val >> 6; |
@@ -688,10 +690,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm) | |||
688 | struct kvm_vcpu *vcpu; | 690 | struct kvm_vcpu *vcpu; |
689 | int i; | 691 | int i; |
690 | 692 | ||
691 | mutex_lock(&kvm->irq_lock); | ||
692 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | 693 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); |
693 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | 694 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); |
694 | mutex_unlock(&kvm->irq_lock); | ||
695 | 695 | ||
696 | /* | 696 | /* |
697 | * Provides NMI watchdog support via Virtual Wire mode. | 697 | * Provides NMI watchdog support via Virtual Wire mode. |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 01f151682802..d057c0cbd245 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
38 | s->isr_ack |= (1 << irq); | 38 | s->isr_ack |= (1 << irq); |
39 | if (s != &s->pics_state->pics[0]) | 39 | if (s != &s->pics_state->pics[0]) |
40 | irq += 8; | 40 | irq += 8; |
41 | /* | ||
42 | * We are dropping lock while calling ack notifiers since ack | ||
43 | * notifier callbacks for assigned devices call into PIC recursively. | ||
44 | * Other interrupt may be delivered to PIC while lock is dropped but | ||
45 | * it should be safe since PIC state is already updated at this stage. | ||
46 | */ | ||
47 | spin_unlock(&s->pics_state->lock); | ||
41 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); | 48 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); |
49 | spin_lock(&s->pics_state->lock); | ||
42 | } | 50 | } |
43 | 51 | ||
44 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | 52 | void kvm_pic_clear_isr_ack(struct kvm *kvm) |
@@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
176 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) | 184 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) |
177 | { | 185 | { |
178 | s->isr |= 1 << irq; | 186 | s->isr |= 1 << irq; |
179 | if (s->auto_eoi) { | ||
180 | if (s->rotate_on_auto_eoi) | ||
181 | s->priority_add = (irq + 1) & 7; | ||
182 | pic_clear_isr(s, irq); | ||
183 | } | ||
184 | /* | 187 | /* |
185 | * We don't clear a level sensitive interrupt here | 188 | * We don't clear a level sensitive interrupt here |
186 | */ | 189 | */ |
187 | if (!(s->elcr & (1 << irq))) | 190 | if (!(s->elcr & (1 << irq))) |
188 | s->irr &= ~(1 << irq); | 191 | s->irr &= ~(1 << irq); |
192 | |||
193 | if (s->auto_eoi) { | ||
194 | if (s->rotate_on_auto_eoi) | ||
195 | s->priority_add = (irq + 1) & 7; | ||
196 | pic_clear_isr(s, irq); | ||
197 | } | ||
198 | |||
189 | } | 199 | } |
190 | 200 | ||
191 | int kvm_pic_read_irq(struct kvm *kvm) | 201 | int kvm_pic_read_irq(struct kvm *kvm) |
@@ -225,22 +235,11 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
225 | 235 | ||
226 | void kvm_pic_reset(struct kvm_kpic_state *s) | 236 | void kvm_pic_reset(struct kvm_kpic_state *s) |
227 | { | 237 | { |
228 | int irq, irqbase, n; | 238 | int irq; |
229 | struct kvm *kvm = s->pics_state->irq_request_opaque; | 239 | struct kvm *kvm = s->pics_state->irq_request_opaque; |
230 | struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; | 240 | struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; |
241 | u8 irr = s->irr, isr = s->imr; | ||
231 | 242 | ||
232 | if (s == &s->pics_state->pics[0]) | ||
233 | irqbase = 0; | ||
234 | else | ||
235 | irqbase = 8; | ||
236 | |||
237 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | ||
238 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | ||
239 | if (s->irr & (1 << irq) || s->isr & (1 << irq)) { | ||
240 | n = irq + irqbase; | ||
241 | kvm_notify_acked_irq(kvm, SELECT_PIC(n), n); | ||
242 | } | ||
243 | } | ||
244 | s->last_irr = 0; | 243 | s->last_irr = 0; |
245 | s->irr = 0; | 244 | s->irr = 0; |
246 | s->imr = 0; | 245 | s->imr = 0; |
@@ -256,6 +255,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
256 | s->rotate_on_auto_eoi = 0; | 255 | s->rotate_on_auto_eoi = 0; |
257 | s->special_fully_nested_mode = 0; | 256 | s->special_fully_nested_mode = 0; |
258 | s->init4 = 0; | 257 | s->init4 = 0; |
258 | |||
259 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | ||
260 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | ||
261 | if (irr & (1 << irq) || isr & (1 << irq)) { | ||
262 | pic_clear_isr(s, irq); | ||
263 | } | ||
264 | } | ||
259 | } | 265 | } |
260 | 266 | ||
261 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) | 267 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) |
@@ -298,9 +304,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
298 | priority = get_priority(s, s->isr); | 304 | priority = get_priority(s, s->isr); |
299 | if (priority != 8) { | 305 | if (priority != 8) { |
300 | irq = (priority + s->priority_add) & 7; | 306 | irq = (priority + s->priority_add) & 7; |
301 | pic_clear_isr(s, irq); | ||
302 | if (cmd == 5) | 307 | if (cmd == 5) |
303 | s->priority_add = (irq + 1) & 7; | 308 | s->priority_add = (irq + 1) & 7; |
309 | pic_clear_isr(s, irq); | ||
304 | pic_update_irq(s->pics_state); | 310 | pic_update_irq(s->pics_state); |
305 | } | 311 | } |
306 | break; | 312 | break; |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7d6058a2fd38..be399e207d57 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -71,6 +71,7 @@ struct kvm_pic { | |||
71 | int output; /* intr from master PIC */ | 71 | int output; /* intr from master PIC */ |
72 | struct kvm_io_device dev; | 72 | struct kvm_io_device dev; |
73 | void (*ack_notifier)(void *opaque, int irq); | 73 | void (*ack_notifier)(void *opaque, int irq); |
74 | unsigned long irq_states[16]; | ||
74 | }; | 75 | }; |
75 | 76 | ||
76 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | 77 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); |
@@ -85,7 +86,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) | |||
85 | 86 | ||
86 | static inline int irqchip_in_kernel(struct kvm *kvm) | 87 | static inline int irqchip_in_kernel(struct kvm *kvm) |
87 | { | 88 | { |
88 | return pic_irqchip(kvm) != NULL; | 89 | int ret; |
90 | |||
91 | ret = (pic_irqchip(kvm) != NULL); | ||
92 | smp_rmb(); | ||
93 | return ret; | ||
89 | } | 94 | } |
90 | 95 | ||
91 | void kvm_pic_reset(struct kvm_kpic_state *s); | 96 | void kvm_pic_reset(struct kvm_kpic_state *s); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1ae5ceba7eb2..3063a0c4858b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -32,7 +32,6 @@ | |||
32 | #include <asm/current.h> | 32 | #include <asm/current.h> |
33 | #include <asm/apicdef.h> | 33 | #include <asm/apicdef.h> |
34 | #include <asm/atomic.h> | 34 | #include <asm/atomic.h> |
35 | #include <asm/apicdef.h> | ||
36 | #include "kvm_cache_regs.h" | 35 | #include "kvm_cache_regs.h" |
37 | #include "irq.h" | 36 | #include "irq.h" |
38 | #include "trace.h" | 37 | #include "trace.h" |
@@ -471,11 +470,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) | |||
471 | trigger_mode = IOAPIC_LEVEL_TRIG; | 470 | trigger_mode = IOAPIC_LEVEL_TRIG; |
472 | else | 471 | else |
473 | trigger_mode = IOAPIC_EDGE_TRIG; | 472 | trigger_mode = IOAPIC_EDGE_TRIG; |
474 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) { | 473 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) |
475 | mutex_lock(&apic->vcpu->kvm->irq_lock); | ||
476 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | 474 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); |
477 | mutex_unlock(&apic->vcpu->kvm->irq_lock); | ||
478 | } | ||
479 | } | 475 | } |
480 | 476 | ||
481 | static void apic_send_ipi(struct kvm_lapic *apic) | 477 | static void apic_send_ipi(struct kvm_lapic *apic) |
@@ -504,9 +500,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) | |||
504 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, | 500 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, |
505 | irq.vector); | 501 | irq.vector); |
506 | 502 | ||
507 | mutex_lock(&apic->vcpu->kvm->irq_lock); | ||
508 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); | 503 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); |
509 | mutex_unlock(&apic->vcpu->kvm->irq_lock); | ||
510 | } | 504 | } |
511 | 505 | ||
512 | static u32 apic_get_tmcct(struct kvm_lapic *apic) | 506 | static u32 apic_get_tmcct(struct kvm_lapic *apic) |
@@ -521,7 +515,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) | |||
521 | if (apic_get_reg(apic, APIC_TMICT) == 0) | 515 | if (apic_get_reg(apic, APIC_TMICT) == 0) |
522 | return 0; | 516 | return 0; |
523 | 517 | ||
524 | remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer); | 518 | remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); |
525 | if (ktime_to_ns(remaining) < 0) | 519 | if (ktime_to_ns(remaining) < 0) |
526 | remaining = ktime_set(0, 0); | 520 | remaining = ktime_set(0, 0); |
527 | 521 | ||
@@ -664,7 +658,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
664 | { | 658 | { |
665 | ktime_t now = apic->lapic_timer.timer.base->get_time(); | 659 | ktime_t now = apic->lapic_timer.timer.base->get_time(); |
666 | 660 | ||
667 | apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * | 661 | apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * |
668 | APIC_BUS_CYCLE_NS * apic->divide_count; | 662 | APIC_BUS_CYCLE_NS * apic->divide_count; |
669 | atomic_set(&apic->lapic_timer.pending, 0); | 663 | atomic_set(&apic->lapic_timer.pending, 0); |
670 | 664 | ||
@@ -1156,6 +1150,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) | |||
1156 | hrtimer_cancel(&apic->lapic_timer.timer); | 1150 | hrtimer_cancel(&apic->lapic_timer.timer); |
1157 | update_divide_count(apic); | 1151 | update_divide_count(apic); |
1158 | start_apic_timer(apic); | 1152 | start_apic_timer(apic); |
1153 | apic->irr_pending = true; | ||
1159 | } | 1154 | } |
1160 | 1155 | ||
1161 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | 1156 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eca41ae9f453..4c3e5b2314cb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644); | |||
156 | #define CREATE_TRACE_POINTS | 156 | #define CREATE_TRACE_POINTS |
157 | #include "mmutrace.h" | 157 | #include "mmutrace.h" |
158 | 158 | ||
159 | #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | ||
160 | |||
159 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 161 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
160 | 162 | ||
161 | struct kvm_rmap_desc { | 163 | struct kvm_rmap_desc { |
@@ -634,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
634 | if (*spte & shadow_accessed_mask) | 636 | if (*spte & shadow_accessed_mask) |
635 | kvm_set_pfn_accessed(pfn); | 637 | kvm_set_pfn_accessed(pfn); |
636 | if (is_writeble_pte(*spte)) | 638 | if (is_writeble_pte(*spte)) |
637 | kvm_release_pfn_dirty(pfn); | 639 | kvm_set_pfn_dirty(pfn); |
638 | else | ||
639 | kvm_release_pfn_clean(pfn); | ||
640 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); | 640 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); |
641 | if (!*rmapp) { | 641 | if (!*rmapp) { |
642 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 642 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
@@ -748,7 +748,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
748 | return write_protected; | 748 | return write_protected; |
749 | } | 749 | } |
750 | 750 | ||
751 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | 751 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
752 | unsigned long data) | ||
752 | { | 753 | { |
753 | u64 *spte; | 754 | u64 *spte; |
754 | int need_tlb_flush = 0; | 755 | int need_tlb_flush = 0; |
@@ -763,8 +764,47 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | |||
763 | return need_tlb_flush; | 764 | return need_tlb_flush; |
764 | } | 765 | } |
765 | 766 | ||
767 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
768 | unsigned long data) | ||
769 | { | ||
770 | int need_flush = 0; | ||
771 | u64 *spte, new_spte; | ||
772 | pte_t *ptep = (pte_t *)data; | ||
773 | pfn_t new_pfn; | ||
774 | |||
775 | WARN_ON(pte_huge(*ptep)); | ||
776 | new_pfn = pte_pfn(*ptep); | ||
777 | spte = rmap_next(kvm, rmapp, NULL); | ||
778 | while (spte) { | ||
779 | BUG_ON(!is_shadow_present_pte(*spte)); | ||
780 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | ||
781 | need_flush = 1; | ||
782 | if (pte_write(*ptep)) { | ||
783 | rmap_remove(kvm, spte); | ||
784 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
785 | spte = rmap_next(kvm, rmapp, NULL); | ||
786 | } else { | ||
787 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | ||
788 | new_spte |= (u64)new_pfn << PAGE_SHIFT; | ||
789 | |||
790 | new_spte &= ~PT_WRITABLE_MASK; | ||
791 | new_spte &= ~SPTE_HOST_WRITEABLE; | ||
792 | if (is_writeble_pte(*spte)) | ||
793 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); | ||
794 | __set_spte(spte, new_spte); | ||
795 | spte = rmap_next(kvm, rmapp, spte); | ||
796 | } | ||
797 | } | ||
798 | if (need_flush) | ||
799 | kvm_flush_remote_tlbs(kvm); | ||
800 | |||
801 | return 0; | ||
802 | } | ||
803 | |||
766 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | 804 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, |
767 | int (*handler)(struct kvm *kvm, unsigned long *rmapp)) | 805 | unsigned long data, |
806 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | ||
807 | unsigned long data)) | ||
768 | { | 808 | { |
769 | int i, j; | 809 | int i, j; |
770 | int retval = 0; | 810 | int retval = 0; |
@@ -786,13 +826,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
786 | if (hva >= start && hva < end) { | 826 | if (hva >= start && hva < end) { |
787 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 827 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
788 | 828 | ||
789 | retval |= handler(kvm, &memslot->rmap[gfn_offset]); | 829 | retval |= handler(kvm, &memslot->rmap[gfn_offset], |
830 | data); | ||
790 | 831 | ||
791 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 832 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
792 | int idx = gfn_offset; | 833 | int idx = gfn_offset; |
793 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | 834 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); |
794 | retval |= handler(kvm, | 835 | retval |= handler(kvm, |
795 | &memslot->lpage_info[j][idx].rmap_pde); | 836 | &memslot->lpage_info[j][idx].rmap_pde, |
837 | data); | ||
796 | } | 838 | } |
797 | } | 839 | } |
798 | } | 840 | } |
@@ -802,10 +844,16 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
802 | 844 | ||
803 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | 845 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) |
804 | { | 846 | { |
805 | return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | 847 | return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); |
806 | } | 848 | } |
807 | 849 | ||
808 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) | 850 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) |
851 | { | ||
852 | kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); | ||
853 | } | ||
854 | |||
855 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
856 | unsigned long data) | ||
809 | { | 857 | { |
810 | u64 *spte; | 858 | u64 *spte; |
811 | int young = 0; | 859 | int young = 0; |
@@ -841,13 +889,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
841 | gfn = unalias_gfn(vcpu->kvm, gfn); | 889 | gfn = unalias_gfn(vcpu->kvm, gfn); |
842 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 890 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
843 | 891 | ||
844 | kvm_unmap_rmapp(vcpu->kvm, rmapp); | 892 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); |
845 | kvm_flush_remote_tlbs(vcpu->kvm); | 893 | kvm_flush_remote_tlbs(vcpu->kvm); |
846 | } | 894 | } |
847 | 895 | ||
848 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 896 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) |
849 | { | 897 | { |
850 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | 898 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); |
851 | } | 899 | } |
852 | 900 | ||
853 | #ifdef MMU_DEBUG | 901 | #ifdef MMU_DEBUG |
@@ -1756,7 +1804,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1756 | unsigned pte_access, int user_fault, | 1804 | unsigned pte_access, int user_fault, |
1757 | int write_fault, int dirty, int level, | 1805 | int write_fault, int dirty, int level, |
1758 | gfn_t gfn, pfn_t pfn, bool speculative, | 1806 | gfn_t gfn, pfn_t pfn, bool speculative, |
1759 | bool can_unsync) | 1807 | bool can_unsync, bool reset_host_protection) |
1760 | { | 1808 | { |
1761 | u64 spte; | 1809 | u64 spte; |
1762 | int ret = 0; | 1810 | int ret = 0; |
@@ -1783,6 +1831,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1783 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, | 1831 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, |
1784 | kvm_is_mmio_pfn(pfn)); | 1832 | kvm_is_mmio_pfn(pfn)); |
1785 | 1833 | ||
1834 | if (reset_host_protection) | ||
1835 | spte |= SPTE_HOST_WRITEABLE; | ||
1836 | |||
1786 | spte |= (u64)pfn << PAGE_SHIFT; | 1837 | spte |= (u64)pfn << PAGE_SHIFT; |
1787 | 1838 | ||
1788 | if ((pte_access & ACC_WRITE_MASK) | 1839 | if ((pte_access & ACC_WRITE_MASK) |
@@ -1828,7 +1879,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1828 | unsigned pt_access, unsigned pte_access, | 1879 | unsigned pt_access, unsigned pte_access, |
1829 | int user_fault, int write_fault, int dirty, | 1880 | int user_fault, int write_fault, int dirty, |
1830 | int *ptwrite, int level, gfn_t gfn, | 1881 | int *ptwrite, int level, gfn_t gfn, |
1831 | pfn_t pfn, bool speculative) | 1882 | pfn_t pfn, bool speculative, |
1883 | bool reset_host_protection) | ||
1832 | { | 1884 | { |
1833 | int was_rmapped = 0; | 1885 | int was_rmapped = 0; |
1834 | int was_writeble = is_writeble_pte(*sptep); | 1886 | int was_writeble = is_writeble_pte(*sptep); |
@@ -1860,7 +1912,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1860 | } | 1912 | } |
1861 | 1913 | ||
1862 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, | 1914 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, |
1863 | dirty, level, gfn, pfn, speculative, true)) { | 1915 | dirty, level, gfn, pfn, speculative, true, |
1916 | reset_host_protection)) { | ||
1864 | if (write_fault) | 1917 | if (write_fault) |
1865 | *ptwrite = 1; | 1918 | *ptwrite = 1; |
1866 | kvm_x86_ops->tlb_flush(vcpu); | 1919 | kvm_x86_ops->tlb_flush(vcpu); |
@@ -1877,8 +1930,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1877 | page_header_update_slot(vcpu->kvm, sptep, gfn); | 1930 | page_header_update_slot(vcpu->kvm, sptep, gfn); |
1878 | if (!was_rmapped) { | 1931 | if (!was_rmapped) { |
1879 | rmap_count = rmap_add(vcpu, sptep, gfn); | 1932 | rmap_count = rmap_add(vcpu, sptep, gfn); |
1880 | if (!is_rmap_spte(*sptep)) | 1933 | kvm_release_pfn_clean(pfn); |
1881 | kvm_release_pfn_clean(pfn); | ||
1882 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 1934 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
1883 | rmap_recycle(vcpu, sptep, gfn); | 1935 | rmap_recycle(vcpu, sptep, gfn); |
1884 | } else { | 1936 | } else { |
@@ -1909,7 +1961,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1909 | if (iterator.level == level) { | 1961 | if (iterator.level == level) { |
1910 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 1962 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, |
1911 | 0, write, 1, &pt_write, | 1963 | 0, write, 1, &pt_write, |
1912 | level, gfn, pfn, false); | 1964 | level, gfn, pfn, false, true); |
1913 | ++vcpu->stat.pf_fixed; | 1965 | ++vcpu->stat.pf_fixed; |
1914 | break; | 1966 | break; |
1915 | } | 1967 | } |
@@ -2737,7 +2789,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
2737 | if (r) | 2789 | if (r) |
2738 | goto out; | 2790 | goto out; |
2739 | 2791 | ||
2740 | er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0); | 2792 | er = emulate_instruction(vcpu, cr2, error_code, 0); |
2741 | 2793 | ||
2742 | switch (er) { | 2794 | switch (er) { |
2743 | case EMULATE_DONE: | 2795 | case EMULATE_DONE: |
@@ -2748,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
2748 | case EMULATE_FAIL: | 2800 | case EMULATE_FAIL: |
2749 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 2801 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
2750 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 2802 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
2803 | vcpu->run->internal.ndata = 0; | ||
2751 | return 0; | 2804 | return 0; |
2752 | default: | 2805 | default: |
2753 | BUG(); | 2806 | BUG(); |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d2fec9c12d22..58a0f1e88596 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
273 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) | 273 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) |
274 | return; | 274 | return; |
275 | kvm_get_pfn(pfn); | 275 | kvm_get_pfn(pfn); |
276 | /* | ||
277 | * we call mmu_set_spte() with reset_host_protection = true beacuse that | ||
278 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | ||
279 | */ | ||
276 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 280 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
277 | gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, | 281 | gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, |
278 | gpte_to_gfn(gpte), pfn, true); | 282 | gpte_to_gfn(gpte), pfn, true, true); |
279 | } | 283 | } |
280 | 284 | ||
281 | /* | 285 | /* |
@@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
308 | user_fault, write_fault, | 312 | user_fault, write_fault, |
309 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | 313 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, |
310 | ptwrite, level, | 314 | ptwrite, level, |
311 | gw->gfn, pfn, false); | 315 | gw->gfn, pfn, false, true); |
312 | break; | 316 | break; |
313 | } | 317 | } |
314 | 318 | ||
@@ -451,8 +455,6 @@ out_unlock: | |||
451 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 455 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
452 | { | 456 | { |
453 | struct kvm_shadow_walk_iterator iterator; | 457 | struct kvm_shadow_walk_iterator iterator; |
454 | pt_element_t gpte; | ||
455 | gpa_t pte_gpa = -1; | ||
456 | int level; | 458 | int level; |
457 | u64 *sptep; | 459 | u64 *sptep; |
458 | int need_flush = 0; | 460 | int need_flush = 0; |
@@ -463,14 +465,9 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
463 | level = iterator.level; | 465 | level = iterator.level; |
464 | sptep = iterator.sptep; | 466 | sptep = iterator.sptep; |
465 | 467 | ||
466 | /* FIXME: properly handle invlpg on large guest pages */ | ||
467 | if (level == PT_PAGE_TABLE_LEVEL || | 468 | if (level == PT_PAGE_TABLE_LEVEL || |
468 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || | 469 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || |
469 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { | 470 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { |
470 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
471 | |||
472 | pte_gpa = (sp->gfn << PAGE_SHIFT); | ||
473 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | ||
474 | 471 | ||
475 | if (is_shadow_present_pte(*sptep)) { | 472 | if (is_shadow_present_pte(*sptep)) { |
476 | rmap_remove(vcpu->kvm, sptep); | 473 | rmap_remove(vcpu->kvm, sptep); |
@@ -489,18 +486,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
489 | if (need_flush) | 486 | if (need_flush) |
490 | kvm_flush_remote_tlbs(vcpu->kvm); | 487 | kvm_flush_remote_tlbs(vcpu->kvm); |
491 | spin_unlock(&vcpu->kvm->mmu_lock); | 488 | spin_unlock(&vcpu->kvm->mmu_lock); |
492 | |||
493 | if (pte_gpa == -1) | ||
494 | return; | ||
495 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | ||
496 | sizeof(pt_element_t))) | ||
497 | return; | ||
498 | if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) { | ||
499 | if (mmu_topup_memory_caches(vcpu)) | ||
500 | return; | ||
501 | kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte, | ||
502 | sizeof(pt_element_t), 0); | ||
503 | } | ||
504 | } | 489 | } |
505 | 490 | ||
506 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 491 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) |
@@ -558,6 +543,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
558 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 543 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
559 | { | 544 | { |
560 | int i, offset, nr_present; | 545 | int i, offset, nr_present; |
546 | bool reset_host_protection; | ||
561 | 547 | ||
562 | offset = nr_present = 0; | 548 | offset = nr_present = 0; |
563 | 549 | ||
@@ -595,9 +581,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
595 | 581 | ||
596 | nr_present++; | 582 | nr_present++; |
597 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 583 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
584 | if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) { | ||
585 | pte_access &= ~ACC_WRITE_MASK; | ||
586 | reset_host_protection = 0; | ||
587 | } else { | ||
588 | reset_host_protection = 1; | ||
589 | } | ||
598 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | 590 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, |
599 | is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, | 591 | is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, |
600 | spte_to_pfn(sp->spt[i]), true, false); | 592 | spte_to_pfn(sp->spt[i]), true, false, |
593 | reset_host_protection); | ||
601 | } | 594 | } |
602 | 595 | ||
603 | return !nr_present; | 596 | return !nr_present; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6c79a14a3b6f..1d9b33843c80 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -46,6 +46,7 @@ MODULE_LICENSE("GPL"); | |||
46 | #define SVM_FEATURE_NPT (1 << 0) | 46 | #define SVM_FEATURE_NPT (1 << 0) |
47 | #define SVM_FEATURE_LBRV (1 << 1) | 47 | #define SVM_FEATURE_LBRV (1 << 1) |
48 | #define SVM_FEATURE_SVML (1 << 2) | 48 | #define SVM_FEATURE_SVML (1 << 2) |
49 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | ||
49 | 50 | ||
50 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 51 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
51 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ | 52 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ |
@@ -53,15 +54,6 @@ MODULE_LICENSE("GPL"); | |||
53 | 54 | ||
54 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) | 55 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) |
55 | 56 | ||
56 | /* Turn on to get debugging output*/ | ||
57 | /* #define NESTED_DEBUG */ | ||
58 | |||
59 | #ifdef NESTED_DEBUG | ||
60 | #define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args) | ||
61 | #else | ||
62 | #define nsvm_printk(fmt, args...) do {} while(0) | ||
63 | #endif | ||
64 | |||
65 | static const u32 host_save_user_msrs[] = { | 57 | static const u32 host_save_user_msrs[] = { |
66 | #ifdef CONFIG_X86_64 | 58 | #ifdef CONFIG_X86_64 |
67 | MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, | 59 | MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, |
@@ -85,6 +77,9 @@ struct nested_state { | |||
85 | /* gpa pointers to the real vectors */ | 77 | /* gpa pointers to the real vectors */ |
86 | u64 vmcb_msrpm; | 78 | u64 vmcb_msrpm; |
87 | 79 | ||
80 | /* A VMEXIT is required but not yet emulated */ | ||
81 | bool exit_required; | ||
82 | |||
88 | /* cache for intercepts of the guest */ | 83 | /* cache for intercepts of the guest */ |
89 | u16 intercept_cr_read; | 84 | u16 intercept_cr_read; |
90 | u16 intercept_cr_write; | 85 | u16 intercept_cr_write; |
@@ -112,6 +107,8 @@ struct vcpu_svm { | |||
112 | u32 *msrpm; | 107 | u32 *msrpm; |
113 | 108 | ||
114 | struct nested_state nested; | 109 | struct nested_state nested; |
110 | |||
111 | bool nmi_singlestep; | ||
115 | }; | 112 | }; |
116 | 113 | ||
117 | /* enable NPT for AMD64 and X86 with PAE */ | 114 | /* enable NPT for AMD64 and X86 with PAE */ |
@@ -286,7 +283,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
286 | struct vcpu_svm *svm = to_svm(vcpu); | 283 | struct vcpu_svm *svm = to_svm(vcpu); |
287 | 284 | ||
288 | if (!svm->next_rip) { | 285 | if (!svm->next_rip) { |
289 | if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != | 286 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != |
290 | EMULATE_DONE) | 287 | EMULATE_DONE) |
291 | printk(KERN_DEBUG "%s: NOP\n", __func__); | 288 | printk(KERN_DEBUG "%s: NOP\n", __func__); |
292 | return; | 289 | return; |
@@ -316,7 +313,7 @@ static void svm_hardware_disable(void *garbage) | |||
316 | cpu_svm_disable(); | 313 | cpu_svm_disable(); |
317 | } | 314 | } |
318 | 315 | ||
319 | static void svm_hardware_enable(void *garbage) | 316 | static int svm_hardware_enable(void *garbage) |
320 | { | 317 | { |
321 | 318 | ||
322 | struct svm_cpu_data *sd; | 319 | struct svm_cpu_data *sd; |
@@ -325,16 +322,21 @@ static void svm_hardware_enable(void *garbage) | |||
325 | struct desc_struct *gdt; | 322 | struct desc_struct *gdt; |
326 | int me = raw_smp_processor_id(); | 323 | int me = raw_smp_processor_id(); |
327 | 324 | ||
325 | rdmsrl(MSR_EFER, efer); | ||
326 | if (efer & EFER_SVME) | ||
327 | return -EBUSY; | ||
328 | |||
328 | if (!has_svm()) { | 329 | if (!has_svm()) { |
329 | printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); | 330 | printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n", |
330 | return; | 331 | me); |
332 | return -EINVAL; | ||
331 | } | 333 | } |
332 | sd = per_cpu(svm_data, me); | 334 | sd = per_cpu(svm_data, me); |
333 | 335 | ||
334 | if (!sd) { | 336 | if (!sd) { |
335 | printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", | 337 | printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", |
336 | me); | 338 | me); |
337 | return; | 339 | return -EINVAL; |
338 | } | 340 | } |
339 | 341 | ||
340 | sd->asid_generation = 1; | 342 | sd->asid_generation = 1; |
@@ -345,11 +347,11 @@ static void svm_hardware_enable(void *garbage) | |||
345 | gdt = (struct desc_struct *)gdt_descr.base; | 347 | gdt = (struct desc_struct *)gdt_descr.base; |
346 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); | 348 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); |
347 | 349 | ||
348 | rdmsrl(MSR_EFER, efer); | ||
349 | wrmsrl(MSR_EFER, efer | EFER_SVME); | 350 | wrmsrl(MSR_EFER, efer | EFER_SVME); |
350 | 351 | ||
351 | wrmsrl(MSR_VM_HSAVE_PA, | 352 | wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); |
352 | page_to_pfn(sd->save_area) << PAGE_SHIFT); | 353 | |
354 | return 0; | ||
353 | } | 355 | } |
354 | 356 | ||
355 | static void svm_cpu_uninit(int cpu) | 357 | static void svm_cpu_uninit(int cpu) |
@@ -475,7 +477,7 @@ static __init int svm_hardware_setup(void) | |||
475 | kvm_enable_efer_bits(EFER_SVME); | 477 | kvm_enable_efer_bits(EFER_SVME); |
476 | } | 478 | } |
477 | 479 | ||
478 | for_each_online_cpu(cpu) { | 480 | for_each_possible_cpu(cpu) { |
479 | r = svm_cpu_init(cpu); | 481 | r = svm_cpu_init(cpu); |
480 | if (r) | 482 | if (r) |
481 | goto err; | 483 | goto err; |
@@ -509,7 +511,7 @@ static __exit void svm_hardware_unsetup(void) | |||
509 | { | 511 | { |
510 | int cpu; | 512 | int cpu; |
511 | 513 | ||
512 | for_each_online_cpu(cpu) | 514 | for_each_possible_cpu(cpu) |
513 | svm_cpu_uninit(cpu); | 515 | svm_cpu_uninit(cpu); |
514 | 516 | ||
515 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); | 517 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); |
@@ -624,11 +626,12 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
624 | save->rip = 0x0000fff0; | 626 | save->rip = 0x0000fff0; |
625 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 627 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
626 | 628 | ||
627 | /* | 629 | /* This is the guest-visible cr0 value. |
628 | * cr0 val on cpu init should be 0x60000010, we enable cpu | 630 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
629 | * cache by default. the orderly way is to enable cache in bios. | ||
630 | */ | 631 | */ |
631 | save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; | 632 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
633 | kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); | ||
634 | |||
632 | save->cr4 = X86_CR4_PAE; | 635 | save->cr4 = X86_CR4_PAE; |
633 | /* rdx = ?? */ | 636 | /* rdx = ?? */ |
634 | 637 | ||
@@ -643,8 +646,6 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
643 | control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| | 646 | control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| |
644 | INTERCEPT_CR3_MASK); | 647 | INTERCEPT_CR3_MASK); |
645 | save->g_pat = 0x0007040600070406ULL; | 648 | save->g_pat = 0x0007040600070406ULL; |
646 | /* enable caching because the QEMU Bios doesn't enable it */ | ||
647 | save->cr0 = X86_CR0_ET; | ||
648 | save->cr3 = 0; | 649 | save->cr3 = 0; |
649 | save->cr4 = 0; | 650 | save->cr4 = 0; |
650 | } | 651 | } |
@@ -653,6 +654,11 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
653 | svm->nested.vmcb = 0; | 654 | svm->nested.vmcb = 0; |
654 | svm->vcpu.arch.hflags = 0; | 655 | svm->vcpu.arch.hflags = 0; |
655 | 656 | ||
657 | if (svm_has(SVM_FEATURE_PAUSE_FILTER)) { | ||
658 | control->pause_filter_count = 3000; | ||
659 | control->intercept |= (1ULL << INTERCEPT_PAUSE); | ||
660 | } | ||
661 | |||
656 | enable_gif(svm); | 662 | enable_gif(svm); |
657 | } | 663 | } |
658 | 664 | ||
@@ -757,15 +763,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
757 | int i; | 763 | int i; |
758 | 764 | ||
759 | if (unlikely(cpu != vcpu->cpu)) { | 765 | if (unlikely(cpu != vcpu->cpu)) { |
760 | u64 tsc_this, delta; | 766 | u64 delta; |
761 | 767 | ||
762 | /* | 768 | /* |
763 | * Make sure that the guest sees a monotonically | 769 | * Make sure that the guest sees a monotonically |
764 | * increasing TSC. | 770 | * increasing TSC. |
765 | */ | 771 | */ |
766 | rdtscll(tsc_this); | 772 | delta = vcpu->arch.host_tsc - native_read_tsc(); |
767 | delta = vcpu->arch.host_tsc - tsc_this; | ||
768 | svm->vmcb->control.tsc_offset += delta; | 773 | svm->vmcb->control.tsc_offset += delta; |
774 | if (is_nested(svm)) | ||
775 | svm->nested.hsave->control.tsc_offset += delta; | ||
769 | vcpu->cpu = cpu; | 776 | vcpu->cpu = cpu; |
770 | kvm_migrate_timers(vcpu); | 777 | kvm_migrate_timers(vcpu); |
771 | svm->asid_generation = 0; | 778 | svm->asid_generation = 0; |
@@ -784,7 +791,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
784 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 791 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
785 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 792 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
786 | 793 | ||
787 | rdtscll(vcpu->arch.host_tsc); | 794 | vcpu->arch.host_tsc = native_read_tsc(); |
788 | } | 795 | } |
789 | 796 | ||
790 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | 797 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) |
@@ -1042,7 +1049,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) | |||
1042 | svm->vmcb->control.intercept_exceptions &= | 1049 | svm->vmcb->control.intercept_exceptions &= |
1043 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); | 1050 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); |
1044 | 1051 | ||
1045 | if (vcpu->arch.singlestep) | 1052 | if (svm->nmi_singlestep) |
1046 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); | 1053 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); |
1047 | 1054 | ||
1048 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 1055 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
@@ -1057,26 +1064,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) | |||
1057 | vcpu->guest_debug = 0; | 1064 | vcpu->guest_debug = 0; |
1058 | } | 1065 | } |
1059 | 1066 | ||
1060 | static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | 1067 | static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) |
1061 | { | 1068 | { |
1062 | int old_debug = vcpu->guest_debug; | ||
1063 | struct vcpu_svm *svm = to_svm(vcpu); | 1069 | struct vcpu_svm *svm = to_svm(vcpu); |
1064 | 1070 | ||
1065 | vcpu->guest_debug = dbg->control; | ||
1066 | |||
1067 | update_db_intercept(vcpu); | ||
1068 | |||
1069 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1071 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1070 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; | 1072 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; |
1071 | else | 1073 | else |
1072 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | 1074 | svm->vmcb->save.dr7 = vcpu->arch.dr7; |
1073 | 1075 | ||
1074 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 1076 | update_db_intercept(vcpu); |
1075 | svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
1076 | else if (old_debug & KVM_GUESTDBG_SINGLESTEP) | ||
1077 | svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
1078 | |||
1079 | return 0; | ||
1080 | } | 1077 | } |
1081 | 1078 | ||
1082 | static void load_host_msrs(struct kvm_vcpu *vcpu) | 1079 | static void load_host_msrs(struct kvm_vcpu *vcpu) |
@@ -1177,7 +1174,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | |||
1177 | } | 1174 | } |
1178 | } | 1175 | } |
1179 | 1176 | ||
1180 | static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1177 | static int pf_interception(struct vcpu_svm *svm) |
1181 | { | 1178 | { |
1182 | u64 fault_address; | 1179 | u64 fault_address; |
1183 | u32 error_code; | 1180 | u32 error_code; |
@@ -1191,17 +1188,19 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1191 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1188 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
1192 | } | 1189 | } |
1193 | 1190 | ||
1194 | static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1191 | static int db_interception(struct vcpu_svm *svm) |
1195 | { | 1192 | { |
1193 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
1194 | |||
1196 | if (!(svm->vcpu.guest_debug & | 1195 | if (!(svm->vcpu.guest_debug & |
1197 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && | 1196 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && |
1198 | !svm->vcpu.arch.singlestep) { | 1197 | !svm->nmi_singlestep) { |
1199 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); | 1198 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); |
1200 | return 1; | 1199 | return 1; |
1201 | } | 1200 | } |
1202 | 1201 | ||
1203 | if (svm->vcpu.arch.singlestep) { | 1202 | if (svm->nmi_singlestep) { |
1204 | svm->vcpu.arch.singlestep = false; | 1203 | svm->nmi_singlestep = false; |
1205 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) | 1204 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) |
1206 | svm->vmcb->save.rflags &= | 1205 | svm->vmcb->save.rflags &= |
1207 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | 1206 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); |
@@ -1220,25 +1219,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1220 | return 1; | 1219 | return 1; |
1221 | } | 1220 | } |
1222 | 1221 | ||
1223 | static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1222 | static int bp_interception(struct vcpu_svm *svm) |
1224 | { | 1223 | { |
1224 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
1225 | |||
1225 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1226 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
1226 | kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1227 | kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; |
1227 | kvm_run->debug.arch.exception = BP_VECTOR; | 1228 | kvm_run->debug.arch.exception = BP_VECTOR; |
1228 | return 0; | 1229 | return 0; |
1229 | } | 1230 | } |
1230 | 1231 | ||
1231 | static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1232 | static int ud_interception(struct vcpu_svm *svm) |
1232 | { | 1233 | { |
1233 | int er; | 1234 | int er; |
1234 | 1235 | ||
1235 | er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); | 1236 | er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD); |
1236 | if (er != EMULATE_DONE) | 1237 | if (er != EMULATE_DONE) |
1237 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1238 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1238 | return 1; | 1239 | return 1; |
1239 | } | 1240 | } |
1240 | 1241 | ||
1241 | static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1242 | static int nm_interception(struct vcpu_svm *svm) |
1242 | { | 1243 | { |
1243 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1244 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); |
1244 | if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) | 1245 | if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) |
@@ -1248,7 +1249,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1248 | return 1; | 1249 | return 1; |
1249 | } | 1250 | } |
1250 | 1251 | ||
1251 | static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1252 | static int mc_interception(struct vcpu_svm *svm) |
1252 | { | 1253 | { |
1253 | /* | 1254 | /* |
1254 | * On an #MC intercept the MCE handler is not called automatically in | 1255 | * On an #MC intercept the MCE handler is not called automatically in |
@@ -1261,8 +1262,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1261 | return 1; | 1262 | return 1; |
1262 | } | 1263 | } |
1263 | 1264 | ||
1264 | static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1265 | static int shutdown_interception(struct vcpu_svm *svm) |
1265 | { | 1266 | { |
1267 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
1268 | |||
1266 | /* | 1269 | /* |
1267 | * VMCB is undefined after a SHUTDOWN intercept | 1270 | * VMCB is undefined after a SHUTDOWN intercept |
1268 | * so reinitialize it. | 1271 | * so reinitialize it. |
@@ -1274,7 +1277,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1274 | return 0; | 1277 | return 0; |
1275 | } | 1278 | } |
1276 | 1279 | ||
1277 | static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1280 | static int io_interception(struct vcpu_svm *svm) |
1278 | { | 1281 | { |
1279 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ | 1282 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ |
1280 | int size, in, string; | 1283 | int size, in, string; |
@@ -1288,7 +1291,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1288 | 1291 | ||
1289 | if (string) { | 1292 | if (string) { |
1290 | if (emulate_instruction(&svm->vcpu, | 1293 | if (emulate_instruction(&svm->vcpu, |
1291 | kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) | 1294 | 0, 0, 0) == EMULATE_DO_MMIO) |
1292 | return 0; | 1295 | return 0; |
1293 | return 1; | 1296 | return 1; |
1294 | } | 1297 | } |
@@ -1298,33 +1301,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1298 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1301 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
1299 | 1302 | ||
1300 | skip_emulated_instruction(&svm->vcpu); | 1303 | skip_emulated_instruction(&svm->vcpu); |
1301 | return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); | 1304 | return kvm_emulate_pio(&svm->vcpu, in, size, port); |
1302 | } | 1305 | } |
1303 | 1306 | ||
1304 | static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1307 | static int nmi_interception(struct vcpu_svm *svm) |
1305 | { | 1308 | { |
1306 | return 1; | 1309 | return 1; |
1307 | } | 1310 | } |
1308 | 1311 | ||
1309 | static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1312 | static int intr_interception(struct vcpu_svm *svm) |
1310 | { | 1313 | { |
1311 | ++svm->vcpu.stat.irq_exits; | 1314 | ++svm->vcpu.stat.irq_exits; |
1312 | return 1; | 1315 | return 1; |
1313 | } | 1316 | } |
1314 | 1317 | ||
1315 | static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1318 | static int nop_on_interception(struct vcpu_svm *svm) |
1316 | { | 1319 | { |
1317 | return 1; | 1320 | return 1; |
1318 | } | 1321 | } |
1319 | 1322 | ||
1320 | static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1323 | static int halt_interception(struct vcpu_svm *svm) |
1321 | { | 1324 | { |
1322 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; | 1325 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; |
1323 | skip_emulated_instruction(&svm->vcpu); | 1326 | skip_emulated_instruction(&svm->vcpu); |
1324 | return kvm_emulate_halt(&svm->vcpu); | 1327 | return kvm_emulate_halt(&svm->vcpu); |
1325 | } | 1328 | } |
1326 | 1329 | ||
1327 | static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1330 | static int vmmcall_interception(struct vcpu_svm *svm) |
1328 | { | 1331 | { |
1329 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 1332 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1330 | skip_emulated_instruction(&svm->vcpu); | 1333 | skip_emulated_instruction(&svm->vcpu); |
@@ -1375,8 +1378,15 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) | |||
1375 | 1378 | ||
1376 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; | 1379 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; |
1377 | 1380 | ||
1378 | if (nested_svm_exit_handled(svm)) { | 1381 | if (svm->nested.intercept & 1ULL) { |
1379 | nsvm_printk("VMexit -> INTR\n"); | 1382 | /* |
1383 | * The #vmexit can't be emulated here directly because this | ||
1384 | * code path runs with irqs and preemtion disabled. A | ||
1385 | * #vmexit emulation might sleep. Only signal request for | ||
1386 | * the #vmexit here. | ||
1387 | */ | ||
1388 | svm->nested.exit_required = true; | ||
1389 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); | ||
1380 | return 1; | 1390 | return 1; |
1381 | } | 1391 | } |
1382 | 1392 | ||
@@ -1387,10 +1397,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) | |||
1387 | { | 1397 | { |
1388 | struct page *page; | 1398 | struct page *page; |
1389 | 1399 | ||
1390 | down_read(¤t->mm->mmap_sem); | ||
1391 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); | 1400 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); |
1392 | up_read(¤t->mm->mmap_sem); | ||
1393 | |||
1394 | if (is_error_page(page)) | 1401 | if (is_error_page(page)) |
1395 | goto error; | 1402 | goto error; |
1396 | 1403 | ||
@@ -1529,14 +1536,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1529 | } | 1536 | } |
1530 | default: { | 1537 | default: { |
1531 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); | 1538 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); |
1532 | nsvm_printk("exit code: 0x%x\n", exit_code); | ||
1533 | if (svm->nested.intercept & exit_bits) | 1539 | if (svm->nested.intercept & exit_bits) |
1534 | vmexit = NESTED_EXIT_DONE; | 1540 | vmexit = NESTED_EXIT_DONE; |
1535 | } | 1541 | } |
1536 | } | 1542 | } |
1537 | 1543 | ||
1538 | if (vmexit == NESTED_EXIT_DONE) { | 1544 | if (vmexit == NESTED_EXIT_DONE) { |
1539 | nsvm_printk("#VMEXIT reason=%04x\n", exit_code); | ||
1540 | nested_svm_vmexit(svm); | 1545 | nested_svm_vmexit(svm); |
1541 | } | 1546 | } |
1542 | 1547 | ||
@@ -1581,6 +1586,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1581 | struct vmcb *hsave = svm->nested.hsave; | 1586 | struct vmcb *hsave = svm->nested.hsave; |
1582 | struct vmcb *vmcb = svm->vmcb; | 1587 | struct vmcb *vmcb = svm->vmcb; |
1583 | 1588 | ||
1589 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, | ||
1590 | vmcb->control.exit_info_1, | ||
1591 | vmcb->control.exit_info_2, | ||
1592 | vmcb->control.exit_int_info, | ||
1593 | vmcb->control.exit_int_info_err); | ||
1594 | |||
1584 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); | 1595 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); |
1585 | if (!nested_vmcb) | 1596 | if (!nested_vmcb) |
1586 | return 1; | 1597 | return 1; |
@@ -1614,6 +1625,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1614 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; | 1625 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; |
1615 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; | 1626 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; |
1616 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; | 1627 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; |
1628 | |||
1629 | /* | ||
1630 | * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have | ||
1631 | * to make sure that we do not lose injected events. So check event_inj | ||
1632 | * here and copy it to exit_int_info if it is valid. | ||
1633 | * Exit_int_info and event_inj can't be both valid because the case | ||
1634 | * below only happens on a VMRUN instruction intercept which has | ||
1635 | * no valid exit_int_info set. | ||
1636 | */ | ||
1637 | if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { | ||
1638 | struct vmcb_control_area *nc = &nested_vmcb->control; | ||
1639 | |||
1640 | nc->exit_int_info = vmcb->control.event_inj; | ||
1641 | nc->exit_int_info_err = vmcb->control.event_inj_err; | ||
1642 | } | ||
1643 | |||
1617 | nested_vmcb->control.tlb_ctl = 0; | 1644 | nested_vmcb->control.tlb_ctl = 0; |
1618 | nested_vmcb->control.event_inj = 0; | 1645 | nested_vmcb->control.event_inj = 0; |
1619 | nested_vmcb->control.event_inj_err = 0; | 1646 | nested_vmcb->control.event_inj_err = 0; |
@@ -1625,10 +1652,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1625 | /* Restore the original control entries */ | 1652 | /* Restore the original control entries */ |
1626 | copy_vmcb_control_area(vmcb, hsave); | 1653 | copy_vmcb_control_area(vmcb, hsave); |
1627 | 1654 | ||
1628 | /* Kill any pending exceptions */ | ||
1629 | if (svm->vcpu.arch.exception.pending == true) | ||
1630 | nsvm_printk("WARNING: Pending Exception\n"); | ||
1631 | |||
1632 | kvm_clear_exception_queue(&svm->vcpu); | 1655 | kvm_clear_exception_queue(&svm->vcpu); |
1633 | kvm_clear_interrupt_queue(&svm->vcpu); | 1656 | kvm_clear_interrupt_queue(&svm->vcpu); |
1634 | 1657 | ||
@@ -1699,6 +1722,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1699 | /* nested_vmcb is our indicator if nested SVM is activated */ | 1722 | /* nested_vmcb is our indicator if nested SVM is activated */ |
1700 | svm->nested.vmcb = svm->vmcb->save.rax; | 1723 | svm->nested.vmcb = svm->vmcb->save.rax; |
1701 | 1724 | ||
1725 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, | ||
1726 | nested_vmcb->save.rip, | ||
1727 | nested_vmcb->control.int_ctl, | ||
1728 | nested_vmcb->control.event_inj, | ||
1729 | nested_vmcb->control.nested_ctl); | ||
1730 | |||
1702 | /* Clear internal status */ | 1731 | /* Clear internal status */ |
1703 | kvm_clear_exception_queue(&svm->vcpu); | 1732 | kvm_clear_exception_queue(&svm->vcpu); |
1704 | kvm_clear_interrupt_queue(&svm->vcpu); | 1733 | kvm_clear_interrupt_queue(&svm->vcpu); |
@@ -1786,28 +1815,15 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1786 | svm->nested.intercept = nested_vmcb->control.intercept; | 1815 | svm->nested.intercept = nested_vmcb->control.intercept; |
1787 | 1816 | ||
1788 | force_new_asid(&svm->vcpu); | 1817 | force_new_asid(&svm->vcpu); |
1789 | svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info; | ||
1790 | svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err; | ||
1791 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; | 1818 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; |
1792 | if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { | ||
1793 | nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", | ||
1794 | nested_vmcb->control.int_ctl); | ||
1795 | } | ||
1796 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) | 1819 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) |
1797 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; | 1820 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; |
1798 | else | 1821 | else |
1799 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; | 1822 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; |
1800 | 1823 | ||
1801 | nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n", | ||
1802 | nested_vmcb->control.exit_int_info, | ||
1803 | nested_vmcb->control.int_state); | ||
1804 | |||
1805 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 1824 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
1806 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; | 1825 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; |
1807 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; | 1826 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; |
1808 | if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID) | ||
1809 | nsvm_printk("Injecting Event: 0x%x\n", | ||
1810 | nested_vmcb->control.event_inj); | ||
1811 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; | 1827 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; |
1812 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; | 1828 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; |
1813 | 1829 | ||
@@ -1834,7 +1850,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) | |||
1834 | to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; | 1850 | to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; |
1835 | } | 1851 | } |
1836 | 1852 | ||
1837 | static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1853 | static int vmload_interception(struct vcpu_svm *svm) |
1838 | { | 1854 | { |
1839 | struct vmcb *nested_vmcb; | 1855 | struct vmcb *nested_vmcb; |
1840 | 1856 | ||
@@ -1854,7 +1870,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1854 | return 1; | 1870 | return 1; |
1855 | } | 1871 | } |
1856 | 1872 | ||
1857 | static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1873 | static int vmsave_interception(struct vcpu_svm *svm) |
1858 | { | 1874 | { |
1859 | struct vmcb *nested_vmcb; | 1875 | struct vmcb *nested_vmcb; |
1860 | 1876 | ||
@@ -1874,10 +1890,8 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1874 | return 1; | 1890 | return 1; |
1875 | } | 1891 | } |
1876 | 1892 | ||
1877 | static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1893 | static int vmrun_interception(struct vcpu_svm *svm) |
1878 | { | 1894 | { |
1879 | nsvm_printk("VMrun\n"); | ||
1880 | |||
1881 | if (nested_svm_check_permissions(svm)) | 1895 | if (nested_svm_check_permissions(svm)) |
1882 | return 1; | 1896 | return 1; |
1883 | 1897 | ||
@@ -1904,7 +1918,7 @@ failed: | |||
1904 | return 1; | 1918 | return 1; |
1905 | } | 1919 | } |
1906 | 1920 | ||
1907 | static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1921 | static int stgi_interception(struct vcpu_svm *svm) |
1908 | { | 1922 | { |
1909 | if (nested_svm_check_permissions(svm)) | 1923 | if (nested_svm_check_permissions(svm)) |
1910 | return 1; | 1924 | return 1; |
@@ -1917,7 +1931,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1917 | return 1; | 1931 | return 1; |
1918 | } | 1932 | } |
1919 | 1933 | ||
1920 | static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1934 | static int clgi_interception(struct vcpu_svm *svm) |
1921 | { | 1935 | { |
1922 | if (nested_svm_check_permissions(svm)) | 1936 | if (nested_svm_check_permissions(svm)) |
1923 | return 1; | 1937 | return 1; |
@@ -1934,10 +1948,12 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1934 | return 1; | 1948 | return 1; |
1935 | } | 1949 | } |
1936 | 1950 | ||
1937 | static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1951 | static int invlpga_interception(struct vcpu_svm *svm) |
1938 | { | 1952 | { |
1939 | struct kvm_vcpu *vcpu = &svm->vcpu; | 1953 | struct kvm_vcpu *vcpu = &svm->vcpu; |
1940 | nsvm_printk("INVLPGA\n"); | 1954 | |
1955 | trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], | ||
1956 | vcpu->arch.regs[VCPU_REGS_RAX]); | ||
1941 | 1957 | ||
1942 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ | 1958 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ |
1943 | kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); | 1959 | kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); |
@@ -1947,15 +1963,21 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1947 | return 1; | 1963 | return 1; |
1948 | } | 1964 | } |
1949 | 1965 | ||
1950 | static int invalid_op_interception(struct vcpu_svm *svm, | 1966 | static int skinit_interception(struct vcpu_svm *svm) |
1951 | struct kvm_run *kvm_run) | ||
1952 | { | 1967 | { |
1968 | trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); | ||
1969 | |||
1953 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1970 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1954 | return 1; | 1971 | return 1; |
1955 | } | 1972 | } |
1956 | 1973 | ||
1957 | static int task_switch_interception(struct vcpu_svm *svm, | 1974 | static int invalid_op_interception(struct vcpu_svm *svm) |
1958 | struct kvm_run *kvm_run) | 1975 | { |
1976 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | ||
1977 | return 1; | ||
1978 | } | ||
1979 | |||
1980 | static int task_switch_interception(struct vcpu_svm *svm) | ||
1959 | { | 1981 | { |
1960 | u16 tss_selector; | 1982 | u16 tss_selector; |
1961 | int reason; | 1983 | int reason; |
@@ -2005,14 +2027,14 @@ static int task_switch_interception(struct vcpu_svm *svm, | |||
2005 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | 2027 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); |
2006 | } | 2028 | } |
2007 | 2029 | ||
2008 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2030 | static int cpuid_interception(struct vcpu_svm *svm) |
2009 | { | 2031 | { |
2010 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 2032 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
2011 | kvm_emulate_cpuid(&svm->vcpu); | 2033 | kvm_emulate_cpuid(&svm->vcpu); |
2012 | return 1; | 2034 | return 1; |
2013 | } | 2035 | } |
2014 | 2036 | ||
2015 | static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2037 | static int iret_interception(struct vcpu_svm *svm) |
2016 | { | 2038 | { |
2017 | ++svm->vcpu.stat.nmi_window_exits; | 2039 | ++svm->vcpu.stat.nmi_window_exits; |
2018 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2040 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); |
@@ -2020,26 +2042,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
2020 | return 1; | 2042 | return 1; |
2021 | } | 2043 | } |
2022 | 2044 | ||
2023 | static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2045 | static int invlpg_interception(struct vcpu_svm *svm) |
2024 | { | 2046 | { |
2025 | if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) | 2047 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) |
2026 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | 2048 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); |
2027 | return 1; | 2049 | return 1; |
2028 | } | 2050 | } |
2029 | 2051 | ||
2030 | static int emulate_on_interception(struct vcpu_svm *svm, | 2052 | static int emulate_on_interception(struct vcpu_svm *svm) |
2031 | struct kvm_run *kvm_run) | ||
2032 | { | 2053 | { |
2033 | if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) | 2054 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) |
2034 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | 2055 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); |
2035 | return 1; | 2056 | return 1; |
2036 | } | 2057 | } |
2037 | 2058 | ||
2038 | static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2059 | static int cr8_write_interception(struct vcpu_svm *svm) |
2039 | { | 2060 | { |
2061 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
2062 | |||
2040 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); | 2063 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); |
2041 | /* instruction emulation calls kvm_set_cr8() */ | 2064 | /* instruction emulation calls kvm_set_cr8() */ |
2042 | emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); | 2065 | emulate_instruction(&svm->vcpu, 0, 0, 0); |
2043 | if (irqchip_in_kernel(svm->vcpu.kvm)) { | 2066 | if (irqchip_in_kernel(svm->vcpu.kvm)) { |
2044 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | 2067 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; |
2045 | return 1; | 2068 | return 1; |
@@ -2056,10 +2079,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2056 | 2079 | ||
2057 | switch (ecx) { | 2080 | switch (ecx) { |
2058 | case MSR_IA32_TSC: { | 2081 | case MSR_IA32_TSC: { |
2059 | u64 tsc; | 2082 | u64 tsc_offset; |
2060 | 2083 | ||
2061 | rdtscll(tsc); | 2084 | if (is_nested(svm)) |
2062 | *data = svm->vmcb->control.tsc_offset + tsc; | 2085 | tsc_offset = svm->nested.hsave->control.tsc_offset; |
2086 | else | ||
2087 | tsc_offset = svm->vmcb->control.tsc_offset; | ||
2088 | |||
2089 | *data = tsc_offset + native_read_tsc(); | ||
2063 | break; | 2090 | break; |
2064 | } | 2091 | } |
2065 | case MSR_K6_STAR: | 2092 | case MSR_K6_STAR: |
@@ -2121,7 +2148,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2121 | return 0; | 2148 | return 0; |
2122 | } | 2149 | } |
2123 | 2150 | ||
2124 | static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2151 | static int rdmsr_interception(struct vcpu_svm *svm) |
2125 | { | 2152 | { |
2126 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 2153 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
2127 | u64 data; | 2154 | u64 data; |
@@ -2145,10 +2172,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2145 | 2172 | ||
2146 | switch (ecx) { | 2173 | switch (ecx) { |
2147 | case MSR_IA32_TSC: { | 2174 | case MSR_IA32_TSC: { |
2148 | u64 tsc; | 2175 | u64 tsc_offset = data - native_read_tsc(); |
2176 | u64 g_tsc_offset = 0; | ||
2177 | |||
2178 | if (is_nested(svm)) { | ||
2179 | g_tsc_offset = svm->vmcb->control.tsc_offset - | ||
2180 | svm->nested.hsave->control.tsc_offset; | ||
2181 | svm->nested.hsave->control.tsc_offset = tsc_offset; | ||
2182 | } | ||
2183 | |||
2184 | svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset; | ||
2149 | 2185 | ||
2150 | rdtscll(tsc); | ||
2151 | svm->vmcb->control.tsc_offset = data - tsc; | ||
2152 | break; | 2186 | break; |
2153 | } | 2187 | } |
2154 | case MSR_K6_STAR: | 2188 | case MSR_K6_STAR: |
@@ -2207,7 +2241,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2207 | return 0; | 2241 | return 0; |
2208 | } | 2242 | } |
2209 | 2243 | ||
2210 | static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2244 | static int wrmsr_interception(struct vcpu_svm *svm) |
2211 | { | 2245 | { |
2212 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 2246 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
2213 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) | 2247 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) |
@@ -2223,17 +2257,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
2223 | return 1; | 2257 | return 1; |
2224 | } | 2258 | } |
2225 | 2259 | ||
2226 | static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2260 | static int msr_interception(struct vcpu_svm *svm) |
2227 | { | 2261 | { |
2228 | if (svm->vmcb->control.exit_info_1) | 2262 | if (svm->vmcb->control.exit_info_1) |
2229 | return wrmsr_interception(svm, kvm_run); | 2263 | return wrmsr_interception(svm); |
2230 | else | 2264 | else |
2231 | return rdmsr_interception(svm, kvm_run); | 2265 | return rdmsr_interception(svm); |
2232 | } | 2266 | } |
2233 | 2267 | ||
2234 | static int interrupt_window_interception(struct vcpu_svm *svm, | 2268 | static int interrupt_window_interception(struct vcpu_svm *svm) |
2235 | struct kvm_run *kvm_run) | ||
2236 | { | 2269 | { |
2270 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
2271 | |||
2237 | svm_clear_vintr(svm); | 2272 | svm_clear_vintr(svm); |
2238 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 2273 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
2239 | /* | 2274 | /* |
@@ -2251,8 +2286,13 @@ static int interrupt_window_interception(struct vcpu_svm *svm, | |||
2251 | return 1; | 2286 | return 1; |
2252 | } | 2287 | } |
2253 | 2288 | ||
2254 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | 2289 | static int pause_interception(struct vcpu_svm *svm) |
2255 | struct kvm_run *kvm_run) = { | 2290 | { |
2291 | kvm_vcpu_on_spin(&(svm->vcpu)); | ||
2292 | return 1; | ||
2293 | } | ||
2294 | |||
2295 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | ||
2256 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 2296 | [SVM_EXIT_READ_CR0] = emulate_on_interception, |
2257 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2297 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
2258 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2298 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
@@ -2287,6 +2327,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
2287 | [SVM_EXIT_CPUID] = cpuid_interception, | 2327 | [SVM_EXIT_CPUID] = cpuid_interception, |
2288 | [SVM_EXIT_IRET] = iret_interception, | 2328 | [SVM_EXIT_IRET] = iret_interception, |
2289 | [SVM_EXIT_INVD] = emulate_on_interception, | 2329 | [SVM_EXIT_INVD] = emulate_on_interception, |
2330 | [SVM_EXIT_PAUSE] = pause_interception, | ||
2290 | [SVM_EXIT_HLT] = halt_interception, | 2331 | [SVM_EXIT_HLT] = halt_interception, |
2291 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2332 | [SVM_EXIT_INVLPG] = invlpg_interception, |
2292 | [SVM_EXIT_INVLPGA] = invlpga_interception, | 2333 | [SVM_EXIT_INVLPGA] = invlpga_interception, |
@@ -2300,26 +2341,36 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
2300 | [SVM_EXIT_VMSAVE] = vmsave_interception, | 2341 | [SVM_EXIT_VMSAVE] = vmsave_interception, |
2301 | [SVM_EXIT_STGI] = stgi_interception, | 2342 | [SVM_EXIT_STGI] = stgi_interception, |
2302 | [SVM_EXIT_CLGI] = clgi_interception, | 2343 | [SVM_EXIT_CLGI] = clgi_interception, |
2303 | [SVM_EXIT_SKINIT] = invalid_op_interception, | 2344 | [SVM_EXIT_SKINIT] = skinit_interception, |
2304 | [SVM_EXIT_WBINVD] = emulate_on_interception, | 2345 | [SVM_EXIT_WBINVD] = emulate_on_interception, |
2305 | [SVM_EXIT_MONITOR] = invalid_op_interception, | 2346 | [SVM_EXIT_MONITOR] = invalid_op_interception, |
2306 | [SVM_EXIT_MWAIT] = invalid_op_interception, | 2347 | [SVM_EXIT_MWAIT] = invalid_op_interception, |
2307 | [SVM_EXIT_NPF] = pf_interception, | 2348 | [SVM_EXIT_NPF] = pf_interception, |
2308 | }; | 2349 | }; |
2309 | 2350 | ||
2310 | static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 2351 | static int handle_exit(struct kvm_vcpu *vcpu) |
2311 | { | 2352 | { |
2312 | struct vcpu_svm *svm = to_svm(vcpu); | 2353 | struct vcpu_svm *svm = to_svm(vcpu); |
2354 | struct kvm_run *kvm_run = vcpu->run; | ||
2313 | u32 exit_code = svm->vmcb->control.exit_code; | 2355 | u32 exit_code = svm->vmcb->control.exit_code; |
2314 | 2356 | ||
2315 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); | 2357 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); |
2316 | 2358 | ||
2359 | if (unlikely(svm->nested.exit_required)) { | ||
2360 | nested_svm_vmexit(svm); | ||
2361 | svm->nested.exit_required = false; | ||
2362 | |||
2363 | return 1; | ||
2364 | } | ||
2365 | |||
2317 | if (is_nested(svm)) { | 2366 | if (is_nested(svm)) { |
2318 | int vmexit; | 2367 | int vmexit; |
2319 | 2368 | ||
2320 | nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", | 2369 | trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, |
2321 | exit_code, svm->vmcb->control.exit_info_1, | 2370 | svm->vmcb->control.exit_info_1, |
2322 | svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); | 2371 | svm->vmcb->control.exit_info_2, |
2372 | svm->vmcb->control.exit_int_info, | ||
2373 | svm->vmcb->control.exit_int_info_err); | ||
2323 | 2374 | ||
2324 | vmexit = nested_svm_exit_special(svm); | 2375 | vmexit = nested_svm_exit_special(svm); |
2325 | 2376 | ||
@@ -2369,7 +2420,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2369 | return 0; | 2420 | return 0; |
2370 | } | 2421 | } |
2371 | 2422 | ||
2372 | return svm_exit_handlers[exit_code](svm, kvm_run); | 2423 | return svm_exit_handlers[exit_code](svm); |
2373 | } | 2424 | } |
2374 | 2425 | ||
2375 | static void reload_tss(struct kvm_vcpu *vcpu) | 2426 | static void reload_tss(struct kvm_vcpu *vcpu) |
@@ -2446,20 +2497,47 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2446 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | 2497 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); |
2447 | } | 2498 | } |
2448 | 2499 | ||
2500 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) | ||
2501 | { | ||
2502 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2503 | |||
2504 | return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); | ||
2505 | } | ||
2506 | |||
2507 | static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | ||
2508 | { | ||
2509 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2510 | |||
2511 | if (masked) { | ||
2512 | svm->vcpu.arch.hflags |= HF_NMI_MASK; | ||
2513 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | ||
2514 | } else { | ||
2515 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; | ||
2516 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | ||
2517 | } | ||
2518 | } | ||
2519 | |||
2449 | static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | 2520 | static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) |
2450 | { | 2521 | { |
2451 | struct vcpu_svm *svm = to_svm(vcpu); | 2522 | struct vcpu_svm *svm = to_svm(vcpu); |
2452 | struct vmcb *vmcb = svm->vmcb; | 2523 | struct vmcb *vmcb = svm->vmcb; |
2453 | return (vmcb->save.rflags & X86_EFLAGS_IF) && | 2524 | int ret; |
2454 | !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2525 | |
2455 | gif_set(svm) && | 2526 | if (!gif_set(svm) || |
2456 | !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)); | 2527 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) |
2528 | return 0; | ||
2529 | |||
2530 | ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); | ||
2531 | |||
2532 | if (is_nested(svm)) | ||
2533 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); | ||
2534 | |||
2535 | return ret; | ||
2457 | } | 2536 | } |
2458 | 2537 | ||
2459 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 2538 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
2460 | { | 2539 | { |
2461 | struct vcpu_svm *svm = to_svm(vcpu); | 2540 | struct vcpu_svm *svm = to_svm(vcpu); |
2462 | nsvm_printk("Trying to open IRQ window\n"); | ||
2463 | 2541 | ||
2464 | nested_svm_intr(svm); | 2542 | nested_svm_intr(svm); |
2465 | 2543 | ||
@@ -2484,7 +2562,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2484 | /* Something prevents NMI from been injected. Single step over | 2562 | /* Something prevents NMI from been injected. Single step over |
2485 | possible problem (IRET or exception injection or interrupt | 2563 | possible problem (IRET or exception injection or interrupt |
2486 | shadow) */ | 2564 | shadow) */ |
2487 | vcpu->arch.singlestep = true; | 2565 | svm->nmi_singlestep = true; |
2488 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 2566 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
2489 | update_db_intercept(vcpu); | 2567 | update_db_intercept(vcpu); |
2490 | } | 2568 | } |
@@ -2574,13 +2652,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
2574 | #define R "e" | 2652 | #define R "e" |
2575 | #endif | 2653 | #endif |
2576 | 2654 | ||
2577 | static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2655 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
2578 | { | 2656 | { |
2579 | struct vcpu_svm *svm = to_svm(vcpu); | 2657 | struct vcpu_svm *svm = to_svm(vcpu); |
2580 | u16 fs_selector; | 2658 | u16 fs_selector; |
2581 | u16 gs_selector; | 2659 | u16 gs_selector; |
2582 | u16 ldt_selector; | 2660 | u16 ldt_selector; |
2583 | 2661 | ||
2662 | /* | ||
2663 | * A vmexit emulation is required before the vcpu can be executed | ||
2664 | * again. | ||
2665 | */ | ||
2666 | if (unlikely(svm->nested.exit_required)) | ||
2667 | return; | ||
2668 | |||
2584 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | 2669 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; |
2585 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | 2670 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; |
2586 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | 2671 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; |
@@ -2879,6 +2964,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2879 | .queue_exception = svm_queue_exception, | 2964 | .queue_exception = svm_queue_exception, |
2880 | .interrupt_allowed = svm_interrupt_allowed, | 2965 | .interrupt_allowed = svm_interrupt_allowed, |
2881 | .nmi_allowed = svm_nmi_allowed, | 2966 | .nmi_allowed = svm_nmi_allowed, |
2967 | .get_nmi_mask = svm_get_nmi_mask, | ||
2968 | .set_nmi_mask = svm_set_nmi_mask, | ||
2882 | .enable_nmi_window = enable_nmi_window, | 2969 | .enable_nmi_window = enable_nmi_window, |
2883 | .enable_irq_window = enable_irq_window, | 2970 | .enable_irq_window = enable_irq_window, |
2884 | .update_cr8_intercept = update_cr8_intercept, | 2971 | .update_cr8_intercept = update_cr8_intercept, |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0d480e77eacf..816e0449db0b 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -349,6 +349,171 @@ TRACE_EVENT(kvm_apic_accept_irq, | |||
349 | __entry->coalesced ? " (coalesced)" : "") | 349 | __entry->coalesced ? " (coalesced)" : "") |
350 | ); | 350 | ); |
351 | 351 | ||
352 | /* | ||
353 | * Tracepoint for nested VMRUN | ||
354 | */ | ||
355 | TRACE_EVENT(kvm_nested_vmrun, | ||
356 | TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl, | ||
357 | __u32 event_inj, bool npt), | ||
358 | TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt), | ||
359 | |||
360 | TP_STRUCT__entry( | ||
361 | __field( __u64, rip ) | ||
362 | __field( __u64, vmcb ) | ||
363 | __field( __u64, nested_rip ) | ||
364 | __field( __u32, int_ctl ) | ||
365 | __field( __u32, event_inj ) | ||
366 | __field( bool, npt ) | ||
367 | ), | ||
368 | |||
369 | TP_fast_assign( | ||
370 | __entry->rip = rip; | ||
371 | __entry->vmcb = vmcb; | ||
372 | __entry->nested_rip = nested_rip; | ||
373 | __entry->int_ctl = int_ctl; | ||
374 | __entry->event_inj = event_inj; | ||
375 | __entry->npt = npt; | ||
376 | ), | ||
377 | |||
378 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " | ||
379 | "event_inj: 0x%08x npt: %s\n", | ||
380 | __entry->rip, __entry->vmcb, __entry->nested_rip, | ||
381 | __entry->int_ctl, __entry->event_inj, | ||
382 | __entry->npt ? "on" : "off") | ||
383 | ); | ||
384 | |||
385 | /* | ||
386 | * Tracepoint for #VMEXIT while nested | ||
387 | */ | ||
388 | TRACE_EVENT(kvm_nested_vmexit, | ||
389 | TP_PROTO(__u64 rip, __u32 exit_code, | ||
390 | __u64 exit_info1, __u64 exit_info2, | ||
391 | __u32 exit_int_info, __u32 exit_int_info_err), | ||
392 | TP_ARGS(rip, exit_code, exit_info1, exit_info2, | ||
393 | exit_int_info, exit_int_info_err), | ||
394 | |||
395 | TP_STRUCT__entry( | ||
396 | __field( __u64, rip ) | ||
397 | __field( __u32, exit_code ) | ||
398 | __field( __u64, exit_info1 ) | ||
399 | __field( __u64, exit_info2 ) | ||
400 | __field( __u32, exit_int_info ) | ||
401 | __field( __u32, exit_int_info_err ) | ||
402 | ), | ||
403 | |||
404 | TP_fast_assign( | ||
405 | __entry->rip = rip; | ||
406 | __entry->exit_code = exit_code; | ||
407 | __entry->exit_info1 = exit_info1; | ||
408 | __entry->exit_info2 = exit_info2; | ||
409 | __entry->exit_int_info = exit_int_info; | ||
410 | __entry->exit_int_info_err = exit_int_info_err; | ||
411 | ), | ||
412 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " | ||
413 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | ||
414 | __entry->rip, | ||
415 | ftrace_print_symbols_seq(p, __entry->exit_code, | ||
416 | kvm_x86_ops->exit_reasons_str), | ||
417 | __entry->exit_info1, __entry->exit_info2, | ||
418 | __entry->exit_int_info, __entry->exit_int_info_err) | ||
419 | ); | ||
420 | |||
421 | /* | ||
422 | * Tracepoint for #VMEXIT reinjected to the guest | ||
423 | */ | ||
424 | TRACE_EVENT(kvm_nested_vmexit_inject, | ||
425 | TP_PROTO(__u32 exit_code, | ||
426 | __u64 exit_info1, __u64 exit_info2, | ||
427 | __u32 exit_int_info, __u32 exit_int_info_err), | ||
428 | TP_ARGS(exit_code, exit_info1, exit_info2, | ||
429 | exit_int_info, exit_int_info_err), | ||
430 | |||
431 | TP_STRUCT__entry( | ||
432 | __field( __u32, exit_code ) | ||
433 | __field( __u64, exit_info1 ) | ||
434 | __field( __u64, exit_info2 ) | ||
435 | __field( __u32, exit_int_info ) | ||
436 | __field( __u32, exit_int_info_err ) | ||
437 | ), | ||
438 | |||
439 | TP_fast_assign( | ||
440 | __entry->exit_code = exit_code; | ||
441 | __entry->exit_info1 = exit_info1; | ||
442 | __entry->exit_info2 = exit_info2; | ||
443 | __entry->exit_int_info = exit_int_info; | ||
444 | __entry->exit_int_info_err = exit_int_info_err; | ||
445 | ), | ||
446 | |||
447 | TP_printk("reason: %s ext_inf1: 0x%016llx " | ||
448 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | ||
449 | ftrace_print_symbols_seq(p, __entry->exit_code, | ||
450 | kvm_x86_ops->exit_reasons_str), | ||
451 | __entry->exit_info1, __entry->exit_info2, | ||
452 | __entry->exit_int_info, __entry->exit_int_info_err) | ||
453 | ); | ||
454 | |||
455 | /* | ||
456 | * Tracepoint for nested #vmexit because of interrupt pending | ||
457 | */ | ||
458 | TRACE_EVENT(kvm_nested_intr_vmexit, | ||
459 | TP_PROTO(__u64 rip), | ||
460 | TP_ARGS(rip), | ||
461 | |||
462 | TP_STRUCT__entry( | ||
463 | __field( __u64, rip ) | ||
464 | ), | ||
465 | |||
466 | TP_fast_assign( | ||
467 | __entry->rip = rip | ||
468 | ), | ||
469 | |||
470 | TP_printk("rip: 0x%016llx\n", __entry->rip) | ||
471 | ); | ||
472 | |||
473 | /* | ||
474 | * Tracepoint for nested #vmexit because of interrupt pending | ||
475 | */ | ||
476 | TRACE_EVENT(kvm_invlpga, | ||
477 | TP_PROTO(__u64 rip, int asid, u64 address), | ||
478 | TP_ARGS(rip, asid, address), | ||
479 | |||
480 | TP_STRUCT__entry( | ||
481 | __field( __u64, rip ) | ||
482 | __field( int, asid ) | ||
483 | __field( __u64, address ) | ||
484 | ), | ||
485 | |||
486 | TP_fast_assign( | ||
487 | __entry->rip = rip; | ||
488 | __entry->asid = asid; | ||
489 | __entry->address = address; | ||
490 | ), | ||
491 | |||
492 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", | ||
493 | __entry->rip, __entry->asid, __entry->address) | ||
494 | ); | ||
495 | |||
496 | /* | ||
497 | * Tracepoint for nested #vmexit because of interrupt pending | ||
498 | */ | ||
499 | TRACE_EVENT(kvm_skinit, | ||
500 | TP_PROTO(__u64 rip, __u32 slb), | ||
501 | TP_ARGS(rip, slb), | ||
502 | |||
503 | TP_STRUCT__entry( | ||
504 | __field( __u64, rip ) | ||
505 | __field( __u32, slb ) | ||
506 | ), | ||
507 | |||
508 | TP_fast_assign( | ||
509 | __entry->rip = rip; | ||
510 | __entry->slb = slb; | ||
511 | ), | ||
512 | |||
513 | TP_printk("rip: 0x%016llx slb: 0x%08x\n", | ||
514 | __entry->rip, __entry->slb) | ||
515 | ); | ||
516 | |||
352 | #endif /* _TRACE_KVM_H */ | 517 | #endif /* _TRACE_KVM_H */ |
353 | 518 | ||
354 | /* This part must be outside protection */ | 519 | /* This part must be outside protection */ |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f3812014bd0b..d4918d6fc924 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -61,12 +61,37 @@ module_param_named(unrestricted_guest, | |||
61 | static int __read_mostly emulate_invalid_guest_state = 0; | 61 | static int __read_mostly emulate_invalid_guest_state = 0; |
62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
63 | 63 | ||
64 | /* | ||
65 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | ||
66 | * ple_gap: upper bound on the amount of time between two successive | ||
67 | * executions of PAUSE in a loop. Also indicate if ple enabled. | ||
68 | * According to test, this time is usually small than 41 cycles. | ||
69 | * ple_window: upper bound on the amount of time a guest is allowed to execute | ||
70 | * in a PAUSE loop. Tests indicate that most spinlocks are held for | ||
71 | * less than 2^12 cycles | ||
72 | * Time is measured based on a counter that runs at the same rate as the TSC, | ||
73 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | ||
74 | */ | ||
75 | #define KVM_VMX_DEFAULT_PLE_GAP 41 | ||
76 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | ||
77 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | ||
78 | module_param(ple_gap, int, S_IRUGO); | ||
79 | |||
80 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | ||
81 | module_param(ple_window, int, S_IRUGO); | ||
82 | |||
64 | struct vmcs { | 83 | struct vmcs { |
65 | u32 revision_id; | 84 | u32 revision_id; |
66 | u32 abort; | 85 | u32 abort; |
67 | char data[0]; | 86 | char data[0]; |
68 | }; | 87 | }; |
69 | 88 | ||
89 | struct shared_msr_entry { | ||
90 | unsigned index; | ||
91 | u64 data; | ||
92 | u64 mask; | ||
93 | }; | ||
94 | |||
70 | struct vcpu_vmx { | 95 | struct vcpu_vmx { |
71 | struct kvm_vcpu vcpu; | 96 | struct kvm_vcpu vcpu; |
72 | struct list_head local_vcpus_link; | 97 | struct list_head local_vcpus_link; |
@@ -74,13 +99,12 @@ struct vcpu_vmx { | |||
74 | int launched; | 99 | int launched; |
75 | u8 fail; | 100 | u8 fail; |
76 | u32 idt_vectoring_info; | 101 | u32 idt_vectoring_info; |
77 | struct kvm_msr_entry *guest_msrs; | 102 | struct shared_msr_entry *guest_msrs; |
78 | struct kvm_msr_entry *host_msrs; | ||
79 | int nmsrs; | 103 | int nmsrs; |
80 | int save_nmsrs; | 104 | int save_nmsrs; |
81 | int msr_offset_efer; | ||
82 | #ifdef CONFIG_X86_64 | 105 | #ifdef CONFIG_X86_64 |
83 | int msr_offset_kernel_gs_base; | 106 | u64 msr_host_kernel_gs_base; |
107 | u64 msr_guest_kernel_gs_base; | ||
84 | #endif | 108 | #endif |
85 | struct vmcs *vmcs; | 109 | struct vmcs *vmcs; |
86 | struct { | 110 | struct { |
@@ -88,7 +112,6 @@ struct vcpu_vmx { | |||
88 | u16 fs_sel, gs_sel, ldt_sel; | 112 | u16 fs_sel, gs_sel, ldt_sel; |
89 | int gs_ldt_reload_needed; | 113 | int gs_ldt_reload_needed; |
90 | int fs_reload_needed; | 114 | int fs_reload_needed; |
91 | int guest_efer_loaded; | ||
92 | } host_state; | 115 | } host_state; |
93 | struct { | 116 | struct { |
94 | int vm86_active; | 117 | int vm86_active; |
@@ -107,7 +130,6 @@ struct vcpu_vmx { | |||
107 | } rmode; | 130 | } rmode; |
108 | int vpid; | 131 | int vpid; |
109 | bool emulation_required; | 132 | bool emulation_required; |
110 | enum emulation_result invalid_state_emulation_result; | ||
111 | 133 | ||
112 | /* Support for vnmi-less CPUs */ | 134 | /* Support for vnmi-less CPUs */ |
113 | int soft_vnmi_blocked; | 135 | int soft_vnmi_blocked; |
@@ -176,6 +198,8 @@ static struct kvm_vmx_segment_field { | |||
176 | VMX_SEGMENT_FIELD(LDTR), | 198 | VMX_SEGMENT_FIELD(LDTR), |
177 | }; | 199 | }; |
178 | 200 | ||
201 | static u64 host_efer; | ||
202 | |||
179 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); | 203 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); |
180 | 204 | ||
181 | /* | 205 | /* |
@@ -184,28 +208,12 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu); | |||
184 | */ | 208 | */ |
185 | static const u32 vmx_msr_index[] = { | 209 | static const u32 vmx_msr_index[] = { |
186 | #ifdef CONFIG_X86_64 | 210 | #ifdef CONFIG_X86_64 |
187 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, | 211 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, |
188 | #endif | 212 | #endif |
189 | MSR_EFER, MSR_K6_STAR, | 213 | MSR_EFER, MSR_K6_STAR, |
190 | }; | 214 | }; |
191 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 215 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
192 | 216 | ||
193 | static void load_msrs(struct kvm_msr_entry *e, int n) | ||
194 | { | ||
195 | int i; | ||
196 | |||
197 | for (i = 0; i < n; ++i) | ||
198 | wrmsrl(e[i].index, e[i].data); | ||
199 | } | ||
200 | |||
201 | static void save_msrs(struct kvm_msr_entry *e, int n) | ||
202 | { | ||
203 | int i; | ||
204 | |||
205 | for (i = 0; i < n; ++i) | ||
206 | rdmsrl(e[i].index, e[i].data); | ||
207 | } | ||
208 | |||
209 | static inline int is_page_fault(u32 intr_info) | 217 | static inline int is_page_fault(u32 intr_info) |
210 | { | 218 | { |
211 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 219 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
@@ -320,6 +328,12 @@ static inline int cpu_has_vmx_unrestricted_guest(void) | |||
320 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 328 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
321 | } | 329 | } |
322 | 330 | ||
331 | static inline int cpu_has_vmx_ple(void) | ||
332 | { | ||
333 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
334 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
335 | } | ||
336 | |||
323 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 337 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
324 | { | 338 | { |
325 | return flexpriority_enabled && | 339 | return flexpriority_enabled && |
@@ -348,7 +362,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) | |||
348 | int i; | 362 | int i; |
349 | 363 | ||
350 | for (i = 0; i < vmx->nmsrs; ++i) | 364 | for (i = 0; i < vmx->nmsrs; ++i) |
351 | if (vmx->guest_msrs[i].index == msr) | 365 | if (vmx_msr_index[vmx->guest_msrs[i].index] == msr) |
352 | return i; | 366 | return i; |
353 | return -1; | 367 | return -1; |
354 | } | 368 | } |
@@ -379,7 +393,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa) | |||
379 | : : "a" (&operand), "c" (ext) : "cc", "memory"); | 393 | : : "a" (&operand), "c" (ext) : "cc", "memory"); |
380 | } | 394 | } |
381 | 395 | ||
382 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) | 396 | static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) |
383 | { | 397 | { |
384 | int i; | 398 | int i; |
385 | 399 | ||
@@ -570,17 +584,12 @@ static void reload_tss(void) | |||
570 | load_TR_desc(); | 584 | load_TR_desc(); |
571 | } | 585 | } |
572 | 586 | ||
573 | static void load_transition_efer(struct vcpu_vmx *vmx) | 587 | static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) |
574 | { | 588 | { |
575 | int efer_offset = vmx->msr_offset_efer; | ||
576 | u64 host_efer; | ||
577 | u64 guest_efer; | 589 | u64 guest_efer; |
578 | u64 ignore_bits; | 590 | u64 ignore_bits; |
579 | 591 | ||
580 | if (efer_offset < 0) | 592 | guest_efer = vmx->vcpu.arch.shadow_efer; |
581 | return; | ||
582 | host_efer = vmx->host_msrs[efer_offset].data; | ||
583 | guest_efer = vmx->guest_msrs[efer_offset].data; | ||
584 | 593 | ||
585 | /* | 594 | /* |
586 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless | 595 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless |
@@ -593,27 +602,17 @@ static void load_transition_efer(struct vcpu_vmx *vmx) | |||
593 | if (guest_efer & EFER_LMA) | 602 | if (guest_efer & EFER_LMA) |
594 | ignore_bits &= ~(u64)EFER_SCE; | 603 | ignore_bits &= ~(u64)EFER_SCE; |
595 | #endif | 604 | #endif |
596 | if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits)) | ||
597 | return; | ||
598 | |||
599 | vmx->host_state.guest_efer_loaded = 1; | ||
600 | guest_efer &= ~ignore_bits; | 605 | guest_efer &= ~ignore_bits; |
601 | guest_efer |= host_efer & ignore_bits; | 606 | guest_efer |= host_efer & ignore_bits; |
602 | wrmsrl(MSR_EFER, guest_efer); | 607 | vmx->guest_msrs[efer_offset].data = guest_efer; |
603 | vmx->vcpu.stat.efer_reload++; | 608 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; |
604 | } | 609 | return true; |
605 | |||
606 | static void reload_host_efer(struct vcpu_vmx *vmx) | ||
607 | { | ||
608 | if (vmx->host_state.guest_efer_loaded) { | ||
609 | vmx->host_state.guest_efer_loaded = 0; | ||
610 | load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1); | ||
611 | } | ||
612 | } | 610 | } |
613 | 611 | ||
614 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 612 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
615 | { | 613 | { |
616 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 614 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
615 | int i; | ||
617 | 616 | ||
618 | if (vmx->host_state.loaded) | 617 | if (vmx->host_state.loaded) |
619 | return; | 618 | return; |
@@ -650,13 +649,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
650 | #endif | 649 | #endif |
651 | 650 | ||
652 | #ifdef CONFIG_X86_64 | 651 | #ifdef CONFIG_X86_64 |
653 | if (is_long_mode(&vmx->vcpu)) | 652 | if (is_long_mode(&vmx->vcpu)) { |
654 | save_msrs(vmx->host_msrs + | 653 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
655 | vmx->msr_offset_kernel_gs_base, 1); | 654 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
656 | 655 | } | |
657 | #endif | 656 | #endif |
658 | load_msrs(vmx->guest_msrs, vmx->save_nmsrs); | 657 | for (i = 0; i < vmx->save_nmsrs; ++i) |
659 | load_transition_efer(vmx); | 658 | kvm_set_shared_msr(vmx->guest_msrs[i].index, |
659 | vmx->guest_msrs[i].data, | ||
660 | vmx->guest_msrs[i].mask); | ||
660 | } | 661 | } |
661 | 662 | ||
662 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) | 663 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) |
@@ -684,9 +685,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
684 | local_irq_restore(flags); | 685 | local_irq_restore(flags); |
685 | } | 686 | } |
686 | reload_tss(); | 687 | reload_tss(); |
687 | save_msrs(vmx->guest_msrs, vmx->save_nmsrs); | 688 | #ifdef CONFIG_X86_64 |
688 | load_msrs(vmx->host_msrs, vmx->save_nmsrs); | 689 | if (is_long_mode(&vmx->vcpu)) { |
689 | reload_host_efer(vmx); | 690 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
691 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
692 | } | ||
693 | #endif | ||
690 | } | 694 | } |
691 | 695 | ||
692 | static void vmx_load_host_state(struct vcpu_vmx *vmx) | 696 | static void vmx_load_host_state(struct vcpu_vmx *vmx) |
@@ -709,7 +713,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
709 | if (vcpu->cpu != cpu) { | 713 | if (vcpu->cpu != cpu) { |
710 | vcpu_clear(vmx); | 714 | vcpu_clear(vmx); |
711 | kvm_migrate_timers(vcpu); | 715 | kvm_migrate_timers(vcpu); |
712 | vpid_sync_vcpu_all(vmx); | 716 | set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); |
713 | local_irq_disable(); | 717 | local_irq_disable(); |
714 | list_add(&vmx->local_vcpus_link, | 718 | list_add(&vmx->local_vcpus_link, |
715 | &per_cpu(vcpus_on_cpu, cpu)); | 719 | &per_cpu(vcpus_on_cpu, cpu)); |
@@ -877,19 +881,14 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
877 | /* | 881 | /* |
878 | * Swap MSR entry in host/guest MSR entry array. | 882 | * Swap MSR entry in host/guest MSR entry array. |
879 | */ | 883 | */ |
880 | #ifdef CONFIG_X86_64 | ||
881 | static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | 884 | static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) |
882 | { | 885 | { |
883 | struct kvm_msr_entry tmp; | 886 | struct shared_msr_entry tmp; |
884 | 887 | ||
885 | tmp = vmx->guest_msrs[to]; | 888 | tmp = vmx->guest_msrs[to]; |
886 | vmx->guest_msrs[to] = vmx->guest_msrs[from]; | 889 | vmx->guest_msrs[to] = vmx->guest_msrs[from]; |
887 | vmx->guest_msrs[from] = tmp; | 890 | vmx->guest_msrs[from] = tmp; |
888 | tmp = vmx->host_msrs[to]; | ||
889 | vmx->host_msrs[to] = vmx->host_msrs[from]; | ||
890 | vmx->host_msrs[from] = tmp; | ||
891 | } | 891 | } |
892 | #endif | ||
893 | 892 | ||
894 | /* | 893 | /* |
895 | * Set up the vmcs to automatically save and restore system | 894 | * Set up the vmcs to automatically save and restore system |
@@ -898,15 +897,13 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
898 | */ | 897 | */ |
899 | static void setup_msrs(struct vcpu_vmx *vmx) | 898 | static void setup_msrs(struct vcpu_vmx *vmx) |
900 | { | 899 | { |
901 | int save_nmsrs; | 900 | int save_nmsrs, index; |
902 | unsigned long *msr_bitmap; | 901 | unsigned long *msr_bitmap; |
903 | 902 | ||
904 | vmx_load_host_state(vmx); | 903 | vmx_load_host_state(vmx); |
905 | save_nmsrs = 0; | 904 | save_nmsrs = 0; |
906 | #ifdef CONFIG_X86_64 | 905 | #ifdef CONFIG_X86_64 |
907 | if (is_long_mode(&vmx->vcpu)) { | 906 | if (is_long_mode(&vmx->vcpu)) { |
908 | int index; | ||
909 | |||
910 | index = __find_msr_index(vmx, MSR_SYSCALL_MASK); | 907 | index = __find_msr_index(vmx, MSR_SYSCALL_MASK); |
911 | if (index >= 0) | 908 | if (index >= 0) |
912 | move_msr_up(vmx, index, save_nmsrs++); | 909 | move_msr_up(vmx, index, save_nmsrs++); |
@@ -916,9 +913,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
916 | index = __find_msr_index(vmx, MSR_CSTAR); | 913 | index = __find_msr_index(vmx, MSR_CSTAR); |
917 | if (index >= 0) | 914 | if (index >= 0) |
918 | move_msr_up(vmx, index, save_nmsrs++); | 915 | move_msr_up(vmx, index, save_nmsrs++); |
919 | index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE); | ||
920 | if (index >= 0) | ||
921 | move_msr_up(vmx, index, save_nmsrs++); | ||
922 | /* | 916 | /* |
923 | * MSR_K6_STAR is only needed on long mode guests, and only | 917 | * MSR_K6_STAR is only needed on long mode guests, and only |
924 | * if efer.sce is enabled. | 918 | * if efer.sce is enabled. |
@@ -928,13 +922,11 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
928 | move_msr_up(vmx, index, save_nmsrs++); | 922 | move_msr_up(vmx, index, save_nmsrs++); |
929 | } | 923 | } |
930 | #endif | 924 | #endif |
931 | vmx->save_nmsrs = save_nmsrs; | 925 | index = __find_msr_index(vmx, MSR_EFER); |
926 | if (index >= 0 && update_transition_efer(vmx, index)) | ||
927 | move_msr_up(vmx, index, save_nmsrs++); | ||
932 | 928 | ||
933 | #ifdef CONFIG_X86_64 | 929 | vmx->save_nmsrs = save_nmsrs; |
934 | vmx->msr_offset_kernel_gs_base = | ||
935 | __find_msr_index(vmx, MSR_KERNEL_GS_BASE); | ||
936 | #endif | ||
937 | vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); | ||
938 | 930 | ||
939 | if (cpu_has_vmx_msr_bitmap()) { | 931 | if (cpu_has_vmx_msr_bitmap()) { |
940 | if (is_long_mode(&vmx->vcpu)) | 932 | if (is_long_mode(&vmx->vcpu)) |
@@ -976,7 +968,7 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) | |||
976 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 968 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) |
977 | { | 969 | { |
978 | u64 data; | 970 | u64 data; |
979 | struct kvm_msr_entry *msr; | 971 | struct shared_msr_entry *msr; |
980 | 972 | ||
981 | if (!pdata) { | 973 | if (!pdata) { |
982 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); | 974 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); |
@@ -991,9 +983,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
991 | case MSR_GS_BASE: | 983 | case MSR_GS_BASE: |
992 | data = vmcs_readl(GUEST_GS_BASE); | 984 | data = vmcs_readl(GUEST_GS_BASE); |
993 | break; | 985 | break; |
986 | case MSR_KERNEL_GS_BASE: | ||
987 | vmx_load_host_state(to_vmx(vcpu)); | ||
988 | data = to_vmx(vcpu)->msr_guest_kernel_gs_base; | ||
989 | break; | ||
990 | #endif | ||
994 | case MSR_EFER: | 991 | case MSR_EFER: |
995 | return kvm_get_msr_common(vcpu, msr_index, pdata); | 992 | return kvm_get_msr_common(vcpu, msr_index, pdata); |
996 | #endif | ||
997 | case MSR_IA32_TSC: | 993 | case MSR_IA32_TSC: |
998 | data = guest_read_tsc(); | 994 | data = guest_read_tsc(); |
999 | break; | 995 | break; |
@@ -1007,6 +1003,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
1007 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 1003 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
1008 | break; | 1004 | break; |
1009 | default: | 1005 | default: |
1006 | vmx_load_host_state(to_vmx(vcpu)); | ||
1010 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 1007 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
1011 | if (msr) { | 1008 | if (msr) { |
1012 | vmx_load_host_state(to_vmx(vcpu)); | 1009 | vmx_load_host_state(to_vmx(vcpu)); |
@@ -1028,7 +1025,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
1028 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 1025 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) |
1029 | { | 1026 | { |
1030 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1027 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1031 | struct kvm_msr_entry *msr; | 1028 | struct shared_msr_entry *msr; |
1032 | u64 host_tsc; | 1029 | u64 host_tsc; |
1033 | int ret = 0; | 1030 | int ret = 0; |
1034 | 1031 | ||
@@ -1044,6 +1041,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1044 | case MSR_GS_BASE: | 1041 | case MSR_GS_BASE: |
1045 | vmcs_writel(GUEST_GS_BASE, data); | 1042 | vmcs_writel(GUEST_GS_BASE, data); |
1046 | break; | 1043 | break; |
1044 | case MSR_KERNEL_GS_BASE: | ||
1045 | vmx_load_host_state(vmx); | ||
1046 | vmx->msr_guest_kernel_gs_base = data; | ||
1047 | break; | ||
1047 | #endif | 1048 | #endif |
1048 | case MSR_IA32_SYSENTER_CS: | 1049 | case MSR_IA32_SYSENTER_CS: |
1049 | vmcs_write32(GUEST_SYSENTER_CS, data); | 1050 | vmcs_write32(GUEST_SYSENTER_CS, data); |
@@ -1097,30 +1098,14 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | |||
1097 | } | 1098 | } |
1098 | } | 1099 | } |
1099 | 1100 | ||
1100 | static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | 1101 | static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) |
1101 | { | 1102 | { |
1102 | int old_debug = vcpu->guest_debug; | ||
1103 | unsigned long flags; | ||
1104 | |||
1105 | vcpu->guest_debug = dbg->control; | ||
1106 | if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) | ||
1107 | vcpu->guest_debug = 0; | ||
1108 | |||
1109 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1103 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1110 | vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); | 1104 | vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); |
1111 | else | 1105 | else |
1112 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | 1106 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); |
1113 | 1107 | ||
1114 | flags = vmcs_readl(GUEST_RFLAGS); | ||
1115 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
1116 | flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
1117 | else if (old_debug & KVM_GUESTDBG_SINGLESTEP) | ||
1118 | flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
1119 | vmcs_writel(GUEST_RFLAGS, flags); | ||
1120 | |||
1121 | update_exception_bitmap(vcpu); | 1108 | update_exception_bitmap(vcpu); |
1122 | |||
1123 | return 0; | ||
1124 | } | 1109 | } |
1125 | 1110 | ||
1126 | static __init int cpu_has_kvm_support(void) | 1111 | static __init int cpu_has_kvm_support(void) |
@@ -1139,12 +1124,15 @@ static __init int vmx_disabled_by_bios(void) | |||
1139 | /* locked but not enabled */ | 1124 | /* locked but not enabled */ |
1140 | } | 1125 | } |
1141 | 1126 | ||
1142 | static void hardware_enable(void *garbage) | 1127 | static int hardware_enable(void *garbage) |
1143 | { | 1128 | { |
1144 | int cpu = raw_smp_processor_id(); | 1129 | int cpu = raw_smp_processor_id(); |
1145 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 1130 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
1146 | u64 old; | 1131 | u64 old; |
1147 | 1132 | ||
1133 | if (read_cr4() & X86_CR4_VMXE) | ||
1134 | return -EBUSY; | ||
1135 | |||
1148 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 1136 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); |
1149 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1137 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
1150 | if ((old & (FEATURE_CONTROL_LOCKED | | 1138 | if ((old & (FEATURE_CONTROL_LOCKED | |
@@ -1159,6 +1147,10 @@ static void hardware_enable(void *garbage) | |||
1159 | asm volatile (ASM_VMX_VMXON_RAX | 1147 | asm volatile (ASM_VMX_VMXON_RAX |
1160 | : : "a"(&phys_addr), "m"(phys_addr) | 1148 | : : "a"(&phys_addr), "m"(phys_addr) |
1161 | : "memory", "cc"); | 1149 | : "memory", "cc"); |
1150 | |||
1151 | ept_sync_global(); | ||
1152 | |||
1153 | return 0; | ||
1162 | } | 1154 | } |
1163 | 1155 | ||
1164 | static void vmclear_local_vcpus(void) | 1156 | static void vmclear_local_vcpus(void) |
@@ -1250,7 +1242,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1250 | SECONDARY_EXEC_WBINVD_EXITING | | 1242 | SECONDARY_EXEC_WBINVD_EXITING | |
1251 | SECONDARY_EXEC_ENABLE_VPID | | 1243 | SECONDARY_EXEC_ENABLE_VPID | |
1252 | SECONDARY_EXEC_ENABLE_EPT | | 1244 | SECONDARY_EXEC_ENABLE_EPT | |
1253 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 1245 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
1246 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
1254 | if (adjust_vmx_controls(min2, opt2, | 1247 | if (adjust_vmx_controls(min2, opt2, |
1255 | MSR_IA32_VMX_PROCBASED_CTLS2, | 1248 | MSR_IA32_VMX_PROCBASED_CTLS2, |
1256 | &_cpu_based_2nd_exec_control) < 0) | 1249 | &_cpu_based_2nd_exec_control) < 0) |
@@ -1344,15 +1337,17 @@ static void free_kvm_area(void) | |||
1344 | { | 1337 | { |
1345 | int cpu; | 1338 | int cpu; |
1346 | 1339 | ||
1347 | for_each_online_cpu(cpu) | 1340 | for_each_possible_cpu(cpu) { |
1348 | free_vmcs(per_cpu(vmxarea, cpu)); | 1341 | free_vmcs(per_cpu(vmxarea, cpu)); |
1342 | per_cpu(vmxarea, cpu) = NULL; | ||
1343 | } | ||
1349 | } | 1344 | } |
1350 | 1345 | ||
1351 | static __init int alloc_kvm_area(void) | 1346 | static __init int alloc_kvm_area(void) |
1352 | { | 1347 | { |
1353 | int cpu; | 1348 | int cpu; |
1354 | 1349 | ||
1355 | for_each_online_cpu(cpu) { | 1350 | for_each_possible_cpu(cpu) { |
1356 | struct vmcs *vmcs; | 1351 | struct vmcs *vmcs; |
1357 | 1352 | ||
1358 | vmcs = alloc_vmcs_cpu(cpu); | 1353 | vmcs = alloc_vmcs_cpu(cpu); |
@@ -1394,6 +1389,9 @@ static __init int hardware_setup(void) | |||
1394 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) | 1389 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) |
1395 | kvm_disable_largepages(); | 1390 | kvm_disable_largepages(); |
1396 | 1391 | ||
1392 | if (!cpu_has_vmx_ple()) | ||
1393 | ple_gap = 0; | ||
1394 | |||
1397 | return alloc_kvm_area(); | 1395 | return alloc_kvm_area(); |
1398 | } | 1396 | } |
1399 | 1397 | ||
@@ -1536,8 +1534,16 @@ continue_rmode: | |||
1536 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1534 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
1537 | { | 1535 | { |
1538 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1536 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1539 | struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); | 1537 | struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); |
1538 | |||
1539 | if (!msr) | ||
1540 | return; | ||
1540 | 1541 | ||
1542 | /* | ||
1543 | * Force kernel_gs_base reloading before EFER changes, as control | ||
1544 | * of this msr depends on is_long_mode(). | ||
1545 | */ | ||
1546 | vmx_load_host_state(to_vmx(vcpu)); | ||
1541 | vcpu->arch.shadow_efer = efer; | 1547 | vcpu->arch.shadow_efer = efer; |
1542 | if (!msr) | 1548 | if (!msr) |
1543 | return; | 1549 | return; |
@@ -1727,6 +1733,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1727 | vmcs_write64(EPT_POINTER, eptp); | 1733 | vmcs_write64(EPT_POINTER, eptp); |
1728 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | 1734 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : |
1729 | vcpu->kvm->arch.ept_identity_map_addr; | 1735 | vcpu->kvm->arch.ept_identity_map_addr; |
1736 | ept_load_pdptrs(vcpu); | ||
1730 | } | 1737 | } |
1731 | 1738 | ||
1732 | vmx_flush_tlb(vcpu); | 1739 | vmx_flush_tlb(vcpu); |
@@ -2302,13 +2309,22 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2302 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 2309 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
2303 | if (vmx->vpid == 0) | 2310 | if (vmx->vpid == 0) |
2304 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 2311 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
2305 | if (!enable_ept) | 2312 | if (!enable_ept) { |
2306 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | 2313 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; |
2314 | enable_unrestricted_guest = 0; | ||
2315 | } | ||
2307 | if (!enable_unrestricted_guest) | 2316 | if (!enable_unrestricted_guest) |
2308 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 2317 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
2318 | if (!ple_gap) | ||
2319 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
2309 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 2320 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
2310 | } | 2321 | } |
2311 | 2322 | ||
2323 | if (ple_gap) { | ||
2324 | vmcs_write32(PLE_GAP, ple_gap); | ||
2325 | vmcs_write32(PLE_WINDOW, ple_window); | ||
2326 | } | ||
2327 | |||
2312 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); | 2328 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); |
2313 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); | 2329 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); |
2314 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ | 2330 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ |
@@ -2376,10 +2392,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2376 | if (wrmsr_safe(index, data_low, data_high) < 0) | 2392 | if (wrmsr_safe(index, data_low, data_high) < 0) |
2377 | continue; | 2393 | continue; |
2378 | data = data_low | ((u64)data_high << 32); | 2394 | data = data_low | ((u64)data_high << 32); |
2379 | vmx->host_msrs[j].index = index; | 2395 | vmx->guest_msrs[j].index = i; |
2380 | vmx->host_msrs[j].reserved = 0; | 2396 | vmx->guest_msrs[j].data = 0; |
2381 | vmx->host_msrs[j].data = data; | 2397 | vmx->guest_msrs[j].mask = -1ull; |
2382 | vmx->guest_msrs[j] = vmx->host_msrs[j]; | ||
2383 | ++vmx->nmsrs; | 2398 | ++vmx->nmsrs; |
2384 | } | 2399 | } |
2385 | 2400 | ||
@@ -2510,7 +2525,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2510 | if (vmx->vpid != 0) | 2525 | if (vmx->vpid != 0) |
2511 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 2526 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); |
2512 | 2527 | ||
2513 | vmx->vcpu.arch.cr0 = 0x60000010; | 2528 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
2514 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ | 2529 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ |
2515 | vmx_set_cr4(&vmx->vcpu, 0); | 2530 | vmx_set_cr4(&vmx->vcpu, 0); |
2516 | vmx_set_efer(&vmx->vcpu, 0); | 2531 | vmx_set_efer(&vmx->vcpu, 0); |
@@ -2627,6 +2642,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2627 | GUEST_INTR_STATE_NMI)); | 2642 | GUEST_INTR_STATE_NMI)); |
2628 | } | 2643 | } |
2629 | 2644 | ||
2645 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | ||
2646 | { | ||
2647 | if (!cpu_has_virtual_nmis()) | ||
2648 | return to_vmx(vcpu)->soft_vnmi_blocked; | ||
2649 | else | ||
2650 | return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
2651 | GUEST_INTR_STATE_NMI); | ||
2652 | } | ||
2653 | |||
2654 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | ||
2655 | { | ||
2656 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2657 | |||
2658 | if (!cpu_has_virtual_nmis()) { | ||
2659 | if (vmx->soft_vnmi_blocked != masked) { | ||
2660 | vmx->soft_vnmi_blocked = masked; | ||
2661 | vmx->vnmi_blocked_time = 0; | ||
2662 | } | ||
2663 | } else { | ||
2664 | if (masked) | ||
2665 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
2666 | GUEST_INTR_STATE_NMI); | ||
2667 | else | ||
2668 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
2669 | GUEST_INTR_STATE_NMI); | ||
2670 | } | ||
2671 | } | ||
2672 | |||
2630 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 2673 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
2631 | { | 2674 | { |
2632 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | 2675 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && |
@@ -2659,7 +2702,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2659 | * Cause the #SS fault with 0 error code in VM86 mode. | 2702 | * Cause the #SS fault with 0 error code in VM86 mode. |
2660 | */ | 2703 | */ |
2661 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | 2704 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) |
2662 | if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) | 2705 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) |
2663 | return 1; | 2706 | return 1; |
2664 | /* | 2707 | /* |
2665 | * Forward all other exceptions that are valid in real mode. | 2708 | * Forward all other exceptions that are valid in real mode. |
@@ -2710,15 +2753,16 @@ static void kvm_machine_check(void) | |||
2710 | #endif | 2753 | #endif |
2711 | } | 2754 | } |
2712 | 2755 | ||
2713 | static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2756 | static int handle_machine_check(struct kvm_vcpu *vcpu) |
2714 | { | 2757 | { |
2715 | /* already handled by vcpu_run */ | 2758 | /* already handled by vcpu_run */ |
2716 | return 1; | 2759 | return 1; |
2717 | } | 2760 | } |
2718 | 2761 | ||
2719 | static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2762 | static int handle_exception(struct kvm_vcpu *vcpu) |
2720 | { | 2763 | { |
2721 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2764 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2765 | struct kvm_run *kvm_run = vcpu->run; | ||
2722 | u32 intr_info, ex_no, error_code; | 2766 | u32 intr_info, ex_no, error_code; |
2723 | unsigned long cr2, rip, dr6; | 2767 | unsigned long cr2, rip, dr6; |
2724 | u32 vect_info; | 2768 | u32 vect_info; |
@@ -2728,12 +2772,17 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2728 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 2772 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
2729 | 2773 | ||
2730 | if (is_machine_check(intr_info)) | 2774 | if (is_machine_check(intr_info)) |
2731 | return handle_machine_check(vcpu, kvm_run); | 2775 | return handle_machine_check(vcpu); |
2732 | 2776 | ||
2733 | if ((vect_info & VECTORING_INFO_VALID_MASK) && | 2777 | if ((vect_info & VECTORING_INFO_VALID_MASK) && |
2734 | !is_page_fault(intr_info)) | 2778 | !is_page_fault(intr_info)) { |
2735 | printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " | 2779 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
2736 | "intr info 0x%x\n", __func__, vect_info, intr_info); | 2780 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; |
2781 | vcpu->run->internal.ndata = 2; | ||
2782 | vcpu->run->internal.data[0] = vect_info; | ||
2783 | vcpu->run->internal.data[1] = intr_info; | ||
2784 | return 0; | ||
2785 | } | ||
2737 | 2786 | ||
2738 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) | 2787 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) |
2739 | return 1; /* already handled by vmx_vcpu_run() */ | 2788 | return 1; /* already handled by vmx_vcpu_run() */ |
@@ -2744,7 +2793,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2744 | } | 2793 | } |
2745 | 2794 | ||
2746 | if (is_invalid_opcode(intr_info)) { | 2795 | if (is_invalid_opcode(intr_info)) { |
2747 | er = emulate_instruction(vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); | 2796 | er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); |
2748 | if (er != EMULATE_DONE) | 2797 | if (er != EMULATE_DONE) |
2749 | kvm_queue_exception(vcpu, UD_VECTOR); | 2798 | kvm_queue_exception(vcpu, UD_VECTOR); |
2750 | return 1; | 2799 | return 1; |
@@ -2803,20 +2852,19 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2803 | return 0; | 2852 | return 0; |
2804 | } | 2853 | } |
2805 | 2854 | ||
2806 | static int handle_external_interrupt(struct kvm_vcpu *vcpu, | 2855 | static int handle_external_interrupt(struct kvm_vcpu *vcpu) |
2807 | struct kvm_run *kvm_run) | ||
2808 | { | 2856 | { |
2809 | ++vcpu->stat.irq_exits; | 2857 | ++vcpu->stat.irq_exits; |
2810 | return 1; | 2858 | return 1; |
2811 | } | 2859 | } |
2812 | 2860 | ||
2813 | static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2861 | static int handle_triple_fault(struct kvm_vcpu *vcpu) |
2814 | { | 2862 | { |
2815 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 2863 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
2816 | return 0; | 2864 | return 0; |
2817 | } | 2865 | } |
2818 | 2866 | ||
2819 | static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2867 | static int handle_io(struct kvm_vcpu *vcpu) |
2820 | { | 2868 | { |
2821 | unsigned long exit_qualification; | 2869 | unsigned long exit_qualification; |
2822 | int size, in, string; | 2870 | int size, in, string; |
@@ -2827,8 +2875,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2827 | string = (exit_qualification & 16) != 0; | 2875 | string = (exit_qualification & 16) != 0; |
2828 | 2876 | ||
2829 | if (string) { | 2877 | if (string) { |
2830 | if (emulate_instruction(vcpu, | 2878 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) |
2831 | kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) | ||
2832 | return 0; | 2879 | return 0; |
2833 | return 1; | 2880 | return 1; |
2834 | } | 2881 | } |
@@ -2838,7 +2885,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2838 | port = exit_qualification >> 16; | 2885 | port = exit_qualification >> 16; |
2839 | 2886 | ||
2840 | skip_emulated_instruction(vcpu); | 2887 | skip_emulated_instruction(vcpu); |
2841 | return kvm_emulate_pio(vcpu, kvm_run, in, size, port); | 2888 | return kvm_emulate_pio(vcpu, in, size, port); |
2842 | } | 2889 | } |
2843 | 2890 | ||
2844 | static void | 2891 | static void |
@@ -2852,7 +2899,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
2852 | hypercall[2] = 0xc1; | 2899 | hypercall[2] = 0xc1; |
2853 | } | 2900 | } |
2854 | 2901 | ||
2855 | static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2902 | static int handle_cr(struct kvm_vcpu *vcpu) |
2856 | { | 2903 | { |
2857 | unsigned long exit_qualification, val; | 2904 | unsigned long exit_qualification, val; |
2858 | int cr; | 2905 | int cr; |
@@ -2887,7 +2934,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2887 | return 1; | 2934 | return 1; |
2888 | if (cr8_prev <= cr8) | 2935 | if (cr8_prev <= cr8) |
2889 | return 1; | 2936 | return 1; |
2890 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | 2937 | vcpu->run->exit_reason = KVM_EXIT_SET_TPR; |
2891 | return 0; | 2938 | return 0; |
2892 | } | 2939 | } |
2893 | }; | 2940 | }; |
@@ -2922,13 +2969,13 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2922 | default: | 2969 | default: |
2923 | break; | 2970 | break; |
2924 | } | 2971 | } |
2925 | kvm_run->exit_reason = 0; | 2972 | vcpu->run->exit_reason = 0; |
2926 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", | 2973 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", |
2927 | (int)(exit_qualification >> 4) & 3, cr); | 2974 | (int)(exit_qualification >> 4) & 3, cr); |
2928 | return 0; | 2975 | return 0; |
2929 | } | 2976 | } |
2930 | 2977 | ||
2931 | static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2978 | static int handle_dr(struct kvm_vcpu *vcpu) |
2932 | { | 2979 | { |
2933 | unsigned long exit_qualification; | 2980 | unsigned long exit_qualification; |
2934 | unsigned long val; | 2981 | unsigned long val; |
@@ -2944,13 +2991,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2944 | * guest debugging itself. | 2991 | * guest debugging itself. |
2945 | */ | 2992 | */ |
2946 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { | 2993 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { |
2947 | kvm_run->debug.arch.dr6 = vcpu->arch.dr6; | 2994 | vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; |
2948 | kvm_run->debug.arch.dr7 = dr; | 2995 | vcpu->run->debug.arch.dr7 = dr; |
2949 | kvm_run->debug.arch.pc = | 2996 | vcpu->run->debug.arch.pc = |
2950 | vmcs_readl(GUEST_CS_BASE) + | 2997 | vmcs_readl(GUEST_CS_BASE) + |
2951 | vmcs_readl(GUEST_RIP); | 2998 | vmcs_readl(GUEST_RIP); |
2952 | kvm_run->debug.arch.exception = DB_VECTOR; | 2999 | vcpu->run->debug.arch.exception = DB_VECTOR; |
2953 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 3000 | vcpu->run->exit_reason = KVM_EXIT_DEBUG; |
2954 | return 0; | 3001 | return 0; |
2955 | } else { | 3002 | } else { |
2956 | vcpu->arch.dr7 &= ~DR7_GD; | 3003 | vcpu->arch.dr7 &= ~DR7_GD; |
@@ -3016,13 +3063,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3016 | return 1; | 3063 | return 1; |
3017 | } | 3064 | } |
3018 | 3065 | ||
3019 | static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3066 | static int handle_cpuid(struct kvm_vcpu *vcpu) |
3020 | { | 3067 | { |
3021 | kvm_emulate_cpuid(vcpu); | 3068 | kvm_emulate_cpuid(vcpu); |
3022 | return 1; | 3069 | return 1; |
3023 | } | 3070 | } |
3024 | 3071 | ||
3025 | static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3072 | static int handle_rdmsr(struct kvm_vcpu *vcpu) |
3026 | { | 3073 | { |
3027 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3074 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; |
3028 | u64 data; | 3075 | u64 data; |
@@ -3041,7 +3088,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3041 | return 1; | 3088 | return 1; |
3042 | } | 3089 | } |
3043 | 3090 | ||
3044 | static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3091 | static int handle_wrmsr(struct kvm_vcpu *vcpu) |
3045 | { | 3092 | { |
3046 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3093 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; |
3047 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) | 3094 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) |
@@ -3058,14 +3105,12 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3058 | return 1; | 3105 | return 1; |
3059 | } | 3106 | } |
3060 | 3107 | ||
3061 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu, | 3108 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) |
3062 | struct kvm_run *kvm_run) | ||
3063 | { | 3109 | { |
3064 | return 1; | 3110 | return 1; |
3065 | } | 3111 | } |
3066 | 3112 | ||
3067 | static int handle_interrupt_window(struct kvm_vcpu *vcpu, | 3113 | static int handle_interrupt_window(struct kvm_vcpu *vcpu) |
3068 | struct kvm_run *kvm_run) | ||
3069 | { | 3114 | { |
3070 | u32 cpu_based_vm_exec_control; | 3115 | u32 cpu_based_vm_exec_control; |
3071 | 3116 | ||
@@ -3081,34 +3126,34 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
3081 | * possible | 3126 | * possible |
3082 | */ | 3127 | */ |
3083 | if (!irqchip_in_kernel(vcpu->kvm) && | 3128 | if (!irqchip_in_kernel(vcpu->kvm) && |
3084 | kvm_run->request_interrupt_window && | 3129 | vcpu->run->request_interrupt_window && |
3085 | !kvm_cpu_has_interrupt(vcpu)) { | 3130 | !kvm_cpu_has_interrupt(vcpu)) { |
3086 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 3131 | vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
3087 | return 0; | 3132 | return 0; |
3088 | } | 3133 | } |
3089 | return 1; | 3134 | return 1; |
3090 | } | 3135 | } |
3091 | 3136 | ||
3092 | static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3137 | static int handle_halt(struct kvm_vcpu *vcpu) |
3093 | { | 3138 | { |
3094 | skip_emulated_instruction(vcpu); | 3139 | skip_emulated_instruction(vcpu); |
3095 | return kvm_emulate_halt(vcpu); | 3140 | return kvm_emulate_halt(vcpu); |
3096 | } | 3141 | } |
3097 | 3142 | ||
3098 | static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3143 | static int handle_vmcall(struct kvm_vcpu *vcpu) |
3099 | { | 3144 | { |
3100 | skip_emulated_instruction(vcpu); | 3145 | skip_emulated_instruction(vcpu); |
3101 | kvm_emulate_hypercall(vcpu); | 3146 | kvm_emulate_hypercall(vcpu); |
3102 | return 1; | 3147 | return 1; |
3103 | } | 3148 | } |
3104 | 3149 | ||
3105 | static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3150 | static int handle_vmx_insn(struct kvm_vcpu *vcpu) |
3106 | { | 3151 | { |
3107 | kvm_queue_exception(vcpu, UD_VECTOR); | 3152 | kvm_queue_exception(vcpu, UD_VECTOR); |
3108 | return 1; | 3153 | return 1; |
3109 | } | 3154 | } |
3110 | 3155 | ||
3111 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3156 | static int handle_invlpg(struct kvm_vcpu *vcpu) |
3112 | { | 3157 | { |
3113 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3158 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3114 | 3159 | ||
@@ -3117,14 +3162,14 @@ static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3117 | return 1; | 3162 | return 1; |
3118 | } | 3163 | } |
3119 | 3164 | ||
3120 | static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3165 | static int handle_wbinvd(struct kvm_vcpu *vcpu) |
3121 | { | 3166 | { |
3122 | skip_emulated_instruction(vcpu); | 3167 | skip_emulated_instruction(vcpu); |
3123 | /* TODO: Add support for VT-d/pass-through device */ | 3168 | /* TODO: Add support for VT-d/pass-through device */ |
3124 | return 1; | 3169 | return 1; |
3125 | } | 3170 | } |
3126 | 3171 | ||
3127 | static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3172 | static int handle_apic_access(struct kvm_vcpu *vcpu) |
3128 | { | 3173 | { |
3129 | unsigned long exit_qualification; | 3174 | unsigned long exit_qualification; |
3130 | enum emulation_result er; | 3175 | enum emulation_result er; |
@@ -3133,7 +3178,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3133 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3178 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3134 | offset = exit_qualification & 0xffful; | 3179 | offset = exit_qualification & 0xffful; |
3135 | 3180 | ||
3136 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3181 | er = emulate_instruction(vcpu, 0, 0, 0); |
3137 | 3182 | ||
3138 | if (er != EMULATE_DONE) { | 3183 | if (er != EMULATE_DONE) { |
3139 | printk(KERN_ERR | 3184 | printk(KERN_ERR |
@@ -3144,7 +3189,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3144 | return 1; | 3189 | return 1; |
3145 | } | 3190 | } |
3146 | 3191 | ||
3147 | static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3192 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
3148 | { | 3193 | { |
3149 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3194 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3150 | unsigned long exit_qualification; | 3195 | unsigned long exit_qualification; |
@@ -3198,7 +3243,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3198 | return 1; | 3243 | return 1; |
3199 | } | 3244 | } |
3200 | 3245 | ||
3201 | static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3246 | static int handle_ept_violation(struct kvm_vcpu *vcpu) |
3202 | { | 3247 | { |
3203 | unsigned long exit_qualification; | 3248 | unsigned long exit_qualification; |
3204 | gpa_t gpa; | 3249 | gpa_t gpa; |
@@ -3219,8 +3264,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3219 | vmcs_readl(GUEST_LINEAR_ADDRESS)); | 3264 | vmcs_readl(GUEST_LINEAR_ADDRESS)); |
3220 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | 3265 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", |
3221 | (long unsigned int)exit_qualification); | 3266 | (long unsigned int)exit_qualification); |
3222 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3267 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
3223 | kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; | 3268 | vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; |
3224 | return 0; | 3269 | return 0; |
3225 | } | 3270 | } |
3226 | 3271 | ||
@@ -3290,7 +3335,7 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte, | |||
3290 | } | 3335 | } |
3291 | } | 3336 | } |
3292 | 3337 | ||
3293 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3338 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu) |
3294 | { | 3339 | { |
3295 | u64 sptes[4]; | 3340 | u64 sptes[4]; |
3296 | int nr_sptes, i; | 3341 | int nr_sptes, i; |
@@ -3306,13 +3351,13 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3306 | for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) | 3351 | for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) |
3307 | ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); | 3352 | ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); |
3308 | 3353 | ||
3309 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3354 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
3310 | kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; | 3355 | vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; |
3311 | 3356 | ||
3312 | return 0; | 3357 | return 0; |
3313 | } | 3358 | } |
3314 | 3359 | ||
3315 | static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3360 | static int handle_nmi_window(struct kvm_vcpu *vcpu) |
3316 | { | 3361 | { |
3317 | u32 cpu_based_vm_exec_control; | 3362 | u32 cpu_based_vm_exec_control; |
3318 | 3363 | ||
@@ -3325,36 +3370,50 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3325 | return 1; | 3370 | return 1; |
3326 | } | 3371 | } |
3327 | 3372 | ||
3328 | static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | 3373 | static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) |
3329 | struct kvm_run *kvm_run) | ||
3330 | { | 3374 | { |
3331 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3375 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3332 | enum emulation_result err = EMULATE_DONE; | 3376 | enum emulation_result err = EMULATE_DONE; |
3333 | 3377 | int ret = 1; | |
3334 | local_irq_enable(); | ||
3335 | preempt_enable(); | ||
3336 | 3378 | ||
3337 | while (!guest_state_valid(vcpu)) { | 3379 | while (!guest_state_valid(vcpu)) { |
3338 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3380 | err = emulate_instruction(vcpu, 0, 0, 0); |
3339 | 3381 | ||
3340 | if (err == EMULATE_DO_MMIO) | 3382 | if (err == EMULATE_DO_MMIO) { |
3341 | break; | 3383 | ret = 0; |
3384 | goto out; | ||
3385 | } | ||
3342 | 3386 | ||
3343 | if (err != EMULATE_DONE) { | 3387 | if (err != EMULATE_DONE) { |
3344 | kvm_report_emulation_failure(vcpu, "emulation failure"); | 3388 | kvm_report_emulation_failure(vcpu, "emulation failure"); |
3345 | break; | 3389 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
3390 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
3391 | vcpu->run->internal.ndata = 0; | ||
3392 | ret = 0; | ||
3393 | goto out; | ||
3346 | } | 3394 | } |
3347 | 3395 | ||
3348 | if (signal_pending(current)) | 3396 | if (signal_pending(current)) |
3349 | break; | 3397 | goto out; |
3350 | if (need_resched()) | 3398 | if (need_resched()) |
3351 | schedule(); | 3399 | schedule(); |
3352 | } | 3400 | } |
3353 | 3401 | ||
3354 | preempt_disable(); | 3402 | vmx->emulation_required = 0; |
3355 | local_irq_disable(); | 3403 | out: |
3404 | return ret; | ||
3405 | } | ||
3356 | 3406 | ||
3357 | vmx->invalid_state_emulation_result = err; | 3407 | /* |
3408 | * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE | ||
3409 | * exiting, so only get here on cpu with PAUSE-Loop-Exiting. | ||
3410 | */ | ||
3411 | static int handle_pause(struct kvm_vcpu *vcpu) | ||
3412 | { | ||
3413 | skip_emulated_instruction(vcpu); | ||
3414 | kvm_vcpu_on_spin(vcpu); | ||
3415 | |||
3416 | return 1; | ||
3358 | } | 3417 | } |
3359 | 3418 | ||
3360 | /* | 3419 | /* |
@@ -3362,8 +3421,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | |||
3362 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 3421 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
3363 | * to be done to userspace and return 0. | 3422 | * to be done to userspace and return 0. |
3364 | */ | 3423 | */ |
3365 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | 3424 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { |
3366 | struct kvm_run *kvm_run) = { | ||
3367 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, | 3425 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, |
3368 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, | 3426 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, |
3369 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, | 3427 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, |
@@ -3394,6 +3452,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
3394 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, | 3452 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, |
3395 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3453 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
3396 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, | 3454 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, |
3455 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, | ||
3397 | }; | 3456 | }; |
3398 | 3457 | ||
3399 | static const int kvm_vmx_max_exit_handlers = | 3458 | static const int kvm_vmx_max_exit_handlers = |
@@ -3403,7 +3462,7 @@ static const int kvm_vmx_max_exit_handlers = | |||
3403 | * The guest has exited. See if we can fix it or if we need userspace | 3462 | * The guest has exited. See if we can fix it or if we need userspace |
3404 | * assistance. | 3463 | * assistance. |
3405 | */ | 3464 | */ |
3406 | static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 3465 | static int vmx_handle_exit(struct kvm_vcpu *vcpu) |
3407 | { | 3466 | { |
3408 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3467 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3409 | u32 exit_reason = vmx->exit_reason; | 3468 | u32 exit_reason = vmx->exit_reason; |
@@ -3411,13 +3470,9 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3411 | 3470 | ||
3412 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); | 3471 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); |
3413 | 3472 | ||
3414 | /* If we need to emulate an MMIO from handle_invalid_guest_state | 3473 | /* If guest state is invalid, start emulating */ |
3415 | * we just return 0 */ | 3474 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3416 | if (vmx->emulation_required && emulate_invalid_guest_state) { | 3475 | return handle_invalid_guest_state(vcpu); |
3417 | if (guest_state_valid(vcpu)) | ||
3418 | vmx->emulation_required = 0; | ||
3419 | return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO; | ||
3420 | } | ||
3421 | 3476 | ||
3422 | /* Access CR3 don't cause VMExit in paging mode, so we need | 3477 | /* Access CR3 don't cause VMExit in paging mode, so we need |
3423 | * to sync with guest real CR3. */ | 3478 | * to sync with guest real CR3. */ |
@@ -3425,8 +3480,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3425 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | 3480 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); |
3426 | 3481 | ||
3427 | if (unlikely(vmx->fail)) { | 3482 | if (unlikely(vmx->fail)) { |
3428 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3483 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
3429 | kvm_run->fail_entry.hardware_entry_failure_reason | 3484 | vcpu->run->fail_entry.hardware_entry_failure_reason |
3430 | = vmcs_read32(VM_INSTRUCTION_ERROR); | 3485 | = vmcs_read32(VM_INSTRUCTION_ERROR); |
3431 | return 0; | 3486 | return 0; |
3432 | } | 3487 | } |
@@ -3459,10 +3514,10 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3459 | 3514 | ||
3460 | if (exit_reason < kvm_vmx_max_exit_handlers | 3515 | if (exit_reason < kvm_vmx_max_exit_handlers |
3461 | && kvm_vmx_exit_handlers[exit_reason]) | 3516 | && kvm_vmx_exit_handlers[exit_reason]) |
3462 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); | 3517 | return kvm_vmx_exit_handlers[exit_reason](vcpu); |
3463 | else { | 3518 | else { |
3464 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3519 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
3465 | kvm_run->hw.hardware_exit_reason = exit_reason; | 3520 | vcpu->run->hw.hardware_exit_reason = exit_reason; |
3466 | } | 3521 | } |
3467 | return 0; | 3522 | return 0; |
3468 | } | 3523 | } |
@@ -3600,23 +3655,18 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) | |||
3600 | #define Q "l" | 3655 | #define Q "l" |
3601 | #endif | 3656 | #endif |
3602 | 3657 | ||
3603 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3658 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) |
3604 | { | 3659 | { |
3605 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3660 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3606 | 3661 | ||
3607 | if (enable_ept && is_paging(vcpu)) { | ||
3608 | vmcs_writel(GUEST_CR3, vcpu->arch.cr3); | ||
3609 | ept_load_pdptrs(vcpu); | ||
3610 | } | ||
3611 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 3662 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
3612 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | 3663 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) |
3613 | vmx->entry_time = ktime_get(); | 3664 | vmx->entry_time = ktime_get(); |
3614 | 3665 | ||
3615 | /* Handle invalid guest state instead of entering VMX */ | 3666 | /* Don't enter VMX if guest state is invalid, let the exit handler |
3616 | if (vmx->emulation_required && emulate_invalid_guest_state) { | 3667 | start emulation until we arrive back to a valid state */ |
3617 | handle_invalid_guest_state(vcpu, kvm_run); | 3668 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3618 | return; | 3669 | return; |
3619 | } | ||
3620 | 3670 | ||
3621 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | 3671 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) |
3622 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | 3672 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); |
@@ -3775,7 +3825,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
3775 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | 3825 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); |
3776 | spin_unlock(&vmx_vpid_lock); | 3826 | spin_unlock(&vmx_vpid_lock); |
3777 | vmx_free_vmcs(vcpu); | 3827 | vmx_free_vmcs(vcpu); |
3778 | kfree(vmx->host_msrs); | ||
3779 | kfree(vmx->guest_msrs); | 3828 | kfree(vmx->guest_msrs); |
3780 | kvm_vcpu_uninit(vcpu); | 3829 | kvm_vcpu_uninit(vcpu); |
3781 | kmem_cache_free(kvm_vcpu_cache, vmx); | 3830 | kmem_cache_free(kvm_vcpu_cache, vmx); |
@@ -3802,10 +3851,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
3802 | goto uninit_vcpu; | 3851 | goto uninit_vcpu; |
3803 | } | 3852 | } |
3804 | 3853 | ||
3805 | vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
3806 | if (!vmx->host_msrs) | ||
3807 | goto free_guest_msrs; | ||
3808 | |||
3809 | vmx->vmcs = alloc_vmcs(); | 3854 | vmx->vmcs = alloc_vmcs(); |
3810 | if (!vmx->vmcs) | 3855 | if (!vmx->vmcs) |
3811 | goto free_msrs; | 3856 | goto free_msrs; |
@@ -3836,8 +3881,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
3836 | free_vmcs: | 3881 | free_vmcs: |
3837 | free_vmcs(vmx->vmcs); | 3882 | free_vmcs(vmx->vmcs); |
3838 | free_msrs: | 3883 | free_msrs: |
3839 | kfree(vmx->host_msrs); | ||
3840 | free_guest_msrs: | ||
3841 | kfree(vmx->guest_msrs); | 3884 | kfree(vmx->guest_msrs); |
3842 | uninit_vcpu: | 3885 | uninit_vcpu: |
3843 | kvm_vcpu_uninit(&vmx->vcpu); | 3886 | kvm_vcpu_uninit(&vmx->vcpu); |
@@ -3973,6 +4016,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3973 | .queue_exception = vmx_queue_exception, | 4016 | .queue_exception = vmx_queue_exception, |
3974 | .interrupt_allowed = vmx_interrupt_allowed, | 4017 | .interrupt_allowed = vmx_interrupt_allowed, |
3975 | .nmi_allowed = vmx_nmi_allowed, | 4018 | .nmi_allowed = vmx_nmi_allowed, |
4019 | .get_nmi_mask = vmx_get_nmi_mask, | ||
4020 | .set_nmi_mask = vmx_set_nmi_mask, | ||
3976 | .enable_nmi_window = enable_nmi_window, | 4021 | .enable_nmi_window = enable_nmi_window, |
3977 | .enable_irq_window = enable_irq_window, | 4022 | .enable_irq_window = enable_irq_window, |
3978 | .update_cr8_intercept = update_cr8_intercept, | 4023 | .update_cr8_intercept = update_cr8_intercept, |
@@ -3987,7 +4032,12 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3987 | 4032 | ||
3988 | static int __init vmx_init(void) | 4033 | static int __init vmx_init(void) |
3989 | { | 4034 | { |
3990 | int r; | 4035 | int r, i; |
4036 | |||
4037 | rdmsrl_safe(MSR_EFER, &host_efer); | ||
4038 | |||
4039 | for (i = 0; i < NR_VMX_MSR; ++i) | ||
4040 | kvm_define_shared_msr(i, vmx_msr_index[i]); | ||
3991 | 4041 | ||
3992 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); | 4042 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); |
3993 | if (!vmx_io_bitmap_a) | 4043 | if (!vmx_io_bitmap_a) |
@@ -4049,8 +4099,6 @@ static int __init vmx_init(void) | |||
4049 | if (bypass_guest_pf) | 4099 | if (bypass_guest_pf) |
4050 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 4100 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
4051 | 4101 | ||
4052 | ept_sync_global(); | ||
4053 | |||
4054 | return 0; | 4102 | return 0; |
4055 | 4103 | ||
4056 | out3: | 4104 | out3: |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index be451ee44249..6651dbf58675 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -37,11 +37,13 @@ | |||
37 | #include <linux/iommu.h> | 37 | #include <linux/iommu.h> |
38 | #include <linux/intel-iommu.h> | 38 | #include <linux/intel-iommu.h> |
39 | #include <linux/cpufreq.h> | 39 | #include <linux/cpufreq.h> |
40 | #include <linux/user-return-notifier.h> | ||
40 | #include <trace/events/kvm.h> | 41 | #include <trace/events/kvm.h> |
41 | #undef TRACE_INCLUDE_FILE | 42 | #undef TRACE_INCLUDE_FILE |
42 | #define CREATE_TRACE_POINTS | 43 | #define CREATE_TRACE_POINTS |
43 | #include "trace.h" | 44 | #include "trace.h" |
44 | 45 | ||
46 | #include <asm/debugreg.h> | ||
45 | #include <asm/uaccess.h> | 47 | #include <asm/uaccess.h> |
46 | #include <asm/msr.h> | 48 | #include <asm/msr.h> |
47 | #include <asm/desc.h> | 49 | #include <asm/desc.h> |
@@ -87,6 +89,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); | |||
87 | int ignore_msrs = 0; | 89 | int ignore_msrs = 0; |
88 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); | 90 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); |
89 | 91 | ||
92 | #define KVM_NR_SHARED_MSRS 16 | ||
93 | |||
94 | struct kvm_shared_msrs_global { | ||
95 | int nr; | ||
96 | struct kvm_shared_msr { | ||
97 | u32 msr; | ||
98 | u64 value; | ||
99 | } msrs[KVM_NR_SHARED_MSRS]; | ||
100 | }; | ||
101 | |||
102 | struct kvm_shared_msrs { | ||
103 | struct user_return_notifier urn; | ||
104 | bool registered; | ||
105 | u64 current_value[KVM_NR_SHARED_MSRS]; | ||
106 | }; | ||
107 | |||
108 | static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; | ||
109 | static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs); | ||
110 | |||
90 | struct kvm_stats_debugfs_item debugfs_entries[] = { | 111 | struct kvm_stats_debugfs_item debugfs_entries[] = { |
91 | { "pf_fixed", VCPU_STAT(pf_fixed) }, | 112 | { "pf_fixed", VCPU_STAT(pf_fixed) }, |
92 | { "pf_guest", VCPU_STAT(pf_guest) }, | 113 | { "pf_guest", VCPU_STAT(pf_guest) }, |
@@ -123,6 +144,72 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
123 | { NULL } | 144 | { NULL } |
124 | }; | 145 | }; |
125 | 146 | ||
147 | static void kvm_on_user_return(struct user_return_notifier *urn) | ||
148 | { | ||
149 | unsigned slot; | ||
150 | struct kvm_shared_msr *global; | ||
151 | struct kvm_shared_msrs *locals | ||
152 | = container_of(urn, struct kvm_shared_msrs, urn); | ||
153 | |||
154 | for (slot = 0; slot < shared_msrs_global.nr; ++slot) { | ||
155 | global = &shared_msrs_global.msrs[slot]; | ||
156 | if (global->value != locals->current_value[slot]) { | ||
157 | wrmsrl(global->msr, global->value); | ||
158 | locals->current_value[slot] = global->value; | ||
159 | } | ||
160 | } | ||
161 | locals->registered = false; | ||
162 | user_return_notifier_unregister(urn); | ||
163 | } | ||
164 | |||
165 | void kvm_define_shared_msr(unsigned slot, u32 msr) | ||
166 | { | ||
167 | int cpu; | ||
168 | u64 value; | ||
169 | |||
170 | if (slot >= shared_msrs_global.nr) | ||
171 | shared_msrs_global.nr = slot + 1; | ||
172 | shared_msrs_global.msrs[slot].msr = msr; | ||
173 | rdmsrl_safe(msr, &value); | ||
174 | shared_msrs_global.msrs[slot].value = value; | ||
175 | for_each_online_cpu(cpu) | ||
176 | per_cpu(shared_msrs, cpu).current_value[slot] = value; | ||
177 | } | ||
178 | EXPORT_SYMBOL_GPL(kvm_define_shared_msr); | ||
179 | |||
180 | static void kvm_shared_msr_cpu_online(void) | ||
181 | { | ||
182 | unsigned i; | ||
183 | struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs); | ||
184 | |||
185 | for (i = 0; i < shared_msrs_global.nr; ++i) | ||
186 | locals->current_value[i] = shared_msrs_global.msrs[i].value; | ||
187 | } | ||
188 | |||
189 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | ||
190 | { | ||
191 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | ||
192 | |||
193 | if (((value ^ smsr->current_value[slot]) & mask) == 0) | ||
194 | return; | ||
195 | smsr->current_value[slot] = value; | ||
196 | wrmsrl(shared_msrs_global.msrs[slot].msr, value); | ||
197 | if (!smsr->registered) { | ||
198 | smsr->urn.on_user_return = kvm_on_user_return; | ||
199 | user_return_notifier_register(&smsr->urn); | ||
200 | smsr->registered = true; | ||
201 | } | ||
202 | } | ||
203 | EXPORT_SYMBOL_GPL(kvm_set_shared_msr); | ||
204 | |||
205 | static void drop_user_return_notifiers(void *ignore) | ||
206 | { | ||
207 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | ||
208 | |||
209 | if (smsr->registered) | ||
210 | kvm_on_user_return(&smsr->urn); | ||
211 | } | ||
212 | |||
126 | unsigned long segment_base(u16 selector) | 213 | unsigned long segment_base(u16 selector) |
127 | { | 214 | { |
128 | struct descriptor_table gdt; | 215 | struct descriptor_table gdt; |
@@ -484,16 +571,19 @@ static inline u32 bit(int bitno) | |||
484 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. | 571 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. |
485 | * | 572 | * |
486 | * This list is modified at module load time to reflect the | 573 | * This list is modified at module load time to reflect the |
487 | * capabilities of the host cpu. | 574 | * capabilities of the host cpu. This capabilities test skips MSRs that are |
575 | * kvm-specific. Those are put in the beginning of the list. | ||
488 | */ | 576 | */ |
577 | |||
578 | #define KVM_SAVE_MSRS_BEGIN 2 | ||
489 | static u32 msrs_to_save[] = { | 579 | static u32 msrs_to_save[] = { |
580 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | ||
490 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 581 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
491 | MSR_K6_STAR, | 582 | MSR_K6_STAR, |
492 | #ifdef CONFIG_X86_64 | 583 | #ifdef CONFIG_X86_64 |
493 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 584 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
494 | #endif | 585 | #endif |
495 | MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 586 | MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA |
496 | MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA | ||
497 | }; | 587 | }; |
498 | 588 | ||
499 | static unsigned num_msrs_to_save; | 589 | static unsigned num_msrs_to_save; |
@@ -677,7 +767,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
677 | /* With all the info we got, fill in the values */ | 767 | /* With all the info we got, fill in the values */ |
678 | 768 | ||
679 | vcpu->hv_clock.system_time = ts.tv_nsec + | 769 | vcpu->hv_clock.system_time = ts.tv_nsec + |
680 | (NSEC_PER_SEC * (u64)ts.tv_sec); | 770 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; |
771 | |||
681 | /* | 772 | /* |
682 | * The interface expects us to write an even number signaling that the | 773 | * The interface expects us to write an even number signaling that the |
683 | * update is finished. Since the guest won't see the intermediate | 774 | * update is finished. Since the guest won't see the intermediate |
@@ -835,6 +926,38 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
835 | return 0; | 926 | return 0; |
836 | } | 927 | } |
837 | 928 | ||
929 | static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) | ||
930 | { | ||
931 | struct kvm *kvm = vcpu->kvm; | ||
932 | int lm = is_long_mode(vcpu); | ||
933 | u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64 | ||
934 | : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32; | ||
935 | u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 | ||
936 | : kvm->arch.xen_hvm_config.blob_size_32; | ||
937 | u32 page_num = data & ~PAGE_MASK; | ||
938 | u64 page_addr = data & PAGE_MASK; | ||
939 | u8 *page; | ||
940 | int r; | ||
941 | |||
942 | r = -E2BIG; | ||
943 | if (page_num >= blob_size) | ||
944 | goto out; | ||
945 | r = -ENOMEM; | ||
946 | page = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
947 | if (!page) | ||
948 | goto out; | ||
949 | r = -EFAULT; | ||
950 | if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE)) | ||
951 | goto out_free; | ||
952 | if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) | ||
953 | goto out_free; | ||
954 | r = 0; | ||
955 | out_free: | ||
956 | kfree(page); | ||
957 | out: | ||
958 | return r; | ||
959 | } | ||
960 | |||
838 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 961 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
839 | { | 962 | { |
840 | switch (msr) { | 963 | switch (msr) { |
@@ -950,6 +1073,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
950 | "0x%x data 0x%llx\n", msr, data); | 1073 | "0x%x data 0x%llx\n", msr, data); |
951 | break; | 1074 | break; |
952 | default: | 1075 | default: |
1076 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | ||
1077 | return xen_hvm_config(vcpu, data); | ||
953 | if (!ignore_msrs) { | 1078 | if (!ignore_msrs) { |
954 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 1079 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
955 | msr, data); | 1080 | msr, data); |
@@ -1224,6 +1349,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1224 | case KVM_CAP_PIT2: | 1349 | case KVM_CAP_PIT2: |
1225 | case KVM_CAP_PIT_STATE2: | 1350 | case KVM_CAP_PIT_STATE2: |
1226 | case KVM_CAP_SET_IDENTITY_MAP_ADDR: | 1351 | case KVM_CAP_SET_IDENTITY_MAP_ADDR: |
1352 | case KVM_CAP_XEN_HVM: | ||
1353 | case KVM_CAP_ADJUST_CLOCK: | ||
1354 | case KVM_CAP_VCPU_EVENTS: | ||
1227 | r = 1; | 1355 | r = 1; |
1228 | break; | 1356 | break; |
1229 | case KVM_CAP_COALESCED_MMIO: | 1357 | case KVM_CAP_COALESCED_MMIO: |
@@ -1238,8 +1366,8 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1238 | case KVM_CAP_NR_MEMSLOTS: | 1366 | case KVM_CAP_NR_MEMSLOTS: |
1239 | r = KVM_MEMORY_SLOTS; | 1367 | r = KVM_MEMORY_SLOTS; |
1240 | break; | 1368 | break; |
1241 | case KVM_CAP_PV_MMU: | 1369 | case KVM_CAP_PV_MMU: /* obsolete */ |
1242 | r = !tdp_enabled; | 1370 | r = 0; |
1243 | break; | 1371 | break; |
1244 | case KVM_CAP_IOMMU: | 1372 | case KVM_CAP_IOMMU: |
1245 | r = iommu_found(); | 1373 | r = iommu_found(); |
@@ -1326,6 +1454,12 @@ out: | |||
1326 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 1454 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
1327 | { | 1455 | { |
1328 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 1456 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
1457 | if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { | ||
1458 | unsigned long khz = cpufreq_quick_get(cpu); | ||
1459 | if (!khz) | ||
1460 | khz = tsc_khz; | ||
1461 | per_cpu(cpu_tsc_khz, cpu) = khz; | ||
1462 | } | ||
1329 | kvm_request_guest_time_update(vcpu); | 1463 | kvm_request_guest_time_update(vcpu); |
1330 | } | 1464 | } |
1331 | 1465 | ||
@@ -1591,6 +1725,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
1591 | 1725 | ||
1592 | if (cpuid->nent < 1) | 1726 | if (cpuid->nent < 1) |
1593 | goto out; | 1727 | goto out; |
1728 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
1729 | cpuid->nent = KVM_MAX_CPUID_ENTRIES; | ||
1594 | r = -ENOMEM; | 1730 | r = -ENOMEM; |
1595 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); | 1731 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); |
1596 | if (!cpuid_entries) | 1732 | if (!cpuid_entries) |
@@ -1690,7 +1826,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
1690 | unsigned bank_num = mcg_cap & 0xff, bank; | 1826 | unsigned bank_num = mcg_cap & 0xff, bank; |
1691 | 1827 | ||
1692 | r = -EINVAL; | 1828 | r = -EINVAL; |
1693 | if (!bank_num) | 1829 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) |
1694 | goto out; | 1830 | goto out; |
1695 | if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) | 1831 | if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) |
1696 | goto out; | 1832 | goto out; |
@@ -1757,6 +1893,65 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
1757 | return 0; | 1893 | return 0; |
1758 | } | 1894 | } |
1759 | 1895 | ||
1896 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | ||
1897 | struct kvm_vcpu_events *events) | ||
1898 | { | ||
1899 | vcpu_load(vcpu); | ||
1900 | |||
1901 | events->exception.injected = vcpu->arch.exception.pending; | ||
1902 | events->exception.nr = vcpu->arch.exception.nr; | ||
1903 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | ||
1904 | events->exception.error_code = vcpu->arch.exception.error_code; | ||
1905 | |||
1906 | events->interrupt.injected = vcpu->arch.interrupt.pending; | ||
1907 | events->interrupt.nr = vcpu->arch.interrupt.nr; | ||
1908 | events->interrupt.soft = vcpu->arch.interrupt.soft; | ||
1909 | |||
1910 | events->nmi.injected = vcpu->arch.nmi_injected; | ||
1911 | events->nmi.pending = vcpu->arch.nmi_pending; | ||
1912 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); | ||
1913 | |||
1914 | events->sipi_vector = vcpu->arch.sipi_vector; | ||
1915 | |||
1916 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | ||
1917 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR); | ||
1918 | |||
1919 | vcpu_put(vcpu); | ||
1920 | } | ||
1921 | |||
1922 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | ||
1923 | struct kvm_vcpu_events *events) | ||
1924 | { | ||
1925 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | ||
1926 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) | ||
1927 | return -EINVAL; | ||
1928 | |||
1929 | vcpu_load(vcpu); | ||
1930 | |||
1931 | vcpu->arch.exception.pending = events->exception.injected; | ||
1932 | vcpu->arch.exception.nr = events->exception.nr; | ||
1933 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; | ||
1934 | vcpu->arch.exception.error_code = events->exception.error_code; | ||
1935 | |||
1936 | vcpu->arch.interrupt.pending = events->interrupt.injected; | ||
1937 | vcpu->arch.interrupt.nr = events->interrupt.nr; | ||
1938 | vcpu->arch.interrupt.soft = events->interrupt.soft; | ||
1939 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | ||
1940 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
1941 | |||
1942 | vcpu->arch.nmi_injected = events->nmi.injected; | ||
1943 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) | ||
1944 | vcpu->arch.nmi_pending = events->nmi.pending; | ||
1945 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); | ||
1946 | |||
1947 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) | ||
1948 | vcpu->arch.sipi_vector = events->sipi_vector; | ||
1949 | |||
1950 | vcpu_put(vcpu); | ||
1951 | |||
1952 | return 0; | ||
1953 | } | ||
1954 | |||
1760 | long kvm_arch_vcpu_ioctl(struct file *filp, | 1955 | long kvm_arch_vcpu_ioctl(struct file *filp, |
1761 | unsigned int ioctl, unsigned long arg) | 1956 | unsigned int ioctl, unsigned long arg) |
1762 | { | 1957 | { |
@@ -1767,6 +1962,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1767 | 1962 | ||
1768 | switch (ioctl) { | 1963 | switch (ioctl) { |
1769 | case KVM_GET_LAPIC: { | 1964 | case KVM_GET_LAPIC: { |
1965 | r = -EINVAL; | ||
1966 | if (!vcpu->arch.apic) | ||
1967 | goto out; | ||
1770 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 1968 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
1771 | 1969 | ||
1772 | r = -ENOMEM; | 1970 | r = -ENOMEM; |
@@ -1782,6 +1980,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1782 | break; | 1980 | break; |
1783 | } | 1981 | } |
1784 | case KVM_SET_LAPIC: { | 1982 | case KVM_SET_LAPIC: { |
1983 | r = -EINVAL; | ||
1984 | if (!vcpu->arch.apic) | ||
1985 | goto out; | ||
1785 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 1986 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
1786 | r = -ENOMEM; | 1987 | r = -ENOMEM; |
1787 | if (!lapic) | 1988 | if (!lapic) |
@@ -1908,6 +2109,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1908 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | 2109 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); |
1909 | break; | 2110 | break; |
1910 | } | 2111 | } |
2112 | case KVM_GET_VCPU_EVENTS: { | ||
2113 | struct kvm_vcpu_events events; | ||
2114 | |||
2115 | kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events); | ||
2116 | |||
2117 | r = -EFAULT; | ||
2118 | if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events))) | ||
2119 | break; | ||
2120 | r = 0; | ||
2121 | break; | ||
2122 | } | ||
2123 | case KVM_SET_VCPU_EVENTS: { | ||
2124 | struct kvm_vcpu_events events; | ||
2125 | |||
2126 | r = -EFAULT; | ||
2127 | if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) | ||
2128 | break; | ||
2129 | |||
2130 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); | ||
2131 | break; | ||
2132 | } | ||
1911 | default: | 2133 | default: |
1912 | r = -EINVAL; | 2134 | r = -EINVAL; |
1913 | } | 2135 | } |
@@ -2036,9 +2258,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
2036 | sizeof(struct kvm_pic_state)); | 2258 | sizeof(struct kvm_pic_state)); |
2037 | break; | 2259 | break; |
2038 | case KVM_IRQCHIP_IOAPIC: | 2260 | case KVM_IRQCHIP_IOAPIC: |
2039 | memcpy(&chip->chip.ioapic, | 2261 | r = kvm_get_ioapic(kvm, &chip->chip.ioapic); |
2040 | ioapic_irqchip(kvm), | ||
2041 | sizeof(struct kvm_ioapic_state)); | ||
2042 | break; | 2262 | break; |
2043 | default: | 2263 | default: |
2044 | r = -EINVAL; | 2264 | r = -EINVAL; |
@@ -2068,11 +2288,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
2068 | spin_unlock(&pic_irqchip(kvm)->lock); | 2288 | spin_unlock(&pic_irqchip(kvm)->lock); |
2069 | break; | 2289 | break; |
2070 | case KVM_IRQCHIP_IOAPIC: | 2290 | case KVM_IRQCHIP_IOAPIC: |
2071 | mutex_lock(&kvm->irq_lock); | 2291 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); |
2072 | memcpy(ioapic_irqchip(kvm), | ||
2073 | &chip->chip.ioapic, | ||
2074 | sizeof(struct kvm_ioapic_state)); | ||
2075 | mutex_unlock(&kvm->irq_lock); | ||
2076 | break; | 2292 | break; |
2077 | default: | 2293 | default: |
2078 | r = -EINVAL; | 2294 | r = -EINVAL; |
@@ -2180,7 +2396,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2180 | { | 2396 | { |
2181 | struct kvm *kvm = filp->private_data; | 2397 | struct kvm *kvm = filp->private_data; |
2182 | void __user *argp = (void __user *)arg; | 2398 | void __user *argp = (void __user *)arg; |
2183 | int r = -EINVAL; | 2399 | int r = -ENOTTY; |
2184 | /* | 2400 | /* |
2185 | * This union makes it completely explicit to gcc-3.x | 2401 | * This union makes it completely explicit to gcc-3.x |
2186 | * that these two variables' stack usage should be | 2402 | * that these two variables' stack usage should be |
@@ -2242,25 +2458,39 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2242 | if (r) | 2458 | if (r) |
2243 | goto out; | 2459 | goto out; |
2244 | break; | 2460 | break; |
2245 | case KVM_CREATE_IRQCHIP: | 2461 | case KVM_CREATE_IRQCHIP: { |
2462 | struct kvm_pic *vpic; | ||
2463 | |||
2464 | mutex_lock(&kvm->lock); | ||
2465 | r = -EEXIST; | ||
2466 | if (kvm->arch.vpic) | ||
2467 | goto create_irqchip_unlock; | ||
2246 | r = -ENOMEM; | 2468 | r = -ENOMEM; |
2247 | kvm->arch.vpic = kvm_create_pic(kvm); | 2469 | vpic = kvm_create_pic(kvm); |
2248 | if (kvm->arch.vpic) { | 2470 | if (vpic) { |
2249 | r = kvm_ioapic_init(kvm); | 2471 | r = kvm_ioapic_init(kvm); |
2250 | if (r) { | 2472 | if (r) { |
2251 | kfree(kvm->arch.vpic); | 2473 | kfree(vpic); |
2252 | kvm->arch.vpic = NULL; | 2474 | goto create_irqchip_unlock; |
2253 | goto out; | ||
2254 | } | 2475 | } |
2255 | } else | 2476 | } else |
2256 | goto out; | 2477 | goto create_irqchip_unlock; |
2478 | smp_wmb(); | ||
2479 | kvm->arch.vpic = vpic; | ||
2480 | smp_wmb(); | ||
2257 | r = kvm_setup_default_irq_routing(kvm); | 2481 | r = kvm_setup_default_irq_routing(kvm); |
2258 | if (r) { | 2482 | if (r) { |
2483 | mutex_lock(&kvm->irq_lock); | ||
2259 | kfree(kvm->arch.vpic); | 2484 | kfree(kvm->arch.vpic); |
2260 | kfree(kvm->arch.vioapic); | 2485 | kfree(kvm->arch.vioapic); |
2261 | goto out; | 2486 | kvm->arch.vpic = NULL; |
2487 | kvm->arch.vioapic = NULL; | ||
2488 | mutex_unlock(&kvm->irq_lock); | ||
2262 | } | 2489 | } |
2490 | create_irqchip_unlock: | ||
2491 | mutex_unlock(&kvm->lock); | ||
2263 | break; | 2492 | break; |
2493 | } | ||
2264 | case KVM_CREATE_PIT: | 2494 | case KVM_CREATE_PIT: |
2265 | u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; | 2495 | u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; |
2266 | goto create_pit; | 2496 | goto create_pit; |
@@ -2290,10 +2520,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2290 | goto out; | 2520 | goto out; |
2291 | if (irqchip_in_kernel(kvm)) { | 2521 | if (irqchip_in_kernel(kvm)) { |
2292 | __s32 status; | 2522 | __s32 status; |
2293 | mutex_lock(&kvm->irq_lock); | ||
2294 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 2523 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
2295 | irq_event.irq, irq_event.level); | 2524 | irq_event.irq, irq_event.level); |
2296 | mutex_unlock(&kvm->irq_lock); | ||
2297 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 2525 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
2298 | irq_event.status = status; | 2526 | irq_event.status = status; |
2299 | if (copy_to_user(argp, &irq_event, | 2527 | if (copy_to_user(argp, &irq_event, |
@@ -2419,6 +2647,55 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2419 | r = 0; | 2647 | r = 0; |
2420 | break; | 2648 | break; |
2421 | } | 2649 | } |
2650 | case KVM_XEN_HVM_CONFIG: { | ||
2651 | r = -EFAULT; | ||
2652 | if (copy_from_user(&kvm->arch.xen_hvm_config, argp, | ||
2653 | sizeof(struct kvm_xen_hvm_config))) | ||
2654 | goto out; | ||
2655 | r = -EINVAL; | ||
2656 | if (kvm->arch.xen_hvm_config.flags) | ||
2657 | goto out; | ||
2658 | r = 0; | ||
2659 | break; | ||
2660 | } | ||
2661 | case KVM_SET_CLOCK: { | ||
2662 | struct timespec now; | ||
2663 | struct kvm_clock_data user_ns; | ||
2664 | u64 now_ns; | ||
2665 | s64 delta; | ||
2666 | |||
2667 | r = -EFAULT; | ||
2668 | if (copy_from_user(&user_ns, argp, sizeof(user_ns))) | ||
2669 | goto out; | ||
2670 | |||
2671 | r = -EINVAL; | ||
2672 | if (user_ns.flags) | ||
2673 | goto out; | ||
2674 | |||
2675 | r = 0; | ||
2676 | ktime_get_ts(&now); | ||
2677 | now_ns = timespec_to_ns(&now); | ||
2678 | delta = user_ns.clock - now_ns; | ||
2679 | kvm->arch.kvmclock_offset = delta; | ||
2680 | break; | ||
2681 | } | ||
2682 | case KVM_GET_CLOCK: { | ||
2683 | struct timespec now; | ||
2684 | struct kvm_clock_data user_ns; | ||
2685 | u64 now_ns; | ||
2686 | |||
2687 | ktime_get_ts(&now); | ||
2688 | now_ns = timespec_to_ns(&now); | ||
2689 | user_ns.clock = kvm->arch.kvmclock_offset + now_ns; | ||
2690 | user_ns.flags = 0; | ||
2691 | |||
2692 | r = -EFAULT; | ||
2693 | if (copy_to_user(argp, &user_ns, sizeof(user_ns))) | ||
2694 | goto out; | ||
2695 | r = 0; | ||
2696 | break; | ||
2697 | } | ||
2698 | |||
2422 | default: | 2699 | default: |
2423 | ; | 2700 | ; |
2424 | } | 2701 | } |
@@ -2431,7 +2708,8 @@ static void kvm_init_msr_list(void) | |||
2431 | u32 dummy[2]; | 2708 | u32 dummy[2]; |
2432 | unsigned i, j; | 2709 | unsigned i, j; |
2433 | 2710 | ||
2434 | for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { | 2711 | /* skip the first msrs in the list. KVM-specific */ |
2712 | for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { | ||
2435 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) | 2713 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) |
2436 | continue; | 2714 | continue; |
2437 | if (j < i) | 2715 | if (j < i) |
@@ -2755,13 +3033,13 @@ static void cache_all_regs(struct kvm_vcpu *vcpu) | |||
2755 | } | 3033 | } |
2756 | 3034 | ||
2757 | int emulate_instruction(struct kvm_vcpu *vcpu, | 3035 | int emulate_instruction(struct kvm_vcpu *vcpu, |
2758 | struct kvm_run *run, | ||
2759 | unsigned long cr2, | 3036 | unsigned long cr2, |
2760 | u16 error_code, | 3037 | u16 error_code, |
2761 | int emulation_type) | 3038 | int emulation_type) |
2762 | { | 3039 | { |
2763 | int r, shadow_mask; | 3040 | int r, shadow_mask; |
2764 | struct decode_cache *c; | 3041 | struct decode_cache *c; |
3042 | struct kvm_run *run = vcpu->run; | ||
2765 | 3043 | ||
2766 | kvm_clear_exception_queue(vcpu); | 3044 | kvm_clear_exception_queue(vcpu); |
2767 | vcpu->arch.mmio_fault_cr2 = cr2; | 3045 | vcpu->arch.mmio_fault_cr2 = cr2; |
@@ -2781,7 +3059,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
2781 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3059 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
2782 | 3060 | ||
2783 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3061 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
2784 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); | 3062 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); |
2785 | vcpu->arch.emulate_ctxt.mode = | 3063 | vcpu->arch.emulate_ctxt.mode = |
2786 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3064 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
2787 | ? X86EMUL_MODE_REAL : cs_l | 3065 | ? X86EMUL_MODE_REAL : cs_l |
@@ -2859,7 +3137,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
2859 | return EMULATE_DO_MMIO; | 3137 | return EMULATE_DO_MMIO; |
2860 | } | 3138 | } |
2861 | 3139 | ||
2862 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 3140 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
2863 | 3141 | ||
2864 | if (vcpu->mmio_is_write) { | 3142 | if (vcpu->mmio_is_write) { |
2865 | vcpu->mmio_needed = 0; | 3143 | vcpu->mmio_needed = 0; |
@@ -2967,8 +3245,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu) | |||
2967 | return r; | 3245 | return r; |
2968 | } | 3246 | } |
2969 | 3247 | ||
2970 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 3248 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) |
2971 | int size, unsigned port) | ||
2972 | { | 3249 | { |
2973 | unsigned long val; | 3250 | unsigned long val; |
2974 | 3251 | ||
@@ -2997,7 +3274,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2997 | } | 3274 | } |
2998 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); | 3275 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); |
2999 | 3276 | ||
3000 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 3277 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, |
3001 | int size, unsigned long count, int down, | 3278 | int size, unsigned long count, int down, |
3002 | gva_t address, int rep, unsigned port) | 3279 | gva_t address, int rep, unsigned port) |
3003 | { | 3280 | { |
@@ -3070,9 +3347,6 @@ static void bounce_off(void *info) | |||
3070 | /* nothing */ | 3347 | /* nothing */ |
3071 | } | 3348 | } |
3072 | 3349 | ||
3073 | static unsigned int ref_freq; | ||
3074 | static unsigned long tsc_khz_ref; | ||
3075 | |||
3076 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | 3350 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, |
3077 | void *data) | 3351 | void *data) |
3078 | { | 3352 | { |
@@ -3081,14 +3355,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
3081 | struct kvm_vcpu *vcpu; | 3355 | struct kvm_vcpu *vcpu; |
3082 | int i, send_ipi = 0; | 3356 | int i, send_ipi = 0; |
3083 | 3357 | ||
3084 | if (!ref_freq) | ||
3085 | ref_freq = freq->old; | ||
3086 | |||
3087 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) | 3358 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) |
3088 | return 0; | 3359 | return 0; |
3089 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) | 3360 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) |
3090 | return 0; | 3361 | return 0; |
3091 | per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); | 3362 | per_cpu(cpu_tsc_khz, freq->cpu) = freq->new; |
3092 | 3363 | ||
3093 | spin_lock(&kvm_lock); | 3364 | spin_lock(&kvm_lock); |
3094 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3365 | list_for_each_entry(kvm, &vm_list, vm_list) { |
@@ -3125,9 +3396,28 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = { | |||
3125 | .notifier_call = kvmclock_cpufreq_notifier | 3396 | .notifier_call = kvmclock_cpufreq_notifier |
3126 | }; | 3397 | }; |
3127 | 3398 | ||
3399 | static void kvm_timer_init(void) | ||
3400 | { | ||
3401 | int cpu; | ||
3402 | |||
3403 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
3404 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | ||
3405 | CPUFREQ_TRANSITION_NOTIFIER); | ||
3406 | for_each_online_cpu(cpu) { | ||
3407 | unsigned long khz = cpufreq_get(cpu); | ||
3408 | if (!khz) | ||
3409 | khz = tsc_khz; | ||
3410 | per_cpu(cpu_tsc_khz, cpu) = khz; | ||
3411 | } | ||
3412 | } else { | ||
3413 | for_each_possible_cpu(cpu) | ||
3414 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | ||
3415 | } | ||
3416 | } | ||
3417 | |||
3128 | int kvm_arch_init(void *opaque) | 3418 | int kvm_arch_init(void *opaque) |
3129 | { | 3419 | { |
3130 | int r, cpu; | 3420 | int r; |
3131 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; | 3421 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; |
3132 | 3422 | ||
3133 | if (kvm_x86_ops) { | 3423 | if (kvm_x86_ops) { |
@@ -3159,13 +3449,7 @@ int kvm_arch_init(void *opaque) | |||
3159 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 3449 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
3160 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | 3450 | PT_DIRTY_MASK, PT64_NX_MASK, 0); |
3161 | 3451 | ||
3162 | for_each_possible_cpu(cpu) | 3452 | kvm_timer_init(); |
3163 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | ||
3164 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
3165 | tsc_khz_ref = tsc_khz; | ||
3166 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | ||
3167 | CPUFREQ_TRANSITION_NOTIFIER); | ||
3168 | } | ||
3169 | 3453 | ||
3170 | return 0; | 3454 | return 0; |
3171 | 3455 | ||
@@ -3293,7 +3577,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | |||
3293 | unsigned long *rflags) | 3577 | unsigned long *rflags) |
3294 | { | 3578 | { |
3295 | kvm_lmsw(vcpu, msw); | 3579 | kvm_lmsw(vcpu, msw); |
3296 | *rflags = kvm_x86_ops->get_rflags(vcpu); | 3580 | *rflags = kvm_get_rflags(vcpu); |
3297 | } | 3581 | } |
3298 | 3582 | ||
3299 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | 3583 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) |
@@ -3331,7 +3615,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
3331 | switch (cr) { | 3615 | switch (cr) { |
3332 | case 0: | 3616 | case 0: |
3333 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); | 3617 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); |
3334 | *rflags = kvm_x86_ops->get_rflags(vcpu); | 3618 | *rflags = kvm_get_rflags(vcpu); |
3335 | break; | 3619 | break; |
3336 | case 2: | 3620 | case 2: |
3337 | vcpu->arch.cr2 = val; | 3621 | vcpu->arch.cr2 = val; |
@@ -3451,18 +3735,18 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | |||
3451 | * | 3735 | * |
3452 | * No need to exit to userspace if we already have an interrupt queued. | 3736 | * No need to exit to userspace if we already have an interrupt queued. |
3453 | */ | 3737 | */ |
3454 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, | 3738 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) |
3455 | struct kvm_run *kvm_run) | ||
3456 | { | 3739 | { |
3457 | return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && | 3740 | return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && |
3458 | kvm_run->request_interrupt_window && | 3741 | vcpu->run->request_interrupt_window && |
3459 | kvm_arch_interrupt_allowed(vcpu)); | 3742 | kvm_arch_interrupt_allowed(vcpu)); |
3460 | } | 3743 | } |
3461 | 3744 | ||
3462 | static void post_kvm_run_save(struct kvm_vcpu *vcpu, | 3745 | static void post_kvm_run_save(struct kvm_vcpu *vcpu) |
3463 | struct kvm_run *kvm_run) | ||
3464 | { | 3746 | { |
3465 | kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; | 3747 | struct kvm_run *kvm_run = vcpu->run; |
3748 | |||
3749 | kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; | ||
3466 | kvm_run->cr8 = kvm_get_cr8(vcpu); | 3750 | kvm_run->cr8 = kvm_get_cr8(vcpu); |
3467 | kvm_run->apic_base = kvm_get_apic_base(vcpu); | 3751 | kvm_run->apic_base = kvm_get_apic_base(vcpu); |
3468 | if (irqchip_in_kernel(vcpu->kvm)) | 3752 | if (irqchip_in_kernel(vcpu->kvm)) |
@@ -3523,7 +3807,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) | |||
3523 | kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); | 3807 | kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); |
3524 | } | 3808 | } |
3525 | 3809 | ||
3526 | static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3810 | static void inject_pending_event(struct kvm_vcpu *vcpu) |
3527 | { | 3811 | { |
3528 | /* try to reinject previous events if any */ | 3812 | /* try to reinject previous events if any */ |
3529 | if (vcpu->arch.exception.pending) { | 3813 | if (vcpu->arch.exception.pending) { |
@@ -3559,11 +3843,11 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3559 | } | 3843 | } |
3560 | } | 3844 | } |
3561 | 3845 | ||
3562 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3846 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
3563 | { | 3847 | { |
3564 | int r; | 3848 | int r; |
3565 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 3849 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
3566 | kvm_run->request_interrupt_window; | 3850 | vcpu->run->request_interrupt_window; |
3567 | 3851 | ||
3568 | if (vcpu->requests) | 3852 | if (vcpu->requests) |
3569 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | 3853 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) |
@@ -3584,12 +3868,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3584 | kvm_x86_ops->tlb_flush(vcpu); | 3868 | kvm_x86_ops->tlb_flush(vcpu); |
3585 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, | 3869 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, |
3586 | &vcpu->requests)) { | 3870 | &vcpu->requests)) { |
3587 | kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; | 3871 | vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; |
3588 | r = 0; | 3872 | r = 0; |
3589 | goto out; | 3873 | goto out; |
3590 | } | 3874 | } |
3591 | if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { | 3875 | if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { |
3592 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 3876 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
3593 | r = 0; | 3877 | r = 0; |
3594 | goto out; | 3878 | goto out; |
3595 | } | 3879 | } |
@@ -3613,7 +3897,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3613 | goto out; | 3897 | goto out; |
3614 | } | 3898 | } |
3615 | 3899 | ||
3616 | inject_pending_event(vcpu, kvm_run); | 3900 | inject_pending_event(vcpu); |
3617 | 3901 | ||
3618 | /* enable NMI/IRQ window open exits if needed */ | 3902 | /* enable NMI/IRQ window open exits if needed */ |
3619 | if (vcpu->arch.nmi_pending) | 3903 | if (vcpu->arch.nmi_pending) |
@@ -3639,16 +3923,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3639 | } | 3923 | } |
3640 | 3924 | ||
3641 | trace_kvm_entry(vcpu->vcpu_id); | 3925 | trace_kvm_entry(vcpu->vcpu_id); |
3642 | kvm_x86_ops->run(vcpu, kvm_run); | 3926 | kvm_x86_ops->run(vcpu); |
3643 | 3927 | ||
3644 | if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { | 3928 | /* |
3645 | set_debugreg(current->thread.debugreg0, 0); | 3929 | * If the guest has used debug registers, at least dr7 |
3646 | set_debugreg(current->thread.debugreg1, 1); | 3930 | * will be disabled while returning to the host. |
3647 | set_debugreg(current->thread.debugreg2, 2); | 3931 | * If we don't have active breakpoints in the host, we don't |
3648 | set_debugreg(current->thread.debugreg3, 3); | 3932 | * care about the messed up debug address registers. But if |
3649 | set_debugreg(current->thread.debugreg6, 6); | 3933 | * we have some of them active, restore the old state. |
3650 | set_debugreg(current->thread.debugreg7, 7); | 3934 | */ |
3651 | } | 3935 | if (hw_breakpoint_active()) |
3936 | hw_breakpoint_restore(); | ||
3652 | 3937 | ||
3653 | set_bit(KVM_REQ_KICK, &vcpu->requests); | 3938 | set_bit(KVM_REQ_KICK, &vcpu->requests); |
3654 | local_irq_enable(); | 3939 | local_irq_enable(); |
@@ -3680,13 +3965,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3680 | 3965 | ||
3681 | kvm_lapic_sync_from_vapic(vcpu); | 3966 | kvm_lapic_sync_from_vapic(vcpu); |
3682 | 3967 | ||
3683 | r = kvm_x86_ops->handle_exit(kvm_run, vcpu); | 3968 | r = kvm_x86_ops->handle_exit(vcpu); |
3684 | out: | 3969 | out: |
3685 | return r; | 3970 | return r; |
3686 | } | 3971 | } |
3687 | 3972 | ||
3688 | 3973 | ||
3689 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3974 | static int __vcpu_run(struct kvm_vcpu *vcpu) |
3690 | { | 3975 | { |
3691 | int r; | 3976 | int r; |
3692 | 3977 | ||
@@ -3706,7 +3991,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3706 | r = 1; | 3991 | r = 1; |
3707 | while (r > 0) { | 3992 | while (r > 0) { |
3708 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 3993 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
3709 | r = vcpu_enter_guest(vcpu, kvm_run); | 3994 | r = vcpu_enter_guest(vcpu); |
3710 | else { | 3995 | else { |
3711 | up_read(&vcpu->kvm->slots_lock); | 3996 | up_read(&vcpu->kvm->slots_lock); |
3712 | kvm_vcpu_block(vcpu); | 3997 | kvm_vcpu_block(vcpu); |
@@ -3734,14 +4019,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3734 | if (kvm_cpu_has_pending_timer(vcpu)) | 4019 | if (kvm_cpu_has_pending_timer(vcpu)) |
3735 | kvm_inject_pending_timer_irqs(vcpu); | 4020 | kvm_inject_pending_timer_irqs(vcpu); |
3736 | 4021 | ||
3737 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | 4022 | if (dm_request_for_irq_injection(vcpu)) { |
3738 | r = -EINTR; | 4023 | r = -EINTR; |
3739 | kvm_run->exit_reason = KVM_EXIT_INTR; | 4024 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
3740 | ++vcpu->stat.request_irq_exits; | 4025 | ++vcpu->stat.request_irq_exits; |
3741 | } | 4026 | } |
3742 | if (signal_pending(current)) { | 4027 | if (signal_pending(current)) { |
3743 | r = -EINTR; | 4028 | r = -EINTR; |
3744 | kvm_run->exit_reason = KVM_EXIT_INTR; | 4029 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
3745 | ++vcpu->stat.signal_exits; | 4030 | ++vcpu->stat.signal_exits; |
3746 | } | 4031 | } |
3747 | if (need_resched()) { | 4032 | if (need_resched()) { |
@@ -3752,7 +4037,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3752 | } | 4037 | } |
3753 | 4038 | ||
3754 | up_read(&vcpu->kvm->slots_lock); | 4039 | up_read(&vcpu->kvm->slots_lock); |
3755 | post_kvm_run_save(vcpu, kvm_run); | 4040 | post_kvm_run_save(vcpu); |
3756 | 4041 | ||
3757 | vapic_exit(vcpu); | 4042 | vapic_exit(vcpu); |
3758 | 4043 | ||
@@ -3785,15 +4070,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3785 | if (r) | 4070 | if (r) |
3786 | goto out; | 4071 | goto out; |
3787 | } | 4072 | } |
3788 | #if CONFIG_HAS_IOMEM | ||
3789 | if (vcpu->mmio_needed) { | 4073 | if (vcpu->mmio_needed) { |
3790 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | 4074 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
3791 | vcpu->mmio_read_completed = 1; | 4075 | vcpu->mmio_read_completed = 1; |
3792 | vcpu->mmio_needed = 0; | 4076 | vcpu->mmio_needed = 0; |
3793 | 4077 | ||
3794 | down_read(&vcpu->kvm->slots_lock); | 4078 | down_read(&vcpu->kvm->slots_lock); |
3795 | r = emulate_instruction(vcpu, kvm_run, | 4079 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, |
3796 | vcpu->arch.mmio_fault_cr2, 0, | ||
3797 | EMULTYPE_NO_DECODE); | 4080 | EMULTYPE_NO_DECODE); |
3798 | up_read(&vcpu->kvm->slots_lock); | 4081 | up_read(&vcpu->kvm->slots_lock); |
3799 | if (r == EMULATE_DO_MMIO) { | 4082 | if (r == EMULATE_DO_MMIO) { |
@@ -3804,12 +4087,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3804 | goto out; | 4087 | goto out; |
3805 | } | 4088 | } |
3806 | } | 4089 | } |
3807 | #endif | ||
3808 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) | 4090 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) |
3809 | kvm_register_write(vcpu, VCPU_REGS_RAX, | 4091 | kvm_register_write(vcpu, VCPU_REGS_RAX, |
3810 | kvm_run->hypercall.ret); | 4092 | kvm_run->hypercall.ret); |
3811 | 4093 | ||
3812 | r = __vcpu_run(vcpu, kvm_run); | 4094 | r = __vcpu_run(vcpu); |
3813 | 4095 | ||
3814 | out: | 4096 | out: |
3815 | if (vcpu->sigset_active) | 4097 | if (vcpu->sigset_active) |
@@ -3843,13 +4125,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
3843 | #endif | 4125 | #endif |
3844 | 4126 | ||
3845 | regs->rip = kvm_rip_read(vcpu); | 4127 | regs->rip = kvm_rip_read(vcpu); |
3846 | regs->rflags = kvm_x86_ops->get_rflags(vcpu); | 4128 | regs->rflags = kvm_get_rflags(vcpu); |
3847 | |||
3848 | /* | ||
3849 | * Don't leak debug flags in case they were set for guest debugging | ||
3850 | */ | ||
3851 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
3852 | regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
3853 | 4129 | ||
3854 | vcpu_put(vcpu); | 4130 | vcpu_put(vcpu); |
3855 | 4131 | ||
@@ -3877,12 +4153,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
3877 | kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); | 4153 | kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); |
3878 | kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); | 4154 | kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); |
3879 | kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); | 4155 | kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); |
3880 | |||
3881 | #endif | 4156 | #endif |
3882 | 4157 | ||
3883 | kvm_rip_write(vcpu, regs->rip); | 4158 | kvm_rip_write(vcpu, regs->rip); |
3884 | kvm_x86_ops->set_rflags(vcpu, regs->rflags); | 4159 | kvm_set_rflags(vcpu, regs->rflags); |
3885 | |||
3886 | 4160 | ||
3887 | vcpu->arch.exception.pending = false; | 4161 | vcpu->arch.exception.pending = false; |
3888 | 4162 | ||
@@ -4049,7 +4323,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
4049 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); | 4323 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); |
4050 | } | 4324 | } |
4051 | 4325 | ||
4052 | static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, | 4326 | static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, |
4053 | struct desc_struct *seg_desc) | 4327 | struct desc_struct *seg_desc) |
4054 | { | 4328 | { |
4055 | u32 base_addr = get_desc_base(seg_desc); | 4329 | u32 base_addr = get_desc_base(seg_desc); |
@@ -4101,7 +4375,7 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | |||
4101 | { | 4375 | { |
4102 | return (seg != VCPU_SREG_LDTR) && | 4376 | return (seg != VCPU_SREG_LDTR) && |
4103 | (seg != VCPU_SREG_TR) && | 4377 | (seg != VCPU_SREG_TR) && |
4104 | (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM); | 4378 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); |
4105 | } | 4379 | } |
4106 | 4380 | ||
4107 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 4381 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
@@ -4129,7 +4403,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, | |||
4129 | { | 4403 | { |
4130 | tss->cr3 = vcpu->arch.cr3; | 4404 | tss->cr3 = vcpu->arch.cr3; |
4131 | tss->eip = kvm_rip_read(vcpu); | 4405 | tss->eip = kvm_rip_read(vcpu); |
4132 | tss->eflags = kvm_x86_ops->get_rflags(vcpu); | 4406 | tss->eflags = kvm_get_rflags(vcpu); |
4133 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4407 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
4134 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 4408 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
4135 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); | 4409 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
@@ -4153,7 +4427,7 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, | |||
4153 | kvm_set_cr3(vcpu, tss->cr3); | 4427 | kvm_set_cr3(vcpu, tss->cr3); |
4154 | 4428 | ||
4155 | kvm_rip_write(vcpu, tss->eip); | 4429 | kvm_rip_write(vcpu, tss->eip); |
4156 | kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); | 4430 | kvm_set_rflags(vcpu, tss->eflags | 2); |
4157 | 4431 | ||
4158 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); | 4432 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); |
4159 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); | 4433 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); |
@@ -4191,7 +4465,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, | |||
4191 | struct tss_segment_16 *tss) | 4465 | struct tss_segment_16 *tss) |
4192 | { | 4466 | { |
4193 | tss->ip = kvm_rip_read(vcpu); | 4467 | tss->ip = kvm_rip_read(vcpu); |
4194 | tss->flag = kvm_x86_ops->get_rflags(vcpu); | 4468 | tss->flag = kvm_get_rflags(vcpu); |
4195 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4469 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
4196 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 4470 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
4197 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); | 4471 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
@@ -4206,14 +4480,13 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, | |||
4206 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | 4480 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); |
4207 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | 4481 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); |
4208 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); | 4482 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); |
4209 | tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
4210 | } | 4483 | } |
4211 | 4484 | ||
4212 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | 4485 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, |
4213 | struct tss_segment_16 *tss) | 4486 | struct tss_segment_16 *tss) |
4214 | { | 4487 | { |
4215 | kvm_rip_write(vcpu, tss->ip); | 4488 | kvm_rip_write(vcpu, tss->ip); |
4216 | kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); | 4489 | kvm_set_rflags(vcpu, tss->flag | 2); |
4217 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); | 4490 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); |
4218 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); | 4491 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); |
4219 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); | 4492 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); |
@@ -4359,8 +4632,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4359 | } | 4632 | } |
4360 | 4633 | ||
4361 | if (reason == TASK_SWITCH_IRET) { | 4634 | if (reason == TASK_SWITCH_IRET) { |
4362 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); | 4635 | u32 eflags = kvm_get_rflags(vcpu); |
4363 | kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | 4636 | kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); |
4364 | } | 4637 | } |
4365 | 4638 | ||
4366 | /* set back link to prev task only if NT bit is set in eflags | 4639 | /* set back link to prev task only if NT bit is set in eflags |
@@ -4368,11 +4641,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4368 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | 4641 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) |
4369 | old_tss_sel = 0xffff; | 4642 | old_tss_sel = 0xffff; |
4370 | 4643 | ||
4371 | /* set back link to prev task only if NT bit is set in eflags | ||
4372 | note that old_tss_sel is not used afetr this point */ | ||
4373 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
4374 | old_tss_sel = 0xffff; | ||
4375 | |||
4376 | if (nseg_desc.type & 8) | 4644 | if (nseg_desc.type & 8) |
4377 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, | 4645 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, |
4378 | old_tss_base, &nseg_desc); | 4646 | old_tss_base, &nseg_desc); |
@@ -4381,8 +4649,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4381 | old_tss_base, &nseg_desc); | 4649 | old_tss_base, &nseg_desc); |
4382 | 4650 | ||
4383 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | 4651 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { |
4384 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); | 4652 | u32 eflags = kvm_get_rflags(vcpu); |
4385 | kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT); | 4653 | kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); |
4386 | } | 4654 | } |
4387 | 4655 | ||
4388 | if (reason != TASK_SWITCH_IRET) { | 4656 | if (reason != TASK_SWITCH_IRET) { |
@@ -4434,8 +4702,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4434 | 4702 | ||
4435 | mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; | 4703 | mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; |
4436 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 4704 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
4437 | if (!is_long_mode(vcpu) && is_pae(vcpu)) | 4705 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
4438 | load_pdptrs(vcpu, vcpu->arch.cr3); | 4706 | load_pdptrs(vcpu, vcpu->arch.cr3); |
4707 | mmu_reset_needed = 1; | ||
4708 | } | ||
4439 | 4709 | ||
4440 | if (mmu_reset_needed) | 4710 | if (mmu_reset_needed) |
4441 | kvm_mmu_reset_context(vcpu); | 4711 | kvm_mmu_reset_context(vcpu); |
@@ -4476,12 +4746,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4476 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | 4746 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, |
4477 | struct kvm_guest_debug *dbg) | 4747 | struct kvm_guest_debug *dbg) |
4478 | { | 4748 | { |
4749 | unsigned long rflags; | ||
4479 | int i, r; | 4750 | int i, r; |
4480 | 4751 | ||
4481 | vcpu_load(vcpu); | 4752 | vcpu_load(vcpu); |
4482 | 4753 | ||
4483 | if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) == | 4754 | if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { |
4484 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) { | 4755 | r = -EBUSY; |
4756 | if (vcpu->arch.exception.pending) | ||
4757 | goto unlock_out; | ||
4758 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) | ||
4759 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
4760 | else | ||
4761 | kvm_queue_exception(vcpu, BP_VECTOR); | ||
4762 | } | ||
4763 | |||
4764 | /* | ||
4765 | * Read rflags as long as potentially injected trace flags are still | ||
4766 | * filtered out. | ||
4767 | */ | ||
4768 | rflags = kvm_get_rflags(vcpu); | ||
4769 | |||
4770 | vcpu->guest_debug = dbg->control; | ||
4771 | if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) | ||
4772 | vcpu->guest_debug = 0; | ||
4773 | |||
4774 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { | ||
4485 | for (i = 0; i < KVM_NR_DB_REGS; ++i) | 4775 | for (i = 0; i < KVM_NR_DB_REGS; ++i) |
4486 | vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; | 4776 | vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; |
4487 | vcpu->arch.switch_db_regs = | 4777 | vcpu->arch.switch_db_regs = |
@@ -4492,13 +4782,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
4492 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); | 4782 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); |
4493 | } | 4783 | } |
4494 | 4784 | ||
4495 | r = kvm_x86_ops->set_guest_debug(vcpu, dbg); | 4785 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { |
4786 | vcpu->arch.singlestep_cs = | ||
4787 | get_segment_selector(vcpu, VCPU_SREG_CS); | ||
4788 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); | ||
4789 | } | ||
4790 | |||
4791 | /* | ||
4792 | * Trigger an rflags update that will inject or remove the trace | ||
4793 | * flags. | ||
4794 | */ | ||
4795 | kvm_set_rflags(vcpu, rflags); | ||
4796 | |||
4797 | kvm_x86_ops->set_guest_debug(vcpu, dbg); | ||
4496 | 4798 | ||
4497 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) | 4799 | r = 0; |
4498 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
4499 | else if (dbg->control & KVM_GUESTDBG_INJECT_BP) | ||
4500 | kvm_queue_exception(vcpu, BP_VECTOR); | ||
4501 | 4800 | ||
4801 | unlock_out: | ||
4502 | vcpu_put(vcpu); | 4802 | vcpu_put(vcpu); |
4503 | 4803 | ||
4504 | return r; | 4804 | return r; |
@@ -4699,14 +4999,26 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4699 | return kvm_x86_ops->vcpu_reset(vcpu); | 4999 | return kvm_x86_ops->vcpu_reset(vcpu); |
4700 | } | 5000 | } |
4701 | 5001 | ||
4702 | void kvm_arch_hardware_enable(void *garbage) | 5002 | int kvm_arch_hardware_enable(void *garbage) |
4703 | { | 5003 | { |
4704 | kvm_x86_ops->hardware_enable(garbage); | 5004 | /* |
5005 | * Since this may be called from a hotplug notifcation, | ||
5006 | * we can't get the CPU frequency directly. | ||
5007 | */ | ||
5008 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
5009 | int cpu = raw_smp_processor_id(); | ||
5010 | per_cpu(cpu_tsc_khz, cpu) = 0; | ||
5011 | } | ||
5012 | |||
5013 | kvm_shared_msr_cpu_online(); | ||
5014 | |||
5015 | return kvm_x86_ops->hardware_enable(garbage); | ||
4705 | } | 5016 | } |
4706 | 5017 | ||
4707 | void kvm_arch_hardware_disable(void *garbage) | 5018 | void kvm_arch_hardware_disable(void *garbage) |
4708 | { | 5019 | { |
4709 | kvm_x86_ops->hardware_disable(garbage); | 5020 | kvm_x86_ops->hardware_disable(garbage); |
5021 | drop_user_return_notifiers(garbage); | ||
4710 | } | 5022 | } |
4711 | 5023 | ||
4712 | int kvm_arch_hardware_setup(void) | 5024 | int kvm_arch_hardware_setup(void) |
@@ -4944,8 +5256,36 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
4944 | return kvm_x86_ops->interrupt_allowed(vcpu); | 5256 | return kvm_x86_ops->interrupt_allowed(vcpu); |
4945 | } | 5257 | } |
4946 | 5258 | ||
5259 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) | ||
5260 | { | ||
5261 | unsigned long rflags; | ||
5262 | |||
5263 | rflags = kvm_x86_ops->get_rflags(vcpu); | ||
5264 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
5265 | rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
5266 | return rflags; | ||
5267 | } | ||
5268 | EXPORT_SYMBOL_GPL(kvm_get_rflags); | ||
5269 | |||
5270 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
5271 | { | ||
5272 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && | ||
5273 | vcpu->arch.singlestep_cs == | ||
5274 | get_segment_selector(vcpu, VCPU_SREG_CS) && | ||
5275 | vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) | ||
5276 | rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
5277 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
5278 | } | ||
5279 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | ||
5280 | |||
4947 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 5281 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
4948 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 5282 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
4949 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); | 5283 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); |
4950 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); | 5284 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); |
4951 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); | 5285 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); |
5286 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); | ||
5287 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); | ||
5288 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); | ||
5289 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); | ||
5290 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); | ||
5291 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | ||
diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore new file mode 100644 index 000000000000..8df89f0a3fe6 --- /dev/null +++ b/arch/x86/lib/.gitignore | |||
@@ -0,0 +1 @@ | |||
inat-tables.c | |||
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 3e549b8ec8c9..cffd754f3039 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -2,21 +2,36 @@ | |||
2 | # Makefile for x86 specific library files. | 2 | # Makefile for x86 specific library files. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_SMP) := msr.o | 5 | inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk |
6 | inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt | ||
7 | quiet_cmd_inat_tables = GEN $@ | ||
8 | cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ | ||
9 | |||
10 | $(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) | ||
11 | $(call cmd,inat_tables) | ||
12 | |||
13 | $(obj)/inat.o: $(obj)/inat-tables.c | ||
14 | |||
15 | clean-files := inat-tables.c | ||
16 | |||
17 | obj-$(CONFIG_SMP) += msr-smp.o | ||
6 | 18 | ||
7 | lib-y := delay.o | 19 | lib-y := delay.o |
8 | lib-y += thunk_$(BITS).o | 20 | lib-y += thunk_$(BITS).o |
9 | lib-y += usercopy_$(BITS).o getuser.o putuser.o | 21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o |
10 | lib-y += memcpy_$(BITS).o | 22 | lib-y += memcpy_$(BITS).o |
23 | lib-$(CONFIG_KPROBES) += insn.o inat.o | ||
11 | 24 | ||
12 | obj-y += msr-reg.o msr-reg-export.o | 25 | obj-y += msr.o msr-reg.o msr-reg-export.o |
13 | 26 | ||
14 | ifeq ($(CONFIG_X86_32),y) | 27 | ifeq ($(CONFIG_X86_32),y) |
15 | obj-y += atomic64_32.o | 28 | obj-y += atomic64_32.o |
16 | lib-y += checksum_32.o | 29 | lib-y += checksum_32.o |
17 | lib-y += strstr_32.o | 30 | lib-y += strstr_32.o |
18 | lib-y += semaphore_32.o string_32.o cmpxchg8b_emu.o | 31 | lib-y += semaphore_32.o string_32.o |
19 | 32 | ifneq ($(CONFIG_X86_CMPXCHG64),y) | |
33 | lib-y += cmpxchg8b_emu.o | ||
34 | endif | ||
20 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o | 35 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o |
21 | else | 36 | else |
22 | obj-y += io_64.o iomap_copy_64.o | 37 | obj-y += io_64.o iomap_copy_64.o |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 6ba0f7bb85ea..cf889d4e076a 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -65,7 +65,7 @@ | |||
65 | .endm | 65 | .endm |
66 | 66 | ||
67 | /* Standard copy_to_user with segment limit checking */ | 67 | /* Standard copy_to_user with segment limit checking */ |
68 | ENTRY(copy_to_user) | 68 | ENTRY(_copy_to_user) |
69 | CFI_STARTPROC | 69 | CFI_STARTPROC |
70 | GET_THREAD_INFO(%rax) | 70 | GET_THREAD_INFO(%rax) |
71 | movq %rdi,%rcx | 71 | movq %rdi,%rcx |
@@ -75,10 +75,10 @@ ENTRY(copy_to_user) | |||
75 | jae bad_to_user | 75 | jae bad_to_user |
76 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 76 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
77 | CFI_ENDPROC | 77 | CFI_ENDPROC |
78 | ENDPROC(copy_to_user) | 78 | ENDPROC(_copy_to_user) |
79 | 79 | ||
80 | /* Standard copy_from_user with segment limit checking */ | 80 | /* Standard copy_from_user with segment limit checking */ |
81 | ENTRY(copy_from_user) | 81 | ENTRY(_copy_from_user) |
82 | CFI_STARTPROC | 82 | CFI_STARTPROC |
83 | GET_THREAD_INFO(%rax) | 83 | GET_THREAD_INFO(%rax) |
84 | movq %rsi,%rcx | 84 | movq %rsi,%rcx |
@@ -88,7 +88,7 @@ ENTRY(copy_from_user) | |||
88 | jae bad_from_user | 88 | jae bad_from_user |
89 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 89 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
90 | CFI_ENDPROC | 90 | CFI_ENDPROC |
91 | ENDPROC(copy_from_user) | 91 | ENDPROC(_copy_from_user) |
92 | 92 | ||
93 | ENTRY(copy_user_generic) | 93 | ENTRY(copy_user_generic) |
94 | CFI_STARTPROC | 94 | CFI_STARTPROC |
@@ -96,12 +96,6 @@ ENTRY(copy_user_generic) | |||
96 | CFI_ENDPROC | 96 | CFI_ENDPROC |
97 | ENDPROC(copy_user_generic) | 97 | ENDPROC(copy_user_generic) |
98 | 98 | ||
99 | ENTRY(__copy_from_user_inatomic) | ||
100 | CFI_STARTPROC | ||
101 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
102 | CFI_ENDPROC | ||
103 | ENDPROC(__copy_from_user_inatomic) | ||
104 | |||
105 | .section .fixup,"ax" | 99 | .section .fixup,"ax" |
106 | /* must zero dest */ | 100 | /* must zero dest */ |
107 | ENTRY(bad_from_user) | 101 | ENTRY(bad_from_user) |
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c new file mode 100644 index 000000000000..46fc4ee09fc4 --- /dev/null +++ b/arch/x86/lib/inat.c | |||
@@ -0,0 +1,90 @@ | |||
1 | /* | ||
2 | * x86 instruction attribute tables | ||
3 | * | ||
4 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | */ | ||
21 | #include <asm/insn.h> | ||
22 | |||
23 | /* Attribute tables are generated from opcode map */ | ||
24 | #include "inat-tables.c" | ||
25 | |||
26 | /* Attribute search APIs */ | ||
27 | insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) | ||
28 | { | ||
29 | return inat_primary_table[opcode]; | ||
30 | } | ||
31 | |||
32 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, | ||
33 | insn_attr_t esc_attr) | ||
34 | { | ||
35 | const insn_attr_t *table; | ||
36 | insn_attr_t lpfx_attr; | ||
37 | int n, m = 0; | ||
38 | |||
39 | n = inat_escape_id(esc_attr); | ||
40 | if (last_pfx) { | ||
41 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
42 | m = inat_last_prefix_id(lpfx_attr); | ||
43 | } | ||
44 | table = inat_escape_tables[n][0]; | ||
45 | if (!table) | ||
46 | return 0; | ||
47 | if (inat_has_variant(table[opcode]) && m) { | ||
48 | table = inat_escape_tables[n][m]; | ||
49 | if (!table) | ||
50 | return 0; | ||
51 | } | ||
52 | return table[opcode]; | ||
53 | } | ||
54 | |||
55 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, | ||
56 | insn_attr_t grp_attr) | ||
57 | { | ||
58 | const insn_attr_t *table; | ||
59 | insn_attr_t lpfx_attr; | ||
60 | int n, m = 0; | ||
61 | |||
62 | n = inat_group_id(grp_attr); | ||
63 | if (last_pfx) { | ||
64 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
65 | m = inat_last_prefix_id(lpfx_attr); | ||
66 | } | ||
67 | table = inat_group_tables[n][0]; | ||
68 | if (!table) | ||
69 | return inat_group_common_attribute(grp_attr); | ||
70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { | ||
71 | table = inat_group_tables[n][m]; | ||
72 | if (!table) | ||
73 | return inat_group_common_attribute(grp_attr); | ||
74 | } | ||
75 | return table[X86_MODRM_REG(modrm)] | | ||
76 | inat_group_common_attribute(grp_attr); | ||
77 | } | ||
78 | |||
79 | insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, | ||
80 | insn_byte_t vex_p) | ||
81 | { | ||
82 | const insn_attr_t *table; | ||
83 | if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) | ||
84 | return 0; | ||
85 | table = inat_avx_tables[vex_m][vex_p]; | ||
86 | if (!table) | ||
87 | return 0; | ||
88 | return table[opcode]; | ||
89 | } | ||
90 | |||
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 000000000000..9f33b984d0ef --- /dev/null +++ b/arch/x86/lib/insn.c | |||
@@ -0,0 +1,516 @@ | |||
1 | /* | ||
2 | * x86 instruction analysis | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2002, 2004, 2009 | ||
19 | */ | ||
20 | |||
21 | #include <linux/string.h> | ||
22 | #include <asm/inat.h> | ||
23 | #include <asm/insn.h> | ||
24 | |||
25 | #define get_next(t, insn) \ | ||
26 | ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) | ||
27 | |||
28 | #define peek_next(t, insn) \ | ||
29 | ({t r; r = *(t*)insn->next_byte; r; }) | ||
30 | |||
31 | #define peek_nbyte_next(t, insn, n) \ | ||
32 | ({t r; r = *(t*)((insn)->next_byte + n); r; }) | ||
33 | |||
34 | /** | ||
35 | * insn_init() - initialize struct insn | ||
36 | * @insn: &struct insn to be initialized | ||
37 | * @kaddr: address (in kernel memory) of instruction (or copy thereof) | ||
38 | * @x86_64: !0 for 64-bit kernel or 64-bit app | ||
39 | */ | ||
40 | void insn_init(struct insn *insn, const void *kaddr, int x86_64) | ||
41 | { | ||
42 | memset(insn, 0, sizeof(*insn)); | ||
43 | insn->kaddr = kaddr; | ||
44 | insn->next_byte = kaddr; | ||
45 | insn->x86_64 = x86_64 ? 1 : 0; | ||
46 | insn->opnd_bytes = 4; | ||
47 | if (x86_64) | ||
48 | insn->addr_bytes = 8; | ||
49 | else | ||
50 | insn->addr_bytes = 4; | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * insn_get_prefixes - scan x86 instruction prefix bytes | ||
55 | * @insn: &struct insn containing instruction | ||
56 | * | ||
57 | * Populates the @insn->prefixes bitmap, and updates @insn->next_byte | ||
58 | * to point to the (first) opcode. No effect if @insn->prefixes.got | ||
59 | * is already set. | ||
60 | */ | ||
61 | void insn_get_prefixes(struct insn *insn) | ||
62 | { | ||
63 | struct insn_field *prefixes = &insn->prefixes; | ||
64 | insn_attr_t attr; | ||
65 | insn_byte_t b, lb; | ||
66 | int i, nb; | ||
67 | |||
68 | if (prefixes->got) | ||
69 | return; | ||
70 | |||
71 | nb = 0; | ||
72 | lb = 0; | ||
73 | b = peek_next(insn_byte_t, insn); | ||
74 | attr = inat_get_opcode_attribute(b); | ||
75 | while (inat_is_legacy_prefix(attr)) { | ||
76 | /* Skip if same prefix */ | ||
77 | for (i = 0; i < nb; i++) | ||
78 | if (prefixes->bytes[i] == b) | ||
79 | goto found; | ||
80 | if (nb == 4) | ||
81 | /* Invalid instruction */ | ||
82 | break; | ||
83 | prefixes->bytes[nb++] = b; | ||
84 | if (inat_is_address_size_prefix(attr)) { | ||
85 | /* address size switches 2/4 or 4/8 */ | ||
86 | if (insn->x86_64) | ||
87 | insn->addr_bytes ^= 12; | ||
88 | else | ||
89 | insn->addr_bytes ^= 6; | ||
90 | } else if (inat_is_operand_size_prefix(attr)) { | ||
91 | /* oprand size switches 2/4 */ | ||
92 | insn->opnd_bytes ^= 6; | ||
93 | } | ||
94 | found: | ||
95 | prefixes->nbytes++; | ||
96 | insn->next_byte++; | ||
97 | lb = b; | ||
98 | b = peek_next(insn_byte_t, insn); | ||
99 | attr = inat_get_opcode_attribute(b); | ||
100 | } | ||
101 | /* Set the last prefix */ | ||
102 | if (lb && lb != insn->prefixes.bytes[3]) { | ||
103 | if (unlikely(insn->prefixes.bytes[3])) { | ||
104 | /* Swap the last prefix */ | ||
105 | b = insn->prefixes.bytes[3]; | ||
106 | for (i = 0; i < nb; i++) | ||
107 | if (prefixes->bytes[i] == lb) | ||
108 | prefixes->bytes[i] = b; | ||
109 | } | ||
110 | insn->prefixes.bytes[3] = lb; | ||
111 | } | ||
112 | |||
113 | /* Decode REX prefix */ | ||
114 | if (insn->x86_64) { | ||
115 | b = peek_next(insn_byte_t, insn); | ||
116 | attr = inat_get_opcode_attribute(b); | ||
117 | if (inat_is_rex_prefix(attr)) { | ||
118 | insn->rex_prefix.value = b; | ||
119 | insn->rex_prefix.nbytes = 1; | ||
120 | insn->next_byte++; | ||
121 | if (X86_REX_W(b)) | ||
122 | /* REX.W overrides opnd_size */ | ||
123 | insn->opnd_bytes = 8; | ||
124 | } | ||
125 | } | ||
126 | insn->rex_prefix.got = 1; | ||
127 | |||
128 | /* Decode VEX prefix */ | ||
129 | b = peek_next(insn_byte_t, insn); | ||
130 | attr = inat_get_opcode_attribute(b); | ||
131 | if (inat_is_vex_prefix(attr)) { | ||
132 | insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); | ||
133 | if (!insn->x86_64) { | ||
134 | /* | ||
135 | * In 32-bits mode, if the [7:6] bits (mod bits of | ||
136 | * ModRM) on the second byte are not 11b, it is | ||
137 | * LDS or LES. | ||
138 | */ | ||
139 | if (X86_MODRM_MOD(b2) != 3) | ||
140 | goto vex_end; | ||
141 | } | ||
142 | insn->vex_prefix.bytes[0] = b; | ||
143 | insn->vex_prefix.bytes[1] = b2; | ||
144 | if (inat_is_vex3_prefix(attr)) { | ||
145 | b2 = peek_nbyte_next(insn_byte_t, insn, 2); | ||
146 | insn->vex_prefix.bytes[2] = b2; | ||
147 | insn->vex_prefix.nbytes = 3; | ||
148 | insn->next_byte += 3; | ||
149 | if (insn->x86_64 && X86_VEX_W(b2)) | ||
150 | /* VEX.W overrides opnd_size */ | ||
151 | insn->opnd_bytes = 8; | ||
152 | } else { | ||
153 | insn->vex_prefix.nbytes = 2; | ||
154 | insn->next_byte += 2; | ||
155 | } | ||
156 | } | ||
157 | vex_end: | ||
158 | insn->vex_prefix.got = 1; | ||
159 | |||
160 | prefixes->got = 1; | ||
161 | return; | ||
162 | } | ||
163 | |||
164 | /** | ||
165 | * insn_get_opcode - collect opcode(s) | ||
166 | * @insn: &struct insn containing instruction | ||
167 | * | ||
168 | * Populates @insn->opcode, updates @insn->next_byte to point past the | ||
169 | * opcode byte(s), and set @insn->attr (except for groups). | ||
170 | * If necessary, first collects any preceding (prefix) bytes. | ||
171 | * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got | ||
172 | * is already 1. | ||
173 | */ | ||
174 | void insn_get_opcode(struct insn *insn) | ||
175 | { | ||
176 | struct insn_field *opcode = &insn->opcode; | ||
177 | insn_byte_t op, pfx; | ||
178 | if (opcode->got) | ||
179 | return; | ||
180 | if (!insn->prefixes.got) | ||
181 | insn_get_prefixes(insn); | ||
182 | |||
183 | /* Get first opcode */ | ||
184 | op = get_next(insn_byte_t, insn); | ||
185 | opcode->bytes[0] = op; | ||
186 | opcode->nbytes = 1; | ||
187 | |||
188 | /* Check if there is VEX prefix or not */ | ||
189 | if (insn_is_avx(insn)) { | ||
190 | insn_byte_t m, p; | ||
191 | m = insn_vex_m_bits(insn); | ||
192 | p = insn_vex_p_bits(insn); | ||
193 | insn->attr = inat_get_avx_attribute(op, m, p); | ||
194 | if (!inat_accept_vex(insn->attr)) | ||
195 | insn->attr = 0; /* This instruction is bad */ | ||
196 | goto end; /* VEX has only 1 byte for opcode */ | ||
197 | } | ||
198 | |||
199 | insn->attr = inat_get_opcode_attribute(op); | ||
200 | while (inat_is_escape(insn->attr)) { | ||
201 | /* Get escaped opcode */ | ||
202 | op = get_next(insn_byte_t, insn); | ||
203 | opcode->bytes[opcode->nbytes++] = op; | ||
204 | pfx = insn_last_prefix(insn); | ||
205 | insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); | ||
206 | } | ||
207 | if (inat_must_vex(insn->attr)) | ||
208 | insn->attr = 0; /* This instruction is bad */ | ||
209 | end: | ||
210 | opcode->got = 1; | ||
211 | } | ||
212 | |||
213 | /** | ||
214 | * insn_get_modrm - collect ModRM byte, if any | ||
215 | * @insn: &struct insn containing instruction | ||
216 | * | ||
217 | * Populates @insn->modrm and updates @insn->next_byte to point past the | ||
218 | * ModRM byte, if any. If necessary, first collects the preceding bytes | ||
219 | * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. | ||
220 | */ | ||
221 | void insn_get_modrm(struct insn *insn) | ||
222 | { | ||
223 | struct insn_field *modrm = &insn->modrm; | ||
224 | insn_byte_t pfx, mod; | ||
225 | if (modrm->got) | ||
226 | return; | ||
227 | if (!insn->opcode.got) | ||
228 | insn_get_opcode(insn); | ||
229 | |||
230 | if (inat_has_modrm(insn->attr)) { | ||
231 | mod = get_next(insn_byte_t, insn); | ||
232 | modrm->value = mod; | ||
233 | modrm->nbytes = 1; | ||
234 | if (inat_is_group(insn->attr)) { | ||
235 | pfx = insn_last_prefix(insn); | ||
236 | insn->attr = inat_get_group_attribute(mod, pfx, | ||
237 | insn->attr); | ||
238 | } | ||
239 | } | ||
240 | |||
241 | if (insn->x86_64 && inat_is_force64(insn->attr)) | ||
242 | insn->opnd_bytes = 8; | ||
243 | modrm->got = 1; | ||
244 | } | ||
245 | |||
246 | |||
247 | /** | ||
248 | * insn_rip_relative() - Does instruction use RIP-relative addressing mode? | ||
249 | * @insn: &struct insn containing instruction | ||
250 | * | ||
251 | * If necessary, first collects the instruction up to and including the | ||
252 | * ModRM byte. No effect if @insn->x86_64 is 0. | ||
253 | */ | ||
254 | int insn_rip_relative(struct insn *insn) | ||
255 | { | ||
256 | struct insn_field *modrm = &insn->modrm; | ||
257 | |||
258 | if (!insn->x86_64) | ||
259 | return 0; | ||
260 | if (!modrm->got) | ||
261 | insn_get_modrm(insn); | ||
262 | /* | ||
263 | * For rip-relative instructions, the mod field (top 2 bits) | ||
264 | * is zero and the r/m field (bottom 3 bits) is 0x5. | ||
265 | */ | ||
266 | return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); | ||
267 | } | ||
268 | |||
269 | /** | ||
270 | * insn_get_sib() - Get the SIB byte of instruction | ||
271 | * @insn: &struct insn containing instruction | ||
272 | * | ||
273 | * If necessary, first collects the instruction up to and including the | ||
274 | * ModRM byte. | ||
275 | */ | ||
276 | void insn_get_sib(struct insn *insn) | ||
277 | { | ||
278 | insn_byte_t modrm; | ||
279 | |||
280 | if (insn->sib.got) | ||
281 | return; | ||
282 | if (!insn->modrm.got) | ||
283 | insn_get_modrm(insn); | ||
284 | if (insn->modrm.nbytes) { | ||
285 | modrm = (insn_byte_t)insn->modrm.value; | ||
286 | if (insn->addr_bytes != 2 && | ||
287 | X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { | ||
288 | insn->sib.value = get_next(insn_byte_t, insn); | ||
289 | insn->sib.nbytes = 1; | ||
290 | } | ||
291 | } | ||
292 | insn->sib.got = 1; | ||
293 | } | ||
294 | |||
295 | |||
296 | /** | ||
297 | * insn_get_displacement() - Get the displacement of instruction | ||
298 | * @insn: &struct insn containing instruction | ||
299 | * | ||
300 | * If necessary, first collects the instruction up to and including the | ||
301 | * SIB byte. | ||
302 | * Displacement value is sign-expanded. | ||
303 | */ | ||
304 | void insn_get_displacement(struct insn *insn) | ||
305 | { | ||
306 | insn_byte_t mod, rm, base; | ||
307 | |||
308 | if (insn->displacement.got) | ||
309 | return; | ||
310 | if (!insn->sib.got) | ||
311 | insn_get_sib(insn); | ||
312 | if (insn->modrm.nbytes) { | ||
313 | /* | ||
314 | * Interpreting the modrm byte: | ||
315 | * mod = 00 - no displacement fields (exceptions below) | ||
316 | * mod = 01 - 1-byte displacement field | ||
317 | * mod = 10 - displacement field is 4 bytes, or 2 bytes if | ||
318 | * address size = 2 (0x67 prefix in 32-bit mode) | ||
319 | * mod = 11 - no memory operand | ||
320 | * | ||
321 | * If address size = 2... | ||
322 | * mod = 00, r/m = 110 - displacement field is 2 bytes | ||
323 | * | ||
324 | * If address size != 2... | ||
325 | * mod != 11, r/m = 100 - SIB byte exists | ||
326 | * mod = 00, SIB base = 101 - displacement field is 4 bytes | ||
327 | * mod = 00, r/m = 101 - rip-relative addressing, displacement | ||
328 | * field is 4 bytes | ||
329 | */ | ||
330 | mod = X86_MODRM_MOD(insn->modrm.value); | ||
331 | rm = X86_MODRM_RM(insn->modrm.value); | ||
332 | base = X86_SIB_BASE(insn->sib.value); | ||
333 | if (mod == 3) | ||
334 | goto out; | ||
335 | if (mod == 1) { | ||
336 | insn->displacement.value = get_next(char, insn); | ||
337 | insn->displacement.nbytes = 1; | ||
338 | } else if (insn->addr_bytes == 2) { | ||
339 | if ((mod == 0 && rm == 6) || mod == 2) { | ||
340 | insn->displacement.value = | ||
341 | get_next(short, insn); | ||
342 | insn->displacement.nbytes = 2; | ||
343 | } | ||
344 | } else { | ||
345 | if ((mod == 0 && rm == 5) || mod == 2 || | ||
346 | (mod == 0 && base == 5)) { | ||
347 | insn->displacement.value = get_next(int, insn); | ||
348 | insn->displacement.nbytes = 4; | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | out: | ||
353 | insn->displacement.got = 1; | ||
354 | } | ||
355 | |||
356 | /* Decode moffset16/32/64 */ | ||
357 | static void __get_moffset(struct insn *insn) | ||
358 | { | ||
359 | switch (insn->addr_bytes) { | ||
360 | case 2: | ||
361 | insn->moffset1.value = get_next(short, insn); | ||
362 | insn->moffset1.nbytes = 2; | ||
363 | break; | ||
364 | case 4: | ||
365 | insn->moffset1.value = get_next(int, insn); | ||
366 | insn->moffset1.nbytes = 4; | ||
367 | break; | ||
368 | case 8: | ||
369 | insn->moffset1.value = get_next(int, insn); | ||
370 | insn->moffset1.nbytes = 4; | ||
371 | insn->moffset2.value = get_next(int, insn); | ||
372 | insn->moffset2.nbytes = 4; | ||
373 | break; | ||
374 | } | ||
375 | insn->moffset1.got = insn->moffset2.got = 1; | ||
376 | } | ||
377 | |||
378 | /* Decode imm v32(Iz) */ | ||
379 | static void __get_immv32(struct insn *insn) | ||
380 | { | ||
381 | switch (insn->opnd_bytes) { | ||
382 | case 2: | ||
383 | insn->immediate.value = get_next(short, insn); | ||
384 | insn->immediate.nbytes = 2; | ||
385 | break; | ||
386 | case 4: | ||
387 | case 8: | ||
388 | insn->immediate.value = get_next(int, insn); | ||
389 | insn->immediate.nbytes = 4; | ||
390 | break; | ||
391 | } | ||
392 | } | ||
393 | |||
394 | /* Decode imm v64(Iv/Ov) */ | ||
395 | static void __get_immv(struct insn *insn) | ||
396 | { | ||
397 | switch (insn->opnd_bytes) { | ||
398 | case 2: | ||
399 | insn->immediate1.value = get_next(short, insn); | ||
400 | insn->immediate1.nbytes = 2; | ||
401 | break; | ||
402 | case 4: | ||
403 | insn->immediate1.value = get_next(int, insn); | ||
404 | insn->immediate1.nbytes = 4; | ||
405 | break; | ||
406 | case 8: | ||
407 | insn->immediate1.value = get_next(int, insn); | ||
408 | insn->immediate1.nbytes = 4; | ||
409 | insn->immediate2.value = get_next(int, insn); | ||
410 | insn->immediate2.nbytes = 4; | ||
411 | break; | ||
412 | } | ||
413 | insn->immediate1.got = insn->immediate2.got = 1; | ||
414 | } | ||
415 | |||
416 | /* Decode ptr16:16/32(Ap) */ | ||
417 | static void __get_immptr(struct insn *insn) | ||
418 | { | ||
419 | switch (insn->opnd_bytes) { | ||
420 | case 2: | ||
421 | insn->immediate1.value = get_next(short, insn); | ||
422 | insn->immediate1.nbytes = 2; | ||
423 | break; | ||
424 | case 4: | ||
425 | insn->immediate1.value = get_next(int, insn); | ||
426 | insn->immediate1.nbytes = 4; | ||
427 | break; | ||
428 | case 8: | ||
429 | /* ptr16:64 is not exist (no segment) */ | ||
430 | return; | ||
431 | } | ||
432 | insn->immediate2.value = get_next(unsigned short, insn); | ||
433 | insn->immediate2.nbytes = 2; | ||
434 | insn->immediate1.got = insn->immediate2.got = 1; | ||
435 | } | ||
436 | |||
437 | /** | ||
438 | * insn_get_immediate() - Get the immediates of instruction | ||
439 | * @insn: &struct insn containing instruction | ||
440 | * | ||
441 | * If necessary, first collects the instruction up to and including the | ||
442 | * displacement bytes. | ||
443 | * Basically, most of immediates are sign-expanded. Unsigned-value can be | ||
444 | * get by bit masking with ((1 << (nbytes * 8)) - 1) | ||
445 | */ | ||
446 | void insn_get_immediate(struct insn *insn) | ||
447 | { | ||
448 | if (insn->immediate.got) | ||
449 | return; | ||
450 | if (!insn->displacement.got) | ||
451 | insn_get_displacement(insn); | ||
452 | |||
453 | if (inat_has_moffset(insn->attr)) { | ||
454 | __get_moffset(insn); | ||
455 | goto done; | ||
456 | } | ||
457 | |||
458 | if (!inat_has_immediate(insn->attr)) | ||
459 | /* no immediates */ | ||
460 | goto done; | ||
461 | |||
462 | switch (inat_immediate_size(insn->attr)) { | ||
463 | case INAT_IMM_BYTE: | ||
464 | insn->immediate.value = get_next(char, insn); | ||
465 | insn->immediate.nbytes = 1; | ||
466 | break; | ||
467 | case INAT_IMM_WORD: | ||
468 | insn->immediate.value = get_next(short, insn); | ||
469 | insn->immediate.nbytes = 2; | ||
470 | break; | ||
471 | case INAT_IMM_DWORD: | ||
472 | insn->immediate.value = get_next(int, insn); | ||
473 | insn->immediate.nbytes = 4; | ||
474 | break; | ||
475 | case INAT_IMM_QWORD: | ||
476 | insn->immediate1.value = get_next(int, insn); | ||
477 | insn->immediate1.nbytes = 4; | ||
478 | insn->immediate2.value = get_next(int, insn); | ||
479 | insn->immediate2.nbytes = 4; | ||
480 | break; | ||
481 | case INAT_IMM_PTR: | ||
482 | __get_immptr(insn); | ||
483 | break; | ||
484 | case INAT_IMM_VWORD32: | ||
485 | __get_immv32(insn); | ||
486 | break; | ||
487 | case INAT_IMM_VWORD: | ||
488 | __get_immv(insn); | ||
489 | break; | ||
490 | default: | ||
491 | break; | ||
492 | } | ||
493 | if (inat_has_second_immediate(insn->attr)) { | ||
494 | insn->immediate2.value = get_next(char, insn); | ||
495 | insn->immediate2.nbytes = 1; | ||
496 | } | ||
497 | done: | ||
498 | insn->immediate.got = 1; | ||
499 | } | ||
500 | |||
501 | /** | ||
502 | * insn_get_length() - Get the length of instruction | ||
503 | * @insn: &struct insn containing instruction | ||
504 | * | ||
505 | * If necessary, first collects the instruction up to and including the | ||
506 | * immediates bytes. | ||
507 | */ | ||
508 | void insn_get_length(struct insn *insn) | ||
509 | { | ||
510 | if (insn->length) | ||
511 | return; | ||
512 | if (!insn->immediate.got) | ||
513 | insn_get_immediate(insn); | ||
514 | insn->length = (unsigned char)((unsigned long)insn->next_byte | ||
515 | - (unsigned long)insn->kaddr); | ||
516 | } | ||
diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c new file mode 100644 index 000000000000..a6b1b86d2253 --- /dev/null +++ b/arch/x86/lib/msr-smp.c | |||
@@ -0,0 +1,204 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/preempt.h> | ||
3 | #include <linux/smp.h> | ||
4 | #include <asm/msr.h> | ||
5 | |||
6 | static void __rdmsr_on_cpu(void *info) | ||
7 | { | ||
8 | struct msr_info *rv = info; | ||
9 | struct msr *reg; | ||
10 | int this_cpu = raw_smp_processor_id(); | ||
11 | |||
12 | if (rv->msrs) | ||
13 | reg = per_cpu_ptr(rv->msrs, this_cpu); | ||
14 | else | ||
15 | reg = &rv->reg; | ||
16 | |||
17 | rdmsr(rv->msr_no, reg->l, reg->h); | ||
18 | } | ||
19 | |||
20 | static void __wrmsr_on_cpu(void *info) | ||
21 | { | ||
22 | struct msr_info *rv = info; | ||
23 | struct msr *reg; | ||
24 | int this_cpu = raw_smp_processor_id(); | ||
25 | |||
26 | if (rv->msrs) | ||
27 | reg = per_cpu_ptr(rv->msrs, this_cpu); | ||
28 | else | ||
29 | reg = &rv->reg; | ||
30 | |||
31 | wrmsr(rv->msr_no, reg->l, reg->h); | ||
32 | } | ||
33 | |||
34 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | ||
35 | { | ||
36 | int err; | ||
37 | struct msr_info rv; | ||
38 | |||
39 | memset(&rv, 0, sizeof(rv)); | ||
40 | |||
41 | rv.msr_no = msr_no; | ||
42 | err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); | ||
43 | *l = rv.reg.l; | ||
44 | *h = rv.reg.h; | ||
45 | |||
46 | return err; | ||
47 | } | ||
48 | EXPORT_SYMBOL(rdmsr_on_cpu); | ||
49 | |||
50 | int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | ||
51 | { | ||
52 | int err; | ||
53 | struct msr_info rv; | ||
54 | |||
55 | memset(&rv, 0, sizeof(rv)); | ||
56 | |||
57 | rv.msr_no = msr_no; | ||
58 | rv.reg.l = l; | ||
59 | rv.reg.h = h; | ||
60 | err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); | ||
61 | |||
62 | return err; | ||
63 | } | ||
64 | EXPORT_SYMBOL(wrmsr_on_cpu); | ||
65 | |||
66 | static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, | ||
67 | struct msr *msrs, | ||
68 | void (*msr_func) (void *info)) | ||
69 | { | ||
70 | struct msr_info rv; | ||
71 | int this_cpu; | ||
72 | |||
73 | memset(&rv, 0, sizeof(rv)); | ||
74 | |||
75 | rv.msrs = msrs; | ||
76 | rv.msr_no = msr_no; | ||
77 | |||
78 | this_cpu = get_cpu(); | ||
79 | |||
80 | if (cpumask_test_cpu(this_cpu, mask)) | ||
81 | msr_func(&rv); | ||
82 | |||
83 | smp_call_function_many(mask, msr_func, &rv, 1); | ||
84 | put_cpu(); | ||
85 | } | ||
86 | |||
87 | /* rdmsr on a bunch of CPUs | ||
88 | * | ||
89 | * @mask: which CPUs | ||
90 | * @msr_no: which MSR | ||
91 | * @msrs: array of MSR values | ||
92 | * | ||
93 | */ | ||
94 | void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) | ||
95 | { | ||
96 | __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); | ||
97 | } | ||
98 | EXPORT_SYMBOL(rdmsr_on_cpus); | ||
99 | |||
100 | /* | ||
101 | * wrmsr on a bunch of CPUs | ||
102 | * | ||
103 | * @mask: which CPUs | ||
104 | * @msr_no: which MSR | ||
105 | * @msrs: array of MSR values | ||
106 | * | ||
107 | */ | ||
108 | void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) | ||
109 | { | ||
110 | __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); | ||
111 | } | ||
112 | EXPORT_SYMBOL(wrmsr_on_cpus); | ||
113 | |||
114 | /* These "safe" variants are slower and should be used when the target MSR | ||
115 | may not actually exist. */ | ||
116 | static void __rdmsr_safe_on_cpu(void *info) | ||
117 | { | ||
118 | struct msr_info *rv = info; | ||
119 | |||
120 | rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); | ||
121 | } | ||
122 | |||
123 | static void __wrmsr_safe_on_cpu(void *info) | ||
124 | { | ||
125 | struct msr_info *rv = info; | ||
126 | |||
127 | rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); | ||
128 | } | ||
129 | |||
130 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | ||
131 | { | ||
132 | int err; | ||
133 | struct msr_info rv; | ||
134 | |||
135 | memset(&rv, 0, sizeof(rv)); | ||
136 | |||
137 | rv.msr_no = msr_no; | ||
138 | err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); | ||
139 | *l = rv.reg.l; | ||
140 | *h = rv.reg.h; | ||
141 | |||
142 | return err ? err : rv.err; | ||
143 | } | ||
144 | EXPORT_SYMBOL(rdmsr_safe_on_cpu); | ||
145 | |||
146 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | ||
147 | { | ||
148 | int err; | ||
149 | struct msr_info rv; | ||
150 | |||
151 | memset(&rv, 0, sizeof(rv)); | ||
152 | |||
153 | rv.msr_no = msr_no; | ||
154 | rv.reg.l = l; | ||
155 | rv.reg.h = h; | ||
156 | err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); | ||
157 | |||
158 | return err ? err : rv.err; | ||
159 | } | ||
160 | EXPORT_SYMBOL(wrmsr_safe_on_cpu); | ||
161 | |||
162 | /* | ||
163 | * These variants are significantly slower, but allows control over | ||
164 | * the entire 32-bit GPR set. | ||
165 | */ | ||
166 | static void __rdmsr_safe_regs_on_cpu(void *info) | ||
167 | { | ||
168 | struct msr_regs_info *rv = info; | ||
169 | |||
170 | rv->err = rdmsr_safe_regs(rv->regs); | ||
171 | } | ||
172 | |||
173 | static void __wrmsr_safe_regs_on_cpu(void *info) | ||
174 | { | ||
175 | struct msr_regs_info *rv = info; | ||
176 | |||
177 | rv->err = wrmsr_safe_regs(rv->regs); | ||
178 | } | ||
179 | |||
180 | int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) | ||
181 | { | ||
182 | int err; | ||
183 | struct msr_regs_info rv; | ||
184 | |||
185 | rv.regs = regs; | ||
186 | rv.err = -EIO; | ||
187 | err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); | ||
188 | |||
189 | return err ? err : rv.err; | ||
190 | } | ||
191 | EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); | ||
192 | |||
193 | int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) | ||
194 | { | ||
195 | int err; | ||
196 | struct msr_regs_info rv; | ||
197 | |||
198 | rv.regs = regs; | ||
199 | rv.err = -EIO; | ||
200 | err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); | ||
201 | |||
202 | return err ? err : rv.err; | ||
203 | } | ||
204 | EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); | ||
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 33a1e3ca22d8..8f8eebdca7d4 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c | |||
@@ -1,226 +1,23 @@ | |||
1 | #include <linux/module.h> | 1 | #include <linux/module.h> |
2 | #include <linux/preempt.h> | 2 | #include <linux/preempt.h> |
3 | #include <linux/smp.h> | ||
4 | #include <asm/msr.h> | 3 | #include <asm/msr.h> |
5 | 4 | ||
6 | struct msr_info { | 5 | struct msr *msrs_alloc(void) |
7 | u32 msr_no; | ||
8 | struct msr reg; | ||
9 | struct msr *msrs; | ||
10 | int off; | ||
11 | int err; | ||
12 | }; | ||
13 | |||
14 | static void __rdmsr_on_cpu(void *info) | ||
15 | { | ||
16 | struct msr_info *rv = info; | ||
17 | struct msr *reg; | ||
18 | int this_cpu = raw_smp_processor_id(); | ||
19 | |||
20 | if (rv->msrs) | ||
21 | reg = &rv->msrs[this_cpu - rv->off]; | ||
22 | else | ||
23 | reg = &rv->reg; | ||
24 | |||
25 | rdmsr(rv->msr_no, reg->l, reg->h); | ||
26 | } | ||
27 | |||
28 | static void __wrmsr_on_cpu(void *info) | ||
29 | { | ||
30 | struct msr_info *rv = info; | ||
31 | struct msr *reg; | ||
32 | int this_cpu = raw_smp_processor_id(); | ||
33 | |||
34 | if (rv->msrs) | ||
35 | reg = &rv->msrs[this_cpu - rv->off]; | ||
36 | else | ||
37 | reg = &rv->reg; | ||
38 | |||
39 | wrmsr(rv->msr_no, reg->l, reg->h); | ||
40 | } | ||
41 | |||
42 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | ||
43 | { | ||
44 | int err; | ||
45 | struct msr_info rv; | ||
46 | |||
47 | memset(&rv, 0, sizeof(rv)); | ||
48 | |||
49 | rv.msr_no = msr_no; | ||
50 | err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); | ||
51 | *l = rv.reg.l; | ||
52 | *h = rv.reg.h; | ||
53 | |||
54 | return err; | ||
55 | } | ||
56 | EXPORT_SYMBOL(rdmsr_on_cpu); | ||
57 | |||
58 | int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | ||
59 | { | ||
60 | int err; | ||
61 | struct msr_info rv; | ||
62 | |||
63 | memset(&rv, 0, sizeof(rv)); | ||
64 | |||
65 | rv.msr_no = msr_no; | ||
66 | rv.reg.l = l; | ||
67 | rv.reg.h = h; | ||
68 | err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); | ||
69 | |||
70 | return err; | ||
71 | } | ||
72 | EXPORT_SYMBOL(wrmsr_on_cpu); | ||
73 | |||
74 | /* rdmsr on a bunch of CPUs | ||
75 | * | ||
76 | * @mask: which CPUs | ||
77 | * @msr_no: which MSR | ||
78 | * @msrs: array of MSR values | ||
79 | * | ||
80 | */ | ||
81 | void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) | ||
82 | { | ||
83 | struct msr_info rv; | ||
84 | int this_cpu; | ||
85 | |||
86 | memset(&rv, 0, sizeof(rv)); | ||
87 | |||
88 | rv.off = cpumask_first(mask); | ||
89 | rv.msrs = msrs; | ||
90 | rv.msr_no = msr_no; | ||
91 | |||
92 | this_cpu = get_cpu(); | ||
93 | |||
94 | if (cpumask_test_cpu(this_cpu, mask)) | ||
95 | __rdmsr_on_cpu(&rv); | ||
96 | |||
97 | smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); | ||
98 | put_cpu(); | ||
99 | } | ||
100 | EXPORT_SYMBOL(rdmsr_on_cpus); | ||
101 | |||
102 | /* | ||
103 | * wrmsr on a bunch of CPUs | ||
104 | * | ||
105 | * @mask: which CPUs | ||
106 | * @msr_no: which MSR | ||
107 | * @msrs: array of MSR values | ||
108 | * | ||
109 | */ | ||
110 | void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) | ||
111 | { | ||
112 | struct msr_info rv; | ||
113 | int this_cpu; | ||
114 | |||
115 | memset(&rv, 0, sizeof(rv)); | ||
116 | |||
117 | rv.off = cpumask_first(mask); | ||
118 | rv.msrs = msrs; | ||
119 | rv.msr_no = msr_no; | ||
120 | |||
121 | this_cpu = get_cpu(); | ||
122 | |||
123 | if (cpumask_test_cpu(this_cpu, mask)) | ||
124 | __wrmsr_on_cpu(&rv); | ||
125 | |||
126 | smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); | ||
127 | put_cpu(); | ||
128 | } | ||
129 | EXPORT_SYMBOL(wrmsr_on_cpus); | ||
130 | |||
131 | /* These "safe" variants are slower and should be used when the target MSR | ||
132 | may not actually exist. */ | ||
133 | static void __rdmsr_safe_on_cpu(void *info) | ||
134 | { | ||
135 | struct msr_info *rv = info; | ||
136 | |||
137 | rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); | ||
138 | } | ||
139 | |||
140 | static void __wrmsr_safe_on_cpu(void *info) | ||
141 | { | ||
142 | struct msr_info *rv = info; | ||
143 | |||
144 | rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); | ||
145 | } | ||
146 | |||
147 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | ||
148 | { | 6 | { |
149 | int err; | 7 | struct msr *msrs = NULL; |
150 | struct msr_info rv; | ||
151 | 8 | ||
152 | memset(&rv, 0, sizeof(rv)); | 9 | msrs = alloc_percpu(struct msr); |
10 | if (!msrs) { | ||
11 | pr_warning("%s: error allocating msrs\n", __func__); | ||
12 | return NULL; | ||
13 | } | ||
153 | 14 | ||
154 | rv.msr_no = msr_no; | 15 | return msrs; |
155 | err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); | ||
156 | *l = rv.reg.l; | ||
157 | *h = rv.reg.h; | ||
158 | |||
159 | return err ? err : rv.err; | ||
160 | } | 16 | } |
161 | EXPORT_SYMBOL(rdmsr_safe_on_cpu); | 17 | EXPORT_SYMBOL(msrs_alloc); |
162 | 18 | ||
163 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | 19 | void msrs_free(struct msr *msrs) |
164 | { | 20 | { |
165 | int err; | 21 | free_percpu(msrs); |
166 | struct msr_info rv; | ||
167 | |||
168 | memset(&rv, 0, sizeof(rv)); | ||
169 | |||
170 | rv.msr_no = msr_no; | ||
171 | rv.reg.l = l; | ||
172 | rv.reg.h = h; | ||
173 | err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); | ||
174 | |||
175 | return err ? err : rv.err; | ||
176 | } | ||
177 | EXPORT_SYMBOL(wrmsr_safe_on_cpu); | ||
178 | |||
179 | /* | ||
180 | * These variants are significantly slower, but allows control over | ||
181 | * the entire 32-bit GPR set. | ||
182 | */ | ||
183 | struct msr_regs_info { | ||
184 | u32 *regs; | ||
185 | int err; | ||
186 | }; | ||
187 | |||
188 | static void __rdmsr_safe_regs_on_cpu(void *info) | ||
189 | { | ||
190 | struct msr_regs_info *rv = info; | ||
191 | |||
192 | rv->err = rdmsr_safe_regs(rv->regs); | ||
193 | } | ||
194 | |||
195 | static void __wrmsr_safe_regs_on_cpu(void *info) | ||
196 | { | ||
197 | struct msr_regs_info *rv = info; | ||
198 | |||
199 | rv->err = wrmsr_safe_regs(rv->regs); | ||
200 | } | ||
201 | |||
202 | int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) | ||
203 | { | ||
204 | int err; | ||
205 | struct msr_regs_info rv; | ||
206 | |||
207 | rv.regs = regs; | ||
208 | rv.err = -EIO; | ||
209 | err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); | ||
210 | |||
211 | return err ? err : rv.err; | ||
212 | } | ||
213 | EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); | ||
214 | |||
215 | int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) | ||
216 | { | ||
217 | int err; | ||
218 | struct msr_regs_info rv; | ||
219 | |||
220 | rv.regs = regs; | ||
221 | rv.err = -EIO; | ||
222 | err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); | ||
223 | |||
224 | return err ? err : rv.err; | ||
225 | } | 22 | } |
226 | EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); | 23 | EXPORT_SYMBOL(msrs_free); |
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1f118d462acc..e218d5df85ff 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c | |||
@@ -874,7 +874,7 @@ EXPORT_SYMBOL(copy_to_user); | |||
874 | * data to the requested size using zero bytes. | 874 | * data to the requested size using zero bytes. |
875 | */ | 875 | */ |
876 | unsigned long | 876 | unsigned long |
877 | copy_from_user(void *to, const void __user *from, unsigned long n) | 877 | _copy_from_user(void *to, const void __user *from, unsigned long n) |
878 | { | 878 | { |
879 | if (access_ok(VERIFY_READ, from, n)) | 879 | if (access_ok(VERIFY_READ, from, n)) |
880 | n = __copy_from_user(to, from, n); | 880 | n = __copy_from_user(to, from, n); |
@@ -882,4 +882,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) | |||
882 | memset(to, 0, n); | 882 | memset(to, 0, n); |
883 | return n; | 883 | return n; |
884 | } | 884 | } |
885 | EXPORT_SYMBOL(copy_from_user); | 885 | EXPORT_SYMBOL(_copy_from_user); |
886 | |||
887 | void copy_from_user_overflow(void) | ||
888 | { | ||
889 | WARN(1, "Buffer overflow detected!\n"); | ||
890 | } | ||
891 | EXPORT_SYMBOL(copy_from_user_overflow); | ||
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt new file mode 100644 index 000000000000..a793da5e560e --- /dev/null +++ b/arch/x86/lib/x86-opcode-map.txt | |||
@@ -0,0 +1,893 @@ | |||
1 | # x86 Opcode Maps | ||
2 | # | ||
3 | #<Opcode maps> | ||
4 | # Table: table-name | ||
5 | # Referrer: escaped-name | ||
6 | # AVXcode: avx-code | ||
7 | # opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] | ||
8 | # (or) | ||
9 | # opcode: escape # escaped-name | ||
10 | # EndTable | ||
11 | # | ||
12 | #<group maps> | ||
13 | # GrpTable: GrpXXX | ||
14 | # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] | ||
15 | # EndTable | ||
16 | # | ||
17 | # AVX Superscripts | ||
18 | # (VEX): this opcode can accept VEX prefix. | ||
19 | # (oVEX): this opcode requires VEX prefix. | ||
20 | # (o128): this opcode only supports 128bit VEX. | ||
21 | # (o256): this opcode only supports 256bit VEX. | ||
22 | # | ||
23 | |||
24 | Table: one byte opcode | ||
25 | Referrer: | ||
26 | AVXcode: | ||
27 | # 0x00 - 0x0f | ||
28 | 00: ADD Eb,Gb | ||
29 | 01: ADD Ev,Gv | ||
30 | 02: ADD Gb,Eb | ||
31 | 03: ADD Gv,Ev | ||
32 | 04: ADD AL,Ib | ||
33 | 05: ADD rAX,Iz | ||
34 | 06: PUSH ES (i64) | ||
35 | 07: POP ES (i64) | ||
36 | 08: OR Eb,Gb | ||
37 | 09: OR Ev,Gv | ||
38 | 0a: OR Gb,Eb | ||
39 | 0b: OR Gv,Ev | ||
40 | 0c: OR AL,Ib | ||
41 | 0d: OR rAX,Iz | ||
42 | 0e: PUSH CS (i64) | ||
43 | 0f: escape # 2-byte escape | ||
44 | # 0x10 - 0x1f | ||
45 | 10: ADC Eb,Gb | ||
46 | 11: ADC Ev,Gv | ||
47 | 12: ADC Gb,Eb | ||
48 | 13: ADC Gv,Ev | ||
49 | 14: ADC AL,Ib | ||
50 | 15: ADC rAX,Iz | ||
51 | 16: PUSH SS (i64) | ||
52 | 17: POP SS (i64) | ||
53 | 18: SBB Eb,Gb | ||
54 | 19: SBB Ev,Gv | ||
55 | 1a: SBB Gb,Eb | ||
56 | 1b: SBB Gv,Ev | ||
57 | 1c: SBB AL,Ib | ||
58 | 1d: SBB rAX,Iz | ||
59 | 1e: PUSH DS (i64) | ||
60 | 1f: POP DS (i64) | ||
61 | # 0x20 - 0x2f | ||
62 | 20: AND Eb,Gb | ||
63 | 21: AND Ev,Gv | ||
64 | 22: AND Gb,Eb | ||
65 | 23: AND Gv,Ev | ||
66 | 24: AND AL,Ib | ||
67 | 25: AND rAx,Iz | ||
68 | 26: SEG=ES (Prefix) | ||
69 | 27: DAA (i64) | ||
70 | 28: SUB Eb,Gb | ||
71 | 29: SUB Ev,Gv | ||
72 | 2a: SUB Gb,Eb | ||
73 | 2b: SUB Gv,Ev | ||
74 | 2c: SUB AL,Ib | ||
75 | 2d: SUB rAX,Iz | ||
76 | 2e: SEG=CS (Prefix) | ||
77 | 2f: DAS (i64) | ||
78 | # 0x30 - 0x3f | ||
79 | 30: XOR Eb,Gb | ||
80 | 31: XOR Ev,Gv | ||
81 | 32: XOR Gb,Eb | ||
82 | 33: XOR Gv,Ev | ||
83 | 34: XOR AL,Ib | ||
84 | 35: XOR rAX,Iz | ||
85 | 36: SEG=SS (Prefix) | ||
86 | 37: AAA (i64) | ||
87 | 38: CMP Eb,Gb | ||
88 | 39: CMP Ev,Gv | ||
89 | 3a: CMP Gb,Eb | ||
90 | 3b: CMP Gv,Ev | ||
91 | 3c: CMP AL,Ib | ||
92 | 3d: CMP rAX,Iz | ||
93 | 3e: SEG=DS (Prefix) | ||
94 | 3f: AAS (i64) | ||
95 | # 0x40 - 0x4f | ||
96 | 40: INC eAX (i64) | REX (o64) | ||
97 | 41: INC eCX (i64) | REX.B (o64) | ||
98 | 42: INC eDX (i64) | REX.X (o64) | ||
99 | 43: INC eBX (i64) | REX.XB (o64) | ||
100 | 44: INC eSP (i64) | REX.R (o64) | ||
101 | 45: INC eBP (i64) | REX.RB (o64) | ||
102 | 46: INC eSI (i64) | REX.RX (o64) | ||
103 | 47: INC eDI (i64) | REX.RXB (o64) | ||
104 | 48: DEC eAX (i64) | REX.W (o64) | ||
105 | 49: DEC eCX (i64) | REX.WB (o64) | ||
106 | 4a: DEC eDX (i64) | REX.WX (o64) | ||
107 | 4b: DEC eBX (i64) | REX.WXB (o64) | ||
108 | 4c: DEC eSP (i64) | REX.WR (o64) | ||
109 | 4d: DEC eBP (i64) | REX.WRB (o64) | ||
110 | 4e: DEC eSI (i64) | REX.WRX (o64) | ||
111 | 4f: DEC eDI (i64) | REX.WRXB (o64) | ||
112 | # 0x50 - 0x5f | ||
113 | 50: PUSH rAX/r8 (d64) | ||
114 | 51: PUSH rCX/r9 (d64) | ||
115 | 52: PUSH rDX/r10 (d64) | ||
116 | 53: PUSH rBX/r11 (d64) | ||
117 | 54: PUSH rSP/r12 (d64) | ||
118 | 55: PUSH rBP/r13 (d64) | ||
119 | 56: PUSH rSI/r14 (d64) | ||
120 | 57: PUSH rDI/r15 (d64) | ||
121 | 58: POP rAX/r8 (d64) | ||
122 | 59: POP rCX/r9 (d64) | ||
123 | 5a: POP rDX/r10 (d64) | ||
124 | 5b: POP rBX/r11 (d64) | ||
125 | 5c: POP rSP/r12 (d64) | ||
126 | 5d: POP rBP/r13 (d64) | ||
127 | 5e: POP rSI/r14 (d64) | ||
128 | 5f: POP rDI/r15 (d64) | ||
129 | # 0x60 - 0x6f | ||
130 | 60: PUSHA/PUSHAD (i64) | ||
131 | 61: POPA/POPAD (i64) | ||
132 | 62: BOUND Gv,Ma (i64) | ||
133 | 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) | ||
134 | 64: SEG=FS (Prefix) | ||
135 | 65: SEG=GS (Prefix) | ||
136 | 66: Operand-Size (Prefix) | ||
137 | 67: Address-Size (Prefix) | ||
138 | 68: PUSH Iz (d64) | ||
139 | 69: IMUL Gv,Ev,Iz | ||
140 | 6a: PUSH Ib (d64) | ||
141 | 6b: IMUL Gv,Ev,Ib | ||
142 | 6c: INS/INSB Yb,DX | ||
143 | 6d: INS/INSW/INSD Yz,DX | ||
144 | 6e: OUTS/OUTSB DX,Xb | ||
145 | 6f: OUTS/OUTSW/OUTSD DX,Xz | ||
146 | # 0x70 - 0x7f | ||
147 | 70: JO Jb | ||
148 | 71: JNO Jb | ||
149 | 72: JB/JNAE/JC Jb | ||
150 | 73: JNB/JAE/JNC Jb | ||
151 | 74: JZ/JE Jb | ||
152 | 75: JNZ/JNE Jb | ||
153 | 76: JBE/JNA Jb | ||
154 | 77: JNBE/JA Jb | ||
155 | 78: JS Jb | ||
156 | 79: JNS Jb | ||
157 | 7a: JP/JPE Jb | ||
158 | 7b: JNP/JPO Jb | ||
159 | 7c: JL/JNGE Jb | ||
160 | 7d: JNL/JGE Jb | ||
161 | 7e: JLE/JNG Jb | ||
162 | 7f: JNLE/JG Jb | ||
163 | # 0x80 - 0x8f | ||
164 | 80: Grp1 Eb,Ib (1A) | ||
165 | 81: Grp1 Ev,Iz (1A) | ||
166 | 82: Grp1 Eb,Ib (1A),(i64) | ||
167 | 83: Grp1 Ev,Ib (1A) | ||
168 | 84: TEST Eb,Gb | ||
169 | 85: TEST Ev,Gv | ||
170 | 86: XCHG Eb,Gb | ||
171 | 87: XCHG Ev,Gv | ||
172 | 88: MOV Eb,Gb | ||
173 | 89: MOV Ev,Gv | ||
174 | 8a: MOV Gb,Eb | ||
175 | 8b: MOV Gv,Ev | ||
176 | 8c: MOV Ev,Sw | ||
177 | 8d: LEA Gv,M | ||
178 | 8e: MOV Sw,Ew | ||
179 | 8f: Grp1A (1A) | POP Ev (d64) | ||
180 | # 0x90 - 0x9f | ||
181 | 90: NOP | PAUSE (F3) | XCHG r8,rAX | ||
182 | 91: XCHG rCX/r9,rAX | ||
183 | 92: XCHG rDX/r10,rAX | ||
184 | 93: XCHG rBX/r11,rAX | ||
185 | 94: XCHG rSP/r12,rAX | ||
186 | 95: XCHG rBP/r13,rAX | ||
187 | 96: XCHG rSI/r14,rAX | ||
188 | 97: XCHG rDI/r15,rAX | ||
189 | 98: CBW/CWDE/CDQE | ||
190 | 99: CWD/CDQ/CQO | ||
191 | 9a: CALLF Ap (i64) | ||
192 | 9b: FWAIT/WAIT | ||
193 | 9c: PUSHF/D/Q Fv (d64) | ||
194 | 9d: POPF/D/Q Fv (d64) | ||
195 | 9e: SAHF | ||
196 | 9f: LAHF | ||
197 | # 0xa0 - 0xaf | ||
198 | a0: MOV AL,Ob | ||
199 | a1: MOV rAX,Ov | ||
200 | a2: MOV Ob,AL | ||
201 | a3: MOV Ov,rAX | ||
202 | a4: MOVS/B Xb,Yb | ||
203 | a5: MOVS/W/D/Q Xv,Yv | ||
204 | a6: CMPS/B Xb,Yb | ||
205 | a7: CMPS/W/D Xv,Yv | ||
206 | a8: TEST AL,Ib | ||
207 | a9: TEST rAX,Iz | ||
208 | aa: STOS/B Yb,AL | ||
209 | ab: STOS/W/D/Q Yv,rAX | ||
210 | ac: LODS/B AL,Xb | ||
211 | ad: LODS/W/D/Q rAX,Xv | ||
212 | ae: SCAS/B AL,Yb | ||
213 | af: SCAS/W/D/Q rAX,Xv | ||
214 | # 0xb0 - 0xbf | ||
215 | b0: MOV AL/R8L,Ib | ||
216 | b1: MOV CL/R9L,Ib | ||
217 | b2: MOV DL/R10L,Ib | ||
218 | b3: MOV BL/R11L,Ib | ||
219 | b4: MOV AH/R12L,Ib | ||
220 | b5: MOV CH/R13L,Ib | ||
221 | b6: MOV DH/R14L,Ib | ||
222 | b7: MOV BH/R15L,Ib | ||
223 | b8: MOV rAX/r8,Iv | ||
224 | b9: MOV rCX/r9,Iv | ||
225 | ba: MOV rDX/r10,Iv | ||
226 | bb: MOV rBX/r11,Iv | ||
227 | bc: MOV rSP/r12,Iv | ||
228 | bd: MOV rBP/r13,Iv | ||
229 | be: MOV rSI/r14,Iv | ||
230 | bf: MOV rDI/r15,Iv | ||
231 | # 0xc0 - 0xcf | ||
232 | c0: Grp2 Eb,Ib (1A) | ||
233 | c1: Grp2 Ev,Ib (1A) | ||
234 | c2: RETN Iw (f64) | ||
235 | c3: RETN | ||
236 | c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) | ||
237 | c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) | ||
238 | c6: Grp11 Eb,Ib (1A) | ||
239 | c7: Grp11 Ev,Iz (1A) | ||
240 | c8: ENTER Iw,Ib | ||
241 | c9: LEAVE (d64) | ||
242 | ca: RETF Iw | ||
243 | cb: RETF | ||
244 | cc: INT3 | ||
245 | cd: INT Ib | ||
246 | ce: INTO (i64) | ||
247 | cf: IRET/D/Q | ||
248 | # 0xd0 - 0xdf | ||
249 | d0: Grp2 Eb,1 (1A) | ||
250 | d1: Grp2 Ev,1 (1A) | ||
251 | d2: Grp2 Eb,CL (1A) | ||
252 | d3: Grp2 Ev,CL (1A) | ||
253 | d4: AAM Ib (i64) | ||
254 | d5: AAD Ib (i64) | ||
255 | d6: | ||
256 | d7: XLAT/XLATB | ||
257 | d8: ESC | ||
258 | d9: ESC | ||
259 | da: ESC | ||
260 | db: ESC | ||
261 | dc: ESC | ||
262 | dd: ESC | ||
263 | de: ESC | ||
264 | df: ESC | ||
265 | # 0xe0 - 0xef | ||
266 | e0: LOOPNE/LOOPNZ Jb (f64) | ||
267 | e1: LOOPE/LOOPZ Jb (f64) | ||
268 | e2: LOOP Jb (f64) | ||
269 | e3: JrCXZ Jb (f64) | ||
270 | e4: IN AL,Ib | ||
271 | e5: IN eAX,Ib | ||
272 | e6: OUT Ib,AL | ||
273 | e7: OUT Ib,eAX | ||
274 | e8: CALL Jz (f64) | ||
275 | e9: JMP-near Jz (f64) | ||
276 | ea: JMP-far Ap (i64) | ||
277 | eb: JMP-short Jb (f64) | ||
278 | ec: IN AL,DX | ||
279 | ed: IN eAX,DX | ||
280 | ee: OUT DX,AL | ||
281 | ef: OUT DX,eAX | ||
282 | # 0xf0 - 0xff | ||
283 | f0: LOCK (Prefix) | ||
284 | f1: | ||
285 | f2: REPNE (Prefix) | ||
286 | f3: REP/REPE (Prefix) | ||
287 | f4: HLT | ||
288 | f5: CMC | ||
289 | f6: Grp3_1 Eb (1A) | ||
290 | f7: Grp3_2 Ev (1A) | ||
291 | f8: CLC | ||
292 | f9: STC | ||
293 | fa: CLI | ||
294 | fb: STI | ||
295 | fc: CLD | ||
296 | fd: STD | ||
297 | fe: Grp4 (1A) | ||
298 | ff: Grp5 (1A) | ||
299 | EndTable | ||
300 | |||
301 | Table: 2-byte opcode (0x0f) | ||
302 | Referrer: 2-byte escape | ||
303 | AVXcode: 1 | ||
304 | # 0x0f 0x00-0x0f | ||
305 | 00: Grp6 (1A) | ||
306 | 01: Grp7 (1A) | ||
307 | 02: LAR Gv,Ew | ||
308 | 03: LSL Gv,Ew | ||
309 | 04: | ||
310 | 05: SYSCALL (o64) | ||
311 | 06: CLTS | ||
312 | 07: SYSRET (o64) | ||
313 | 08: INVD | ||
314 | 09: WBINVD | ||
315 | 0a: | ||
316 | 0b: UD2 (1B) | ||
317 | 0c: | ||
318 | 0d: NOP Ev | GrpP | ||
319 | 0e: FEMMS | ||
320 | # 3DNow! uses the last imm byte as opcode extension. | ||
321 | 0f: 3DNow! Pq,Qq,Ib | ||
322 | # 0x0f 0x10-0x1f | ||
323 | 10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) | ||
324 | 11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) | ||
325 | 12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) | ||
326 | 13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) | ||
327 | 14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) | ||
328 | 15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) | ||
329 | 16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) | ||
330 | 17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) | ||
331 | 18: Grp16 (1A) | ||
332 | 19: | ||
333 | 1a: | ||
334 | 1b: | ||
335 | 1c: | ||
336 | 1d: | ||
337 | 1e: | ||
338 | 1f: NOP Ev | ||
339 | # 0x0f 0x20-0x2f | ||
340 | 20: MOV Rd,Cd | ||
341 | 21: MOV Rd,Dd | ||
342 | 22: MOV Cd,Rd | ||
343 | 23: MOV Dd,Rd | ||
344 | 24: | ||
345 | 25: | ||
346 | 26: | ||
347 | 27: | ||
348 | 28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) | ||
349 | 29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) | ||
350 | 2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) | ||
351 | 2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) | ||
352 | 2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) | ||
353 | 2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) | ||
354 | 2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) | ||
355 | 2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) | ||
356 | # 0x0f 0x30-0x3f | ||
357 | 30: WRMSR | ||
358 | 31: RDTSC | ||
359 | 32: RDMSR | ||
360 | 33: RDPMC | ||
361 | 34: SYSENTER | ||
362 | 35: SYSEXIT | ||
363 | 36: | ||
364 | 37: GETSEC | ||
365 | 38: escape # 3-byte escape 1 | ||
366 | 39: | ||
367 | 3a: escape # 3-byte escape 2 | ||
368 | 3b: | ||
369 | 3c: | ||
370 | 3d: | ||
371 | 3e: | ||
372 | 3f: | ||
373 | # 0x0f 0x40-0x4f | ||
374 | 40: CMOVO Gv,Ev | ||
375 | 41: CMOVNO Gv,Ev | ||
376 | 42: CMOVB/C/NAE Gv,Ev | ||
377 | 43: CMOVAE/NB/NC Gv,Ev | ||
378 | 44: CMOVE/Z Gv,Ev | ||
379 | 45: CMOVNE/NZ Gv,Ev | ||
380 | 46: CMOVBE/NA Gv,Ev | ||
381 | 47: CMOVA/NBE Gv,Ev | ||
382 | 48: CMOVS Gv,Ev | ||
383 | 49: CMOVNS Gv,Ev | ||
384 | 4a: CMOVP/PE Gv,Ev | ||
385 | 4b: CMOVNP/PO Gv,Ev | ||
386 | 4c: CMOVL/NGE Gv,Ev | ||
387 | 4d: CMOVNL/GE Gv,Ev | ||
388 | 4e: CMOVLE/NG Gv,Ev | ||
389 | 4f: CMOVNLE/G Gv,Ev | ||
390 | # 0x0f 0x50-0x5f | ||
391 | 50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) | ||
392 | 51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) | ||
393 | 52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) | ||
394 | 53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) | ||
395 | 54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) | ||
396 | 55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) | ||
397 | 56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) | ||
398 | 57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) | ||
399 | 58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) | ||
400 | 59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) | ||
401 | 5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) | ||
402 | 5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) | ||
403 | 5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) | ||
404 | 5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) | ||
405 | 5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) | ||
406 | 5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) | ||
407 | # 0x0f 0x60-0x6f | ||
408 | 60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) | ||
409 | 61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) | ||
410 | 62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) | ||
411 | 63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) | ||
412 | 64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) | ||
413 | 65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) | ||
414 | 66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) | ||
415 | 67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) | ||
416 | 68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) | ||
417 | 69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) | ||
418 | 6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) | ||
419 | 6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) | ||
420 | 6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) | ||
421 | 6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) | ||
422 | 6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) | ||
423 | 6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) | ||
424 | # 0x0f 0x70-0x7f | ||
425 | 70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) | ||
426 | 71: Grp12 (1A) | ||
427 | 72: Grp13 (1A) | ||
428 | 73: Grp14 (1A) | ||
429 | 74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) | ||
430 | 75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) | ||
431 | 76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) | ||
432 | 77: emms/vzeroupper/vzeroall (VEX) | ||
433 | 78: VMREAD Ed/q,Gd/q | ||
434 | 79: VMWRITE Gd/q,Ed/q | ||
435 | 7a: | ||
436 | 7b: | ||
437 | 7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) | ||
438 | 7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) | ||
439 | 7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) | ||
440 | 7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) | ||
441 | # 0x0f 0x80-0x8f | ||
442 | 80: JO Jz (f64) | ||
443 | 81: JNO Jz (f64) | ||
444 | 82: JB/JNAE/JC Jz (f64) | ||
445 | 83: JNB/JAE/JNC Jz (f64) | ||
446 | 84: JZ/JE Jz (f64) | ||
447 | 85: JNZ/JNE Jz (f64) | ||
448 | 86: JBE/JNA Jz (f64) | ||
449 | 87: JNBE/JA Jz (f64) | ||
450 | 88: JS Jz (f64) | ||
451 | 89: JNS Jz (f64) | ||
452 | 8a: JP/JPE Jz (f64) | ||
453 | 8b: JNP/JPO Jz (f64) | ||
454 | 8c: JL/JNGE Jz (f64) | ||
455 | 8d: JNL/JGE Jz (f64) | ||
456 | 8e: JLE/JNG Jz (f64) | ||
457 | 8f: JNLE/JG Jz (f64) | ||
458 | # 0x0f 0x90-0x9f | ||
459 | 90: SETO Eb | ||
460 | 91: SETNO Eb | ||
461 | 92: SETB/C/NAE Eb | ||
462 | 93: SETAE/NB/NC Eb | ||
463 | 94: SETE/Z Eb | ||
464 | 95: SETNE/NZ Eb | ||
465 | 96: SETBE/NA Eb | ||
466 | 97: SETA/NBE Eb | ||
467 | 98: SETS Eb | ||
468 | 99: SETNS Eb | ||
469 | 9a: SETP/PE Eb | ||
470 | 9b: SETNP/PO Eb | ||
471 | 9c: SETL/NGE Eb | ||
472 | 9d: SETNL/GE Eb | ||
473 | 9e: SETLE/NG Eb | ||
474 | 9f: SETNLE/G Eb | ||
475 | # 0x0f 0xa0-0xaf | ||
476 | a0: PUSH FS (d64) | ||
477 | a1: POP FS (d64) | ||
478 | a2: CPUID | ||
479 | a3: BT Ev,Gv | ||
480 | a4: SHLD Ev,Gv,Ib | ||
481 | a5: SHLD Ev,Gv,CL | ||
482 | a6: GrpPDLK | ||
483 | a7: GrpRNG | ||
484 | a8: PUSH GS (d64) | ||
485 | a9: POP GS (d64) | ||
486 | aa: RSM | ||
487 | ab: BTS Ev,Gv | ||
488 | ac: SHRD Ev,Gv,Ib | ||
489 | ad: SHRD Ev,Gv,CL | ||
490 | ae: Grp15 (1A),(1C) | ||
491 | af: IMUL Gv,Ev | ||
492 | # 0x0f 0xb0-0xbf | ||
493 | b0: CMPXCHG Eb,Gb | ||
494 | b1: CMPXCHG Ev,Gv | ||
495 | b2: LSS Gv,Mp | ||
496 | b3: BTR Ev,Gv | ||
497 | b4: LFS Gv,Mp | ||
498 | b5: LGS Gv,Mp | ||
499 | b6: MOVZX Gv,Eb | ||
500 | b7: MOVZX Gv,Ew | ||
501 | b8: JMPE | POPCNT Gv,Ev (F3) | ||
502 | b9: Grp10 (1A) | ||
503 | ba: Grp8 Ev,Ib (1A) | ||
504 | bb: BTC Ev,Gv | ||
505 | bc: BSF Gv,Ev | ||
506 | bd: BSR Gv,Ev | ||
507 | be: MOVSX Gv,Eb | ||
508 | bf: MOVSX Gv,Ew | ||
509 | # 0x0f 0xc0-0xcf | ||
510 | c0: XADD Eb,Gb | ||
511 | c1: XADD Ev,Gv | ||
512 | c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) | ||
513 | c3: movnti Md/q,Gd/q | ||
514 | c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) | ||
515 | c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) | ||
516 | c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) | ||
517 | c7: Grp9 (1A) | ||
518 | c8: BSWAP RAX/EAX/R8/R8D | ||
519 | c9: BSWAP RCX/ECX/R9/R9D | ||
520 | ca: BSWAP RDX/EDX/R10/R10D | ||
521 | cb: BSWAP RBX/EBX/R11/R11D | ||
522 | cc: BSWAP RSP/ESP/R12/R12D | ||
523 | cd: BSWAP RBP/EBP/R13/R13D | ||
524 | ce: BSWAP RSI/ESI/R14/R14D | ||
525 | cf: BSWAP RDI/EDI/R15/R15D | ||
526 | # 0x0f 0xd0-0xdf | ||
527 | d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) | ||
528 | d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) | ||
529 | d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) | ||
530 | d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) | ||
531 | d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) | ||
532 | d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) | ||
533 | d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) | ||
534 | d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) | ||
535 | d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) | ||
536 | d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) | ||
537 | da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) | ||
538 | db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) | ||
539 | dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) | ||
540 | dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) | ||
541 | de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) | ||
542 | df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) | ||
543 | # 0x0f 0xe0-0xef | ||
544 | e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) | ||
545 | e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) | ||
546 | e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) | ||
547 | e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) | ||
548 | e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) | ||
549 | e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) | ||
550 | e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) | ||
551 | e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) | ||
552 | e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) | ||
553 | e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) | ||
554 | ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) | ||
555 | eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) | ||
556 | ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) | ||
557 | ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) | ||
558 | ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) | ||
559 | ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) | ||
560 | # 0x0f 0xf0-0xff | ||
561 | f0: lddqu Vdq,Mdq (F2),(VEX) | ||
562 | f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) | ||
563 | f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) | ||
564 | f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) | ||
565 | f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) | ||
566 | f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) | ||
567 | f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) | ||
568 | f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) | ||
569 | f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) | ||
570 | f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) | ||
571 | fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) | ||
572 | fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) | ||
573 | fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) | ||
574 | fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) | ||
575 | fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) | ||
576 | ff: | ||
577 | EndTable | ||
578 | |||
579 | Table: 3-byte opcode 1 (0x0f 0x38) | ||
580 | Referrer: 3-byte escape 1 | ||
581 | AVXcode: 2 | ||
582 | # 0x0f 0x38 0x00-0x0f | ||
583 | 00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) | ||
584 | 01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) | ||
585 | 02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) | ||
586 | 03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) | ||
587 | 04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) | ||
588 | 05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) | ||
589 | 06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) | ||
590 | 07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) | ||
591 | 08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) | ||
592 | 09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) | ||
593 | 0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) | ||
594 | 0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) | ||
595 | 0c: Vpermilps /r (66),(oVEX) | ||
596 | 0d: Vpermilpd /r (66),(oVEX) | ||
597 | 0e: vtestps /r (66),(oVEX) | ||
598 | 0f: vtestpd /r (66),(oVEX) | ||
599 | # 0x0f 0x38 0x10-0x1f | ||
600 | 10: pblendvb Vdq,Wdq (66) | ||
601 | 11: | ||
602 | 12: | ||
603 | 13: | ||
604 | 14: blendvps Vdq,Wdq (66) | ||
605 | 15: blendvpd Vdq,Wdq (66) | ||
606 | 16: | ||
607 | 17: ptest Vdq,Wdq (66),(VEX) | ||
608 | 18: vbroadcastss /r (66),(oVEX) | ||
609 | 19: vbroadcastsd /r (66),(oVEX),(o256) | ||
610 | 1a: vbroadcastf128 /r (66),(oVEX),(o256) | ||
611 | 1b: | ||
612 | 1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) | ||
613 | 1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) | ||
614 | 1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) | ||
615 | 1f: | ||
616 | # 0x0f 0x38 0x20-0x2f | ||
617 | 20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) | ||
618 | 21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) | ||
619 | 22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) | ||
620 | 23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) | ||
621 | 24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) | ||
622 | 25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) | ||
623 | 26: | ||
624 | 27: | ||
625 | 28: pmuldq Vdq,Wdq (66),(VEX),(o128) | ||
626 | 29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) | ||
627 | 2a: movntdqa Vdq,Mdq (66),(VEX),(o128) | ||
628 | 2b: packusdw Vdq,Wdq (66),(VEX),(o128) | ||
629 | 2c: vmaskmovps(ld) /r (66),(oVEX) | ||
630 | 2d: vmaskmovpd(ld) /r (66),(oVEX) | ||
631 | 2e: vmaskmovps(st) /r (66),(oVEX) | ||
632 | 2f: vmaskmovpd(st) /r (66),(oVEX) | ||
633 | # 0x0f 0x38 0x30-0x3f | ||
634 | 30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) | ||
635 | 31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) | ||
636 | 32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) | ||
637 | 33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) | ||
638 | 34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) | ||
639 | 35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) | ||
640 | 36: | ||
641 | 37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) | ||
642 | 38: pminsb Vdq,Wdq (66),(VEX),(o128) | ||
643 | 39: pminsd Vdq,Wdq (66),(VEX),(o128) | ||
644 | 3a: pminuw Vdq,Wdq (66),(VEX),(o128) | ||
645 | 3b: pminud Vdq,Wdq (66),(VEX),(o128) | ||
646 | 3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) | ||
647 | 3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) | ||
648 | 3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) | ||
649 | 3f: pmaxud Vdq,Wdq (66),(VEX),(o128) | ||
650 | # 0x0f 0x38 0x40-0x8f | ||
651 | 40: pmulld Vdq,Wdq (66),(VEX),(o128) | ||
652 | 41: phminposuw Vdq,Wdq (66),(VEX),(o128) | ||
653 | 80: INVEPT Gd/q,Mdq (66) | ||
654 | 81: INVPID Gd/q,Mdq (66) | ||
655 | # 0x0f 0x38 0x90-0xbf (FMA) | ||
656 | 96: vfmaddsub132pd/ps /r (66),(VEX) | ||
657 | 97: vfmsubadd132pd/ps /r (66),(VEX) | ||
658 | 98: vfmadd132pd/ps /r (66),(VEX) | ||
659 | 99: vfmadd132sd/ss /r (66),(VEX),(o128) | ||
660 | 9a: vfmsub132pd/ps /r (66),(VEX) | ||
661 | 9b: vfmsub132sd/ss /r (66),(VEX),(o128) | ||
662 | 9c: vfnmadd132pd/ps /r (66),(VEX) | ||
663 | 9d: vfnmadd132sd/ss /r (66),(VEX),(o128) | ||
664 | 9e: vfnmsub132pd/ps /r (66),(VEX) | ||
665 | 9f: vfnmsub132sd/ss /r (66),(VEX),(o128) | ||
666 | a6: vfmaddsub213pd/ps /r (66),(VEX) | ||
667 | a7: vfmsubadd213pd/ps /r (66),(VEX) | ||
668 | a8: vfmadd213pd/ps /r (66),(VEX) | ||
669 | a9: vfmadd213sd/ss /r (66),(VEX),(o128) | ||
670 | aa: vfmsub213pd/ps /r (66),(VEX) | ||
671 | ab: vfmsub213sd/ss /r (66),(VEX),(o128) | ||
672 | ac: vfnmadd213pd/ps /r (66),(VEX) | ||
673 | ad: vfnmadd213sd/ss /r (66),(VEX),(o128) | ||
674 | ae: vfnmsub213pd/ps /r (66),(VEX) | ||
675 | af: vfnmsub213sd/ss /r (66),(VEX),(o128) | ||
676 | b6: vfmaddsub231pd/ps /r (66),(VEX) | ||
677 | b7: vfmsubadd231pd/ps /r (66),(VEX) | ||
678 | b8: vfmadd231pd/ps /r (66),(VEX) | ||
679 | b9: vfmadd231sd/ss /r (66),(VEX),(o128) | ||
680 | ba: vfmsub231pd/ps /r (66),(VEX) | ||
681 | bb: vfmsub231sd/ss /r (66),(VEX),(o128) | ||
682 | bc: vfnmadd231pd/ps /r (66),(VEX) | ||
683 | bd: vfnmadd231sd/ss /r (66),(VEX),(o128) | ||
684 | be: vfnmsub231pd/ps /r (66),(VEX) | ||
685 | bf: vfnmsub231sd/ss /r (66),(VEX),(o128) | ||
686 | # 0x0f 0x38 0xc0-0xff | ||
687 | db: aesimc Vdq,Wdq (66),(VEX),(o128) | ||
688 | dc: aesenc Vdq,Wdq (66),(VEX),(o128) | ||
689 | dd: aesenclast Vdq,Wdq (66),(VEX),(o128) | ||
690 | de: aesdec Vdq,Wdq (66),(VEX),(o128) | ||
691 | df: aesdeclast Vdq,Wdq (66),(VEX),(o128) | ||
692 | f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) | ||
693 | f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) | ||
694 | EndTable | ||
695 | |||
696 | Table: 3-byte opcode 2 (0x0f 0x3a) | ||
697 | Referrer: 3-byte escape 2 | ||
698 | AVXcode: 3 | ||
699 | # 0x0f 0x3a 0x00-0xff | ||
700 | 04: vpermilps /r,Ib (66),(oVEX) | ||
701 | 05: vpermilpd /r,Ib (66),(oVEX) | ||
702 | 06: vperm2f128 /r,Ib (66),(oVEX),(o256) | ||
703 | 08: roundps Vdq,Wdq,Ib (66),(VEX) | ||
704 | 09: roundpd Vdq,Wdq,Ib (66),(VEX) | ||
705 | 0a: roundss Vss,Wss,Ib (66),(VEX),(o128) | ||
706 | 0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) | ||
707 | 0c: blendps Vdq,Wdq,Ib (66),(VEX) | ||
708 | 0d: blendpd Vdq,Wdq,Ib (66),(VEX) | ||
709 | 0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) | ||
710 | 0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) | ||
711 | 14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) | ||
712 | 15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) | ||
713 | 16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) | ||
714 | 17: extractps Ed,Vdq,Ib (66),(VEX),(o128) | ||
715 | 18: vinsertf128 /r,Ib (66),(oVEX),(o256) | ||
716 | 19: vextractf128 /r,Ib (66),(oVEX),(o256) | ||
717 | 20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) | ||
718 | 21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) | ||
719 | 22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) | ||
720 | 40: dpps Vdq,Wdq,Ib (66),(VEX) | ||
721 | 41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) | ||
722 | 42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) | ||
723 | 44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) | ||
724 | 4a: vblendvps /r,Ib (66),(oVEX) | ||
725 | 4b: vblendvpd /r,Ib (66),(oVEX) | ||
726 | 4c: vpblendvb /r,Ib (66),(oVEX),(o128) | ||
727 | 60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) | ||
728 | 61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) | ||
729 | 62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) | ||
730 | 63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) | ||
731 | df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) | ||
732 | EndTable | ||
733 | |||
734 | GrpTable: Grp1 | ||
735 | 0: ADD | ||
736 | 1: OR | ||
737 | 2: ADC | ||
738 | 3: SBB | ||
739 | 4: AND | ||
740 | 5: SUB | ||
741 | 6: XOR | ||
742 | 7: CMP | ||
743 | EndTable | ||
744 | |||
745 | GrpTable: Grp1A | ||
746 | 0: POP | ||
747 | EndTable | ||
748 | |||
749 | GrpTable: Grp2 | ||
750 | 0: ROL | ||
751 | 1: ROR | ||
752 | 2: RCL | ||
753 | 3: RCR | ||
754 | 4: SHL/SAL | ||
755 | 5: SHR | ||
756 | 6: | ||
757 | 7: SAR | ||
758 | EndTable | ||
759 | |||
760 | GrpTable: Grp3_1 | ||
761 | 0: TEST Eb,Ib | ||
762 | 1: | ||
763 | 2: NOT Eb | ||
764 | 3: NEG Eb | ||
765 | 4: MUL AL,Eb | ||
766 | 5: IMUL AL,Eb | ||
767 | 6: DIV AL,Eb | ||
768 | 7: IDIV AL,Eb | ||
769 | EndTable | ||
770 | |||
771 | GrpTable: Grp3_2 | ||
772 | 0: TEST Ev,Iz | ||
773 | 1: | ||
774 | 2: NOT Ev | ||
775 | 3: NEG Ev | ||
776 | 4: MUL rAX,Ev | ||
777 | 5: IMUL rAX,Ev | ||
778 | 6: DIV rAX,Ev | ||
779 | 7: IDIV rAX,Ev | ||
780 | EndTable | ||
781 | |||
782 | GrpTable: Grp4 | ||
783 | 0: INC Eb | ||
784 | 1: DEC Eb | ||
785 | EndTable | ||
786 | |||
787 | GrpTable: Grp5 | ||
788 | 0: INC Ev | ||
789 | 1: DEC Ev | ||
790 | 2: CALLN Ev (f64) | ||
791 | 3: CALLF Ep | ||
792 | 4: JMPN Ev (f64) | ||
793 | 5: JMPF Ep | ||
794 | 6: PUSH Ev (d64) | ||
795 | 7: | ||
796 | EndTable | ||
797 | |||
798 | GrpTable: Grp6 | ||
799 | 0: SLDT Rv/Mw | ||
800 | 1: STR Rv/Mw | ||
801 | 2: LLDT Ew | ||
802 | 3: LTR Ew | ||
803 | 4: VERR Ew | ||
804 | 5: VERW Ew | ||
805 | EndTable | ||
806 | |||
807 | GrpTable: Grp7 | ||
808 | 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | ||
809 | 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) | ||
810 | 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | ||
811 | 3: LIDT Ms | ||
812 | 4: SMSW Mw/Rv | ||
813 | 5: | ||
814 | 6: LMSW Ew | ||
815 | 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) | ||
816 | EndTable | ||
817 | |||
818 | GrpTable: Grp8 | ||
819 | 4: BT | ||
820 | 5: BTS | ||
821 | 6: BTR | ||
822 | 7: BTC | ||
823 | EndTable | ||
824 | |||
825 | GrpTable: Grp9 | ||
826 | 1: CMPXCHG8B/16B Mq/Mdq | ||
827 | 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | ||
828 | 7: VMPTRST Mq | ||
829 | EndTable | ||
830 | |||
831 | GrpTable: Grp10 | ||
832 | EndTable | ||
833 | |||
834 | GrpTable: Grp11 | ||
835 | 0: MOV | ||
836 | EndTable | ||
837 | |||
838 | GrpTable: Grp12 | ||
839 | 2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) | ||
840 | 4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) | ||
841 | 6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) | ||
842 | EndTable | ||
843 | |||
844 | GrpTable: Grp13 | ||
845 | 2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) | ||
846 | 4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) | ||
847 | 6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) | ||
848 | EndTable | ||
849 | |||
850 | GrpTable: Grp14 | ||
851 | 2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) | ||
852 | 3: psrldq Udq,Ib (66),(11B),(VEX),(o128) | ||
853 | 6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) | ||
854 | 7: pslldq Udq,Ib (66),(11B),(VEX),(o128) | ||
855 | EndTable | ||
856 | |||
857 | GrpTable: Grp15 | ||
858 | 0: fxsave | ||
859 | 1: fxstor | ||
860 | 2: ldmxcsr (VEX) | ||
861 | 3: stmxcsr (VEX) | ||
862 | 4: XSAVE | ||
863 | 5: XRSTOR | lfence (11B) | ||
864 | 6: mfence (11B) | ||
865 | 7: clflush | sfence (11B) | ||
866 | EndTable | ||
867 | |||
868 | GrpTable: Grp16 | ||
869 | 0: prefetch NTA | ||
870 | 1: prefetch T0 | ||
871 | 2: prefetch T1 | ||
872 | 3: prefetch T2 | ||
873 | EndTable | ||
874 | |||
875 | # AMD's Prefetch Group | ||
876 | GrpTable: GrpP | ||
877 | 0: PREFETCH | ||
878 | 1: PREFETCHW | ||
879 | EndTable | ||
880 | |||
881 | GrpTable: GrpPDLK | ||
882 | 0: MONTMUL | ||
883 | 1: XSHA1 | ||
884 | 2: XSHA2 | ||
885 | EndTable | ||
886 | |||
887 | GrpTable: GrpRNG | ||
888 | 0: xstore-rng | ||
889 | 1: xcrypt-ecb | ||
890 | 2: xcrypt-cbc | ||
891 | 4: xcrypt-cfb | ||
892 | 5: xcrypt-ofb | ||
893 | EndTable | ||
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 61b41ca3b5a2..d0474ad2a6e5 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c | |||
@@ -35,34 +35,3 @@ int fixup_exception(struct pt_regs *regs) | |||
35 | 35 | ||
36 | return 0; | 36 | return 0; |
37 | } | 37 | } |
38 | |||
39 | #ifdef CONFIG_X86_64 | ||
40 | /* | ||
41 | * Need to defined our own search_extable on X86_64 to work around | ||
42 | * a B stepping K8 bug. | ||
43 | */ | ||
44 | const struct exception_table_entry * | ||
45 | search_extable(const struct exception_table_entry *first, | ||
46 | const struct exception_table_entry *last, | ||
47 | unsigned long value) | ||
48 | { | ||
49 | /* B stepping K8 bug */ | ||
50 | if ((value >> 32) == 0) | ||
51 | value |= 0xffffffffUL << 32; | ||
52 | |||
53 | while (first <= last) { | ||
54 | const struct exception_table_entry *mid; | ||
55 | long diff; | ||
56 | |||
57 | mid = (last - first) / 2 + first; | ||
58 | diff = mid->insn - value; | ||
59 | if (diff == 0) | ||
60 | return mid; | ||
61 | else if (diff < 0) | ||
62 | first = mid+1; | ||
63 | else | ||
64 | last = mid-1; | ||
65 | } | ||
66 | return NULL; | ||
67 | } | ||
68 | #endif | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f4cee9028cf0..f62777940dfb 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -38,7 +38,8 @@ enum x86_pf_error_code { | |||
38 | * Returns 0 if mmiotrace is disabled, or if the fault is not | 38 | * Returns 0 if mmiotrace is disabled, or if the fault is not |
39 | * handled by mmiotrace: | 39 | * handled by mmiotrace: |
40 | */ | 40 | */ |
41 | static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) | 41 | static inline int __kprobes |
42 | kmmio_fault(struct pt_regs *regs, unsigned long addr) | ||
42 | { | 43 | { |
43 | if (unlikely(is_kmmio_active())) | 44 | if (unlikely(is_kmmio_active())) |
44 | if (kmmio_handler(regs, addr) == 1) | 45 | if (kmmio_handler(regs, addr) == 1) |
@@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) | |||
46 | return 0; | 47 | return 0; |
47 | } | 48 | } |
48 | 49 | ||
49 | static inline int notify_page_fault(struct pt_regs *regs) | 50 | static inline int __kprobes notify_page_fault(struct pt_regs *regs) |
50 | { | 51 | { |
51 | int ret = 0; | 52 | int ret = 0; |
52 | 53 | ||
@@ -240,7 +241,7 @@ void vmalloc_sync_all(void) | |||
240 | * | 241 | * |
241 | * Handle a fault on the vmalloc or module mapping area | 242 | * Handle a fault on the vmalloc or module mapping area |
242 | */ | 243 | */ |
243 | static noinline int vmalloc_fault(unsigned long address) | 244 | static noinline __kprobes int vmalloc_fault(unsigned long address) |
244 | { | 245 | { |
245 | unsigned long pgd_paddr; | 246 | unsigned long pgd_paddr; |
246 | pmd_t *pmd_k; | 247 | pmd_t *pmd_k; |
@@ -357,7 +358,7 @@ void vmalloc_sync_all(void) | |||
357 | * | 358 | * |
358 | * This assumes no large pages in there. | 359 | * This assumes no large pages in there. |
359 | */ | 360 | */ |
360 | static noinline int vmalloc_fault(unsigned long address) | 361 | static noinline __kprobes int vmalloc_fault(unsigned long address) |
361 | { | 362 | { |
362 | pgd_t *pgd, *pgd_ref; | 363 | pgd_t *pgd, *pgd_ref; |
363 | pud_t *pud, *pud_ref; | 364 | pud_t *pud, *pud_ref; |
@@ -658,7 +659,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
658 | show_fault_oops(regs, error_code, address); | 659 | show_fault_oops(regs, error_code, address); |
659 | 660 | ||
660 | stackend = end_of_stack(tsk); | 661 | stackend = end_of_stack(tsk); |
661 | if (*stackend != STACK_END_MAGIC) | 662 | if (tsk != &init_task && *stackend != STACK_END_MAGIC) |
662 | printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); | 663 | printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); |
663 | 664 | ||
664 | tsk->thread.cr2 = address; | 665 | tsk->thread.cr2 = address; |
@@ -860,7 +861,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) | |||
860 | * There are no security implications to leaving a stale TLB when | 861 | * There are no security implications to leaving a stale TLB when |
861 | * increasing the permissions on a page. | 862 | * increasing the permissions on a page. |
862 | */ | 863 | */ |
863 | static noinline int | 864 | static noinline __kprobes int |
864 | spurious_fault(unsigned long error_code, unsigned long address) | 865 | spurious_fault(unsigned long error_code, unsigned long address) |
865 | { | 866 | { |
866 | pgd_t *pgd; | 867 | pgd_t *pgd; |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 73ffd5536f62..d406c5239019 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -146,10 +146,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
146 | use_gbpages = direct_gbpages; | 146 | use_gbpages = direct_gbpages; |
147 | #endif | 147 | #endif |
148 | 148 | ||
149 | set_nx(); | ||
150 | if (nx_enabled) | ||
151 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | ||
152 | |||
153 | /* Enable PSE if available */ | 149 | /* Enable PSE if available */ |
154 | if (cpu_has_pse) | 150 | if (cpu_has_pse) |
155 | set_in_cr4(X86_CR4_PSE); | 151 | set_in_cr4(X86_CR4_PSE); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 30938c1d8d5d..c973f8e2a6cf 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -412,7 +412,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) | |||
412 | pkmap_page_table = pte; | 412 | pkmap_page_table = pte; |
413 | } | 413 | } |
414 | 414 | ||
415 | static void __init add_one_highpage_init(struct page *page, int pfn) | 415 | static void __init add_one_highpage_init(struct page *page) |
416 | { | 416 | { |
417 | ClearPageReserved(page); | 417 | ClearPageReserved(page); |
418 | init_page_count(page); | 418 | init_page_count(page); |
@@ -445,7 +445,7 @@ static int __init add_highpages_work_fn(unsigned long start_pfn, | |||
445 | if (!pfn_valid(node_pfn)) | 445 | if (!pfn_valid(node_pfn)) |
446 | continue; | 446 | continue; |
447 | page = pfn_to_page(node_pfn); | 447 | page = pfn_to_page(node_pfn); |
448 | add_one_highpage_init(page, node_pfn); | 448 | add_one_highpage_init(page); |
449 | } | 449 | } |
450 | 450 | ||
451 | return 0; | 451 | return 0; |
@@ -703,8 +703,8 @@ void __init find_low_pfn_range(void) | |||
703 | } | 703 | } |
704 | 704 | ||
705 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 705 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
706 | void __init initmem_init(unsigned long start_pfn, | 706 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
707 | unsigned long end_pfn) | 707 | int acpi, int k8) |
708 | { | 708 | { |
709 | #ifdef CONFIG_HIGHMEM | 709 | #ifdef CONFIG_HIGHMEM |
710 | highstart_pfn = highend_pfn = max_pfn; | 710 | highstart_pfn = highend_pfn = max_pfn; |
@@ -997,7 +997,7 @@ static noinline int do_test_wp_bit(void) | |||
997 | const int rodata_test_data = 0xC3; | 997 | const int rodata_test_data = 0xC3; |
998 | EXPORT_SYMBOL_GPL(rodata_test_data); | 998 | EXPORT_SYMBOL_GPL(rodata_test_data); |
999 | 999 | ||
1000 | static int kernel_set_to_readonly; | 1000 | int kernel_set_to_readonly __read_mostly; |
1001 | 1001 | ||
1002 | void set_kernel_text_rw(void) | 1002 | void set_kernel_text_rw(void) |
1003 | { | 1003 | { |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5a4398a6006b..5198b9bb34ef 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -568,7 +568,8 @@ kernel_physical_mapping_init(unsigned long start, | |||
568 | } | 568 | } |
569 | 569 | ||
570 | #ifndef CONFIG_NUMA | 570 | #ifndef CONFIG_NUMA |
571 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 571 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
572 | int acpi, int k8) | ||
572 | { | 573 | { |
573 | unsigned long bootmap_size, bootmap; | 574 | unsigned long bootmap_size, bootmap; |
574 | 575 | ||
@@ -694,12 +695,12 @@ void __init mem_init(void) | |||
694 | const int rodata_test_data = 0xC3; | 695 | const int rodata_test_data = 0xC3; |
695 | EXPORT_SYMBOL_GPL(rodata_test_data); | 696 | EXPORT_SYMBOL_GPL(rodata_test_data); |
696 | 697 | ||
697 | static int kernel_set_to_readonly; | 698 | int kernel_set_to_readonly; |
698 | 699 | ||
699 | void set_kernel_text_rw(void) | 700 | void set_kernel_text_rw(void) |
700 | { | 701 | { |
701 | unsigned long start = PFN_ALIGN(_stext); | 702 | unsigned long start = PFN_ALIGN(_text); |
702 | unsigned long end = PFN_ALIGN(__start_rodata); | 703 | unsigned long end = PFN_ALIGN(__stop___ex_table); |
703 | 704 | ||
704 | if (!kernel_set_to_readonly) | 705 | if (!kernel_set_to_readonly) |
705 | return; | 706 | return; |
@@ -707,13 +708,18 @@ void set_kernel_text_rw(void) | |||
707 | pr_debug("Set kernel text: %lx - %lx for read write\n", | 708 | pr_debug("Set kernel text: %lx - %lx for read write\n", |
708 | start, end); | 709 | start, end); |
709 | 710 | ||
711 | /* | ||
712 | * Make the kernel identity mapping for text RW. Kernel text | ||
713 | * mapping will always be RO. Refer to the comment in | ||
714 | * static_protections() in pageattr.c | ||
715 | */ | ||
710 | set_memory_rw(start, (end - start) >> PAGE_SHIFT); | 716 | set_memory_rw(start, (end - start) >> PAGE_SHIFT); |
711 | } | 717 | } |
712 | 718 | ||
713 | void set_kernel_text_ro(void) | 719 | void set_kernel_text_ro(void) |
714 | { | 720 | { |
715 | unsigned long start = PFN_ALIGN(_stext); | 721 | unsigned long start = PFN_ALIGN(_text); |
716 | unsigned long end = PFN_ALIGN(__start_rodata); | 722 | unsigned long end = PFN_ALIGN(__stop___ex_table); |
717 | 723 | ||
718 | if (!kernel_set_to_readonly) | 724 | if (!kernel_set_to_readonly) |
719 | return; | 725 | return; |
@@ -721,14 +727,21 @@ void set_kernel_text_ro(void) | |||
721 | pr_debug("Set kernel text: %lx - %lx for read only\n", | 727 | pr_debug("Set kernel text: %lx - %lx for read only\n", |
722 | start, end); | 728 | start, end); |
723 | 729 | ||
730 | /* | ||
731 | * Set the kernel identity mapping for text RO. | ||
732 | */ | ||
724 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); | 733 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); |
725 | } | 734 | } |
726 | 735 | ||
727 | void mark_rodata_ro(void) | 736 | void mark_rodata_ro(void) |
728 | { | 737 | { |
729 | unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); | 738 | unsigned long start = PFN_ALIGN(_text); |
730 | unsigned long rodata_start = | 739 | unsigned long rodata_start = |
731 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | 740 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; |
741 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; | ||
742 | unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); | ||
743 | unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); | ||
744 | unsigned long data_start = (unsigned long) &_sdata; | ||
732 | 745 | ||
733 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 746 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
734 | (end - start) >> 10); | 747 | (end - start) >> 10); |
@@ -751,6 +764,14 @@ void mark_rodata_ro(void) | |||
751 | printk(KERN_INFO "Testing CPA: again\n"); | 764 | printk(KERN_INFO "Testing CPA: again\n"); |
752 | set_memory_ro(start, (end-start) >> PAGE_SHIFT); | 765 | set_memory_ro(start, (end-start) >> PAGE_SHIFT); |
753 | #endif | 766 | #endif |
767 | |||
768 | free_init_pages("unused kernel memory", | ||
769 | (unsigned long) page_address(virt_to_page(text_end)), | ||
770 | (unsigned long) | ||
771 | page_address(virt_to_page(rodata_start))); | ||
772 | free_init_pages("unused kernel memory", | ||
773 | (unsigned long) page_address(virt_to_page(rodata_end)), | ||
774 | (unsigned long) page_address(virt_to_page(data_start))); | ||
754 | } | 775 | } |
755 | 776 | ||
756 | #endif | 777 | #endif |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 334e63ca7b2b..c246d259822d 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -170,8 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
170 | (unsigned long long)phys_addr, | 170 | (unsigned long long)phys_addr, |
171 | (unsigned long long)(phys_addr + size), | 171 | (unsigned long long)(phys_addr + size), |
172 | prot_val, new_prot_val); | 172 | prot_val, new_prot_val); |
173 | free_memtype(phys_addr, phys_addr + size); | 173 | goto err_free_memtype; |
174 | return NULL; | ||
175 | } | 174 | } |
176 | prot_val = new_prot_val; | 175 | prot_val = new_prot_val; |
177 | } | 176 | } |
@@ -197,26 +196,25 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
197 | */ | 196 | */ |
198 | area = get_vm_area_caller(size, VM_IOREMAP, caller); | 197 | area = get_vm_area_caller(size, VM_IOREMAP, caller); |
199 | if (!area) | 198 | if (!area) |
200 | return NULL; | 199 | goto err_free_memtype; |
201 | area->phys_addr = phys_addr; | 200 | area->phys_addr = phys_addr; |
202 | vaddr = (unsigned long) area->addr; | 201 | vaddr = (unsigned long) area->addr; |
203 | 202 | ||
204 | if (kernel_map_sync_memtype(phys_addr, size, prot_val)) { | 203 | if (kernel_map_sync_memtype(phys_addr, size, prot_val)) |
205 | free_memtype(phys_addr, phys_addr + size); | 204 | goto err_free_area; |
206 | free_vm_area(area); | ||
207 | return NULL; | ||
208 | } | ||
209 | 205 | ||
210 | if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { | 206 | if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) |
211 | free_memtype(phys_addr, phys_addr + size); | 207 | goto err_free_area; |
212 | free_vm_area(area); | ||
213 | return NULL; | ||
214 | } | ||
215 | 208 | ||
216 | ret_addr = (void __iomem *) (vaddr + offset); | 209 | ret_addr = (void __iomem *) (vaddr + offset); |
217 | mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); | 210 | mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); |
218 | 211 | ||
219 | return ret_addr; | 212 | return ret_addr; |
213 | err_free_area: | ||
214 | free_vm_area(area); | ||
215 | err_free_memtype: | ||
216 | free_memtype(phys_addr, phys_addr + size); | ||
217 | return NULL; | ||
220 | } | 218 | } |
221 | 219 | ||
222 | /** | 220 | /** |
@@ -283,30 +281,6 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) | |||
283 | } | 281 | } |
284 | EXPORT_SYMBOL(ioremap_cache); | 282 | EXPORT_SYMBOL(ioremap_cache); |
285 | 283 | ||
286 | static void __iomem *ioremap_default(resource_size_t phys_addr, | ||
287 | unsigned long size) | ||
288 | { | ||
289 | unsigned long flags; | ||
290 | void __iomem *ret; | ||
291 | int err; | ||
292 | |||
293 | /* | ||
294 | * - WB for WB-able memory and no other conflicting mappings | ||
295 | * - UC_MINUS for non-WB-able memory with no other conflicting mappings | ||
296 | * - Inherit from confliting mappings otherwise | ||
297 | */ | ||
298 | err = reserve_memtype(phys_addr, phys_addr + size, | ||
299 | _PAGE_CACHE_WB, &flags); | ||
300 | if (err < 0) | ||
301 | return NULL; | ||
302 | |||
303 | ret = __ioremap_caller(phys_addr, size, flags, | ||
304 | __builtin_return_address(0)); | ||
305 | |||
306 | free_memtype(phys_addr, phys_addr + size); | ||
307 | return ret; | ||
308 | } | ||
309 | |||
310 | void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, | 284 | void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, |
311 | unsigned long prot_val) | 285 | unsigned long prot_val) |
312 | { | 286 | { |
@@ -382,7 +356,7 @@ void *xlate_dev_mem_ptr(unsigned long phys) | |||
382 | if (page_is_ram(start >> PAGE_SHIFT)) | 356 | if (page_is_ram(start >> PAGE_SHIFT)) |
383 | return __va(phys); | 357 | return __va(phys); |
384 | 358 | ||
385 | addr = (void __force *)ioremap_default(start, PAGE_SIZE); | 359 | addr = (void __force *)ioremap_cache(start, PAGE_SIZE); |
386 | if (addr) | 360 | if (addr) |
387 | addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); | 361 | addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); |
388 | 362 | ||
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 268f8255280f..970ed579d4e4 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c | |||
@@ -24,6 +24,9 @@ | |||
24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
25 | #include <asm/k8.h> | 25 | #include <asm/k8.h> |
26 | 26 | ||
27 | static struct bootnode __initdata nodes[8]; | ||
28 | static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; | ||
29 | |||
27 | static __init int find_northbridge(void) | 30 | static __init int find_northbridge(void) |
28 | { | 31 | { |
29 | int num; | 32 | int num; |
@@ -54,18 +57,6 @@ static __init void early_get_boot_cpu_id(void) | |||
54 | * need to get boot_cpu_id so can use that to create apicid_to_node | 57 | * need to get boot_cpu_id so can use that to create apicid_to_node |
55 | * in k8_scan_nodes() | 58 | * in k8_scan_nodes() |
56 | */ | 59 | */ |
57 | /* | ||
58 | * Find possible boot-time SMP configuration: | ||
59 | */ | ||
60 | #ifdef CONFIG_X86_MPPARSE | ||
61 | early_find_smp_config(); | ||
62 | #endif | ||
63 | #ifdef CONFIG_ACPI | ||
64 | /* | ||
65 | * Read APIC information from ACPI tables. | ||
66 | */ | ||
67 | early_acpi_boot_init(); | ||
68 | #endif | ||
69 | #ifdef CONFIG_X86_MPPARSE | 60 | #ifdef CONFIG_X86_MPPARSE |
70 | /* | 61 | /* |
71 | * get boot-time SMP configuration: | 62 | * get boot-time SMP configuration: |
@@ -76,12 +67,26 @@ static __init void early_get_boot_cpu_id(void) | |||
76 | early_init_lapic_mapping(); | 67 | early_init_lapic_mapping(); |
77 | } | 68 | } |
78 | 69 | ||
79 | int __init k8_scan_nodes(unsigned long start, unsigned long end) | 70 | int __init k8_get_nodes(struct bootnode *physnodes) |
80 | { | 71 | { |
81 | unsigned numnodes, cores, bits, apicid_base; | 72 | int i; |
73 | int ret = 0; | ||
74 | |||
75 | for_each_node_mask(i, nodes_parsed) { | ||
76 | physnodes[ret].start = nodes[i].start; | ||
77 | physnodes[ret].end = nodes[i].end; | ||
78 | ret++; | ||
79 | } | ||
80 | return ret; | ||
81 | } | ||
82 | |||
83 | int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn) | ||
84 | { | ||
85 | unsigned long start = PFN_PHYS(start_pfn); | ||
86 | unsigned long end = PFN_PHYS(end_pfn); | ||
87 | unsigned numnodes; | ||
82 | unsigned long prevbase; | 88 | unsigned long prevbase; |
83 | struct bootnode nodes[8]; | 89 | int i, nb, found = 0; |
84 | int i, j, nb, found = 0; | ||
85 | u32 nodeid, reg; | 90 | u32 nodeid, reg; |
86 | 91 | ||
87 | if (!early_pci_allowed()) | 92 | if (!early_pci_allowed()) |
@@ -91,16 +96,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
91 | if (nb < 0) | 96 | if (nb < 0) |
92 | return nb; | 97 | return nb; |
93 | 98 | ||
94 | printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); | 99 | pr_info("Scanning NUMA topology in Northbridge %d\n", nb); |
95 | 100 | ||
96 | reg = read_pci_config(0, nb, 0, 0x60); | 101 | reg = read_pci_config(0, nb, 0, 0x60); |
97 | numnodes = ((reg >> 4) & 0xF) + 1; | 102 | numnodes = ((reg >> 4) & 0xF) + 1; |
98 | if (numnodes <= 1) | 103 | if (numnodes <= 1) |
99 | return -1; | 104 | return -1; |
100 | 105 | ||
101 | printk(KERN_INFO "Number of nodes %d\n", numnodes); | 106 | pr_info("Number of physical nodes %d\n", numnodes); |
102 | 107 | ||
103 | memset(&nodes, 0, sizeof(nodes)); | ||
104 | prevbase = 0; | 108 | prevbase = 0; |
105 | for (i = 0; i < 8; i++) { | 109 | for (i = 0; i < 8; i++) { |
106 | unsigned long base, limit; | 110 | unsigned long base, limit; |
@@ -111,28 +115,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
111 | nodeid = limit & 7; | 115 | nodeid = limit & 7; |
112 | if ((base & 3) == 0) { | 116 | if ((base & 3) == 0) { |
113 | if (i < numnodes) | 117 | if (i < numnodes) |
114 | printk("Skipping disabled node %d\n", i); | 118 | pr_info("Skipping disabled node %d\n", i); |
115 | continue; | 119 | continue; |
116 | } | 120 | } |
117 | if (nodeid >= numnodes) { | 121 | if (nodeid >= numnodes) { |
118 | printk("Ignoring excess node %d (%lx:%lx)\n", nodeid, | 122 | pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid, |
119 | base, limit); | 123 | base, limit); |
120 | continue; | 124 | continue; |
121 | } | 125 | } |
122 | 126 | ||
123 | if (!limit) { | 127 | if (!limit) { |
124 | printk(KERN_INFO "Skipping node entry %d (base %lx)\n", | 128 | pr_info("Skipping node entry %d (base %lx)\n", |
125 | i, base); | 129 | i, base); |
126 | continue; | 130 | continue; |
127 | } | 131 | } |
128 | if ((base >> 8) & 3 || (limit >> 8) & 3) { | 132 | if ((base >> 8) & 3 || (limit >> 8) & 3) { |
129 | printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n", | 133 | pr_err("Node %d using interleaving mode %lx/%lx\n", |
130 | nodeid, (base>>8)&3, (limit>>8) & 3); | 134 | nodeid, (base >> 8) & 3, (limit >> 8) & 3); |
131 | return -1; | 135 | return -1; |
132 | } | 136 | } |
133 | if (node_isset(nodeid, node_possible_map)) { | 137 | if (node_isset(nodeid, nodes_parsed)) { |
134 | printk(KERN_INFO "Node %d already present. Skipping\n", | 138 | pr_info("Node %d already present, skipping\n", |
135 | nodeid); | 139 | nodeid); |
136 | continue; | 140 | continue; |
137 | } | 141 | } |
138 | 142 | ||
@@ -141,8 +145,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
141 | limit |= (1<<24)-1; | 145 | limit |= (1<<24)-1; |
142 | limit++; | 146 | limit++; |
143 | 147 | ||
144 | if (limit > max_pfn << PAGE_SHIFT) | 148 | if (limit > end) |
145 | limit = max_pfn << PAGE_SHIFT; | 149 | limit = end; |
146 | if (limit <= base) | 150 | if (limit <= base) |
147 | continue; | 151 | continue; |
148 | 152 | ||
@@ -154,24 +158,24 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
154 | if (limit > end) | 158 | if (limit > end) |
155 | limit = end; | 159 | limit = end; |
156 | if (limit == base) { | 160 | if (limit == base) { |
157 | printk(KERN_ERR "Empty node %d\n", nodeid); | 161 | pr_err("Empty node %d\n", nodeid); |
158 | continue; | 162 | continue; |
159 | } | 163 | } |
160 | if (limit < base) { | 164 | if (limit < base) { |
161 | printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n", | 165 | pr_err("Node %d bogus settings %lx-%lx.\n", |
162 | nodeid, base, limit); | 166 | nodeid, base, limit); |
163 | continue; | 167 | continue; |
164 | } | 168 | } |
165 | 169 | ||
166 | /* Could sort here, but pun for now. Should not happen anyroads. */ | 170 | /* Could sort here, but pun for now. Should not happen anyroads. */ |
167 | if (prevbase > base) { | 171 | if (prevbase > base) { |
168 | printk(KERN_ERR "Node map not sorted %lx,%lx\n", | 172 | pr_err("Node map not sorted %lx,%lx\n", |
169 | prevbase, base); | 173 | prevbase, base); |
170 | return -1; | 174 | return -1; |
171 | } | 175 | } |
172 | 176 | ||
173 | printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n", | 177 | pr_info("Node %d MemBase %016lx Limit %016lx\n", |
174 | nodeid, base, limit); | 178 | nodeid, base, limit); |
175 | 179 | ||
176 | found++; | 180 | found++; |
177 | 181 | ||
@@ -180,18 +184,29 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
180 | 184 | ||
181 | prevbase = base; | 185 | prevbase = base; |
182 | 186 | ||
183 | node_set(nodeid, node_possible_map); | 187 | node_set(nodeid, nodes_parsed); |
184 | } | 188 | } |
185 | 189 | ||
186 | if (!found) | 190 | if (!found) |
187 | return -1; | 191 | return -1; |
192 | return 0; | ||
193 | } | ||
194 | |||
195 | int __init k8_scan_nodes(void) | ||
196 | { | ||
197 | unsigned int bits; | ||
198 | unsigned int cores; | ||
199 | unsigned int apicid_base; | ||
200 | int i; | ||
188 | 201 | ||
202 | BUG_ON(nodes_empty(nodes_parsed)); | ||
203 | node_possible_map = nodes_parsed; | ||
189 | memnode_shift = compute_hash_shift(nodes, 8, NULL); | 204 | memnode_shift = compute_hash_shift(nodes, 8, NULL); |
190 | if (memnode_shift < 0) { | 205 | if (memnode_shift < 0) { |
191 | printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); | 206 | pr_err("No NUMA node hash function found. Contact maintainer\n"); |
192 | return -1; | 207 | return -1; |
193 | } | 208 | } |
194 | printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); | 209 | pr_info("Using node hash shift of %d\n", memnode_shift); |
195 | 210 | ||
196 | /* use the coreid bits from early_identify_cpu */ | 211 | /* use the coreid bits from early_identify_cpu */ |
197 | bits = boot_cpu_data.x86_coreid_bits; | 212 | bits = boot_cpu_data.x86_coreid_bits; |
@@ -200,14 +215,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
200 | /* need to get boot_cpu_id early for system with apicid lifting */ | 215 | /* need to get boot_cpu_id early for system with apicid lifting */ |
201 | early_get_boot_cpu_id(); | 216 | early_get_boot_cpu_id(); |
202 | if (boot_cpu_physical_apicid > 0) { | 217 | if (boot_cpu_physical_apicid > 0) { |
203 | printk(KERN_INFO "BSP APIC ID: %02x\n", | 218 | pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); |
204 | boot_cpu_physical_apicid); | ||
205 | apicid_base = boot_cpu_physical_apicid; | 219 | apicid_base = boot_cpu_physical_apicid; |
206 | } | 220 | } |
207 | 221 | ||
208 | for (i = 0; i < 8; i++) { | 222 | for_each_node_mask(i, node_possible_map) { |
209 | if (nodes[i].start == nodes[i].end) | 223 | int j; |
210 | continue; | ||
211 | 224 | ||
212 | e820_register_active_regions(i, | 225 | e820_register_active_regions(i, |
213 | nodes[i].start >> PAGE_SHIFT, | 226 | nodes[i].start >> PAGE_SHIFT, |
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index 4901d0dafda6..af3b6c8a436f 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -106,26 +106,25 @@ void kmemcheck_error_recall(void) | |||
106 | 106 | ||
107 | switch (e->type) { | 107 | switch (e->type) { |
108 | case KMEMCHECK_ERROR_INVALID_ACCESS: | 108 | case KMEMCHECK_ERROR_INVALID_ACCESS: |
109 | printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " | 109 | printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n", |
110 | "from %s memory (%p)\n", | ||
111 | 8 * e->size, e->state < ARRAY_SIZE(desc) ? | 110 | 8 * e->size, e->state < ARRAY_SIZE(desc) ? |
112 | desc[e->state] : "(invalid shadow state)", | 111 | desc[e->state] : "(invalid shadow state)", |
113 | (void *) e->address); | 112 | (void *) e->address); |
114 | 113 | ||
115 | printk(KERN_INFO); | 114 | printk(KERN_WARNING); |
116 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) | 115 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) |
117 | printk("%02x", e->memory_copy[i]); | 116 | printk(KERN_CONT "%02x", e->memory_copy[i]); |
118 | printk("\n"); | 117 | printk(KERN_CONT "\n"); |
119 | 118 | ||
120 | printk(KERN_INFO); | 119 | printk(KERN_WARNING); |
121 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) { | 120 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) { |
122 | if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) | 121 | if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) |
123 | printk(" %c", short_desc[e->shadow_copy[i]]); | 122 | printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]); |
124 | else | 123 | else |
125 | printk(" ?"); | 124 | printk(KERN_CONT " ?"); |
126 | } | 125 | } |
127 | printk("\n"); | 126 | printk(KERN_CONT "\n"); |
128 | printk(KERN_INFO "%*c\n", 2 + 2 | 127 | printk(KERN_WARNING "%*c\n", 2 + 2 |
129 | * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); | 128 | * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); |
130 | break; | 129 | break; |
131 | case KMEMCHECK_ERROR_BUG: | 130 | case KMEMCHECK_ERROR_BUG: |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917f..c0f6198565eb 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -5,6 +5,8 @@ | |||
5 | * 2008 Pekka Paalanen <pq@iki.fi> | 5 | * 2008 Pekka Paalanen <pq@iki.fi> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
9 | |||
8 | #include <linux/list.h> | 10 | #include <linux/list.h> |
9 | #include <linux/rculist.h> | 11 | #include <linux/rculist.h> |
10 | #include <linux/spinlock.h> | 12 | #include <linux/spinlock.h> |
@@ -136,7 +138,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) | |||
136 | pte_t *pte = lookup_address(f->page, &level); | 138 | pte_t *pte = lookup_address(f->page, &level); |
137 | 139 | ||
138 | if (!pte) { | 140 | if (!pte) { |
139 | pr_err("kmmio: no pte for page 0x%08lx\n", f->page); | 141 | pr_err("no pte for page 0x%08lx\n", f->page); |
140 | return -1; | 142 | return -1; |
141 | } | 143 | } |
142 | 144 | ||
@@ -148,7 +150,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) | |||
148 | clear_pte_presence(pte, clear, &f->old_presence); | 150 | clear_pte_presence(pte, clear, &f->old_presence); |
149 | break; | 151 | break; |
150 | default: | 152 | default: |
151 | pr_err("kmmio: unexpected page level 0x%x.\n", level); | 153 | pr_err("unexpected page level 0x%x.\n", level); |
152 | return -1; | 154 | return -1; |
153 | } | 155 | } |
154 | 156 | ||
@@ -170,13 +172,14 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) | |||
170 | static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | 172 | static int arm_kmmio_fault_page(struct kmmio_fault_page *f) |
171 | { | 173 | { |
172 | int ret; | 174 | int ret; |
173 | WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); | 175 | WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); |
174 | if (f->armed) { | 176 | if (f->armed) { |
175 | pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", | 177 | pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", |
176 | f->page, f->count, !!f->old_presence); | 178 | f->page, f->count, !!f->old_presence); |
177 | } | 179 | } |
178 | ret = clear_page_presence(f, true); | 180 | ret = clear_page_presence(f, true); |
179 | WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); | 181 | WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), |
182 | f->page); | ||
180 | f->armed = true; | 183 | f->armed = true; |
181 | return ret; | 184 | return ret; |
182 | } | 185 | } |
@@ -203,7 +206,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) | |||
203 | */ | 206 | */ |
204 | /* | 207 | /* |
205 | * Interrupts are disabled on entry as trap3 is an interrupt gate | 208 | * Interrupts are disabled on entry as trap3 is an interrupt gate |
206 | * and they remain disabled thorough out this function. | 209 | * and they remain disabled throughout this function. |
207 | */ | 210 | */ |
208 | int kmmio_handler(struct pt_regs *regs, unsigned long addr) | 211 | int kmmio_handler(struct pt_regs *regs, unsigned long addr) |
209 | { | 212 | { |
@@ -240,24 +243,21 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
240 | * condition needs handling by do_page_fault(), the | 243 | * condition needs handling by do_page_fault(), the |
241 | * page really not being present is the most common. | 244 | * page really not being present is the most common. |
242 | */ | 245 | */ |
243 | pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", | 246 | pr_debug("secondary hit for 0x%08lx CPU %d.\n", |
244 | addr, smp_processor_id()); | 247 | addr, smp_processor_id()); |
245 | 248 | ||
246 | if (!faultpage->old_presence) | 249 | if (!faultpage->old_presence) |
247 | pr_info("kmmio: unexpected secondary hit for " | 250 | pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n", |
248 | "address 0x%08lx on CPU %d.\n", addr, | 251 | addr, smp_processor_id()); |
249 | smp_processor_id()); | ||
250 | } else { | 252 | } else { |
251 | /* | 253 | /* |
252 | * Prevent overwriting already in-flight context. | 254 | * Prevent overwriting already in-flight context. |
253 | * This should not happen, let's hope disarming at | 255 | * This should not happen, let's hope disarming at |
254 | * least prevents a panic. | 256 | * least prevents a panic. |
255 | */ | 257 | */ |
256 | pr_emerg("kmmio: recursive probe hit on CPU %d, " | 258 | pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n", |
257 | "for address 0x%08lx. Ignoring.\n", | 259 | smp_processor_id(), addr); |
258 | smp_processor_id(), addr); | 260 | pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr); |
259 | pr_emerg("kmmio: previous hit was at 0x%08lx.\n", | ||
260 | ctx->addr); | ||
261 | disarm_kmmio_fault_page(faultpage); | 261 | disarm_kmmio_fault_page(faultpage); |
262 | } | 262 | } |
263 | goto no_kmmio_ctx; | 263 | goto no_kmmio_ctx; |
@@ -302,7 +302,7 @@ no_kmmio: | |||
302 | 302 | ||
303 | /* | 303 | /* |
304 | * Interrupts are disabled on entry as trap1 is an interrupt gate | 304 | * Interrupts are disabled on entry as trap1 is an interrupt gate |
305 | * and they remain disabled thorough out this function. | 305 | * and they remain disabled throughout this function. |
306 | * This must always get called as the pair to kmmio_handler(). | 306 | * This must always get called as the pair to kmmio_handler(). |
307 | */ | 307 | */ |
308 | static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | 308 | static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) |
@@ -316,8 +316,8 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
316 | * something external causing them (f.e. using a debugger while | 316 | * something external causing them (f.e. using a debugger while |
317 | * mmio tracing enabled), or erroneous behaviour | 317 | * mmio tracing enabled), or erroneous behaviour |
318 | */ | 318 | */ |
319 | pr_warning("kmmio: unexpected debug trap on CPU %d.\n", | 319 | pr_warning("unexpected debug trap on CPU %d.\n", |
320 | smp_processor_id()); | 320 | smp_processor_id()); |
321 | goto out; | 321 | goto out; |
322 | } | 322 | } |
323 | 323 | ||
@@ -425,7 +425,7 @@ int register_kmmio_probe(struct kmmio_probe *p) | |||
425 | list_add_rcu(&p->list, &kmmio_probes); | 425 | list_add_rcu(&p->list, &kmmio_probes); |
426 | while (size < size_lim) { | 426 | while (size < size_lim) { |
427 | if (add_kmmio_fault_page(p->addr + size)) | 427 | if (add_kmmio_fault_page(p->addr + size)) |
428 | pr_err("kmmio: Unable to set page fault.\n"); | 428 | pr_err("Unable to set page fault.\n"); |
429 | size += PAGE_SIZE; | 429 | size += PAGE_SIZE; |
430 | } | 430 | } |
431 | out: | 431 | out: |
@@ -490,7 +490,7 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) | |||
490 | * 2. remove_kmmio_fault_pages() | 490 | * 2. remove_kmmio_fault_pages() |
491 | * Remove the pages from kmmio_page_table. | 491 | * Remove the pages from kmmio_page_table. |
492 | * 3. rcu_free_kmmio_fault_pages() | 492 | * 3. rcu_free_kmmio_fault_pages() |
493 | * Actally free the kmmio_fault_page structs as with RCU. | 493 | * Actually free the kmmio_fault_page structs as with RCU. |
494 | */ | 494 | */ |
495 | void unregister_kmmio_probe(struct kmmio_probe *p) | 495 | void unregister_kmmio_probe(struct kmmio_probe *p) |
496 | { | 496 | { |
@@ -511,7 +511,7 @@ void unregister_kmmio_probe(struct kmmio_probe *p) | |||
511 | 511 | ||
512 | drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); | 512 | drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); |
513 | if (!drelease) { | 513 | if (!drelease) { |
514 | pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); | 514 | pr_crit("leaking kmmio_fault_page objects.\n"); |
515 | return; | 515 | return; |
516 | } | 516 | } |
517 | drelease->release_list = release_list; | 517 | drelease->release_list = release_list; |
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) | |||
540 | struct die_args *arg = args; | 540 | struct die_args *arg = args; |
541 | 541 | ||
542 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) | 542 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) |
543 | if (post_kmmio_handler(arg->err, arg->regs) == 1) | 543 | if (post_kmmio_handler(arg->err, arg->regs) == 1) { |
544 | /* | ||
545 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
546 | * denote completion of processing | ||
547 | */ | ||
548 | (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; | ||
544 | return NOTIFY_STOP; | 549 | return NOTIFY_STOP; |
550 | } | ||
545 | 551 | ||
546 | return NOTIFY_DONE; | 552 | return NOTIFY_DONE; |
547 | } | 553 | } |
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 132772a8ec57..34a3291ca103 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c | |||
@@ -19,6 +19,9 @@ | |||
19 | * | 19 | * |
20 | * Derived from the read-mod example from relay-examples by Tom Zanussi. | 20 | * Derived from the read-mod example from relay-examples by Tom Zanussi. |
21 | */ | 21 | */ |
22 | |||
23 | #define pr_fmt(fmt) "mmiotrace: " fmt | ||
24 | |||
22 | #define DEBUG 1 | 25 | #define DEBUG 1 |
23 | 26 | ||
24 | #include <linux/module.h> | 27 | #include <linux/module.h> |
@@ -36,8 +39,6 @@ | |||
36 | 39 | ||
37 | #include "pf_in.h" | 40 | #include "pf_in.h" |
38 | 41 | ||
39 | #define NAME "mmiotrace: " | ||
40 | |||
41 | struct trap_reason { | 42 | struct trap_reason { |
42 | unsigned long addr; | 43 | unsigned long addr; |
43 | unsigned long ip; | 44 | unsigned long ip; |
@@ -96,17 +97,18 @@ static void print_pte(unsigned long address) | |||
96 | pte_t *pte = lookup_address(address, &level); | 97 | pte_t *pte = lookup_address(address, &level); |
97 | 98 | ||
98 | if (!pte) { | 99 | if (!pte) { |
99 | pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", | 100 | pr_err("Error in %s: no pte for page 0x%08lx\n", |
100 | __func__, address); | 101 | __func__, address); |
101 | return; | 102 | return; |
102 | } | 103 | } |
103 | 104 | ||
104 | if (level == PG_LEVEL_2M) { | 105 | if (level == PG_LEVEL_2M) { |
105 | pr_emerg(NAME "4MB pages are not currently supported: " | 106 | pr_emerg("4MB pages are not currently supported: 0x%08lx\n", |
106 | "0x%08lx\n", address); | 107 | address); |
107 | BUG(); | 108 | BUG(); |
108 | } | 109 | } |
109 | pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address, | 110 | pr_info("pte for 0x%lx: 0x%llx 0x%llx\n", |
111 | address, | ||
110 | (unsigned long long)pte_val(*pte), | 112 | (unsigned long long)pte_val(*pte), |
111 | (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); | 113 | (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); |
112 | } | 114 | } |
@@ -118,22 +120,21 @@ static void print_pte(unsigned long address) | |||
118 | static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) | 120 | static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) |
119 | { | 121 | { |
120 | const struct trap_reason *my_reason = &get_cpu_var(pf_reason); | 122 | const struct trap_reason *my_reason = &get_cpu_var(pf_reason); |
121 | pr_emerg(NAME "unexpected fault for address: 0x%08lx, " | 123 | pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n", |
122 | "last fault for address: 0x%08lx\n", | 124 | addr, my_reason->addr); |
123 | addr, my_reason->addr); | ||
124 | print_pte(addr); | 125 | print_pte(addr); |
125 | print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); | 126 | print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); |
126 | print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); | 127 | print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); |
127 | #ifdef __i386__ | 128 | #ifdef __i386__ |
128 | pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", | 129 | pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", |
129 | regs->ax, regs->bx, regs->cx, regs->dx); | 130 | regs->ax, regs->bx, regs->cx, regs->dx); |
130 | pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", | 131 | pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", |
131 | regs->si, regs->di, regs->bp, regs->sp); | 132 | regs->si, regs->di, regs->bp, regs->sp); |
132 | #else | 133 | #else |
133 | pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", | 134 | pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", |
134 | regs->ax, regs->cx, regs->dx); | 135 | regs->ax, regs->cx, regs->dx); |
135 | pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", | 136 | pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", |
136 | regs->si, regs->di, regs->bp, regs->sp); | 137 | regs->si, regs->di, regs->bp, regs->sp); |
137 | #endif | 138 | #endif |
138 | put_cpu_var(pf_reason); | 139 | put_cpu_var(pf_reason); |
139 | BUG(); | 140 | BUG(); |
@@ -213,7 +214,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, | |||
213 | /* this should always return the active_trace count to 0 */ | 214 | /* this should always return the active_trace count to 0 */ |
214 | my_reason->active_traces--; | 215 | my_reason->active_traces--; |
215 | if (my_reason->active_traces) { | 216 | if (my_reason->active_traces) { |
216 | pr_emerg(NAME "unexpected post handler"); | 217 | pr_emerg("unexpected post handler"); |
217 | BUG(); | 218 | BUG(); |
218 | } | 219 | } |
219 | 220 | ||
@@ -244,7 +245,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, | |||
244 | }; | 245 | }; |
245 | 246 | ||
246 | if (!trace) { | 247 | if (!trace) { |
247 | pr_err(NAME "kmalloc failed in ioremap\n"); | 248 | pr_err("kmalloc failed in ioremap\n"); |
248 | return; | 249 | return; |
249 | } | 250 | } |
250 | 251 | ||
@@ -282,8 +283,8 @@ void mmiotrace_ioremap(resource_size_t offset, unsigned long size, | |||
282 | if (!is_enabled()) /* recheck and proper locking in *_core() */ | 283 | if (!is_enabled()) /* recheck and proper locking in *_core() */ |
283 | return; | 284 | return; |
284 | 285 | ||
285 | pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n", | 286 | pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n", |
286 | (unsigned long long)offset, size, addr); | 287 | (unsigned long long)offset, size, addr); |
287 | if ((filter_offset) && (offset != filter_offset)) | 288 | if ((filter_offset) && (offset != filter_offset)) |
288 | return; | 289 | return; |
289 | ioremap_trace_core(offset, size, addr); | 290 | ioremap_trace_core(offset, size, addr); |
@@ -301,7 +302,7 @@ static void iounmap_trace_core(volatile void __iomem *addr) | |||
301 | struct remap_trace *tmp; | 302 | struct remap_trace *tmp; |
302 | struct remap_trace *found_trace = NULL; | 303 | struct remap_trace *found_trace = NULL; |
303 | 304 | ||
304 | pr_debug(NAME "Unmapping %p.\n", addr); | 305 | pr_debug("Unmapping %p.\n", addr); |
305 | 306 | ||
306 | spin_lock_irq(&trace_lock); | 307 | spin_lock_irq(&trace_lock); |
307 | if (!is_enabled()) | 308 | if (!is_enabled()) |
@@ -363,9 +364,8 @@ static void clear_trace_list(void) | |||
363 | * Caller also ensures is_enabled() cannot change. | 364 | * Caller also ensures is_enabled() cannot change. |
364 | */ | 365 | */ |
365 | list_for_each_entry(trace, &trace_list, list) { | 366 | list_for_each_entry(trace, &trace_list, list) { |
366 | pr_notice(NAME "purging non-iounmapped " | 367 | pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n", |
367 | "trace @0x%08lx, size 0x%lx.\n", | 368 | trace->probe.addr, trace->probe.len); |
368 | trace->probe.addr, trace->probe.len); | ||
369 | if (!nommiotrace) | 369 | if (!nommiotrace) |
370 | unregister_kmmio_probe(&trace->probe); | 370 | unregister_kmmio_probe(&trace->probe); |
371 | } | 371 | } |
@@ -387,7 +387,7 @@ static void enter_uniprocessor(void) | |||
387 | 387 | ||
388 | if (downed_cpus == NULL && | 388 | if (downed_cpus == NULL && |
389 | !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { | 389 | !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { |
390 | pr_notice(NAME "Failed to allocate mask\n"); | 390 | pr_notice("Failed to allocate mask\n"); |
391 | goto out; | 391 | goto out; |
392 | } | 392 | } |
393 | 393 | ||
@@ -395,20 +395,19 @@ static void enter_uniprocessor(void) | |||
395 | cpumask_copy(downed_cpus, cpu_online_mask); | 395 | cpumask_copy(downed_cpus, cpu_online_mask); |
396 | cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); | 396 | cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); |
397 | if (num_online_cpus() > 1) | 397 | if (num_online_cpus() > 1) |
398 | pr_notice(NAME "Disabling non-boot CPUs...\n"); | 398 | pr_notice("Disabling non-boot CPUs...\n"); |
399 | put_online_cpus(); | 399 | put_online_cpus(); |
400 | 400 | ||
401 | for_each_cpu(cpu, downed_cpus) { | 401 | for_each_cpu(cpu, downed_cpus) { |
402 | err = cpu_down(cpu); | 402 | err = cpu_down(cpu); |
403 | if (!err) | 403 | if (!err) |
404 | pr_info(NAME "CPU%d is down.\n", cpu); | 404 | pr_info("CPU%d is down.\n", cpu); |
405 | else | 405 | else |
406 | pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); | 406 | pr_err("Error taking CPU%d down: %d\n", cpu, err); |
407 | } | 407 | } |
408 | out: | 408 | out: |
409 | if (num_online_cpus() > 1) | 409 | if (num_online_cpus() > 1) |
410 | pr_warning(NAME "multiple CPUs still online, " | 410 | pr_warning("multiple CPUs still online, may miss events.\n"); |
411 | "may miss events.\n"); | ||
412 | } | 411 | } |
413 | 412 | ||
414 | /* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, | 413 | /* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, |
@@ -420,13 +419,13 @@ static void __ref leave_uniprocessor(void) | |||
420 | 419 | ||
421 | if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) | 420 | if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) |
422 | return; | 421 | return; |
423 | pr_notice(NAME "Re-enabling CPUs...\n"); | 422 | pr_notice("Re-enabling CPUs...\n"); |
424 | for_each_cpu(cpu, downed_cpus) { | 423 | for_each_cpu(cpu, downed_cpus) { |
425 | err = cpu_up(cpu); | 424 | err = cpu_up(cpu); |
426 | if (!err) | 425 | if (!err) |
427 | pr_info(NAME "enabled CPU%d.\n", cpu); | 426 | pr_info("enabled CPU%d.\n", cpu); |
428 | else | 427 | else |
429 | pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err); | 428 | pr_err("cannot re-enable CPU%d: %d\n", cpu, err); |
430 | } | 429 | } |
431 | } | 430 | } |
432 | 431 | ||
@@ -434,8 +433,8 @@ static void __ref leave_uniprocessor(void) | |||
434 | static void enter_uniprocessor(void) | 433 | static void enter_uniprocessor(void) |
435 | { | 434 | { |
436 | if (num_online_cpus() > 1) | 435 | if (num_online_cpus() > 1) |
437 | pr_warning(NAME "multiple CPUs are online, may miss events. " | 436 | pr_warning("multiple CPUs are online, may miss events. " |
438 | "Suggest booting with maxcpus=1 kernel argument.\n"); | 437 | "Suggest booting with maxcpus=1 kernel argument.\n"); |
439 | } | 438 | } |
440 | 439 | ||
441 | static void leave_uniprocessor(void) | 440 | static void leave_uniprocessor(void) |
@@ -450,13 +449,13 @@ void enable_mmiotrace(void) | |||
450 | goto out; | 449 | goto out; |
451 | 450 | ||
452 | if (nommiotrace) | 451 | if (nommiotrace) |
453 | pr_info(NAME "MMIO tracing disabled.\n"); | 452 | pr_info("MMIO tracing disabled.\n"); |
454 | kmmio_init(); | 453 | kmmio_init(); |
455 | enter_uniprocessor(); | 454 | enter_uniprocessor(); |
456 | spin_lock_irq(&trace_lock); | 455 | spin_lock_irq(&trace_lock); |
457 | atomic_inc(&mmiotrace_enabled); | 456 | atomic_inc(&mmiotrace_enabled); |
458 | spin_unlock_irq(&trace_lock); | 457 | spin_unlock_irq(&trace_lock); |
459 | pr_info(NAME "enabled.\n"); | 458 | pr_info("enabled.\n"); |
460 | out: | 459 | out: |
461 | mutex_unlock(&mmiotrace_mutex); | 460 | mutex_unlock(&mmiotrace_mutex); |
462 | } | 461 | } |
@@ -475,7 +474,7 @@ void disable_mmiotrace(void) | |||
475 | clear_trace_list(); /* guarantees: no more kmmio callbacks */ | 474 | clear_trace_list(); /* guarantees: no more kmmio callbacks */ |
476 | leave_uniprocessor(); | 475 | leave_uniprocessor(); |
477 | kmmio_cleanup(); | 476 | kmmio_cleanup(); |
478 | pr_info(NAME "disabled.\n"); | 477 | pr_info("disabled.\n"); |
479 | out: | 478 | out: |
480 | mutex_unlock(&mmiotrace_mutex); | 479 | mutex_unlock(&mmiotrace_mutex); |
481 | } | 480 | } |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index d2530062fe00..b20760ca7244 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -347,8 +347,8 @@ static void init_remap_allocator(int nid) | |||
347 | (ulong) node_remap_end_vaddr[nid]); | 347 | (ulong) node_remap_end_vaddr[nid]); |
348 | } | 348 | } |
349 | 349 | ||
350 | void __init initmem_init(unsigned long start_pfn, | 350 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
351 | unsigned long end_pfn) | 351 | int acpi, int k8) |
352 | { | 352 | { |
353 | int nid; | 353 | int nid; |
354 | long kva_target_pfn; | 354 | long kva_target_pfn; |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 459913beac71..83bbc70d11bb 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -239,8 +239,14 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
239 | bootmap = early_node_mem(nodeid, bootmap_start, end, | 239 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); | 240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); |
241 | if (bootmap == NULL) { | 241 | if (bootmap == NULL) { |
242 | if (nodedata_phys < start || nodedata_phys >= end) | 242 | if (nodedata_phys < start || nodedata_phys >= end) { |
243 | free_bootmem(nodedata_phys, pgdat_size); | 243 | /* |
244 | * only need to free it if it is from other node | ||
245 | * bootmem | ||
246 | */ | ||
247 | if (nid != nodeid) | ||
248 | free_bootmem(nodedata_phys, pgdat_size); | ||
249 | } | ||
244 | node_data[nodeid] = NULL; | 250 | node_data[nodeid] = NULL; |
245 | return; | 251 | return; |
246 | } | 252 | } |
@@ -306,8 +312,71 @@ void __init numa_init_array(void) | |||
306 | 312 | ||
307 | #ifdef CONFIG_NUMA_EMU | 313 | #ifdef CONFIG_NUMA_EMU |
308 | /* Numa emulation */ | 314 | /* Numa emulation */ |
315 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | ||
316 | static struct bootnode physnodes[MAX_NUMNODES] __initdata; | ||
309 | static char *cmdline __initdata; | 317 | static char *cmdline __initdata; |
310 | 318 | ||
319 | static int __init setup_physnodes(unsigned long start, unsigned long end, | ||
320 | int acpi, int k8) | ||
321 | { | ||
322 | int nr_nodes = 0; | ||
323 | int ret = 0; | ||
324 | int i; | ||
325 | |||
326 | #ifdef CONFIG_ACPI_NUMA | ||
327 | if (acpi) | ||
328 | nr_nodes = acpi_get_nodes(physnodes); | ||
329 | #endif | ||
330 | #ifdef CONFIG_K8_NUMA | ||
331 | if (k8) | ||
332 | nr_nodes = k8_get_nodes(physnodes); | ||
333 | #endif | ||
334 | /* | ||
335 | * Basic sanity checking on the physical node map: there may be errors | ||
336 | * if the SRAT or K8 incorrectly reported the topology or the mem= | ||
337 | * kernel parameter is used. | ||
338 | */ | ||
339 | for (i = 0; i < nr_nodes; i++) { | ||
340 | if (physnodes[i].start == physnodes[i].end) | ||
341 | continue; | ||
342 | if (physnodes[i].start > end) { | ||
343 | physnodes[i].end = physnodes[i].start; | ||
344 | continue; | ||
345 | } | ||
346 | if (physnodes[i].end < start) { | ||
347 | physnodes[i].start = physnodes[i].end; | ||
348 | continue; | ||
349 | } | ||
350 | if (physnodes[i].start < start) | ||
351 | physnodes[i].start = start; | ||
352 | if (physnodes[i].end > end) | ||
353 | physnodes[i].end = end; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * Remove all nodes that have no memory or were truncated because of the | ||
358 | * limited address range. | ||
359 | */ | ||
360 | for (i = 0; i < nr_nodes; i++) { | ||
361 | if (physnodes[i].start == physnodes[i].end) | ||
362 | continue; | ||
363 | physnodes[ret].start = physnodes[i].start; | ||
364 | physnodes[ret].end = physnodes[i].end; | ||
365 | ret++; | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * If no physical topology was detected, a single node is faked to cover | ||
370 | * the entire address space. | ||
371 | */ | ||
372 | if (!ret) { | ||
373 | physnodes[ret].start = start; | ||
374 | physnodes[ret].end = end; | ||
375 | ret = 1; | ||
376 | } | ||
377 | return ret; | ||
378 | } | ||
379 | |||
311 | /* | 380 | /* |
312 | * Setups up nid to range from addr to addr + size. If the end | 381 | * Setups up nid to range from addr to addr + size. If the end |
313 | * boundary is greater than max_addr, then max_addr is used instead. | 382 | * boundary is greater than max_addr, then max_addr is used instead. |
@@ -315,11 +384,9 @@ static char *cmdline __initdata; | |||
315 | * allocation past addr and -1 otherwise. addr is adjusted to be at | 384 | * allocation past addr and -1 otherwise. addr is adjusted to be at |
316 | * the end of the node. | 385 | * the end of the node. |
317 | */ | 386 | */ |
318 | static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, | 387 | static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr) |
319 | u64 size, u64 max_addr) | ||
320 | { | 388 | { |
321 | int ret = 0; | 389 | int ret = 0; |
322 | |||
323 | nodes[nid].start = *addr; | 390 | nodes[nid].start = *addr; |
324 | *addr += size; | 391 | *addr += size; |
325 | if (*addr >= max_addr) { | 392 | if (*addr >= max_addr) { |
@@ -335,12 +402,111 @@ static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, | |||
335 | } | 402 | } |
336 | 403 | ||
337 | /* | 404 | /* |
405 | * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr | ||
406 | * to max_addr. The return value is the number of nodes allocated. | ||
407 | */ | ||
408 | static int __init split_nodes_interleave(u64 addr, u64 max_addr, | ||
409 | int nr_phys_nodes, int nr_nodes) | ||
410 | { | ||
411 | nodemask_t physnode_mask = NODE_MASK_NONE; | ||
412 | u64 size; | ||
413 | int big; | ||
414 | int ret = 0; | ||
415 | int i; | ||
416 | |||
417 | if (nr_nodes <= 0) | ||
418 | return -1; | ||
419 | if (nr_nodes > MAX_NUMNODES) { | ||
420 | pr_info("numa=fake=%d too large, reducing to %d\n", | ||
421 | nr_nodes, MAX_NUMNODES); | ||
422 | nr_nodes = MAX_NUMNODES; | ||
423 | } | ||
424 | |||
425 | size = (max_addr - addr - e820_hole_size(addr, max_addr)) / nr_nodes; | ||
426 | /* | ||
427 | * Calculate the number of big nodes that can be allocated as a result | ||
428 | * of consolidating the remainder. | ||
429 | */ | ||
430 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) & nr_nodes) / | ||
431 | FAKE_NODE_MIN_SIZE; | ||
432 | |||
433 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
434 | if (!size) { | ||
435 | pr_err("Not enough memory for each node. " | ||
436 | "NUMA emulation disabled.\n"); | ||
437 | return -1; | ||
438 | } | ||
439 | |||
440 | for (i = 0; i < nr_phys_nodes; i++) | ||
441 | if (physnodes[i].start != physnodes[i].end) | ||
442 | node_set(i, physnode_mask); | ||
443 | |||
444 | /* | ||
445 | * Continue to fill physical nodes with fake nodes until there is no | ||
446 | * memory left on any of them. | ||
447 | */ | ||
448 | while (nodes_weight(physnode_mask)) { | ||
449 | for_each_node_mask(i, physnode_mask) { | ||
450 | u64 end = physnodes[i].start + size; | ||
451 | u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); | ||
452 | |||
453 | if (ret < big) | ||
454 | end += FAKE_NODE_MIN_SIZE; | ||
455 | |||
456 | /* | ||
457 | * Continue to add memory to this fake node if its | ||
458 | * non-reserved memory is less than the per-node size. | ||
459 | */ | ||
460 | while (end - physnodes[i].start - | ||
461 | e820_hole_size(physnodes[i].start, end) < size) { | ||
462 | end += FAKE_NODE_MIN_SIZE; | ||
463 | if (end > physnodes[i].end) { | ||
464 | end = physnodes[i].end; | ||
465 | break; | ||
466 | } | ||
467 | } | ||
468 | |||
469 | /* | ||
470 | * If there won't be at least FAKE_NODE_MIN_SIZE of | ||
471 | * non-reserved memory in ZONE_DMA32 for the next node, | ||
472 | * this one must extend to the boundary. | ||
473 | */ | ||
474 | if (end < dma32_end && dma32_end - end - | ||
475 | e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | ||
476 | end = dma32_end; | ||
477 | |||
478 | /* | ||
479 | * If there won't be enough non-reserved memory for the | ||
480 | * next node, this one must extend to the end of the | ||
481 | * physical node. | ||
482 | */ | ||
483 | if (physnodes[i].end - end - | ||
484 | e820_hole_size(end, physnodes[i].end) < size) | ||
485 | end = physnodes[i].end; | ||
486 | |||
487 | /* | ||
488 | * Avoid allocating more nodes than requested, which can | ||
489 | * happen as a result of rounding down each node's size | ||
490 | * to FAKE_NODE_MIN_SIZE. | ||
491 | */ | ||
492 | if (nodes_weight(physnode_mask) + ret >= nr_nodes) | ||
493 | end = physnodes[i].end; | ||
494 | |||
495 | if (setup_node_range(ret++, &physnodes[i].start, | ||
496 | end - physnodes[i].start, | ||
497 | physnodes[i].end) < 0) | ||
498 | node_clear(i, physnode_mask); | ||
499 | } | ||
500 | } | ||
501 | return ret; | ||
502 | } | ||
503 | |||
504 | /* | ||
338 | * Splits num_nodes nodes up equally starting at node_start. The return value | 505 | * Splits num_nodes nodes up equally starting at node_start. The return value |
339 | * is the number of nodes split up and addr is adjusted to be at the end of the | 506 | * is the number of nodes split up and addr is adjusted to be at the end of the |
340 | * last node allocated. | 507 | * last node allocated. |
341 | */ | 508 | */ |
342 | static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | 509 | static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start, |
343 | u64 max_addr, int node_start, | ||
344 | int num_nodes) | 510 | int num_nodes) |
345 | { | 511 | { |
346 | unsigned int big; | 512 | unsigned int big; |
@@ -388,7 +554,7 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | |||
388 | break; | 554 | break; |
389 | } | 555 | } |
390 | } | 556 | } |
391 | if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0) | 557 | if (setup_node_range(i, addr, end - *addr, max_addr) < 0) |
392 | break; | 558 | break; |
393 | } | 559 | } |
394 | return i - node_start + 1; | 560 | return i - node_start + 1; |
@@ -399,12 +565,12 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | |||
399 | * always assigned to a final node and can be asymmetric. Returns the number of | 565 | * always assigned to a final node and can be asymmetric. Returns the number of |
400 | * nodes split. | 566 | * nodes split. |
401 | */ | 567 | */ |
402 | static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | 568 | static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start, |
403 | u64 max_addr, int node_start, u64 size) | 569 | u64 size) |
404 | { | 570 | { |
405 | int i = node_start; | 571 | int i = node_start; |
406 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; | 572 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; |
407 | while (!setup_node_range(i++, nodes, addr, size, max_addr)) | 573 | while (!setup_node_range(i++, addr, size, max_addr)) |
408 | ; | 574 | ; |
409 | return i - node_start; | 575 | return i - node_start; |
410 | } | 576 | } |
@@ -413,15 +579,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | |||
413 | * Sets up the system RAM area from start_pfn to last_pfn according to the | 579 | * Sets up the system RAM area from start_pfn to last_pfn according to the |
414 | * numa=fake command-line option. | 580 | * numa=fake command-line option. |
415 | */ | 581 | */ |
416 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 582 | static int __init numa_emulation(unsigned long start_pfn, |
417 | 583 | unsigned long last_pfn, int acpi, int k8) | |
418 | static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn) | ||
419 | { | 584 | { |
420 | u64 size, addr = start_pfn << PAGE_SHIFT; | 585 | u64 size, addr = start_pfn << PAGE_SHIFT; |
421 | u64 max_addr = last_pfn << PAGE_SHIFT; | 586 | u64 max_addr = last_pfn << PAGE_SHIFT; |
422 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; | 587 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; |
588 | int num_phys_nodes; | ||
423 | 589 | ||
424 | memset(&nodes, 0, sizeof(nodes)); | 590 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); |
425 | /* | 591 | /* |
426 | * If the numa=fake command-line is just a single number N, split the | 592 | * If the numa=fake command-line is just a single number N, split the |
427 | * system RAM into N fake nodes. | 593 | * system RAM into N fake nodes. |
@@ -429,7 +595,8 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn | |||
429 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { | 595 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { |
430 | long n = simple_strtol(cmdline, NULL, 0); | 596 | long n = simple_strtol(cmdline, NULL, 0); |
431 | 597 | ||
432 | num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n); | 598 | num_nodes = split_nodes_interleave(addr, max_addr, |
599 | num_phys_nodes, n); | ||
433 | if (num_nodes < 0) | 600 | if (num_nodes < 0) |
434 | return num_nodes; | 601 | return num_nodes; |
435 | goto out; | 602 | goto out; |
@@ -456,8 +623,8 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn | |||
456 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; | 623 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; |
457 | if (size) | 624 | if (size) |
458 | for (i = 0; i < coeff; i++, num_nodes++) | 625 | for (i = 0; i < coeff; i++, num_nodes++) |
459 | if (setup_node_range(num_nodes, nodes, | 626 | if (setup_node_range(num_nodes, &addr, |
460 | &addr, size, max_addr) < 0) | 627 | size, max_addr) < 0) |
461 | goto done; | 628 | goto done; |
462 | if (!*cmdline) | 629 | if (!*cmdline) |
463 | break; | 630 | break; |
@@ -473,7 +640,7 @@ done: | |||
473 | if (addr < max_addr) { | 640 | if (addr < max_addr) { |
474 | if (coeff_flag && coeff < 0) { | 641 | if (coeff_flag && coeff < 0) { |
475 | /* Split remaining nodes into num-sized chunks */ | 642 | /* Split remaining nodes into num-sized chunks */ |
476 | num_nodes += split_nodes_by_size(nodes, &addr, max_addr, | 643 | num_nodes += split_nodes_by_size(&addr, max_addr, |
477 | num_nodes, num); | 644 | num_nodes, num); |
478 | goto out; | 645 | goto out; |
479 | } | 646 | } |
@@ -482,7 +649,7 @@ done: | |||
482 | /* Split remaining nodes into coeff chunks */ | 649 | /* Split remaining nodes into coeff chunks */ |
483 | if (coeff <= 0) | 650 | if (coeff <= 0) |
484 | break; | 651 | break; |
485 | num_nodes += split_nodes_equally(nodes, &addr, max_addr, | 652 | num_nodes += split_nodes_equally(&addr, max_addr, |
486 | num_nodes, coeff); | 653 | num_nodes, coeff); |
487 | break; | 654 | break; |
488 | case ',': | 655 | case ',': |
@@ -490,8 +657,8 @@ done: | |||
490 | break; | 657 | break; |
491 | default: | 658 | default: |
492 | /* Give one final node */ | 659 | /* Give one final node */ |
493 | setup_node_range(num_nodes, nodes, &addr, | 660 | setup_node_range(num_nodes, &addr, max_addr - addr, |
494 | max_addr - addr, max_addr); | 661 | max_addr); |
495 | num_nodes++; | 662 | num_nodes++; |
496 | } | 663 | } |
497 | } | 664 | } |
@@ -505,14 +672,10 @@ out: | |||
505 | } | 672 | } |
506 | 673 | ||
507 | /* | 674 | /* |
508 | * We need to vacate all active ranges that may have been registered by | 675 | * We need to vacate all active ranges that may have been registered for |
509 | * SRAT and set acpi_numa to -1 so that srat_disabled() always returns | 676 | * the e820 memory map. |
510 | * true. NUMA emulation has succeeded so we will not scan ACPI nodes. | ||
511 | */ | 677 | */ |
512 | remove_all_active_ranges(); | 678 | remove_all_active_ranges(); |
513 | #ifdef CONFIG_ACPI_NUMA | ||
514 | acpi_numa = -1; | ||
515 | #endif | ||
516 | for_each_node_mask(i, node_possible_map) { | 679 | for_each_node_mask(i, node_possible_map) { |
517 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | 680 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, |
518 | nodes[i].end >> PAGE_SHIFT); | 681 | nodes[i].end >> PAGE_SHIFT); |
@@ -524,7 +687,8 @@ out: | |||
524 | } | 687 | } |
525 | #endif /* CONFIG_NUMA_EMU */ | 688 | #endif /* CONFIG_NUMA_EMU */ |
526 | 689 | ||
527 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) | 690 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, |
691 | int acpi, int k8) | ||
528 | { | 692 | { |
529 | int i; | 693 | int i; |
530 | 694 | ||
@@ -532,23 +696,22 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) | |||
532 | nodes_clear(node_online_map); | 696 | nodes_clear(node_online_map); |
533 | 697 | ||
534 | #ifdef CONFIG_NUMA_EMU | 698 | #ifdef CONFIG_NUMA_EMU |
535 | if (cmdline && !numa_emulation(start_pfn, last_pfn)) | 699 | if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8)) |
536 | return; | 700 | return; |
537 | nodes_clear(node_possible_map); | 701 | nodes_clear(node_possible_map); |
538 | nodes_clear(node_online_map); | 702 | nodes_clear(node_online_map); |
539 | #endif | 703 | #endif |
540 | 704 | ||
541 | #ifdef CONFIG_ACPI_NUMA | 705 | #ifdef CONFIG_ACPI_NUMA |
542 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, | 706 | if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, |
543 | last_pfn << PAGE_SHIFT)) | 707 | last_pfn << PAGE_SHIFT)) |
544 | return; | 708 | return; |
545 | nodes_clear(node_possible_map); | 709 | nodes_clear(node_possible_map); |
546 | nodes_clear(node_online_map); | 710 | nodes_clear(node_online_map); |
547 | #endif | 711 | #endif |
548 | 712 | ||
549 | #ifdef CONFIG_K8_NUMA | 713 | #ifdef CONFIG_K8_NUMA |
550 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, | 714 | if (!numa_off && k8 && !k8_scan_nodes()) |
551 | last_pfn<<PAGE_SHIFT)) | ||
552 | return; | 715 | return; |
553 | nodes_clear(node_possible_map); | 716 | nodes_clear(node_possible_map); |
554 | nodes_clear(node_online_map); | 717 | nodes_clear(node_online_map); |
@@ -601,6 +764,25 @@ static __init int numa_setup(char *opt) | |||
601 | early_param("numa", numa_setup); | 764 | early_param("numa", numa_setup); |
602 | 765 | ||
603 | #ifdef CONFIG_NUMA | 766 | #ifdef CONFIG_NUMA |
767 | |||
768 | static __init int find_near_online_node(int node) | ||
769 | { | ||
770 | int n, val; | ||
771 | int min_val = INT_MAX; | ||
772 | int best_node = -1; | ||
773 | |||
774 | for_each_online_node(n) { | ||
775 | val = node_distance(node, n); | ||
776 | |||
777 | if (val < min_val) { | ||
778 | min_val = val; | ||
779 | best_node = n; | ||
780 | } | ||
781 | } | ||
782 | |||
783 | return best_node; | ||
784 | } | ||
785 | |||
604 | /* | 786 | /* |
605 | * Setup early cpu_to_node. | 787 | * Setup early cpu_to_node. |
606 | * | 788 | * |
@@ -632,7 +814,7 @@ void __init init_cpu_to_node(void) | |||
632 | if (node == NUMA_NO_NODE) | 814 | if (node == NUMA_NO_NODE) |
633 | continue; | 815 | continue; |
634 | if (!node_online(node)) | 816 | if (!node_online(node)) |
635 | continue; | 817 | node = find_near_online_node(node); |
636 | numa_set_node(cpu, node); | 818 | numa_set_node(cpu, node); |
637 | } | 819 | } |
638 | } | 820 | } |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dd38bfbefd1f..1d4eb93d333c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -279,6 +279,22 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
279 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) | 279 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) |
280 | pgprot_val(forbidden) |= _PAGE_RW; | 280 | pgprot_val(forbidden) |= _PAGE_RW; |
281 | 281 | ||
282 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | ||
283 | /* | ||
284 | * Once the kernel maps the text as RO (kernel_set_to_readonly is set), | ||
285 | * kernel text mappings for the large page aligned text, rodata sections | ||
286 | * will be always read-only. For the kernel identity mappings covering | ||
287 | * the holes caused by this alignment can be anything that user asks. | ||
288 | * | ||
289 | * This will preserve the large page mappings for kernel text/data | ||
290 | * at no extra cost. | ||
291 | */ | ||
292 | if (kernel_set_to_readonly && | ||
293 | within(address, (unsigned long)_text, | ||
294 | (unsigned long)__end_rodata_hpage_align)) | ||
295 | pgprot_val(forbidden) |= _PAGE_RW; | ||
296 | #endif | ||
297 | |||
282 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); | 298 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); |
283 | 299 | ||
284 | return prot; | 300 | return prot; |
@@ -1069,12 +1085,18 @@ EXPORT_SYMBOL(set_memory_array_wb); | |||
1069 | 1085 | ||
1070 | int set_memory_x(unsigned long addr, int numpages) | 1086 | int set_memory_x(unsigned long addr, int numpages) |
1071 | { | 1087 | { |
1088 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1089 | return 0; | ||
1090 | |||
1072 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); | 1091 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); |
1073 | } | 1092 | } |
1074 | EXPORT_SYMBOL(set_memory_x); | 1093 | EXPORT_SYMBOL(set_memory_x); |
1075 | 1094 | ||
1076 | int set_memory_nx(unsigned long addr, int numpages) | 1095 | int set_memory_nx(unsigned long addr, int numpages) |
1077 | { | 1096 | { |
1097 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1098 | return 0; | ||
1099 | |||
1078 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); | 1100 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); |
1079 | } | 1101 | } |
1080 | EXPORT_SYMBOL(set_memory_nx); | 1102 | EXPORT_SYMBOL(set_memory_nx); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e78cd0ec2bcf..ae9648eb1c7f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
21 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
22 | #include <asm/tlbflush.h> | 22 | #include <asm/tlbflush.h> |
23 | #include <asm/x86_init.h> | ||
23 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
24 | #include <asm/fcntl.h> | 25 | #include <asm/fcntl.h> |
25 | #include <asm/e820.h> | 26 | #include <asm/e820.h> |
@@ -355,9 +356,6 @@ static int free_ram_pages_type(u64 start, u64 end) | |||
355 | * - _PAGE_CACHE_UC_MINUS | 356 | * - _PAGE_CACHE_UC_MINUS |
356 | * - _PAGE_CACHE_UC | 357 | * - _PAGE_CACHE_UC |
357 | * | 358 | * |
358 | * req_type will have a special case value '-1', when requester want to inherit | ||
359 | * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. | ||
360 | * | ||
361 | * If new_type is NULL, function will return an error if it cannot reserve the | 359 | * If new_type is NULL, function will return an error if it cannot reserve the |
362 | * region with req_type. If new_type is non-NULL, function will return | 360 | * region with req_type. If new_type is non-NULL, function will return |
363 | * available type in new_type in case of no error. In case of any error | 361 | * available type in new_type in case of no error. In case of any error |
@@ -377,9 +375,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
377 | if (!pat_enabled) { | 375 | if (!pat_enabled) { |
378 | /* This is identical to page table setting without PAT */ | 376 | /* This is identical to page table setting without PAT */ |
379 | if (new_type) { | 377 | if (new_type) { |
380 | if (req_type == -1) | 378 | if (req_type == _PAGE_CACHE_WC) |
381 | *new_type = _PAGE_CACHE_WB; | ||
382 | else if (req_type == _PAGE_CACHE_WC) | ||
383 | *new_type = _PAGE_CACHE_UC_MINUS; | 379 | *new_type = _PAGE_CACHE_UC_MINUS; |
384 | else | 380 | else |
385 | *new_type = req_type & _PAGE_CACHE_MASK; | 381 | *new_type = req_type & _PAGE_CACHE_MASK; |
@@ -388,7 +384,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
388 | } | 384 | } |
389 | 385 | ||
390 | /* Low ISA region is always mapped WB in page table. No need to track */ | 386 | /* Low ISA region is always mapped WB in page table. No need to track */ |
391 | if (is_ISA_range(start, end - 1)) { | 387 | if (x86_platform.is_untracked_pat_range(start, end)) { |
392 | if (new_type) | 388 | if (new_type) |
393 | *new_type = _PAGE_CACHE_WB; | 389 | *new_type = _PAGE_CACHE_WB; |
394 | return 0; | 390 | return 0; |
@@ -499,7 +495,7 @@ int free_memtype(u64 start, u64 end) | |||
499 | return 0; | 495 | return 0; |
500 | 496 | ||
501 | /* Low ISA region is always mapped WB. No need to track */ | 497 | /* Low ISA region is always mapped WB. No need to track */ |
502 | if (is_ISA_range(start, end - 1)) | 498 | if (x86_platform.is_untracked_pat_range(start, end)) |
503 | return 0; | 499 | return 0; |
504 | 500 | ||
505 | is_range_ram = pat_pagerange_is_ram(start, end); | 501 | is_range_ram = pat_pagerange_is_ram(start, end); |
@@ -582,7 +578,7 @@ static unsigned long lookup_memtype(u64 paddr) | |||
582 | int rettype = _PAGE_CACHE_WB; | 578 | int rettype = _PAGE_CACHE_WB; |
583 | struct memtype *entry; | 579 | struct memtype *entry; |
584 | 580 | ||
585 | if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) | 581 | if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE)) |
586 | return rettype; | 582 | return rettype; |
587 | 583 | ||
588 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | 584 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { |
@@ -708,9 +704,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
708 | if (!range_is_allowed(pfn, size)) | 704 | if (!range_is_allowed(pfn, size)) |
709 | return 0; | 705 | return 0; |
710 | 706 | ||
711 | if (file->f_flags & O_SYNC) { | 707 | if (file->f_flags & O_DSYNC) |
712 | flags = _PAGE_CACHE_UC_MINUS; | 708 | flags = _PAGE_CACHE_UC_MINUS; |
713 | } | ||
714 | 709 | ||
715 | #ifdef CONFIG_X86_32 | 710 | #ifdef CONFIG_X86_32 |
716 | /* | 711 | /* |
@@ -1018,8 +1013,10 @@ static const struct file_operations memtype_fops = { | |||
1018 | 1013 | ||
1019 | static int __init pat_memtype_list_init(void) | 1014 | static int __init pat_memtype_list_init(void) |
1020 | { | 1015 | { |
1021 | debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, | 1016 | if (pat_enabled) { |
1022 | NULL, &memtype_fops); | 1017 | debugfs_create_file("pat_memtype_list", S_IRUSR, |
1018 | arch_debugfs_dir, NULL, &memtype_fops); | ||
1019 | } | ||
1023 | return 0; | 1020 | return 0; |
1024 | } | 1021 | } |
1025 | 1022 | ||
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 513d8ed5d2ec..a3250aa34086 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c | |||
@@ -3,10 +3,8 @@ | |||
3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
4 | 4 | ||
5 | #include <asm/pgtable.h> | 5 | #include <asm/pgtable.h> |
6 | #include <asm/proto.h> | ||
6 | 7 | ||
7 | int nx_enabled; | ||
8 | |||
9 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
10 | static int disable_nx __cpuinitdata; | 8 | static int disable_nx __cpuinitdata; |
11 | 9 | ||
12 | /* | 10 | /* |
@@ -22,48 +20,41 @@ static int __init noexec_setup(char *str) | |||
22 | if (!str) | 20 | if (!str) |
23 | return -EINVAL; | 21 | return -EINVAL; |
24 | if (!strncmp(str, "on", 2)) { | 22 | if (!strncmp(str, "on", 2)) { |
25 | __supported_pte_mask |= _PAGE_NX; | ||
26 | disable_nx = 0; | 23 | disable_nx = 0; |
27 | } else if (!strncmp(str, "off", 3)) { | 24 | } else if (!strncmp(str, "off", 3)) { |
28 | disable_nx = 1; | 25 | disable_nx = 1; |
29 | __supported_pte_mask &= ~_PAGE_NX; | ||
30 | } | 26 | } |
27 | x86_configure_nx(); | ||
31 | return 0; | 28 | return 0; |
32 | } | 29 | } |
33 | early_param("noexec", noexec_setup); | 30 | early_param("noexec", noexec_setup); |
34 | #endif | ||
35 | 31 | ||
36 | #ifdef CONFIG_X86_PAE | 32 | void __cpuinit x86_configure_nx(void) |
37 | void __init set_nx(void) | ||
38 | { | 33 | { |
39 | unsigned int v[4], l, h; | 34 | if (cpu_has_nx && !disable_nx) |
40 | 35 | __supported_pte_mask |= _PAGE_NX; | |
41 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | 36 | else |
42 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | 37 | __supported_pte_mask &= ~_PAGE_NX; |
38 | } | ||
43 | 39 | ||
44 | if ((v[3] & (1 << 20)) && !disable_nx) { | 40 | void __init x86_report_nx(void) |
45 | rdmsr(MSR_EFER, l, h); | 41 | { |
46 | l |= EFER_NX; | 42 | if (!cpu_has_nx) { |
47 | wrmsr(MSR_EFER, l, h); | 43 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
48 | nx_enabled = 1; | 44 | "missing in CPU or disabled in BIOS!\n"); |
49 | __supported_pte_mask |= _PAGE_NX; | 45 | } else { |
46 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
47 | if (disable_nx) { | ||
48 | printk(KERN_INFO "NX (Execute Disable) protection: " | ||
49 | "disabled by kernel command line option\n"); | ||
50 | } else { | ||
51 | printk(KERN_INFO "NX (Execute Disable) protection: " | ||
52 | "active\n"); | ||
50 | } | 53 | } |
51 | } | ||
52 | } | ||
53 | #else | 54 | #else |
54 | void set_nx(void) | 55 | /* 32bit non-PAE kernel, NX cannot be used */ |
55 | { | 56 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
56 | } | 57 | "cannot be enabled: non-PAE kernel!\n"); |
57 | #endif | 58 | #endif |
58 | 59 | } | |
59 | #ifdef CONFIG_X86_64 | ||
60 | void __cpuinit check_efer(void) | ||
61 | { | ||
62 | unsigned long efer; | ||
63 | |||
64 | rdmsrl(MSR_EFER, efer); | ||
65 | if (!(efer & EFER_NX) || disable_nx) | ||
66 | __supported_pte_mask &= ~_PAGE_NX; | ||
67 | } | 60 | } |
68 | #endif | ||
69 | |||
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 6f8aa33031c7..9324f13492d5 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -267,6 +267,8 @@ int __init get_memcfg_from_srat(void) | |||
267 | e820_register_active_regions(chunk->nid, chunk->start_pfn, | 267 | e820_register_active_regions(chunk->nid, chunk->start_pfn, |
268 | min(chunk->end_pfn, max_pfn)); | 268 | min(chunk->end_pfn, max_pfn)); |
269 | } | 269 | } |
270 | /* for out of order entries in SRAT */ | ||
271 | sort_node_map(); | ||
270 | 272 | ||
271 | for_each_online_node(nid) { | 273 | for_each_online_node(nid) { |
272 | unsigned long start = node_start_pfn[nid]; | 274 | unsigned long start = node_start_pfn[nid]; |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index dbb5381f7b3b..a27124185fc1 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -136,7 +136,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | |||
136 | apicid_to_node[apic_id] = node; | 136 | apicid_to_node[apic_id] = node; |
137 | node_set(node, cpu_nodes_parsed); | 137 | node_set(node, cpu_nodes_parsed); |
138 | acpi_numa = 1; | 138 | acpi_numa = 1; |
139 | printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", | 139 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", |
140 | pxm, apic_id, node); | 140 | pxm, apic_id, node); |
141 | } | 141 | } |
142 | 142 | ||
@@ -170,7 +170,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
170 | apicid_to_node[apic_id] = node; | 170 | apicid_to_node[apic_id] = node; |
171 | node_set(node, cpu_nodes_parsed); | 171 | node_set(node, cpu_nodes_parsed); |
172 | acpi_numa = 1; | 172 | acpi_numa = 1; |
173 | printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", | 173 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", |
174 | pxm, apic_id, node); | 174 | pxm, apic_id, node); |
175 | } | 175 | } |
176 | 176 | ||
@@ -290,8 +290,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
290 | 290 | ||
291 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, | 291 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, |
292 | start, end); | 292 | start, end); |
293 | e820_register_active_regions(node, start >> PAGE_SHIFT, | ||
294 | end >> PAGE_SHIFT); | ||
295 | 293 | ||
296 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { | 294 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { |
297 | update_nodes_add(node, start, end); | 295 | update_nodes_add(node, start, end); |
@@ -319,7 +317,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
319 | unsigned long s = nodes[i].start >> PAGE_SHIFT; | 317 | unsigned long s = nodes[i].start >> PAGE_SHIFT; |
320 | unsigned long e = nodes[i].end >> PAGE_SHIFT; | 318 | unsigned long e = nodes[i].end >> PAGE_SHIFT; |
321 | pxmram += e - s; | 319 | pxmram += e - s; |
322 | pxmram -= absent_pages_in_range(s, e); | 320 | pxmram -= __absent_pages_in_range(i, s, e); |
323 | if ((long)pxmram < 0) | 321 | if ((long)pxmram < 0) |
324 | pxmram = 0; | 322 | pxmram = 0; |
325 | } | 323 | } |
@@ -338,6 +336,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
338 | 336 | ||
339 | void __init acpi_numa_arch_fixup(void) {} | 337 | void __init acpi_numa_arch_fixup(void) {} |
340 | 338 | ||
339 | int __init acpi_get_nodes(struct bootnode *physnodes) | ||
340 | { | ||
341 | int i; | ||
342 | int ret = 0; | ||
343 | |||
344 | for_each_node_mask(i, nodes_parsed) { | ||
345 | physnodes[ret].start = nodes[i].start; | ||
346 | physnodes[ret].end = nodes[i].end; | ||
347 | ret++; | ||
348 | } | ||
349 | return ret; | ||
350 | } | ||
351 | |||
341 | /* Use the information discovered above to actually set up the nodes. */ | 352 | /* Use the information discovered above to actually set up the nodes. */ |
342 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) | 353 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) |
343 | { | 354 | { |
@@ -350,11 +361,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
350 | for (i = 0; i < MAX_NUMNODES; i++) | 361 | for (i = 0; i < MAX_NUMNODES; i++) |
351 | cutoff_node(i, start, end); | 362 | cutoff_node(i, start, end); |
352 | 363 | ||
353 | if (!nodes_cover_memory(nodes)) { | ||
354 | bad_srat(); | ||
355 | return -1; | ||
356 | } | ||
357 | |||
358 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | 364 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, |
359 | memblk_nodeid); | 365 | memblk_nodeid); |
360 | if (memnode_shift < 0) { | 366 | if (memnode_shift < 0) { |
@@ -364,6 +370,16 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
364 | return -1; | 370 | return -1; |
365 | } | 371 | } |
366 | 372 | ||
373 | for_each_node_mask(i, nodes_parsed) | ||
374 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | ||
375 | nodes[i].end >> PAGE_SHIFT); | ||
376 | /* for out of order entries in SRAT */ | ||
377 | sort_node_map(); | ||
378 | if (!nodes_cover_memory(nodes)) { | ||
379 | bad_srat(); | ||
380 | return -1; | ||
381 | } | ||
382 | |||
367 | /* Account for nodes with cpus and no memory */ | 383 | /* Account for nodes with cpus and no memory */ |
368 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); | 384 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); |
369 | 385 | ||
@@ -454,7 +470,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
454 | for (i = 0; i < num_nodes; i++) | 470 | for (i = 0; i < num_nodes; i++) |
455 | if (fake_nodes[i].start != fake_nodes[i].end) | 471 | if (fake_nodes[i].start != fake_nodes[i].end) |
456 | node_set(i, nodes_parsed); | 472 | node_set(i, nodes_parsed); |
457 | WARN_ON(!nodes_cover_memory(fake_nodes)); | ||
458 | } | 473 | } |
459 | 474 | ||
460 | static int null_slit_node_compare(int a, int b) | 475 | static int null_slit_node_compare(int a, int b) |
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 427fd1b56df5..8565d944f7cf 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c | |||
@@ -1,12 +1,13 @@ | |||
1 | /* | 1 | /* |
2 | * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> | 2 | * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> |
3 | */ | 3 | */ |
4 | |||
5 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
6 | |||
4 | #include <linux/module.h> | 7 | #include <linux/module.h> |
5 | #include <linux/io.h> | 8 | #include <linux/io.h> |
6 | #include <linux/mmiotrace.h> | 9 | #include <linux/mmiotrace.h> |
7 | 10 | ||
8 | #define MODULE_NAME "testmmiotrace" | ||
9 | |||
10 | static unsigned long mmio_address; | 11 | static unsigned long mmio_address; |
11 | module_param(mmio_address, ulong, 0); | 12 | module_param(mmio_address, ulong, 0); |
12 | MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " | 13 | MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " |
@@ -30,7 +31,7 @@ static unsigned v32(unsigned i) | |||
30 | static void do_write_test(void __iomem *p) | 31 | static void do_write_test(void __iomem *p) |
31 | { | 32 | { |
32 | unsigned int i; | 33 | unsigned int i; |
33 | pr_info(MODULE_NAME ": write test.\n"); | 34 | pr_info("write test.\n"); |
34 | mmiotrace_printk("Write test.\n"); | 35 | mmiotrace_printk("Write test.\n"); |
35 | 36 | ||
36 | for (i = 0; i < 256; i++) | 37 | for (i = 0; i < 256; i++) |
@@ -47,7 +48,7 @@ static void do_read_test(void __iomem *p) | |||
47 | { | 48 | { |
48 | unsigned int i; | 49 | unsigned int i; |
49 | unsigned errs[3] = { 0 }; | 50 | unsigned errs[3] = { 0 }; |
50 | pr_info(MODULE_NAME ": read test.\n"); | 51 | pr_info("read test.\n"); |
51 | mmiotrace_printk("Read test.\n"); | 52 | mmiotrace_printk("Read test.\n"); |
52 | 53 | ||
53 | for (i = 0; i < 256; i++) | 54 | for (i = 0; i < 256; i++) |
@@ -68,7 +69,7 @@ static void do_read_test(void __iomem *p) | |||
68 | 69 | ||
69 | static void do_read_far_test(void __iomem *p) | 70 | static void do_read_far_test(void __iomem *p) |
70 | { | 71 | { |
71 | pr_info(MODULE_NAME ": read far test.\n"); | 72 | pr_info("read far test.\n"); |
72 | mmiotrace_printk("Read far test.\n"); | 73 | mmiotrace_printk("Read far test.\n"); |
73 | 74 | ||
74 | ioread32(p + read_far); | 75 | ioread32(p + read_far); |
@@ -78,7 +79,7 @@ static void do_test(unsigned long size) | |||
78 | { | 79 | { |
79 | void __iomem *p = ioremap_nocache(mmio_address, size); | 80 | void __iomem *p = ioremap_nocache(mmio_address, size); |
80 | if (!p) { | 81 | if (!p) { |
81 | pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); | 82 | pr_err("could not ioremap, aborting.\n"); |
82 | return; | 83 | return; |
83 | } | 84 | } |
84 | mmiotrace_printk("ioremap returned %p.\n", p); | 85 | mmiotrace_printk("ioremap returned %p.\n", p); |
@@ -94,24 +95,22 @@ static int __init init(void) | |||
94 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); | 95 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); |
95 | 96 | ||
96 | if (mmio_address == 0) { | 97 | if (mmio_address == 0) { |
97 | pr_err(MODULE_NAME ": you have to use the module argument " | 98 | pr_err("you have to use the module argument mmio_address.\n"); |
98 | "mmio_address.\n"); | 99 | pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n"); |
99 | pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" | ||
100 | " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); | ||
101 | return -ENXIO; | 100 | return -ENXIO; |
102 | } | 101 | } |
103 | 102 | ||
104 | pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " | 103 | pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " |
105 | "address space, and writing 16 kB of rubbish in there.\n", | 104 | "and writing 16 kB of rubbish in there.\n", |
106 | size >> 10, mmio_address); | 105 | size >> 10, mmio_address); |
107 | do_test(size); | 106 | do_test(size); |
108 | pr_info(MODULE_NAME ": All done.\n"); | 107 | pr_info("All done.\n"); |
109 | return 0; | 108 | return 0; |
110 | } | 109 | } |
111 | 110 | ||
112 | static void __exit cleanup(void) | 111 | static void __exit cleanup(void) |
113 | { | 112 | { |
114 | pr_debug(MODULE_NAME ": unloaded.\n"); | 113 | pr_debug("unloaded.\n"); |
115 | } | 114 | } |
116 | 115 | ||
117 | module_init(init); | 116 | module_init(init); |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 36fe08eeb5c3..65b58e4b0b8b 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include <asm/tlbflush.h> | 9 | #include <asm/tlbflush.h> |
10 | #include <asm/mmu_context.h> | 10 | #include <asm/mmu_context.h> |
11 | #include <asm/cache.h> | ||
11 | #include <asm/apic.h> | 12 | #include <asm/apic.h> |
12 | #include <asm/uv/uv.h> | 13 | #include <asm/uv/uv.h> |
13 | 14 | ||
@@ -43,7 +44,7 @@ union smp_flush_state { | |||
43 | spinlock_t tlbstate_lock; | 44 | spinlock_t tlbstate_lock; |
44 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); | 45 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); |
45 | }; | 46 | }; |
46 | char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; | 47 | char pad[INTERNODE_CACHE_BYTES]; |
47 | } ____cacheline_internodealigned_in_smp; | 48 | } ____cacheline_internodealigned_in_smp; |
48 | 49 | ||
49 | /* State is put into the per CPU data section, but padded | 50 | /* State is put into the per CPU data section, but padded |
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 044897be021f..3855096c59b8 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -41,10 +41,11 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
41 | } | 41 | } |
42 | 42 | ||
43 | static struct stacktrace_ops backtrace_ops = { | 43 | static struct stacktrace_ops backtrace_ops = { |
44 | .warning = backtrace_warning, | 44 | .warning = backtrace_warning, |
45 | .warning_symbol = backtrace_warning_symbol, | 45 | .warning_symbol = backtrace_warning_symbol, |
46 | .stack = backtrace_stack, | 46 | .stack = backtrace_stack, |
47 | .address = backtrace_address, | 47 | .address = backtrace_address, |
48 | .walk_stack = print_context_stack, | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | struct frame_head { | 51 | struct frame_head { |
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index d49202e740ea..564b008a51c7 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
@@ -15,3 +15,8 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | |||
15 | 15 | ||
16 | obj-y += common.o early.o | 16 | obj-y += common.o early.o |
17 | obj-y += amd_bus.o | 17 | obj-y += amd_bus.o |
18 | obj-$(CONFIG_X86_64) += bus_numa.o intel_bus.o | ||
19 | |||
20 | ifeq ($(CONFIG_PCI_DEBUG),y) | ||
21 | EXTRA_CFLAGS += -DDEBUG | ||
22 | endif | ||
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 1014eb4bfc37..959e548a7039 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <asm/pci_x86.h> | 7 | #include <asm/pci_x86.h> |
8 | 8 | ||
9 | struct pci_root_info { | 9 | struct pci_root_info { |
10 | struct acpi_device *bridge; | ||
10 | char *name; | 11 | char *name; |
11 | unsigned int res_num; | 12 | unsigned int res_num; |
12 | struct resource *res; | 13 | struct resource *res; |
@@ -58,6 +59,30 @@ bus_has_transparent_bridge(struct pci_bus *bus) | |||
58 | return false; | 59 | return false; |
59 | } | 60 | } |
60 | 61 | ||
62 | static void | ||
63 | align_resource(struct acpi_device *bridge, struct resource *res) | ||
64 | { | ||
65 | int align = (res->flags & IORESOURCE_MEM) ? 16 : 4; | ||
66 | |||
67 | /* | ||
68 | * Host bridge windows are not BARs, but the decoders on the PCI side | ||
69 | * that claim this address space have starting alignment and length | ||
70 | * constraints, so fix any obvious BIOS goofs. | ||
71 | */ | ||
72 | if (!IS_ALIGNED(res->start, align)) { | ||
73 | dev_printk(KERN_DEBUG, &bridge->dev, | ||
74 | "host bridge window %pR invalid; " | ||
75 | "aligning start to %d-byte boundary\n", res, align); | ||
76 | res->start &= ~(align - 1); | ||
77 | } | ||
78 | if (!IS_ALIGNED(res->end + 1, align)) { | ||
79 | dev_printk(KERN_DEBUG, &bridge->dev, | ||
80 | "host bridge window %pR invalid; " | ||
81 | "aligning end to %d-byte boundary\n", res, align); | ||
82 | res->end = ALIGN(res->end, align) - 1; | ||
83 | } | ||
84 | } | ||
85 | |||
61 | static acpi_status | 86 | static acpi_status |
62 | setup_resource(struct acpi_resource *acpi_res, void *data) | 87 | setup_resource(struct acpi_resource *acpi_res, void *data) |
63 | { | 88 | { |
@@ -91,11 +116,12 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
91 | start = addr.minimum + addr.translation_offset; | 116 | start = addr.minimum + addr.translation_offset; |
92 | end = start + addr.address_length - 1; | 117 | end = start + addr.address_length - 1; |
93 | if (info->res_num >= max_root_bus_resources) { | 118 | if (info->res_num >= max_root_bus_resources) { |
94 | printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx " | 119 | if (pci_probe & PCI_USE__CRS) |
95 | "from %s for %s due to _CRS returning more than " | 120 | printk(KERN_WARNING "PCI: Failed to allocate " |
96 | "%d resource descriptors\n", (unsigned long) start, | 121 | "0x%lx-0x%lx from %s for %s due to _CRS " |
97 | (unsigned long) end, root->name, info->name, | 122 | "returning more than %d resource descriptors\n", |
98 | max_root_bus_resources); | 123 | (unsigned long) start, (unsigned long) end, |
124 | root->name, info->name, max_root_bus_resources); | ||
99 | return AE_OK; | 125 | return AE_OK; |
100 | } | 126 | } |
101 | 127 | ||
@@ -105,14 +131,28 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
105 | res->start = start; | 131 | res->start = start; |
106 | res->end = end; | 132 | res->end = end; |
107 | res->child = NULL; | 133 | res->child = NULL; |
134 | align_resource(info->bridge, res); | ||
135 | |||
136 | if (!(pci_probe & PCI_USE__CRS)) { | ||
137 | dev_printk(KERN_DEBUG, &info->bridge->dev, | ||
138 | "host bridge window %pR (ignored)\n", res); | ||
139 | return AE_OK; | ||
140 | } | ||
108 | 141 | ||
109 | if (insert_resource(root, res)) { | 142 | if (insert_resource(root, res)) { |
110 | printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " | 143 | dev_err(&info->bridge->dev, |
111 | "from %s for %s\n", (unsigned long) res->start, | 144 | "can't allocate host bridge window %pR\n", res); |
112 | (unsigned long) res->end, root->name, info->name); | ||
113 | } else { | 145 | } else { |
114 | info->bus->resource[info->res_num] = res; | 146 | info->bus->resource[info->res_num] = res; |
115 | info->res_num++; | 147 | info->res_num++; |
148 | if (addr.translation_offset) | ||
149 | dev_info(&info->bridge->dev, "host bridge window %pR " | ||
150 | "(PCI address [%#llx-%#llx])\n", | ||
151 | res, res->start - addr.translation_offset, | ||
152 | res->end - addr.translation_offset); | ||
153 | else | ||
154 | dev_info(&info->bridge->dev, | ||
155 | "host bridge window %pR\n", res); | ||
116 | } | 156 | } |
117 | return AE_OK; | 157 | return AE_OK; |
118 | } | 158 | } |
@@ -124,6 +164,12 @@ get_current_resources(struct acpi_device *device, int busnum, | |||
124 | struct pci_root_info info; | 164 | struct pci_root_info info; |
125 | size_t size; | 165 | size_t size; |
126 | 166 | ||
167 | if (!(pci_probe & PCI_USE__CRS)) | ||
168 | dev_info(&device->dev, | ||
169 | "ignoring host bridge windows from ACPI; " | ||
170 | "boot with \"pci=use_crs\" to use them\n"); | ||
171 | |||
172 | info.bridge = device; | ||
127 | info.bus = bus; | 173 | info.bus = bus; |
128 | info.res_num = 0; | 174 | info.res_num = 0; |
129 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, | 175 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, |
@@ -163,8 +209,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
163 | #endif | 209 | #endif |
164 | 210 | ||
165 | if (domain && !pci_domains_supported) { | 211 | if (domain && !pci_domains_supported) { |
166 | printk(KERN_WARNING "PCI: Multiple domains not supported " | 212 | printk(KERN_WARNING "pci_bus %04x:%02x: " |
167 | "(dom %d, bus %d)\n", domain, busnum); | 213 | "ignored (multiple domains not supported)\n", |
214 | domain, busnum); | ||
168 | return NULL; | 215 | return NULL; |
169 | } | 216 | } |
170 | 217 | ||
@@ -188,7 +235,8 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
188 | */ | 235 | */ |
189 | sd = kzalloc(sizeof(*sd), GFP_KERNEL); | 236 | sd = kzalloc(sizeof(*sd), GFP_KERNEL); |
190 | if (!sd) { | 237 | if (!sd) { |
191 | printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); | 238 | printk(KERN_WARNING "pci_bus %04x:%02x: " |
239 | "ignored (out of memory)\n", domain, busnum); | ||
192 | return NULL; | 240 | return NULL; |
193 | } | 241 | } |
194 | 242 | ||
@@ -209,9 +257,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
209 | } else { | 257 | } else { |
210 | bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); | 258 | bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); |
211 | if (bus) { | 259 | if (bus) { |
212 | if (pci_probe & PCI_USE__CRS) | 260 | get_current_resources(device, busnum, domain, bus); |
213 | get_current_resources(device, busnum, domain, | ||
214 | bus); | ||
215 | bus->subordinate = pci_scan_child_bus(bus); | 261 | bus->subordinate = pci_scan_child_bus(bus); |
216 | } | 262 | } |
217 | } | 263 | } |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 572ee9782f2a..95ecbd495955 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -6,10 +6,10 @@ | |||
6 | 6 | ||
7 | #ifdef CONFIG_X86_64 | 7 | #ifdef CONFIG_X86_64 |
8 | #include <asm/pci-direct.h> | 8 | #include <asm/pci-direct.h> |
9 | #include <asm/mpspec.h> | ||
10 | #include <linux/cpumask.h> | ||
11 | #endif | 9 | #endif |
12 | 10 | ||
11 | #include "bus_numa.h" | ||
12 | |||
13 | /* | 13 | /* |
14 | * This discovers the pcibus <-> node mapping on AMD K8. | 14 | * This discovers the pcibus <-> node mapping on AMD K8. |
15 | * also get peer root bus resource for io,mmio | 15 | * also get peer root bus resource for io,mmio |
@@ -17,67 +17,6 @@ | |||
17 | 17 | ||
18 | #ifdef CONFIG_X86_64 | 18 | #ifdef CONFIG_X86_64 |
19 | 19 | ||
20 | /* | ||
21 | * sub bus (transparent) will use entres from 3 to store extra from root, | ||
22 | * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? | ||
23 | */ | ||
24 | #define RES_NUM 16 | ||
25 | struct pci_root_info { | ||
26 | char name[12]; | ||
27 | unsigned int res_num; | ||
28 | struct resource res[RES_NUM]; | ||
29 | int bus_min; | ||
30 | int bus_max; | ||
31 | int node; | ||
32 | int link; | ||
33 | }; | ||
34 | |||
35 | /* 4 at this time, it may become to 32 */ | ||
36 | #define PCI_ROOT_NR 4 | ||
37 | static int pci_root_num; | ||
38 | static struct pci_root_info pci_root_info[PCI_ROOT_NR]; | ||
39 | |||
40 | void x86_pci_root_bus_res_quirks(struct pci_bus *b) | ||
41 | { | ||
42 | int i; | ||
43 | int j; | ||
44 | struct pci_root_info *info; | ||
45 | |||
46 | /* don't go for it if _CRS is used already */ | ||
47 | if (b->resource[0] != &ioport_resource || | ||
48 | b->resource[1] != &iomem_resource) | ||
49 | return; | ||
50 | |||
51 | /* if only one root bus, don't need to anything */ | ||
52 | if (pci_root_num < 2) | ||
53 | return; | ||
54 | |||
55 | for (i = 0; i < pci_root_num; i++) { | ||
56 | if (pci_root_info[i].bus_min == b->number) | ||
57 | break; | ||
58 | } | ||
59 | |||
60 | if (i == pci_root_num) | ||
61 | return; | ||
62 | |||
63 | printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", | ||
64 | b->number); | ||
65 | |||
66 | info = &pci_root_info[i]; | ||
67 | for (j = 0; j < info->res_num; j++) { | ||
68 | struct resource *res; | ||
69 | struct resource *root; | ||
70 | |||
71 | res = &info->res[j]; | ||
72 | b->resource[j] = res; | ||
73 | if (res->flags & IORESOURCE_IO) | ||
74 | root = &ioport_resource; | ||
75 | else | ||
76 | root = &iomem_resource; | ||
77 | insert_resource(root, res); | ||
78 | } | ||
79 | } | ||
80 | |||
81 | #define RANGE_NUM 16 | 20 | #define RANGE_NUM 16 |
82 | 21 | ||
83 | struct res_range { | 22 | struct res_range { |
@@ -130,52 +69,6 @@ static void __init update_range(struct res_range *range, size_t start, | |||
130 | } | 69 | } |
131 | } | 70 | } |
132 | 71 | ||
133 | static void __init update_res(struct pci_root_info *info, size_t start, | ||
134 | size_t end, unsigned long flags, int merge) | ||
135 | { | ||
136 | int i; | ||
137 | struct resource *res; | ||
138 | |||
139 | if (!merge) | ||
140 | goto addit; | ||
141 | |||
142 | /* try to merge it with old one */ | ||
143 | for (i = 0; i < info->res_num; i++) { | ||
144 | size_t final_start, final_end; | ||
145 | size_t common_start, common_end; | ||
146 | |||
147 | res = &info->res[i]; | ||
148 | if (res->flags != flags) | ||
149 | continue; | ||
150 | |||
151 | common_start = max((size_t)res->start, start); | ||
152 | common_end = min((size_t)res->end, end); | ||
153 | if (common_start > common_end + 1) | ||
154 | continue; | ||
155 | |||
156 | final_start = min((size_t)res->start, start); | ||
157 | final_end = max((size_t)res->end, end); | ||
158 | |||
159 | res->start = final_start; | ||
160 | res->end = final_end; | ||
161 | return; | ||
162 | } | ||
163 | |||
164 | addit: | ||
165 | |||
166 | /* need to add that */ | ||
167 | if (info->res_num >= RES_NUM) | ||
168 | return; | ||
169 | |||
170 | res = &info->res[info->res_num]; | ||
171 | res->name = info->name; | ||
172 | res->flags = flags; | ||
173 | res->start = start; | ||
174 | res->end = end; | ||
175 | res->child = NULL; | ||
176 | info->res_num++; | ||
177 | } | ||
178 | |||
179 | struct pci_hostbridge_probe { | 72 | struct pci_hostbridge_probe { |
180 | u32 bus; | 73 | u32 bus; |
181 | u32 slot; | 74 | u32 slot; |
@@ -230,7 +123,6 @@ static int __init early_fill_mp_bus_info(void) | |||
230 | int j; | 123 | int j; |
231 | unsigned bus; | 124 | unsigned bus; |
232 | unsigned slot; | 125 | unsigned slot; |
233 | int found; | ||
234 | int node; | 126 | int node; |
235 | int link; | 127 | int link; |
236 | int def_node; | 128 | int def_node; |
@@ -247,7 +139,7 @@ static int __init early_fill_mp_bus_info(void) | |||
247 | if (!early_pci_allowed()) | 139 | if (!early_pci_allowed()) |
248 | return -1; | 140 | return -1; |
249 | 141 | ||
250 | found = 0; | 142 | found_all_numa_early = 0; |
251 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { | 143 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { |
252 | u32 id; | 144 | u32 id; |
253 | u16 device; | 145 | u16 device; |
@@ -261,12 +153,12 @@ static int __init early_fill_mp_bus_info(void) | |||
261 | device = (id>>16) & 0xffff; | 153 | device = (id>>16) & 0xffff; |
262 | if (pci_probes[i].vendor == vendor && | 154 | if (pci_probes[i].vendor == vendor && |
263 | pci_probes[i].device == device) { | 155 | pci_probes[i].device == device) { |
264 | found = 1; | 156 | found_all_numa_early = 1; |
265 | break; | 157 | break; |
266 | } | 158 | } |
267 | } | 159 | } |
268 | 160 | ||
269 | if (!found) | 161 | if (!found_all_numa_early) |
270 | return 0; | 162 | return 0; |
271 | 163 | ||
272 | pci_root_num = 0; | 164 | pci_root_num = 0; |
@@ -488,7 +380,7 @@ static int __init early_fill_mp_bus_info(void) | |||
488 | info = &pci_root_info[i]; | 380 | info = &pci_root_info[i]; |
489 | res_num = info->res_num; | 381 | res_num = info->res_num; |
490 | busnum = info->bus_min; | 382 | busnum = info->bus_min; |
491 | printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n", | 383 | printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n", |
492 | info->bus_min, info->bus_max, info->node, info->link); | 384 | info->bus_min, info->bus_max, info->node, info->link); |
493 | for (j = 0; j < res_num; j++) { | 385 | for (j = 0; j < res_num; j++) { |
494 | res = &info->res[j]; | 386 | res = &info->res[j]; |
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c new file mode 100644 index 000000000000..f939d603adfa --- /dev/null +++ b/arch/x86/pci/bus_numa.c | |||
@@ -0,0 +1,101 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/pci.h> | ||
3 | |||
4 | #include "bus_numa.h" | ||
5 | |||
6 | int pci_root_num; | ||
7 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; | ||
8 | int found_all_numa_early; | ||
9 | |||
10 | void x86_pci_root_bus_res_quirks(struct pci_bus *b) | ||
11 | { | ||
12 | int i; | ||
13 | int j; | ||
14 | struct pci_root_info *info; | ||
15 | |||
16 | /* don't go for it if _CRS is used already */ | ||
17 | if (b->resource[0] != &ioport_resource || | ||
18 | b->resource[1] != &iomem_resource) | ||
19 | return; | ||
20 | |||
21 | if (!pci_root_num) | ||
22 | return; | ||
23 | |||
24 | /* for amd, if only one root bus, don't need to do anything */ | ||
25 | if (pci_root_num < 2 && found_all_numa_early) | ||
26 | return; | ||
27 | |||
28 | for (i = 0; i < pci_root_num; i++) { | ||
29 | if (pci_root_info[i].bus_min == b->number) | ||
30 | break; | ||
31 | } | ||
32 | |||
33 | if (i == pci_root_num) | ||
34 | return; | ||
35 | |||
36 | printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", | ||
37 | b->number); | ||
38 | |||
39 | info = &pci_root_info[i]; | ||
40 | for (j = 0; j < info->res_num; j++) { | ||
41 | struct resource *res; | ||
42 | struct resource *root; | ||
43 | |||
44 | res = &info->res[j]; | ||
45 | b->resource[j] = res; | ||
46 | if (res->flags & IORESOURCE_IO) | ||
47 | root = &ioport_resource; | ||
48 | else | ||
49 | root = &iomem_resource; | ||
50 | insert_resource(root, res); | ||
51 | } | ||
52 | } | ||
53 | |||
54 | void __devinit update_res(struct pci_root_info *info, size_t start, | ||
55 | size_t end, unsigned long flags, int merge) | ||
56 | { | ||
57 | int i; | ||
58 | struct resource *res; | ||
59 | |||
60 | if (start > end) | ||
61 | return; | ||
62 | |||
63 | if (!merge) | ||
64 | goto addit; | ||
65 | |||
66 | /* try to merge it with old one */ | ||
67 | for (i = 0; i < info->res_num; i++) { | ||
68 | size_t final_start, final_end; | ||
69 | size_t common_start, common_end; | ||
70 | |||
71 | res = &info->res[i]; | ||
72 | if (res->flags != flags) | ||
73 | continue; | ||
74 | |||
75 | common_start = max((size_t)res->start, start); | ||
76 | common_end = min((size_t)res->end, end); | ||
77 | if (common_start > common_end + 1) | ||
78 | continue; | ||
79 | |||
80 | final_start = min((size_t)res->start, start); | ||
81 | final_end = max((size_t)res->end, end); | ||
82 | |||
83 | res->start = final_start; | ||
84 | res->end = final_end; | ||
85 | return; | ||
86 | } | ||
87 | |||
88 | addit: | ||
89 | |||
90 | /* need to add that */ | ||
91 | if (info->res_num >= RES_NUM) | ||
92 | return; | ||
93 | |||
94 | res = &info->res[info->res_num]; | ||
95 | res->name = info->name; | ||
96 | res->flags = flags; | ||
97 | res->start = start; | ||
98 | res->end = end; | ||
99 | res->child = NULL; | ||
100 | info->res_num++; | ||
101 | } | ||
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h new file mode 100644 index 000000000000..adbc23fe82ac --- /dev/null +++ b/arch/x86/pci/bus_numa.h | |||
@@ -0,0 +1,27 @@ | |||
1 | #ifdef CONFIG_X86_64 | ||
2 | |||
3 | /* | ||
4 | * sub bus (transparent) will use entres from 3 to store extra from | ||
5 | * root, so need to make sure we have enough slot there, Should we | ||
6 | * increase PCI_BUS_NUM_RESOURCES? | ||
7 | */ | ||
8 | #define RES_NUM 16 | ||
9 | struct pci_root_info { | ||
10 | char name[12]; | ||
11 | unsigned int res_num; | ||
12 | struct resource res[RES_NUM]; | ||
13 | int bus_min; | ||
14 | int bus_max; | ||
15 | int node; | ||
16 | int link; | ||
17 | }; | ||
18 | |||
19 | /* 4 at this time, it may become to 32 */ | ||
20 | #define PCI_ROOT_NR 4 | ||
21 | extern int pci_root_num; | ||
22 | extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; | ||
23 | extern int found_all_numa_early; | ||
24 | |||
25 | extern void update_res(struct pci_root_info *info, size_t start, | ||
26 | size_t end, unsigned long flags, int merge); | ||
27 | #endif | ||
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 1331fcf26143..d2552c68e94d 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -410,8 +410,6 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) | |||
410 | return bus; | 410 | return bus; |
411 | } | 411 | } |
412 | 412 | ||
413 | extern u8 pci_cache_line_size; | ||
414 | |||
415 | int __init pcibios_init(void) | 413 | int __init pcibios_init(void) |
416 | { | 414 | { |
417 | struct cpuinfo_x86 *c = &boot_cpu_data; | 415 | struct cpuinfo_x86 *c = &boot_cpu_data; |
@@ -422,15 +420,19 @@ int __init pcibios_init(void) | |||
422 | } | 420 | } |
423 | 421 | ||
424 | /* | 422 | /* |
425 | * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8 | 423 | * Set PCI cacheline size to that of the CPU if the CPU has reported it. |
426 | * and P4. It's also good for 386/486s (which actually have 16) | 424 | * (For older CPUs that don't support cpuid, we se it to 32 bytes |
425 | * It's also good for 386/486s (which actually have 16) | ||
427 | * as quite a few PCI devices do not support smaller values. | 426 | * as quite a few PCI devices do not support smaller values. |
428 | */ | 427 | */ |
429 | pci_cache_line_size = 32 >> 2; | 428 | if (c->x86_clflush_size > 0) { |
430 | if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) | 429 | pci_dfl_cache_line_size = c->x86_clflush_size >> 2; |
431 | pci_cache_line_size = 64 >> 2; /* K7 & K8 */ | 430 | printk(KERN_DEBUG "PCI: pci_cache_line_size set to %d bytes\n", |
432 | else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) | 431 | pci_dfl_cache_line_size << 2); |
433 | pci_cache_line_size = 128 >> 2; /* P4 */ | 432 | } else { |
433 | pci_dfl_cache_line_size = 32 >> 2; | ||
434 | printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n"); | ||
435 | } | ||
434 | 436 | ||
435 | pcibios_resource_survey(); | 437 | pcibios_resource_survey(); |
436 | 438 | ||
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index aaf26ae58cd5..d1067d539bee 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c | |||
@@ -12,8 +12,6 @@ u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) | |||
12 | u32 v; | 12 | u32 v; |
13 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 13 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
14 | v = inl(0xcfc); | 14 | v = inl(0xcfc); |
15 | if (v != 0xffffffff) | ||
16 | pr_debug("%x reading 4 from %x: %x\n", slot, offset, v); | ||
17 | return v; | 15 | return v; |
18 | } | 16 | } |
19 | 17 | ||
@@ -22,7 +20,6 @@ u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) | |||
22 | u8 v; | 20 | u8 v; |
23 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 21 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
24 | v = inb(0xcfc + (offset&3)); | 22 | v = inb(0xcfc + (offset&3)); |
25 | pr_debug("%x reading 1 from %x: %x\n", slot, offset, v); | ||
26 | return v; | 23 | return v; |
27 | } | 24 | } |
28 | 25 | ||
@@ -31,28 +28,24 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) | |||
31 | u16 v; | 28 | u16 v; |
32 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 29 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
33 | v = inw(0xcfc + (offset&2)); | 30 | v = inw(0xcfc + (offset&2)); |
34 | pr_debug("%x reading 2 from %x: %x\n", slot, offset, v); | ||
35 | return v; | 31 | return v; |
36 | } | 32 | } |
37 | 33 | ||
38 | void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, | 34 | void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, |
39 | u32 val) | 35 | u32 val) |
40 | { | 36 | { |
41 | pr_debug("%x writing to %x: %x\n", slot, offset, val); | ||
42 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 37 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
43 | outl(val, 0xcfc); | 38 | outl(val, 0xcfc); |
44 | } | 39 | } |
45 | 40 | ||
46 | void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) | 41 | void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) |
47 | { | 42 | { |
48 | pr_debug("%x writing to %x: %x\n", slot, offset, val); | ||
49 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 43 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
50 | outb(val, 0xcfc + (offset&3)); | 44 | outb(val, 0xcfc + (offset&3)); |
51 | } | 45 | } |
52 | 46 | ||
53 | void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) | 47 | void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) |
54 | { | 48 | { |
55 | pr_debug("%x writing to %x: %x\n", slot, offset, val); | ||
56 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 49 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
57 | outw(val, 0xcfc + (offset&2)); | 50 | outw(val, 0xcfc + (offset&2)); |
58 | } | 51 | } |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index b22d13b0c71d..5dc9e8c63fcd 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -129,7 +129,9 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) | |||
129 | continue; | 129 | continue; |
130 | if (!r->start || | 130 | if (!r->start || |
131 | pci_claim_resource(dev, idx) < 0) { | 131 | pci_claim_resource(dev, idx) < 0) { |
132 | dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); | 132 | dev_info(&dev->dev, |
133 | "can't reserve window %pR\n", | ||
134 | r); | ||
133 | /* | 135 | /* |
134 | * Something is wrong with the region. | 136 | * Something is wrong with the region. |
135 | * Invalidate the resource to prevent | 137 | * Invalidate the resource to prevent |
@@ -144,16 +146,29 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) | |||
144 | } | 146 | } |
145 | } | 147 | } |
146 | 148 | ||
149 | struct pci_check_idx_range { | ||
150 | int start; | ||
151 | int end; | ||
152 | }; | ||
153 | |||
147 | static void __init pcibios_allocate_resources(int pass) | 154 | static void __init pcibios_allocate_resources(int pass) |
148 | { | 155 | { |
149 | struct pci_dev *dev = NULL; | 156 | struct pci_dev *dev = NULL; |
150 | int idx, disabled; | 157 | int idx, disabled, i; |
151 | u16 command; | 158 | u16 command; |
152 | struct resource *r; | 159 | struct resource *r; |
153 | 160 | ||
161 | struct pci_check_idx_range idx_range[] = { | ||
162 | { PCI_STD_RESOURCES, PCI_STD_RESOURCE_END }, | ||
163 | #ifdef CONFIG_PCI_IOV | ||
164 | { PCI_IOV_RESOURCES, PCI_IOV_RESOURCE_END }, | ||
165 | #endif | ||
166 | }; | ||
167 | |||
154 | for_each_pci_dev(dev) { | 168 | for_each_pci_dev(dev) { |
155 | pci_read_config_word(dev, PCI_COMMAND, &command); | 169 | pci_read_config_word(dev, PCI_COMMAND, &command); |
156 | for (idx = 0; idx < PCI_ROM_RESOURCE; idx++) { | 170 | for (i = 0; i < ARRAY_SIZE(idx_range); i++) |
171 | for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) { | ||
157 | r = &dev->resource[idx]; | 172 | r = &dev->resource[idx]; |
158 | if (r->parent) /* Already allocated */ | 173 | if (r->parent) /* Already allocated */ |
159 | continue; | 174 | continue; |
@@ -164,12 +179,12 @@ static void __init pcibios_allocate_resources(int pass) | |||
164 | else | 179 | else |
165 | disabled = !(command & PCI_COMMAND_MEMORY); | 180 | disabled = !(command & PCI_COMMAND_MEMORY); |
166 | if (pass == disabled) { | 181 | if (pass == disabled) { |
167 | dev_dbg(&dev->dev, "resource %#08llx-%#08llx (f=%lx, d=%d, p=%d)\n", | 182 | dev_dbg(&dev->dev, |
168 | (unsigned long long) r->start, | 183 | "BAR %d: reserving %pr (d=%d, p=%d)\n", |
169 | (unsigned long long) r->end, | 184 | idx, r, disabled, pass); |
170 | r->flags, disabled, pass); | ||
171 | if (pci_claim_resource(dev, idx) < 0) { | 185 | if (pci_claim_resource(dev, idx) < 0) { |
172 | dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); | 186 | dev_info(&dev->dev, |
187 | "can't reserve %pR\n", r); | ||
173 | /* We'll assign a new address later */ | 188 | /* We'll assign a new address later */ |
174 | r->end -= r->start; | 189 | r->end -= r->start; |
175 | r->start = 0; | 190 | r->start = 0; |
@@ -182,7 +197,7 @@ static void __init pcibios_allocate_resources(int pass) | |||
182 | /* Turn the ROM off, leave the resource region, | 197 | /* Turn the ROM off, leave the resource region, |
183 | * but keep it unregistered. */ | 198 | * but keep it unregistered. */ |
184 | u32 reg; | 199 | u32 reg; |
185 | dev_dbg(&dev->dev, "disabling ROM\n"); | 200 | dev_dbg(&dev->dev, "disabling ROM %pR\n", r); |
186 | r->flags &= ~IORESOURCE_ROM_ENABLE; | 201 | r->flags &= ~IORESOURCE_ROM_ENABLE; |
187 | pci_read_config_dword(dev, | 202 | pci_read_config_dword(dev, |
188 | dev->rom_base_reg, ®); | 203 | dev->rom_base_reg, ®); |
@@ -282,6 +297,15 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, | |||
282 | return -EINVAL; | 297 | return -EINVAL; |
283 | 298 | ||
284 | prot = pgprot_val(vma->vm_page_prot); | 299 | prot = pgprot_val(vma->vm_page_prot); |
300 | |||
301 | /* | ||
302 | * Return error if pat is not enabled and write_combine is requested. | ||
303 | * Caller can followup with UC MINUS request and add a WC mtrr if there | ||
304 | * is a free mtrr slot. | ||
305 | */ | ||
306 | if (!pat_enabled && write_combine) | ||
307 | return -EINVAL; | ||
308 | |||
285 | if (pat_enabled && write_combine) | 309 | if (pat_enabled && write_combine) |
286 | prot |= _PAGE_CACHE_WC; | 310 | prot |= _PAGE_CACHE_WC; |
287 | else if (pat_enabled || boot_cpu_data.x86 > 3) | 311 | else if (pat_enabled || boot_cpu_data.x86 > 3) |
diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c new file mode 100644 index 000000000000..b7a55dc55d13 --- /dev/null +++ b/arch/x86/pci/intel_bus.c | |||
@@ -0,0 +1,90 @@ | |||
1 | /* | ||
2 | * to read io range from IOH pci conf, need to do it after mmconfig is there | ||
3 | */ | ||
4 | |||
5 | #include <linux/delay.h> | ||
6 | #include <linux/dmi.h> | ||
7 | #include <linux/pci.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <asm/pci_x86.h> | ||
10 | |||
11 | #include "bus_numa.h" | ||
12 | |||
13 | static inline void print_ioh_resources(struct pci_root_info *info) | ||
14 | { | ||
15 | int res_num; | ||
16 | int busnum; | ||
17 | int i; | ||
18 | |||
19 | printk(KERN_DEBUG "IOH bus: [%02x, %02x]\n", | ||
20 | info->bus_min, info->bus_max); | ||
21 | res_num = info->res_num; | ||
22 | busnum = info->bus_min; | ||
23 | for (i = 0; i < res_num; i++) { | ||
24 | struct resource *res; | ||
25 | |||
26 | res = &info->res[i]; | ||
27 | printk(KERN_DEBUG "IOH bus: %02x index %x %s: [%llx, %llx]\n", | ||
28 | busnum, i, | ||
29 | (res->flags & IORESOURCE_IO) ? "io port" : | ||
30 | "mmio", | ||
31 | res->start, res->end); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | #define IOH_LIO 0x108 | ||
36 | #define IOH_LMMIOL 0x10c | ||
37 | #define IOH_LMMIOH 0x110 | ||
38 | #define IOH_LMMIOH_BASEU 0x114 | ||
39 | #define IOH_LMMIOH_LIMITU 0x118 | ||
40 | #define IOH_LCFGBUS 0x11c | ||
41 | |||
42 | static void __devinit pci_root_bus_res(struct pci_dev *dev) | ||
43 | { | ||
44 | u16 word; | ||
45 | u32 dword; | ||
46 | struct pci_root_info *info; | ||
47 | u16 io_base, io_end; | ||
48 | u32 mmiol_base, mmiol_end; | ||
49 | u64 mmioh_base, mmioh_end; | ||
50 | int bus_base, bus_end; | ||
51 | |||
52 | if (pci_root_num >= PCI_ROOT_NR) { | ||
53 | printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n"); | ||
54 | return; | ||
55 | } | ||
56 | |||
57 | info = &pci_root_info[pci_root_num]; | ||
58 | pci_root_num++; | ||
59 | |||
60 | pci_read_config_word(dev, IOH_LCFGBUS, &word); | ||
61 | bus_base = (word & 0xff); | ||
62 | bus_end = (word & 0xff00) >> 8; | ||
63 | sprintf(info->name, "PCI Bus #%02x", bus_base); | ||
64 | info->bus_min = bus_base; | ||
65 | info->bus_max = bus_end; | ||
66 | |||
67 | pci_read_config_word(dev, IOH_LIO, &word); | ||
68 | io_base = (word & 0xf0) << (12 - 4); | ||
69 | io_end = (word & 0xf000) | 0xfff; | ||
70 | update_res(info, io_base, io_end, IORESOURCE_IO, 0); | ||
71 | |||
72 | pci_read_config_dword(dev, IOH_LMMIOL, &dword); | ||
73 | mmiol_base = (dword & 0xff00) << (24 - 8); | ||
74 | mmiol_end = (dword & 0xff000000) | 0xffffff; | ||
75 | update_res(info, mmiol_base, mmiol_end, IORESOURCE_MEM, 0); | ||
76 | |||
77 | pci_read_config_dword(dev, IOH_LMMIOH, &dword); | ||
78 | mmioh_base = ((u64)(dword & 0xfc00)) << (26 - 10); | ||
79 | mmioh_end = ((u64)(dword & 0xfc000000) | 0x3ffffff); | ||
80 | pci_read_config_dword(dev, IOH_LMMIOH_BASEU, &dword); | ||
81 | mmioh_base |= ((u64)(dword & 0x7ffff)) << 32; | ||
82 | pci_read_config_dword(dev, IOH_LMMIOH_LIMITU, &dword); | ||
83 | mmioh_end |= ((u64)(dword & 0x7ffff)) << 32; | ||
84 | update_res(info, mmioh_base, mmioh_end, IORESOURCE_MEM, 0); | ||
85 | |||
86 | print_ioh_resources(info); | ||
87 | } | ||
88 | |||
89 | /* intel IOH */ | ||
90 | DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, pci_root_bus_res); | ||
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 602c172d3bd5..b19d1e54201e 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -15,48 +15,98 @@ | |||
15 | #include <linux/acpi.h> | 15 | #include <linux/acpi.h> |
16 | #include <linux/sfi_acpi.h> | 16 | #include <linux/sfi_acpi.h> |
17 | #include <linux/bitmap.h> | 17 | #include <linux/bitmap.h> |
18 | #include <linux/sort.h> | 18 | #include <linux/dmi.h> |
19 | #include <asm/e820.h> | 19 | #include <asm/e820.h> |
20 | #include <asm/pci_x86.h> | 20 | #include <asm/pci_x86.h> |
21 | #include <asm/acpi.h> | 21 | #include <asm/acpi.h> |
22 | 22 | ||
23 | #define PREFIX "PCI: " | 23 | #define PREFIX "PCI: " |
24 | 24 | ||
25 | /* aperture is up to 256MB but BIOS may reserve less */ | ||
26 | #define MMCONFIG_APER_MIN (2 * 1024*1024) | ||
27 | #define MMCONFIG_APER_MAX (256 * 1024*1024) | ||
28 | |||
29 | /* Indicate if the mmcfg resources have been placed into the resource table. */ | 25 | /* Indicate if the mmcfg resources have been placed into the resource table. */ |
30 | static int __initdata pci_mmcfg_resources_inserted; | 26 | static int __initdata pci_mmcfg_resources_inserted; |
31 | 27 | ||
32 | static __init int extend_mmcfg(int num) | 28 | LIST_HEAD(pci_mmcfg_list); |
29 | |||
30 | static __init void pci_mmconfig_remove(struct pci_mmcfg_region *cfg) | ||
33 | { | 31 | { |
34 | struct acpi_mcfg_allocation *new; | 32 | if (cfg->res.parent) |
35 | int new_num = pci_mmcfg_config_num + num; | 33 | release_resource(&cfg->res); |
34 | list_del(&cfg->list); | ||
35 | kfree(cfg); | ||
36 | } | ||
36 | 37 | ||
37 | new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL); | 38 | static __init void free_all_mmcfg(void) |
38 | if (!new) | 39 | { |
39 | return -1; | 40 | struct pci_mmcfg_region *cfg, *tmp; |
40 | 41 | ||
41 | if (pci_mmcfg_config) { | 42 | pci_mmcfg_arch_free(); |
42 | memcpy(new, pci_mmcfg_config, | 43 | list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list) |
43 | sizeof(pci_mmcfg_config[0]) * new_num); | 44 | pci_mmconfig_remove(cfg); |
44 | kfree(pci_mmcfg_config); | 45 | } |
46 | |||
47 | static __init void list_add_sorted(struct pci_mmcfg_region *new) | ||
48 | { | ||
49 | struct pci_mmcfg_region *cfg; | ||
50 | |||
51 | /* keep list sorted by segment and starting bus number */ | ||
52 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | ||
53 | if (cfg->segment > new->segment || | ||
54 | (cfg->segment == new->segment && | ||
55 | cfg->start_bus >= new->start_bus)) { | ||
56 | list_add_tail(&new->list, &cfg->list); | ||
57 | return; | ||
58 | } | ||
45 | } | 59 | } |
46 | pci_mmcfg_config = new; | 60 | list_add_tail(&new->list, &pci_mmcfg_list); |
61 | } | ||
47 | 62 | ||
48 | return 0; | 63 | static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, |
64 | int end, u64 addr) | ||
65 | { | ||
66 | struct pci_mmcfg_region *new; | ||
67 | int num_buses; | ||
68 | struct resource *res; | ||
69 | |||
70 | if (addr == 0) | ||
71 | return NULL; | ||
72 | |||
73 | new = kzalloc(sizeof(*new), GFP_KERNEL); | ||
74 | if (!new) | ||
75 | return NULL; | ||
76 | |||
77 | new->address = addr; | ||
78 | new->segment = segment; | ||
79 | new->start_bus = start; | ||
80 | new->end_bus = end; | ||
81 | |||
82 | list_add_sorted(new); | ||
83 | |||
84 | num_buses = end - start + 1; | ||
85 | res = &new->res; | ||
86 | res->start = addr + PCI_MMCFG_BUS_OFFSET(start); | ||
87 | res->end = addr + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; | ||
88 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
89 | snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN, | ||
90 | "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); | ||
91 | res->name = new->name; | ||
92 | |||
93 | printk(KERN_INFO PREFIX "MMCONFIG for domain %04x [bus %02x-%02x] at " | ||
94 | "%pR (base %#lx)\n", segment, start, end, &new->res, | ||
95 | (unsigned long) addr); | ||
96 | |||
97 | return new; | ||
49 | } | 98 | } |
50 | 99 | ||
51 | static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end) | 100 | struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus) |
52 | { | 101 | { |
53 | int i = pci_mmcfg_config_num; | 102 | struct pci_mmcfg_region *cfg; |
54 | 103 | ||
55 | pci_mmcfg_config_num++; | 104 | list_for_each_entry(cfg, &pci_mmcfg_list, list) |
56 | pci_mmcfg_config[i].address = addr; | 105 | if (cfg->segment == segment && |
57 | pci_mmcfg_config[i].pci_segment = segment; | 106 | cfg->start_bus <= bus && bus <= cfg->end_bus) |
58 | pci_mmcfg_config[i].start_bus_number = start; | 107 | return cfg; |
59 | pci_mmcfg_config[i].end_bus_number = end; | 108 | |
109 | return NULL; | ||
60 | } | 110 | } |
61 | 111 | ||
62 | static const char __init *pci_mmcfg_e7520(void) | 112 | static const char __init *pci_mmcfg_e7520(void) |
@@ -68,11 +118,9 @@ static const char __init *pci_mmcfg_e7520(void) | |||
68 | if (win == 0x0000 || win == 0xf000) | 118 | if (win == 0x0000 || win == 0xf000) |
69 | return NULL; | 119 | return NULL; |
70 | 120 | ||
71 | if (extend_mmcfg(1) == -1) | 121 | if (pci_mmconfig_add(0, 0, 255, win << 16) == NULL) |
72 | return NULL; | 122 | return NULL; |
73 | 123 | ||
74 | fill_one_mmcfg(win << 16, 0, 0, 255); | ||
75 | |||
76 | return "Intel Corporation E7520 Memory Controller Hub"; | 124 | return "Intel Corporation E7520 Memory Controller Hub"; |
77 | } | 125 | } |
78 | 126 | ||
@@ -114,11 +162,9 @@ static const char __init *pci_mmcfg_intel_945(void) | |||
114 | if ((pciexbar & mask) >= 0xf0000000U) | 162 | if ((pciexbar & mask) >= 0xf0000000U) |
115 | return NULL; | 163 | return NULL; |
116 | 164 | ||
117 | if (extend_mmcfg(1) == -1) | 165 | if (pci_mmconfig_add(0, 0, (len >> 20) - 1, pciexbar & mask) == NULL) |
118 | return NULL; | 166 | return NULL; |
119 | 167 | ||
120 | fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1); | ||
121 | |||
122 | return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; | 168 | return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; |
123 | } | 169 | } |
124 | 170 | ||
@@ -127,7 +173,7 @@ static const char __init *pci_mmcfg_amd_fam10h(void) | |||
127 | u32 low, high, address; | 173 | u32 low, high, address; |
128 | u64 base, msr; | 174 | u64 base, msr; |
129 | int i; | 175 | int i; |
130 | unsigned segnbits = 0, busnbits; | 176 | unsigned segnbits = 0, busnbits, end_bus; |
131 | 177 | ||
132 | if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) | 178 | if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) |
133 | return NULL; | 179 | return NULL; |
@@ -161,11 +207,13 @@ static const char __init *pci_mmcfg_amd_fam10h(void) | |||
161 | busnbits = 8; | 207 | busnbits = 8; |
162 | } | 208 | } |
163 | 209 | ||
164 | if (extend_mmcfg(1 << segnbits) == -1) | 210 | end_bus = (1 << busnbits) - 1; |
165 | return NULL; | ||
166 | |||
167 | for (i = 0; i < (1 << segnbits); i++) | 211 | for (i = 0; i < (1 << segnbits); i++) |
168 | fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1); | 212 | if (pci_mmconfig_add(i, 0, end_bus, |
213 | base + (1<<28) * i) == NULL) { | ||
214 | free_all_mmcfg(); | ||
215 | return NULL; | ||
216 | } | ||
169 | 217 | ||
170 | return "AMD Family 10h NB"; | 218 | return "AMD Family 10h NB"; |
171 | } | 219 | } |
@@ -190,7 +238,7 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void) | |||
190 | /* | 238 | /* |
191 | * do check if amd fam10h already took over | 239 | * do check if amd fam10h already took over |
192 | */ | 240 | */ |
193 | if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked) | 241 | if (!acpi_disabled || !list_empty(&pci_mmcfg_list) || mcp55_checked) |
194 | return NULL; | 242 | return NULL; |
195 | 243 | ||
196 | mcp55_checked = true; | 244 | mcp55_checked = true; |
@@ -213,16 +261,14 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void) | |||
213 | if (!(extcfg & extcfg_enable_mask)) | 261 | if (!(extcfg & extcfg_enable_mask)) |
214 | continue; | 262 | continue; |
215 | 263 | ||
216 | if (extend_mmcfg(1) == -1) | ||
217 | continue; | ||
218 | |||
219 | size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift; | 264 | size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift; |
220 | base = extcfg & extcfg_base_mask[size_index]; | 265 | base = extcfg & extcfg_base_mask[size_index]; |
221 | /* base could > 4G */ | 266 | /* base could > 4G */ |
222 | base <<= extcfg_base_lshift; | 267 | base <<= extcfg_base_lshift; |
223 | start = (extcfg & extcfg_start_mask) >> extcfg_start_shift; | 268 | start = (extcfg & extcfg_start_mask) >> extcfg_start_shift; |
224 | end = start + extcfg_sizebus[size_index] - 1; | 269 | end = start + extcfg_sizebus[size_index] - 1; |
225 | fill_one_mmcfg(base, 0, start, end); | 270 | if (pci_mmconfig_add(0, start, end, base) == NULL) |
271 | continue; | ||
226 | mcp55_mmconf_found++; | 272 | mcp55_mmconf_found++; |
227 | } | 273 | } |
228 | 274 | ||
@@ -253,45 +299,27 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { | |||
253 | 0x0369, pci_mmcfg_nvidia_mcp55 }, | 299 | 0x0369, pci_mmcfg_nvidia_mcp55 }, |
254 | }; | 300 | }; |
255 | 301 | ||
256 | static int __init cmp_mmcfg(const void *x1, const void *x2) | ||
257 | { | ||
258 | const typeof(pci_mmcfg_config[0]) *m1 = x1; | ||
259 | const typeof(pci_mmcfg_config[0]) *m2 = x2; | ||
260 | int start1, start2; | ||
261 | |||
262 | start1 = m1->start_bus_number; | ||
263 | start2 = m2->start_bus_number; | ||
264 | |||
265 | return start1 - start2; | ||
266 | } | ||
267 | |||
268 | static void __init pci_mmcfg_check_end_bus_number(void) | 302 | static void __init pci_mmcfg_check_end_bus_number(void) |
269 | { | 303 | { |
270 | int i; | 304 | struct pci_mmcfg_region *cfg, *cfgx; |
271 | typeof(pci_mmcfg_config[0]) *cfg, *cfgx; | ||
272 | |||
273 | /* sort them at first */ | ||
274 | sort(pci_mmcfg_config, pci_mmcfg_config_num, | ||
275 | sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL); | ||
276 | 305 | ||
277 | /* last one*/ | 306 | /* last one*/ |
278 | if (pci_mmcfg_config_num > 0) { | 307 | cfg = list_entry(pci_mmcfg_list.prev, typeof(*cfg), list); |
279 | i = pci_mmcfg_config_num - 1; | 308 | if (cfg) |
280 | cfg = &pci_mmcfg_config[i]; | 309 | if (cfg->end_bus < cfg->start_bus) |
281 | if (cfg->end_bus_number < cfg->start_bus_number) | 310 | cfg->end_bus = 255; |
282 | cfg->end_bus_number = 255; | ||
283 | } | ||
284 | 311 | ||
285 | /* don't overlap please */ | 312 | if (list_is_singular(&pci_mmcfg_list)) |
286 | for (i = 0; i < pci_mmcfg_config_num - 1; i++) { | 313 | return; |
287 | cfg = &pci_mmcfg_config[i]; | ||
288 | cfgx = &pci_mmcfg_config[i+1]; | ||
289 | 314 | ||
290 | if (cfg->end_bus_number < cfg->start_bus_number) | 315 | /* don't overlap please */ |
291 | cfg->end_bus_number = 255; | 316 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { |
317 | if (cfg->end_bus < cfg->start_bus) | ||
318 | cfg->end_bus = 255; | ||
292 | 319 | ||
293 | if (cfg->end_bus_number >= cfgx->start_bus_number) | 320 | cfgx = list_entry(cfg->list.next, typeof(*cfg), list); |
294 | cfg->end_bus_number = cfgx->start_bus_number - 1; | 321 | if (cfg != cfgx && cfg->end_bus >= cfgx->start_bus) |
322 | cfg->end_bus = cfgx->start_bus - 1; | ||
295 | } | 323 | } |
296 | } | 324 | } |
297 | 325 | ||
@@ -306,8 +334,7 @@ static int __init pci_mmcfg_check_hostbridge(void) | |||
306 | if (!raw_pci_ops) | 334 | if (!raw_pci_ops) |
307 | return 0; | 335 | return 0; |
308 | 336 | ||
309 | pci_mmcfg_config_num = 0; | 337 | free_all_mmcfg(); |
310 | pci_mmcfg_config = NULL; | ||
311 | 338 | ||
312 | for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) { | 339 | for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) { |
313 | bus = pci_mmcfg_probes[i].bus; | 340 | bus = pci_mmcfg_probes[i].bus; |
@@ -322,45 +349,22 @@ static int __init pci_mmcfg_check_hostbridge(void) | |||
322 | name = pci_mmcfg_probes[i].probe(); | 349 | name = pci_mmcfg_probes[i].probe(); |
323 | 350 | ||
324 | if (name) | 351 | if (name) |
325 | printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n", | 352 | printk(KERN_INFO PREFIX "%s with MMCONFIG support\n", |
326 | name); | 353 | name); |
327 | } | 354 | } |
328 | 355 | ||
329 | /* some end_bus_number is crazy, fix it */ | 356 | /* some end_bus_number is crazy, fix it */ |
330 | pci_mmcfg_check_end_bus_number(); | 357 | pci_mmcfg_check_end_bus_number(); |
331 | 358 | ||
332 | return pci_mmcfg_config_num != 0; | 359 | return !list_empty(&pci_mmcfg_list); |
333 | } | 360 | } |
334 | 361 | ||
335 | static void __init pci_mmcfg_insert_resources(void) | 362 | static void __init pci_mmcfg_insert_resources(void) |
336 | { | 363 | { |
337 | #define PCI_MMCFG_RESOURCE_NAME_LEN 24 | 364 | struct pci_mmcfg_region *cfg; |
338 | int i; | ||
339 | struct resource *res; | ||
340 | char *names; | ||
341 | unsigned num_buses; | ||
342 | |||
343 | res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res), | ||
344 | pci_mmcfg_config_num, GFP_KERNEL); | ||
345 | if (!res) { | ||
346 | printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n"); | ||
347 | return; | ||
348 | } | ||
349 | 365 | ||
350 | names = (void *)&res[pci_mmcfg_config_num]; | 366 | list_for_each_entry(cfg, &pci_mmcfg_list, list) |
351 | for (i = 0; i < pci_mmcfg_config_num; i++, res++) { | 367 | insert_resource(&iomem_resource, &cfg->res); |
352 | struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i]; | ||
353 | num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; | ||
354 | res->name = names; | ||
355 | snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, | ||
356 | "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment, | ||
357 | cfg->start_bus_number, cfg->end_bus_number); | ||
358 | res->start = cfg->address + (cfg->start_bus_number << 20); | ||
359 | res->end = res->start + (num_buses << 20) - 1; | ||
360 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
361 | insert_resource(&iomem_resource, res); | ||
362 | names += PCI_MMCFG_RESOURCE_NAME_LEN; | ||
363 | } | ||
364 | 368 | ||
365 | /* Mark that the resources have been inserted. */ | 369 | /* Mark that the resources have been inserted. */ |
366 | pci_mmcfg_resources_inserted = 1; | 370 | pci_mmcfg_resources_inserted = 1; |
@@ -437,11 +441,12 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) | |||
437 | typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); | 441 | typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); |
438 | 442 | ||
439 | static int __init is_mmconf_reserved(check_reserved_t is_reserved, | 443 | static int __init is_mmconf_reserved(check_reserved_t is_reserved, |
440 | u64 addr, u64 size, int i, | 444 | struct pci_mmcfg_region *cfg, int with_e820) |
441 | typeof(pci_mmcfg_config[0]) *cfg, int with_e820) | ||
442 | { | 445 | { |
446 | u64 addr = cfg->res.start; | ||
447 | u64 size = resource_size(&cfg->res); | ||
443 | u64 old_size = size; | 448 | u64 old_size = size; |
444 | int valid = 0; | 449 | int valid = 0, num_buses; |
445 | 450 | ||
446 | while (!is_reserved(addr, addr + size, E820_RESERVED)) { | 451 | while (!is_reserved(addr, addr + size, E820_RESERVED)) { |
447 | size >>= 1; | 452 | size >>= 1; |
@@ -450,19 +455,25 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, | |||
450 | } | 455 | } |
451 | 456 | ||
452 | if (size >= (16UL<<20) || size == old_size) { | 457 | if (size >= (16UL<<20) || size == old_size) { |
453 | printk(KERN_NOTICE | 458 | printk(KERN_INFO PREFIX "MMCONFIG at %pR reserved in %s\n", |
454 | "PCI: MCFG area at %Lx reserved in %s\n", | 459 | &cfg->res, |
455 | addr, with_e820?"E820":"ACPI motherboard resources"); | 460 | with_e820 ? "E820" : "ACPI motherboard resources"); |
456 | valid = 1; | 461 | valid = 1; |
457 | 462 | ||
458 | if (old_size != size) { | 463 | if (old_size != size) { |
459 | /* update end_bus_number */ | 464 | /* update end_bus */ |
460 | cfg->end_bus_number = cfg->start_bus_number + ((size>>20) - 1); | 465 | cfg->end_bus = cfg->start_bus + ((size>>20) - 1); |
461 | printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx " | 466 | num_buses = cfg->end_bus - cfg->start_bus + 1; |
462 | "segment %hu buses %u - %u\n", | 467 | cfg->res.end = cfg->res.start + |
463 | i, (unsigned long)cfg->address, cfg->pci_segment, | 468 | PCI_MMCFG_BUS_OFFSET(num_buses) - 1; |
464 | (unsigned int)cfg->start_bus_number, | 469 | snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, |
465 | (unsigned int)cfg->end_bus_number); | 470 | "PCI MMCONFIG %04x [bus %02x-%02x]", |
471 | cfg->segment, cfg->start_bus, cfg->end_bus); | ||
472 | printk(KERN_INFO PREFIX | ||
473 | "MMCONFIG for %04x [bus%02x-%02x] " | ||
474 | "at %pR (base %#lx) (size reduced!)\n", | ||
475 | cfg->segment, cfg->start_bus, cfg->end_bus, | ||
476 | &cfg->res, (unsigned long) cfg->address); | ||
466 | } | 477 | } |
467 | } | 478 | } |
468 | 479 | ||
@@ -471,45 +482,26 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, | |||
471 | 482 | ||
472 | static void __init pci_mmcfg_reject_broken(int early) | 483 | static void __init pci_mmcfg_reject_broken(int early) |
473 | { | 484 | { |
474 | typeof(pci_mmcfg_config[0]) *cfg; | 485 | struct pci_mmcfg_region *cfg; |
475 | int i; | ||
476 | 486 | ||
477 | if ((pci_mmcfg_config_num == 0) || | 487 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { |
478 | (pci_mmcfg_config == NULL) || | ||
479 | (pci_mmcfg_config[0].address == 0)) | ||
480 | return; | ||
481 | |||
482 | for (i = 0; i < pci_mmcfg_config_num; i++) { | ||
483 | int valid = 0; | 488 | int valid = 0; |
484 | u64 addr, size; | ||
485 | |||
486 | cfg = &pci_mmcfg_config[i]; | ||
487 | addr = cfg->start_bus_number; | ||
488 | addr <<= 20; | ||
489 | addr += cfg->address; | ||
490 | size = cfg->end_bus_number + 1 - cfg->start_bus_number; | ||
491 | size <<= 20; | ||
492 | printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " | ||
493 | "segment %hu buses %u - %u\n", | ||
494 | i, (unsigned long)cfg->address, cfg->pci_segment, | ||
495 | (unsigned int)cfg->start_bus_number, | ||
496 | (unsigned int)cfg->end_bus_number); | ||
497 | 489 | ||
498 | if (!early && !acpi_disabled) | 490 | if (!early && !acpi_disabled) |
499 | valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); | 491 | valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); |
500 | 492 | ||
501 | if (valid) | 493 | if (valid) |
502 | continue; | 494 | continue; |
503 | 495 | ||
504 | if (!early) | 496 | if (!early) |
505 | printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" | 497 | printk(KERN_ERR FW_BUG PREFIX |
506 | " reserved in ACPI motherboard resources\n", | 498 | "MMCONFIG at %pR not reserved in " |
507 | cfg->address); | 499 | "ACPI motherboard resources\n", &cfg->res); |
508 | 500 | ||
509 | /* Don't try to do this check unless configuration | 501 | /* Don't try to do this check unless configuration |
510 | type 1 is available. how about type 2 ?*/ | 502 | type 1 is available. how about type 2 ?*/ |
511 | if (raw_pci_ops) | 503 | if (raw_pci_ops) |
512 | valid = is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1); | 504 | valid = is_mmconf_reserved(e820_all_mapped, cfg, 1); |
513 | 505 | ||
514 | if (!valid) | 506 | if (!valid) |
515 | goto reject; | 507 | goto reject; |
@@ -518,34 +510,41 @@ static void __init pci_mmcfg_reject_broken(int early) | |||
518 | return; | 510 | return; |
519 | 511 | ||
520 | reject: | 512 | reject: |
521 | printk(KERN_INFO "PCI: Not using MMCONFIG.\n"); | 513 | printk(KERN_INFO PREFIX "not using MMCONFIG\n"); |
522 | pci_mmcfg_arch_free(); | 514 | free_all_mmcfg(); |
523 | kfree(pci_mmcfg_config); | ||
524 | pci_mmcfg_config = NULL; | ||
525 | pci_mmcfg_config_num = 0; | ||
526 | } | 515 | } |
527 | 516 | ||
528 | static int __initdata known_bridge; | 517 | static int __initdata known_bridge; |
529 | 518 | ||
530 | static int acpi_mcfg_64bit_base_addr __initdata = FALSE; | 519 | static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, |
520 | struct acpi_mcfg_allocation *cfg) | ||
521 | { | ||
522 | int year; | ||
531 | 523 | ||
532 | /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ | 524 | if (cfg->address < 0xFFFFFFFF) |
533 | struct acpi_mcfg_allocation *pci_mmcfg_config; | 525 | return 0; |
534 | int pci_mmcfg_config_num; | ||
535 | 526 | ||
536 | static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) | ||
537 | { | ||
538 | if (!strcmp(mcfg->header.oem_id, "SGI")) | 527 | if (!strcmp(mcfg->header.oem_id, "SGI")) |
539 | acpi_mcfg_64bit_base_addr = TRUE; | 528 | return 0; |
540 | 529 | ||
541 | return 0; | 530 | if (mcfg->header.revision >= 1) { |
531 | if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && | ||
532 | year >= 2010) | ||
533 | return 0; | ||
534 | } | ||
535 | |||
536 | printk(KERN_ERR PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx " | ||
537 | "is above 4GB, ignored\n", cfg->pci_segment, | ||
538 | cfg->start_bus_number, cfg->end_bus_number, cfg->address); | ||
539 | return -EINVAL; | ||
542 | } | 540 | } |
543 | 541 | ||
544 | static int __init pci_parse_mcfg(struct acpi_table_header *header) | 542 | static int __init pci_parse_mcfg(struct acpi_table_header *header) |
545 | { | 543 | { |
546 | struct acpi_table_mcfg *mcfg; | 544 | struct acpi_table_mcfg *mcfg; |
545 | struct acpi_mcfg_allocation *cfg_table, *cfg; | ||
547 | unsigned long i; | 546 | unsigned long i; |
548 | int config_size; | 547 | int entries; |
549 | 548 | ||
550 | if (!header) | 549 | if (!header) |
551 | return -EINVAL; | 550 | return -EINVAL; |
@@ -553,38 +552,33 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) | |||
553 | mcfg = (struct acpi_table_mcfg *)header; | 552 | mcfg = (struct acpi_table_mcfg *)header; |
554 | 553 | ||
555 | /* how many config structures do we have */ | 554 | /* how many config structures do we have */ |
556 | pci_mmcfg_config_num = 0; | 555 | free_all_mmcfg(); |
556 | entries = 0; | ||
557 | i = header->length - sizeof(struct acpi_table_mcfg); | 557 | i = header->length - sizeof(struct acpi_table_mcfg); |
558 | while (i >= sizeof(struct acpi_mcfg_allocation)) { | 558 | while (i >= sizeof(struct acpi_mcfg_allocation)) { |
559 | ++pci_mmcfg_config_num; | 559 | entries++; |
560 | i -= sizeof(struct acpi_mcfg_allocation); | 560 | i -= sizeof(struct acpi_mcfg_allocation); |
561 | }; | 561 | }; |
562 | if (pci_mmcfg_config_num == 0) { | 562 | if (entries == 0) { |
563 | printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); | 563 | printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); |
564 | return -ENODEV; | 564 | return -ENODEV; |
565 | } | 565 | } |
566 | 566 | ||
567 | config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); | 567 | cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1]; |
568 | pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); | 568 | for (i = 0; i < entries; i++) { |
569 | if (!pci_mmcfg_config) { | 569 | cfg = &cfg_table[i]; |
570 | printk(KERN_WARNING PREFIX | 570 | if (acpi_mcfg_check_entry(mcfg, cfg)) { |
571 | "No memory for MCFG config tables\n"); | 571 | free_all_mmcfg(); |
572 | return -ENOMEM; | ||
573 | } | ||
574 | |||
575 | memcpy(pci_mmcfg_config, &mcfg[1], config_size); | ||
576 | |||
577 | acpi_mcfg_oem_check(mcfg); | ||
578 | |||
579 | for (i = 0; i < pci_mmcfg_config_num; ++i) { | ||
580 | if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && | ||
581 | !acpi_mcfg_64bit_base_addr) { | ||
582 | printk(KERN_ERR PREFIX | ||
583 | "MMCONFIG not in low 4GB of memory\n"); | ||
584 | kfree(pci_mmcfg_config); | ||
585 | pci_mmcfg_config_num = 0; | ||
586 | return -ENODEV; | 572 | return -ENODEV; |
587 | } | 573 | } |
574 | |||
575 | if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number, | ||
576 | cfg->end_bus_number, cfg->address) == NULL) { | ||
577 | printk(KERN_WARNING PREFIX | ||
578 | "no memory for MCFG entries\n"); | ||
579 | free_all_mmcfg(); | ||
580 | return -ENOMEM; | ||
581 | } | ||
588 | } | 582 | } |
589 | 583 | ||
590 | return 0; | 584 | return 0; |
@@ -614,9 +608,7 @@ static void __init __pci_mmcfg_init(int early) | |||
614 | 608 | ||
615 | pci_mmcfg_reject_broken(early); | 609 | pci_mmcfg_reject_broken(early); |
616 | 610 | ||
617 | if ((pci_mmcfg_config_num == 0) || | 611 | if (list_empty(&pci_mmcfg_list)) |
618 | (pci_mmcfg_config == NULL) || | ||
619 | (pci_mmcfg_config[0].address == 0)) | ||
620 | return; | 612 | return; |
621 | 613 | ||
622 | if (pci_mmcfg_arch_init()) | 614 | if (pci_mmcfg_arch_init()) |
@@ -648,9 +640,7 @@ static int __init pci_mmcfg_late_insert_resources(void) | |||
648 | */ | 640 | */ |
649 | if ((pci_mmcfg_resources_inserted == 1) || | 641 | if ((pci_mmcfg_resources_inserted == 1) || |
650 | (pci_probe & PCI_PROBE_MMCONF) == 0 || | 642 | (pci_probe & PCI_PROBE_MMCONF) == 0 || |
651 | (pci_mmcfg_config_num == 0) || | 643 | list_empty(&pci_mmcfg_list)) |
652 | (pci_mmcfg_config == NULL) || | ||
653 | (pci_mmcfg_config[0].address == 0)) | ||
654 | return 1; | 644 | return 1; |
655 | 645 | ||
656 | /* | 646 | /* |
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index f10a7e94a84c..90d5fd476ed4 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c | |||
@@ -27,18 +27,10 @@ static int mmcfg_last_accessed_cpu; | |||
27 | */ | 27 | */ |
28 | static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) | 28 | static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) |
29 | { | 29 | { |
30 | struct acpi_mcfg_allocation *cfg; | 30 | struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); |
31 | int cfg_num; | ||
32 | |||
33 | for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { | ||
34 | cfg = &pci_mmcfg_config[cfg_num]; | ||
35 | if (cfg->pci_segment == seg && | ||
36 | (cfg->start_bus_number <= bus) && | ||
37 | (cfg->end_bus_number >= bus)) | ||
38 | return cfg->address; | ||
39 | } | ||
40 | 31 | ||
41 | /* Fall back to type 0 */ | 32 | if (cfg) |
33 | return cfg->address; | ||
42 | return 0; | 34 | return 0; |
43 | } | 35 | } |
44 | 36 | ||
@@ -47,7 +39,7 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) | |||
47 | */ | 39 | */ |
48 | static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) | 40 | static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) |
49 | { | 41 | { |
50 | u32 dev_base = base | (bus << 20) | (devfn << 12); | 42 | u32 dev_base = base | PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12); |
51 | int cpu = smp_processor_id(); | 43 | int cpu = smp_processor_id(); |
52 | if (dev_base != mmcfg_last_accessed_device || | 44 | if (dev_base != mmcfg_last_accessed_device || |
53 | cpu != mmcfg_last_accessed_cpu) { | 45 | cpu != mmcfg_last_accessed_cpu) { |
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 94349f8b2f96..e783841bd1d7 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c | |||
@@ -12,38 +12,15 @@ | |||
12 | #include <asm/e820.h> | 12 | #include <asm/e820.h> |
13 | #include <asm/pci_x86.h> | 13 | #include <asm/pci_x86.h> |
14 | 14 | ||
15 | /* Static virtual mapping of the MMCONFIG aperture */ | 15 | #define PREFIX "PCI: " |
16 | struct mmcfg_virt { | ||
17 | struct acpi_mcfg_allocation *cfg; | ||
18 | char __iomem *virt; | ||
19 | }; | ||
20 | static struct mmcfg_virt *pci_mmcfg_virt; | ||
21 | |||
22 | static char __iomem *get_virt(unsigned int seg, unsigned bus) | ||
23 | { | ||
24 | struct acpi_mcfg_allocation *cfg; | ||
25 | int cfg_num; | ||
26 | |||
27 | for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { | ||
28 | cfg = pci_mmcfg_virt[cfg_num].cfg; | ||
29 | if (cfg->pci_segment == seg && | ||
30 | (cfg->start_bus_number <= bus) && | ||
31 | (cfg->end_bus_number >= bus)) | ||
32 | return pci_mmcfg_virt[cfg_num].virt; | ||
33 | } | ||
34 | |||
35 | /* Fall back to type 0 */ | ||
36 | return NULL; | ||
37 | } | ||
38 | 16 | ||
39 | static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) | 17 | static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) |
40 | { | 18 | { |
41 | char __iomem *addr; | 19 | struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); |
42 | 20 | ||
43 | addr = get_virt(seg, bus); | 21 | if (cfg && cfg->virt) |
44 | if (!addr) | 22 | return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); |
45 | return NULL; | 23 | return NULL; |
46 | return addr + ((bus << 20) | (devfn << 12)); | ||
47 | } | 24 | } |
48 | 25 | ||
49 | static int pci_mmcfg_read(unsigned int seg, unsigned int bus, | 26 | static int pci_mmcfg_read(unsigned int seg, unsigned int bus, |
@@ -109,42 +86,30 @@ static struct pci_raw_ops pci_mmcfg = { | |||
109 | .write = pci_mmcfg_write, | 86 | .write = pci_mmcfg_write, |
110 | }; | 87 | }; |
111 | 88 | ||
112 | static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) | 89 | static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) |
113 | { | 90 | { |
114 | void __iomem *addr; | 91 | void __iomem *addr; |
115 | u64 start, size; | 92 | u64 start, size; |
93 | int num_buses; | ||
116 | 94 | ||
117 | start = cfg->start_bus_number; | 95 | start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus); |
118 | start <<= 20; | 96 | num_buses = cfg->end_bus - cfg->start_bus + 1; |
119 | start += cfg->address; | 97 | size = PCI_MMCFG_BUS_OFFSET(num_buses); |
120 | size = cfg->end_bus_number + 1 - cfg->start_bus_number; | ||
121 | size <<= 20; | ||
122 | addr = ioremap_nocache(start, size); | 98 | addr = ioremap_nocache(start, size); |
123 | if (addr) { | 99 | if (addr) |
124 | printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", | 100 | addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus); |
125 | start, start + size - 1); | ||
126 | addr -= cfg->start_bus_number << 20; | ||
127 | } | ||
128 | return addr; | 101 | return addr; |
129 | } | 102 | } |
130 | 103 | ||
131 | int __init pci_mmcfg_arch_init(void) | 104 | int __init pci_mmcfg_arch_init(void) |
132 | { | 105 | { |
133 | int i; | 106 | struct pci_mmcfg_region *cfg; |
134 | pci_mmcfg_virt = kzalloc(sizeof(*pci_mmcfg_virt) * | ||
135 | pci_mmcfg_config_num, GFP_KERNEL); | ||
136 | if (pci_mmcfg_virt == NULL) { | ||
137 | printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n"); | ||
138 | return 0; | ||
139 | } | ||
140 | 107 | ||
141 | for (i = 0; i < pci_mmcfg_config_num; ++i) { | 108 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { |
142 | pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; | 109 | cfg->virt = mcfg_ioremap(cfg); |
143 | pci_mmcfg_virt[i].virt = mcfg_ioremap(&pci_mmcfg_config[i]); | 110 | if (!cfg->virt) { |
144 | if (!pci_mmcfg_virt[i].virt) { | 111 | printk(KERN_ERR PREFIX "can't map MMCONFIG at %pR\n", |
145 | printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " | 112 | &cfg->res); |
146 | "segment %d\n", | ||
147 | pci_mmcfg_config[i].pci_segment); | ||
148 | pci_mmcfg_arch_free(); | 113 | pci_mmcfg_arch_free(); |
149 | return 0; | 114 | return 0; |
150 | } | 115 | } |
@@ -155,19 +120,12 @@ int __init pci_mmcfg_arch_init(void) | |||
155 | 120 | ||
156 | void __init pci_mmcfg_arch_free(void) | 121 | void __init pci_mmcfg_arch_free(void) |
157 | { | 122 | { |
158 | int i; | 123 | struct pci_mmcfg_region *cfg; |
159 | |||
160 | if (pci_mmcfg_virt == NULL) | ||
161 | return; | ||
162 | 124 | ||
163 | for (i = 0; i < pci_mmcfg_config_num; ++i) { | 125 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { |
164 | if (pci_mmcfg_virt[i].virt) { | 126 | if (cfg->virt) { |
165 | iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20)); | 127 | iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); |
166 | pci_mmcfg_virt[i].virt = NULL; | 128 | cfg->virt = NULL; |
167 | pci_mmcfg_virt[i].cfg = NULL; | ||
168 | } | 129 | } |
169 | } | 130 | } |
170 | |||
171 | kfree(pci_mmcfg_virt); | ||
172 | pci_mmcfg_virt = NULL; | ||
173 | } | 131 | } |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 8aa85f17667e..0a979f3e5b8a 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/mce.h> | 18 | #include <asm/mce.h> |
19 | #include <asm/xcr.h> | 19 | #include <asm/xcr.h> |
20 | #include <asm/suspend.h> | 20 | #include <asm/suspend.h> |
21 | #include <asm/debugreg.h> | ||
21 | 22 | ||
22 | #ifdef CONFIG_X86_32 | 23 | #ifdef CONFIG_X86_32 |
23 | static struct saved_context saved_context; | 24 | static struct saved_context saved_context; |
@@ -142,31 +143,6 @@ static void fix_processor_context(void) | |||
142 | #endif | 143 | #endif |
143 | load_TR_desc(); /* This does ltr */ | 144 | load_TR_desc(); /* This does ltr */ |
144 | load_LDT(¤t->active_mm->context); /* This does lldt */ | 145 | load_LDT(¤t->active_mm->context); /* This does lldt */ |
145 | |||
146 | /* | ||
147 | * Now maybe reload the debug registers | ||
148 | */ | ||
149 | if (current->thread.debugreg7) { | ||
150 | #ifdef CONFIG_X86_32 | ||
151 | set_debugreg(current->thread.debugreg0, 0); | ||
152 | set_debugreg(current->thread.debugreg1, 1); | ||
153 | set_debugreg(current->thread.debugreg2, 2); | ||
154 | set_debugreg(current->thread.debugreg3, 3); | ||
155 | /* no 4 and 5 */ | ||
156 | set_debugreg(current->thread.debugreg6, 6); | ||
157 | set_debugreg(current->thread.debugreg7, 7); | ||
158 | #else | ||
159 | /* CONFIG_X86_64 */ | ||
160 | loaddebug(¤t->thread, 0); | ||
161 | loaddebug(¤t->thread, 1); | ||
162 | loaddebug(¤t->thread, 2); | ||
163 | loaddebug(¤t->thread, 3); | ||
164 | /* no 4 and 5 */ | ||
165 | loaddebug(¤t->thread, 6); | ||
166 | loaddebug(¤t->thread, 7); | ||
167 | #endif | ||
168 | } | ||
169 | |||
170 | } | 146 | } |
171 | 147 | ||
172 | /** | 148 | /** |
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile new file mode 100644 index 000000000000..f82082677337 --- /dev/null +++ b/arch/x86/tools/Makefile | |||
@@ -0,0 +1,31 @@ | |||
1 | PHONY += posttest | ||
2 | |||
3 | ifeq ($(KBUILD_VERBOSE),1) | ||
4 | posttest_verbose = -v | ||
5 | else | ||
6 | posttest_verbose = | ||
7 | endif | ||
8 | |||
9 | ifeq ($(CONFIG_64BIT),y) | ||
10 | posttest_64bit = -y | ||
11 | else | ||
12 | posttest_64bit = -n | ||
13 | endif | ||
14 | |||
15 | distill_awk = $(srctree)/arch/x86/tools/distill.awk | ||
16 | chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk | ||
17 | |||
18 | quiet_cmd_posttest = TEST $@ | ||
19 | cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) | ||
20 | |||
21 | posttest: $(obj)/test_get_len vmlinux | ||
22 | $(call cmd,posttest) | ||
23 | |||
24 | hostprogs-y := test_get_len | ||
25 | |||
26 | # -I needed for generated C source and C source which in the kernel tree. | ||
27 | HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ | ||
28 | |||
29 | # Dependencies are also needed. | ||
30 | $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c | ||
31 | |||
diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk new file mode 100644 index 000000000000..fd1ab80be0de --- /dev/null +++ b/arch/x86/tools/chkobjdump.awk | |||
@@ -0,0 +1,33 @@ | |||
1 | # GNU objdump version checker | ||
2 | # | ||
3 | # Usage: | ||
4 | # objdump -v | awk -f chkobjdump.awk | ||
5 | BEGIN { | ||
6 | # objdump version 2.19 or later is OK for the test. | ||
7 | od_ver = 2; | ||
8 | od_sver = 19; | ||
9 | } | ||
10 | |||
11 | /^GNU objdump/ { | ||
12 | verstr = "" | ||
13 | for (i = 3; i <= NF; i++) | ||
14 | if (match($(i), "^[0-9]")) { | ||
15 | verstr = $(i); | ||
16 | break; | ||
17 | } | ||
18 | if (verstr == "") { | ||
19 | printf("Warning: Failed to find objdump version number.\n"); | ||
20 | exit 0; | ||
21 | } | ||
22 | split(verstr, ver, "."); | ||
23 | if (ver[1] > od_ver || | ||
24 | (ver[1] == od_ver && ver[2] >= od_sver)) { | ||
25 | exit 1; | ||
26 | } else { | ||
27 | printf("Warning: objdump version %s is older than %d.%d\n", | ||
28 | verstr, od_ver, od_sver); | ||
29 | print("Warning: Skipping posttest."); | ||
30 | # Logic is inverted, because we just skip test without error. | ||
31 | exit 0; | ||
32 | } | ||
33 | } | ||
diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk new file mode 100644 index 000000000000..c13c0ee48ab4 --- /dev/null +++ b/arch/x86/tools/distill.awk | |||
@@ -0,0 +1,47 @@ | |||
1 | #!/bin/awk -f | ||
2 | # Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len | ||
3 | # Distills the disassembly as follows: | ||
4 | # - Removes all lines except the disassembled instructions. | ||
5 | # - For instructions that exceed 1 line (7 bytes), crams all the hex bytes | ||
6 | # into a single line. | ||
7 | # - Remove bad(or prefix only) instructions | ||
8 | |||
9 | BEGIN { | ||
10 | prev_addr = "" | ||
11 | prev_hex = "" | ||
12 | prev_mnemonic = "" | ||
13 | bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))" | ||
14 | fwait_expr = "^9b " | ||
15 | fwait_str="9b\tfwait" | ||
16 | } | ||
17 | |||
18 | /^ *[0-9a-f]+ <[^>]*>:/ { | ||
19 | # Symbol entry | ||
20 | printf("%s%s\n", $2, $1) | ||
21 | } | ||
22 | |||
23 | /^ *[0-9a-f]+:/ { | ||
24 | if (split($0, field, "\t") < 3) { | ||
25 | # This is a continuation of the same insn. | ||
26 | prev_hex = prev_hex field[2] | ||
27 | } else { | ||
28 | # Skip bad instructions | ||
29 | if (match(prev_mnemonic, bad_expr)) | ||
30 | prev_addr = "" | ||
31 | # Split fwait from other f* instructions | ||
32 | if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") { | ||
33 | printf "%s\t%s\n", prev_addr, fwait_str | ||
34 | sub(fwait_expr, "", prev_hex) | ||
35 | } | ||
36 | if (prev_addr != "") | ||
37 | printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic | ||
38 | prev_addr = field[1] | ||
39 | prev_hex = field[2] | ||
40 | prev_mnemonic = field[3] | ||
41 | } | ||
42 | } | ||
43 | |||
44 | END { | ||
45 | if (prev_addr != "") | ||
46 | printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic | ||
47 | } | ||
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk new file mode 100644 index 000000000000..eaf11f52fc0b --- /dev/null +++ b/arch/x86/tools/gen-insn-attr-x86.awk | |||
@@ -0,0 +1,378 @@ | |||
1 | #!/bin/awk -f | ||
2 | # gen-insn-attr-x86.awk: Instruction attribute table generator | ||
3 | # Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
4 | # | ||
5 | # Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c | ||
6 | |||
7 | # Awk implementation sanity check | ||
8 | function check_awk_implement() { | ||
9 | if (sprintf("%x", 0) != "0") | ||
10 | return "Your awk has a printf-format problem." | ||
11 | return "" | ||
12 | } | ||
13 | |||
14 | # Clear working vars | ||
15 | function clear_vars() { | ||
16 | delete table | ||
17 | delete lptable2 | ||
18 | delete lptable1 | ||
19 | delete lptable3 | ||
20 | eid = -1 # escape id | ||
21 | gid = -1 # group id | ||
22 | aid = -1 # AVX id | ||
23 | tname = "" | ||
24 | } | ||
25 | |||
26 | BEGIN { | ||
27 | # Implementation error checking | ||
28 | awkchecked = check_awk_implement() | ||
29 | if (awkchecked != "") { | ||
30 | print "Error: " awkchecked > "/dev/stderr" | ||
31 | print "Please try to use gawk." > "/dev/stderr" | ||
32 | exit 1 | ||
33 | } | ||
34 | |||
35 | # Setup generating tables | ||
36 | print "/* x86 opcode map generated from x86-opcode-map.txt */" | ||
37 | print "/* Do not change this code. */\n" | ||
38 | ggid = 1 | ||
39 | geid = 1 | ||
40 | gaid = 0 | ||
41 | delete etable | ||
42 | delete gtable | ||
43 | delete atable | ||
44 | |||
45 | opnd_expr = "^[A-Za-z/]" | ||
46 | ext_expr = "^\\(" | ||
47 | sep_expr = "^\\|$" | ||
48 | group_expr = "^Grp[0-9A-Za-z]+" | ||
49 | |||
50 | imm_expr = "^[IJAO][a-z]" | ||
51 | imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | ||
52 | imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | ||
53 | imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" | ||
54 | imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" | ||
55 | imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" | ||
56 | imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" | ||
57 | imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" | ||
58 | imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" | ||
59 | imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" | ||
60 | imm_flag["Ob"] = "INAT_MOFFSET" | ||
61 | imm_flag["Ov"] = "INAT_MOFFSET" | ||
62 | |||
63 | modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" | ||
64 | force64_expr = "\\([df]64\\)" | ||
65 | rex_expr = "^REX(\\.[XRWB]+)*" | ||
66 | fpu_expr = "^ESC" # TODO | ||
67 | |||
68 | lprefix1_expr = "\\(66\\)" | ||
69 | lprefix2_expr = "\\(F3\\)" | ||
70 | lprefix3_expr = "\\(F2\\)" | ||
71 | max_lprefix = 4 | ||
72 | |||
73 | vexok_expr = "\\(VEX\\)" | ||
74 | vexonly_expr = "\\(oVEX\\)" | ||
75 | |||
76 | prefix_expr = "\\(Prefix\\)" | ||
77 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" | ||
78 | prefix_num["REPNE"] = "INAT_PFX_REPNE" | ||
79 | prefix_num["REP/REPE"] = "INAT_PFX_REPE" | ||
80 | prefix_num["LOCK"] = "INAT_PFX_LOCK" | ||
81 | prefix_num["SEG=CS"] = "INAT_PFX_CS" | ||
82 | prefix_num["SEG=DS"] = "INAT_PFX_DS" | ||
83 | prefix_num["SEG=ES"] = "INAT_PFX_ES" | ||
84 | prefix_num["SEG=FS"] = "INAT_PFX_FS" | ||
85 | prefix_num["SEG=GS"] = "INAT_PFX_GS" | ||
86 | prefix_num["SEG=SS"] = "INAT_PFX_SS" | ||
87 | prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" | ||
88 | prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" | ||
89 | prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" | ||
90 | |||
91 | clear_vars() | ||
92 | } | ||
93 | |||
94 | function semantic_error(msg) { | ||
95 | print "Semantic error at " NR ": " msg > "/dev/stderr" | ||
96 | exit 1 | ||
97 | } | ||
98 | |||
99 | function debug(msg) { | ||
100 | print "DEBUG: " msg | ||
101 | } | ||
102 | |||
103 | function array_size(arr, i,c) { | ||
104 | c = 0 | ||
105 | for (i in arr) | ||
106 | c++ | ||
107 | return c | ||
108 | } | ||
109 | |||
110 | /^Table:/ { | ||
111 | print "/* " $0 " */" | ||
112 | if (tname != "") | ||
113 | semantic_error("Hit Table: before EndTable:."); | ||
114 | } | ||
115 | |||
116 | /^Referrer:/ { | ||
117 | if (NF != 1) { | ||
118 | # escape opcode table | ||
119 | ref = "" | ||
120 | for (i = 2; i <= NF; i++) | ||
121 | ref = ref $i | ||
122 | eid = escape[ref] | ||
123 | tname = sprintf("inat_escape_table_%d", eid) | ||
124 | } | ||
125 | } | ||
126 | |||
127 | /^AVXcode:/ { | ||
128 | if (NF != 1) { | ||
129 | # AVX/escape opcode table | ||
130 | aid = $2 | ||
131 | if (gaid <= aid) | ||
132 | gaid = aid + 1 | ||
133 | if (tname == "") # AVX only opcode table | ||
134 | tname = sprintf("inat_avx_table_%d", $2) | ||
135 | } | ||
136 | if (aid == -1 && eid == -1) # primary opcode table | ||
137 | tname = "inat_primary_table" | ||
138 | } | ||
139 | |||
140 | /^GrpTable:/ { | ||
141 | print "/* " $0 " */" | ||
142 | if (!($2 in group)) | ||
143 | semantic_error("No group: " $2 ) | ||
144 | gid = group[$2] | ||
145 | tname = "inat_group_table_" gid | ||
146 | } | ||
147 | |||
148 | function print_table(tbl,name,fmt,n) | ||
149 | { | ||
150 | print "const insn_attr_t " name " = {" | ||
151 | for (i = 0; i < n; i++) { | ||
152 | id = sprintf(fmt, i) | ||
153 | if (tbl[id]) | ||
154 | print " [" id "] = " tbl[id] "," | ||
155 | } | ||
156 | print "};" | ||
157 | } | ||
158 | |||
159 | /^EndTable/ { | ||
160 | if (gid != -1) { | ||
161 | # print group tables | ||
162 | if (array_size(table) != 0) { | ||
163 | print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", | ||
164 | "0x%x", 8) | ||
165 | gtable[gid,0] = tname | ||
166 | } | ||
167 | if (array_size(lptable1) != 0) { | ||
168 | print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", | ||
169 | "0x%x", 8) | ||
170 | gtable[gid,1] = tname "_1" | ||
171 | } | ||
172 | if (array_size(lptable2) != 0) { | ||
173 | print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", | ||
174 | "0x%x", 8) | ||
175 | gtable[gid,2] = tname "_2" | ||
176 | } | ||
177 | if (array_size(lptable3) != 0) { | ||
178 | print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", | ||
179 | "0x%x", 8) | ||
180 | gtable[gid,3] = tname "_3" | ||
181 | } | ||
182 | } else { | ||
183 | # print primary/escaped tables | ||
184 | if (array_size(table) != 0) { | ||
185 | print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", | ||
186 | "0x%02x", 256) | ||
187 | etable[eid,0] = tname | ||
188 | if (aid >= 0) | ||
189 | atable[aid,0] = tname | ||
190 | } | ||
191 | if (array_size(lptable1) != 0) { | ||
192 | print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", | ||
193 | "0x%02x", 256) | ||
194 | etable[eid,1] = tname "_1" | ||
195 | if (aid >= 0) | ||
196 | atable[aid,1] = tname "_1" | ||
197 | } | ||
198 | if (array_size(lptable2) != 0) { | ||
199 | print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", | ||
200 | "0x%02x", 256) | ||
201 | etable[eid,2] = tname "_2" | ||
202 | if (aid >= 0) | ||
203 | atable[aid,2] = tname "_2" | ||
204 | } | ||
205 | if (array_size(lptable3) != 0) { | ||
206 | print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", | ||
207 | "0x%02x", 256) | ||
208 | etable[eid,3] = tname "_3" | ||
209 | if (aid >= 0) | ||
210 | atable[aid,3] = tname "_3" | ||
211 | } | ||
212 | } | ||
213 | print "" | ||
214 | clear_vars() | ||
215 | } | ||
216 | |||
217 | function add_flags(old,new) { | ||
218 | if (old && new) | ||
219 | return old " | " new | ||
220 | else if (old) | ||
221 | return old | ||
222 | else | ||
223 | return new | ||
224 | } | ||
225 | |||
226 | # convert operands to flags. | ||
227 | function convert_operands(count,opnd, i,j,imm,mod) | ||
228 | { | ||
229 | imm = null | ||
230 | mod = null | ||
231 | for (j = 1; j <= count; j++) { | ||
232 | i = opnd[j] | ||
233 | if (match(i, imm_expr) == 1) { | ||
234 | if (!imm_flag[i]) | ||
235 | semantic_error("Unknown imm opnd: " i) | ||
236 | if (imm) { | ||
237 | if (i != "Ib") | ||
238 | semantic_error("Second IMM error") | ||
239 | imm = add_flags(imm, "INAT_SCNDIMM") | ||
240 | } else | ||
241 | imm = imm_flag[i] | ||
242 | } else if (match(i, modrm_expr)) | ||
243 | mod = "INAT_MODRM" | ||
244 | } | ||
245 | return add_flags(imm, mod) | ||
246 | } | ||
247 | |||
248 | /^[0-9a-f]+\:/ { | ||
249 | if (NR == 1) | ||
250 | next | ||
251 | # get index | ||
252 | idx = "0x" substr($1, 1, index($1,":") - 1) | ||
253 | if (idx in table) | ||
254 | semantic_error("Redefine " idx " in " tname) | ||
255 | |||
256 | # check if escaped opcode | ||
257 | if ("escape" == $2) { | ||
258 | if ($3 != "#") | ||
259 | semantic_error("No escaped name") | ||
260 | ref = "" | ||
261 | for (i = 4; i <= NF; i++) | ||
262 | ref = ref $i | ||
263 | if (ref in escape) | ||
264 | semantic_error("Redefine escape (" ref ")") | ||
265 | escape[ref] = geid | ||
266 | geid++ | ||
267 | table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" | ||
268 | next | ||
269 | } | ||
270 | |||
271 | variant = null | ||
272 | # converts | ||
273 | i = 2 | ||
274 | while (i <= NF) { | ||
275 | opcode = $(i++) | ||
276 | delete opnds | ||
277 | ext = null | ||
278 | flags = null | ||
279 | opnd = null | ||
280 | # parse one opcode | ||
281 | if (match($i, opnd_expr)) { | ||
282 | opnd = $i | ||
283 | count = split($(i++), opnds, ",") | ||
284 | flags = convert_operands(count, opnds) | ||
285 | } | ||
286 | if (match($i, ext_expr)) | ||
287 | ext = $(i++) | ||
288 | if (match($i, sep_expr)) | ||
289 | i++ | ||
290 | else if (i < NF) | ||
291 | semantic_error($i " is not a separator") | ||
292 | |||
293 | # check if group opcode | ||
294 | if (match(opcode, group_expr)) { | ||
295 | if (!(opcode in group)) { | ||
296 | group[opcode] = ggid | ||
297 | ggid++ | ||
298 | } | ||
299 | flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") | ||
300 | } | ||
301 | # check force(or default) 64bit | ||
302 | if (match(ext, force64_expr)) | ||
303 | flags = add_flags(flags, "INAT_FORCE64") | ||
304 | |||
305 | # check REX prefix | ||
306 | if (match(opcode, rex_expr)) | ||
307 | flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") | ||
308 | |||
309 | # check coprocessor escape : TODO | ||
310 | if (match(opcode, fpu_expr)) | ||
311 | flags = add_flags(flags, "INAT_MODRM") | ||
312 | |||
313 | # check VEX only code | ||
314 | if (match(ext, vexonly_expr)) | ||
315 | flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") | ||
316 | |||
317 | # check VEX only code | ||
318 | if (match(ext, vexok_expr)) | ||
319 | flags = add_flags(flags, "INAT_VEXOK") | ||
320 | |||
321 | # check prefixes | ||
322 | if (match(ext, prefix_expr)) { | ||
323 | if (!prefix_num[opcode]) | ||
324 | semantic_error("Unknown prefix: " opcode) | ||
325 | flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") | ||
326 | } | ||
327 | if (length(flags) == 0) | ||
328 | continue | ||
329 | # check if last prefix | ||
330 | if (match(ext, lprefix1_expr)) { | ||
331 | lptable1[idx] = add_flags(lptable1[idx],flags) | ||
332 | variant = "INAT_VARIANT" | ||
333 | } else if (match(ext, lprefix2_expr)) { | ||
334 | lptable2[idx] = add_flags(lptable2[idx],flags) | ||
335 | variant = "INAT_VARIANT" | ||
336 | } else if (match(ext, lprefix3_expr)) { | ||
337 | lptable3[idx] = add_flags(lptable3[idx],flags) | ||
338 | variant = "INAT_VARIANT" | ||
339 | } else { | ||
340 | table[idx] = add_flags(table[idx],flags) | ||
341 | } | ||
342 | } | ||
343 | if (variant) | ||
344 | table[idx] = add_flags(table[idx],variant) | ||
345 | } | ||
346 | |||
347 | END { | ||
348 | if (awkchecked != "") | ||
349 | exit 1 | ||
350 | # print escape opcode map's array | ||
351 | print "/* Escape opcode map array */" | ||
352 | print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ | ||
353 | "[INAT_LSTPFX_MAX + 1] = {" | ||
354 | for (i = 0; i < geid; i++) | ||
355 | for (j = 0; j < max_lprefix; j++) | ||
356 | if (etable[i,j]) | ||
357 | print " ["i"]["j"] = "etable[i,j]"," | ||
358 | print "};\n" | ||
359 | # print group opcode map's array | ||
360 | print "/* Group opcode map array */" | ||
361 | print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ | ||
362 | "[INAT_LSTPFX_MAX + 1] = {" | ||
363 | for (i = 0; i < ggid; i++) | ||
364 | for (j = 0; j < max_lprefix; j++) | ||
365 | if (gtable[i,j]) | ||
366 | print " ["i"]["j"] = "gtable[i,j]"," | ||
367 | print "};\n" | ||
368 | # print AVX opcode map's array | ||
369 | print "/* AVX opcode map array */" | ||
370 | print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\ | ||
371 | "[INAT_LSTPFX_MAX + 1] = {" | ||
372 | for (i = 0; i < gaid; i++) | ||
373 | for (j = 0; j < max_lprefix; j++) | ||
374 | if (atable[i,j]) | ||
375 | print " ["i"]["j"] = "atable[i,j]"," | ||
376 | print "};" | ||
377 | } | ||
378 | |||
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c new file mode 100644 index 000000000000..bee8d6ac2691 --- /dev/null +++ b/arch/x86/tools/test_get_len.c | |||
@@ -0,0 +1,173 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) IBM Corporation, 2009 | ||
17 | */ | ||
18 | |||
19 | #include <stdlib.h> | ||
20 | #include <stdio.h> | ||
21 | #include <string.h> | ||
22 | #include <assert.h> | ||
23 | #include <unistd.h> | ||
24 | |||
25 | #define unlikely(cond) (cond) | ||
26 | |||
27 | #include <asm/insn.h> | ||
28 | #include <inat.c> | ||
29 | #include <insn.c> | ||
30 | |||
31 | /* | ||
32 | * Test of instruction analysis in general and insn_get_length() in | ||
33 | * particular. See if insn_get_length() and the disassembler agree | ||
34 | * on the length of each instruction in an elf disassembly. | ||
35 | * | ||
36 | * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len | ||
37 | */ | ||
38 | |||
39 | const char *prog; | ||
40 | static int verbose; | ||
41 | static int x86_64; | ||
42 | |||
43 | static void usage(void) | ||
44 | { | ||
45 | fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" | ||
46 | " %s [-y|-n] [-v] \n", prog); | ||
47 | fprintf(stderr, "\t-y 64bit mode\n"); | ||
48 | fprintf(stderr, "\t-n 32bit mode\n"); | ||
49 | fprintf(stderr, "\t-v verbose mode\n"); | ||
50 | exit(1); | ||
51 | } | ||
52 | |||
53 | static void malformed_line(const char *line, int line_nr) | ||
54 | { | ||
55 | fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); | ||
56 | exit(3); | ||
57 | } | ||
58 | |||
59 | static void dump_field(FILE *fp, const char *name, const char *indent, | ||
60 | struct insn_field *field) | ||
61 | { | ||
62 | fprintf(fp, "%s.%s = {\n", indent, name); | ||
63 | fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", | ||
64 | indent, field->value, field->bytes[0], field->bytes[1], | ||
65 | field->bytes[2], field->bytes[3]); | ||
66 | fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, | ||
67 | field->got, field->nbytes); | ||
68 | } | ||
69 | |||
70 | static void dump_insn(FILE *fp, struct insn *insn) | ||
71 | { | ||
72 | fprintf(fp, "Instruction = { \n"); | ||
73 | dump_field(fp, "prefixes", "\t", &insn->prefixes); | ||
74 | dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); | ||
75 | dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); | ||
76 | dump_field(fp, "opcode", "\t", &insn->opcode); | ||
77 | dump_field(fp, "modrm", "\t", &insn->modrm); | ||
78 | dump_field(fp, "sib", "\t", &insn->sib); | ||
79 | dump_field(fp, "displacement", "\t", &insn->displacement); | ||
80 | dump_field(fp, "immediate1", "\t", &insn->immediate1); | ||
81 | dump_field(fp, "immediate2", "\t", &insn->immediate2); | ||
82 | fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", | ||
83 | insn->attr, insn->opnd_bytes, insn->addr_bytes); | ||
84 | fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", | ||
85 | insn->length, insn->x86_64, insn->kaddr); | ||
86 | } | ||
87 | |||
88 | static void parse_args(int argc, char **argv) | ||
89 | { | ||
90 | int c; | ||
91 | prog = argv[0]; | ||
92 | while ((c = getopt(argc, argv, "ynv")) != -1) { | ||
93 | switch (c) { | ||
94 | case 'y': | ||
95 | x86_64 = 1; | ||
96 | break; | ||
97 | case 'n': | ||
98 | x86_64 = 0; | ||
99 | break; | ||
100 | case 'v': | ||
101 | verbose = 1; | ||
102 | break; | ||
103 | default: | ||
104 | usage(); | ||
105 | } | ||
106 | } | ||
107 | } | ||
108 | |||
109 | #define BUFSIZE 256 | ||
110 | |||
111 | int main(int argc, char **argv) | ||
112 | { | ||
113 | char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; | ||
114 | unsigned char insn_buf[16]; | ||
115 | struct insn insn; | ||
116 | int insns = 0; | ||
117 | int warnings = 0; | ||
118 | |||
119 | parse_args(argc, argv); | ||
120 | |||
121 | while (fgets(line, BUFSIZE, stdin)) { | ||
122 | char copy[BUFSIZE], *s, *tab1, *tab2; | ||
123 | int nb = 0; | ||
124 | unsigned int b; | ||
125 | |||
126 | if (line[0] == '<') { | ||
127 | /* Symbol line */ | ||
128 | strcpy(sym, line); | ||
129 | continue; | ||
130 | } | ||
131 | |||
132 | insns++; | ||
133 | memset(insn_buf, 0, 16); | ||
134 | strcpy(copy, line); | ||
135 | tab1 = strchr(copy, '\t'); | ||
136 | if (!tab1) | ||
137 | malformed_line(line, insns); | ||
138 | s = tab1 + 1; | ||
139 | s += strspn(s, " "); | ||
140 | tab2 = strchr(s, '\t'); | ||
141 | if (!tab2) | ||
142 | malformed_line(line, insns); | ||
143 | *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ | ||
144 | while (s < tab2) { | ||
145 | if (sscanf(s, "%x", &b) == 1) { | ||
146 | insn_buf[nb++] = (unsigned char) b; | ||
147 | s += 3; | ||
148 | } else | ||
149 | break; | ||
150 | } | ||
151 | /* Decode an instruction */ | ||
152 | insn_init(&insn, insn_buf, x86_64); | ||
153 | insn_get_length(&insn); | ||
154 | if (insn.length != nb) { | ||
155 | warnings++; | ||
156 | fprintf(stderr, "Warning: %s found difference at %s\n", | ||
157 | prog, sym); | ||
158 | fprintf(stderr, "Warning: %s", line); | ||
159 | fprintf(stderr, "Warning: objdump says %d bytes, but " | ||
160 | "insn_get_length() says %d\n", nb, | ||
161 | insn.length); | ||
162 | if (verbose) | ||
163 | dump_insn(stderr, &insn); | ||
164 | } | ||
165 | } | ||
166 | if (warnings) | ||
167 | fprintf(stderr, "Warning: decoded and checked %d" | ||
168 | " instructions with %d warnings\n", insns, warnings); | ||
169 | else | ||
170 | fprintf(stderr, "Succeed: decoded and checked %d" | ||
171 | " instructions\n", insns); | ||
172 | return 0; | ||
173 | } | ||
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 58bc00f68b12..02b442e92007 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -393,7 +393,6 @@ static ctl_table abi_table2[] = { | |||
393 | 393 | ||
394 | static ctl_table abi_root_table2[] = { | 394 | static ctl_table abi_root_table2[] = { |
395 | { | 395 | { |
396 | .ctl_name = CTL_ABI, | ||
397 | .procname = "abi", | 396 | .procname = "abi", |
398 | .mode = 0555, | 397 | .mode = 0555, |
399 | .child = abi_table2 | 398 | .child = abi_table2 |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 3439616d69f1..2b26dd5930c6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -27,7 +27,9 @@ | |||
27 | #include <linux/page-flags.h> | 27 | #include <linux/page-flags.h> |
28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
29 | #include <linux/console.h> | 29 | #include <linux/console.h> |
30 | #include <linux/pci.h> | ||
30 | 31 | ||
32 | #include <xen/xen.h> | ||
31 | #include <xen/interface/xen.h> | 33 | #include <xen/interface/xen.h> |
32 | #include <xen/interface/version.h> | 34 | #include <xen/interface/version.h> |
33 | #include <xen/interface/physdev.h> | 35 | #include <xen/interface/physdev.h> |
@@ -138,24 +140,23 @@ static void xen_vcpu_setup(int cpu) | |||
138 | */ | 140 | */ |
139 | void xen_vcpu_restore(void) | 141 | void xen_vcpu_restore(void) |
140 | { | 142 | { |
141 | if (have_vcpu_info_placement) { | 143 | int cpu; |
142 | int cpu; | ||
143 | 144 | ||
144 | for_each_online_cpu(cpu) { | 145 | for_each_online_cpu(cpu) { |
145 | bool other_cpu = (cpu != smp_processor_id()); | 146 | bool other_cpu = (cpu != smp_processor_id()); |
146 | 147 | ||
147 | if (other_cpu && | 148 | if (other_cpu && |
148 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) | 149 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) |
149 | BUG(); | 150 | BUG(); |
150 | 151 | ||
151 | xen_vcpu_setup(cpu); | 152 | xen_setup_runstate_info(cpu); |
152 | 153 | ||
153 | if (other_cpu && | 154 | if (have_vcpu_info_placement) |
154 | HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) | 155 | xen_vcpu_setup(cpu); |
155 | BUG(); | ||
156 | } | ||
157 | 156 | ||
158 | BUG_ON(!have_vcpu_info_placement); | 157 | if (other_cpu && |
158 | HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) | ||
159 | BUG(); | ||
159 | } | 160 | } |
160 | } | 161 | } |
161 | 162 | ||
@@ -178,6 +179,7 @@ static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; | |||
178 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, | 179 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, |
179 | unsigned int *cx, unsigned int *dx) | 180 | unsigned int *cx, unsigned int *dx) |
180 | { | 181 | { |
182 | unsigned maskebx = ~0; | ||
181 | unsigned maskecx = ~0; | 183 | unsigned maskecx = ~0; |
182 | unsigned maskedx = ~0; | 184 | unsigned maskedx = ~0; |
183 | 185 | ||
@@ -185,9 +187,16 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, | |||
185 | * Mask out inconvenient features, to try and disable as many | 187 | * Mask out inconvenient features, to try and disable as many |
186 | * unsupported kernel subsystems as possible. | 188 | * unsupported kernel subsystems as possible. |
187 | */ | 189 | */ |
188 | if (*ax == 1) { | 190 | switch (*ax) { |
191 | case 1: | ||
189 | maskecx = cpuid_leaf1_ecx_mask; | 192 | maskecx = cpuid_leaf1_ecx_mask; |
190 | maskedx = cpuid_leaf1_edx_mask; | 193 | maskedx = cpuid_leaf1_edx_mask; |
194 | break; | ||
195 | |||
196 | case 0xb: | ||
197 | /* Suppress extended topology stuff */ | ||
198 | maskebx = 0; | ||
199 | break; | ||
191 | } | 200 | } |
192 | 201 | ||
193 | asm(XEN_EMULATE_PREFIX "cpuid" | 202 | asm(XEN_EMULATE_PREFIX "cpuid" |
@@ -197,6 +206,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, | |||
197 | "=d" (*dx) | 206 | "=d" (*dx) |
198 | : "0" (*ax), "2" (*cx)); | 207 | : "0" (*ax), "2" (*cx)); |
199 | 208 | ||
209 | *bx &= maskebx; | ||
200 | *cx &= maskecx; | 210 | *cx &= maskecx; |
201 | *dx &= maskedx; | 211 | *dx &= maskedx; |
202 | } | 212 | } |
@@ -1075,6 +1085,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1075 | * Set up some pagetable state before starting to set any ptes. | 1085 | * Set up some pagetable state before starting to set any ptes. |
1076 | */ | 1086 | */ |
1077 | 1087 | ||
1088 | xen_init_mmu_ops(); | ||
1089 | |||
1078 | /* Prevent unwanted bits from being set in PTEs. */ | 1090 | /* Prevent unwanted bits from being set in PTEs. */ |
1079 | __supported_pte_mask &= ~_PAGE_GLOBAL; | 1091 | __supported_pte_mask &= ~_PAGE_GLOBAL; |
1080 | if (!xen_initial_domain()) | 1092 | if (!xen_initial_domain()) |
@@ -1082,10 +1094,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1082 | 1094 | ||
1083 | __supported_pte_mask |= _PAGE_IOMAP; | 1095 | __supported_pte_mask |= _PAGE_IOMAP; |
1084 | 1096 | ||
1085 | #ifdef CONFIG_X86_64 | ||
1086 | /* Work out if we support NX */ | 1097 | /* Work out if we support NX */ |
1087 | check_efer(); | 1098 | x86_configure_nx(); |
1088 | #endif | ||
1089 | 1099 | ||
1090 | xen_setup_features(); | 1100 | xen_setup_features(); |
1091 | 1101 | ||
@@ -1099,7 +1109,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1099 | */ | 1109 | */ |
1100 | xen_setup_stackprotector(); | 1110 | xen_setup_stackprotector(); |
1101 | 1111 | ||
1102 | xen_init_mmu_ops(); | ||
1103 | xen_init_irq_ops(); | 1112 | xen_init_irq_ops(); |
1104 | xen_init_cpuid_mask(); | 1113 | xen_init_cpuid_mask(); |
1105 | 1114 | ||
@@ -1168,10 +1177,16 @@ asmlinkage void __init xen_start_kernel(void) | |||
1168 | add_preferred_console("xenboot", 0, NULL); | 1177 | add_preferred_console("xenboot", 0, NULL); |
1169 | add_preferred_console("tty", 0, NULL); | 1178 | add_preferred_console("tty", 0, NULL); |
1170 | add_preferred_console("hvc", 0, NULL); | 1179 | add_preferred_console("hvc", 0, NULL); |
1180 | } else { | ||
1181 | /* Make sure ACS will be enabled */ | ||
1182 | pci_request_acs(); | ||
1171 | } | 1183 | } |
1184 | |||
1172 | 1185 | ||
1173 | xen_raw_console_write("about to get started...\n"); | 1186 | xen_raw_console_write("about to get started...\n"); |
1174 | 1187 | ||
1188 | xen_setup_runstate_info(0); | ||
1189 | |||
1175 | /* Start the world */ | 1190 | /* Start the world */ |
1176 | #ifdef CONFIG_X86_32 | 1191 | #ifdef CONFIG_X86_32 |
1177 | i386_start_kernel(); | 1192 | i386_start_kernel(); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3bf7b1d250ce..bf4cd6bfe959 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -185,7 +185,7 @@ static inline unsigned p2m_index(unsigned long pfn) | |||
185 | } | 185 | } |
186 | 186 | ||
187 | /* Build the parallel p2m_top_mfn structures */ | 187 | /* Build the parallel p2m_top_mfn structures */ |
188 | static void __init xen_build_mfn_list_list(void) | 188 | void xen_build_mfn_list_list(void) |
189 | { | 189 | { |
190 | unsigned pfn, idx; | 190 | unsigned pfn, idx; |
191 | 191 | ||
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 1167d9830f5f..563d20504988 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -73,7 +73,7 @@ static __cpuinit void cpu_bringup(void) | |||
73 | 73 | ||
74 | xen_setup_cpu_clockevents(); | 74 | xen_setup_cpu_clockevents(); |
75 | 75 | ||
76 | cpu_set(cpu, cpu_online_map); | 76 | set_cpu_online(cpu, true); |
77 | percpu_write(cpu_state, CPU_ONLINE); | 77 | percpu_write(cpu_state, CPU_ONLINE); |
78 | wmb(); | 78 | wmb(); |
79 | 79 | ||
@@ -296,6 +296,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) | |||
296 | (unsigned long)task_stack_page(idle) - | 296 | (unsigned long)task_stack_page(idle) - |
297 | KERNEL_STACK_OFFSET + THREAD_SIZE; | 297 | KERNEL_STACK_OFFSET + THREAD_SIZE; |
298 | #endif | 298 | #endif |
299 | xen_setup_runstate_info(cpu); | ||
299 | xen_setup_timer(cpu); | 300 | xen_setup_timer(cpu); |
300 | xen_init_lock_cpu(cpu); | 301 | xen_init_lock_cpu(cpu); |
301 | 302 | ||
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 36a5141108df..24ded31b5aec 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -120,14 +120,14 @@ struct xen_spinlock { | |||
120 | unsigned short spinners; /* count of waiting cpus */ | 120 | unsigned short spinners; /* count of waiting cpus */ |
121 | }; | 121 | }; |
122 | 122 | ||
123 | static int xen_spin_is_locked(struct raw_spinlock *lock) | 123 | static int xen_spin_is_locked(struct arch_spinlock *lock) |
124 | { | 124 | { |
125 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | 125 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
126 | 126 | ||
127 | return xl->lock != 0; | 127 | return xl->lock != 0; |
128 | } | 128 | } |
129 | 129 | ||
130 | static int xen_spin_is_contended(struct raw_spinlock *lock) | 130 | static int xen_spin_is_contended(struct arch_spinlock *lock) |
131 | { | 131 | { |
132 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | 132 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
133 | 133 | ||
@@ -136,7 +136,7 @@ static int xen_spin_is_contended(struct raw_spinlock *lock) | |||
136 | return xl->spinners != 0; | 136 | return xl->spinners != 0; |
137 | } | 137 | } |
138 | 138 | ||
139 | static int xen_spin_trylock(struct raw_spinlock *lock) | 139 | static int xen_spin_trylock(struct arch_spinlock *lock) |
140 | { | 140 | { |
141 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | 141 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
142 | u8 old = 1; | 142 | u8 old = 1; |
@@ -181,7 +181,7 @@ static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock | |||
181 | __get_cpu_var(lock_spinners) = prev; | 181 | __get_cpu_var(lock_spinners) = prev; |
182 | } | 182 | } |
183 | 183 | ||
184 | static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enable) | 184 | static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) |
185 | { | 185 | { |
186 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | 186 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
187 | struct xen_spinlock *prev; | 187 | struct xen_spinlock *prev; |
@@ -254,7 +254,7 @@ out: | |||
254 | return ret; | 254 | return ret; |
255 | } | 255 | } |
256 | 256 | ||
257 | static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable) | 257 | static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) |
258 | { | 258 | { |
259 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | 259 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
260 | unsigned timeout; | 260 | unsigned timeout; |
@@ -291,12 +291,12 @@ static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable) | |||
291 | spin_time_accum_total(start_spin); | 291 | spin_time_accum_total(start_spin); |
292 | } | 292 | } |
293 | 293 | ||
294 | static void xen_spin_lock(struct raw_spinlock *lock) | 294 | static void xen_spin_lock(struct arch_spinlock *lock) |
295 | { | 295 | { |
296 | __xen_spin_lock(lock, false); | 296 | __xen_spin_lock(lock, false); |
297 | } | 297 | } |
298 | 298 | ||
299 | static void xen_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) | 299 | static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) |
300 | { | 300 | { |
301 | __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); | 301 | __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); |
302 | } | 302 | } |
@@ -317,7 +317,7 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | |||
317 | } | 317 | } |
318 | } | 318 | } |
319 | 319 | ||
320 | static void xen_spin_unlock(struct raw_spinlock *lock) | 320 | static void xen_spin_unlock(struct arch_spinlock *lock) |
321 | { | 321 | { |
322 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | 322 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
323 | 323 | ||
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 95be7b434724..987267f79bf5 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <linux/types.h> | 1 | #include <linux/types.h> |
2 | #include <linux/clockchips.h> | ||
2 | 3 | ||
3 | #include <xen/interface/xen.h> | 4 | #include <xen/interface/xen.h> |
4 | #include <xen/grant_table.h> | 5 | #include <xen/grant_table.h> |
@@ -27,6 +28,8 @@ void xen_pre_suspend(void) | |||
27 | 28 | ||
28 | void xen_post_suspend(int suspend_cancelled) | 29 | void xen_post_suspend(int suspend_cancelled) |
29 | { | 30 | { |
31 | xen_build_mfn_list_list(); | ||
32 | |||
30 | xen_setup_shared_info(); | 33 | xen_setup_shared_info(); |
31 | 34 | ||
32 | if (suspend_cancelled) { | 35 | if (suspend_cancelled) { |
@@ -44,7 +47,19 @@ void xen_post_suspend(int suspend_cancelled) | |||
44 | 47 | ||
45 | } | 48 | } |
46 | 49 | ||
50 | static void xen_vcpu_notify_restore(void *data) | ||
51 | { | ||
52 | unsigned long reason = (unsigned long)data; | ||
53 | |||
54 | /* Boot processor notified via generic timekeeping_resume() */ | ||
55 | if ( smp_processor_id() == 0) | ||
56 | return; | ||
57 | |||
58 | clockevents_notify(reason, NULL); | ||
59 | } | ||
60 | |||
47 | void xen_arch_resume(void) | 61 | void xen_arch_resume(void) |
48 | { | 62 | { |
49 | /* nothing */ | 63 | smp_call_function(xen_vcpu_notify_restore, |
64 | (void *)CLOCK_EVT_NOTIFY_RESUME, 1); | ||
50 | } | 65 | } |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 26e37b787ad3..0d3f07cd1b5f 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -100,7 +100,7 @@ bool xen_vcpu_stolen(int vcpu) | |||
100 | return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; | 100 | return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; |
101 | } | 101 | } |
102 | 102 | ||
103 | static void setup_runstate_info(int cpu) | 103 | void xen_setup_runstate_info(int cpu) |
104 | { | 104 | { |
105 | struct vcpu_register_runstate_memory_area area; | 105 | struct vcpu_register_runstate_memory_area area; |
106 | 106 | ||
@@ -434,7 +434,7 @@ void xen_setup_timer(int cpu) | |||
434 | name = "<timer kasprintf failed>"; | 434 | name = "<timer kasprintf failed>"; |
435 | 435 | ||
436 | irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, | 436 | irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, |
437 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | 437 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, |
438 | name, NULL); | 438 | name, NULL); |
439 | 439 | ||
440 | evt = &per_cpu(xen_clock_events, cpu); | 440 | evt = &per_cpu(xen_clock_events, cpu); |
@@ -442,8 +442,6 @@ void xen_setup_timer(int cpu) | |||
442 | 442 | ||
443 | evt->cpumask = cpumask_of(cpu); | 443 | evt->cpumask = cpumask_of(cpu); |
444 | evt->irq = irq; | 444 | evt->irq = irq; |
445 | |||
446 | setup_runstate_info(cpu); | ||
447 | } | 445 | } |
448 | 446 | ||
449 | void xen_teardown_timer(int cpu) | 447 | void xen_teardown_timer(int cpu) |
@@ -494,6 +492,7 @@ __init void xen_time_init(void) | |||
494 | 492 | ||
495 | setup_force_cpu_cap(X86_FEATURE_TSC); | 493 | setup_force_cpu_cap(X86_FEATURE_TSC); |
496 | 494 | ||
495 | xen_setup_runstate_info(cpu); | ||
497 | xen_setup_timer(cpu); | 496 | xen_setup_timer(cpu); |
498 | xen_setup_cpu_clockevents(); | 497 | xen_setup_cpu_clockevents(); |
499 | } | 498 | } |
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 02f496a8dbaa..53adefda4275 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S | |||
@@ -96,7 +96,7 @@ ENTRY(xen_sysret32) | |||
96 | pushq $__USER32_CS | 96 | pushq $__USER32_CS |
97 | pushq %rcx | 97 | pushq %rcx |
98 | 98 | ||
99 | pushq $VGCF_in_syscall | 99 | pushq $0 |
100 | 1: jmp hypercall_iret | 100 | 1: jmp hypercall_iret |
101 | ENDPATCH(xen_sysret32) | 101 | ENDPATCH(xen_sysret32) |
102 | RELOC(xen_sysret32, 1b+1) | 102 | RELOC(xen_sysret32, 1b+1) |
@@ -151,7 +151,7 @@ ENTRY(xen_syscall32_target) | |||
151 | ENTRY(xen_sysenter_target) | 151 | ENTRY(xen_sysenter_target) |
152 | lea 16(%rsp), %rsp /* strip %rcx, %r11 */ | 152 | lea 16(%rsp), %rsp /* strip %rcx, %r11 */ |
153 | mov $-ENOSYS, %rax | 153 | mov $-ENOSYS, %rax |
154 | pushq $VGCF_in_syscall | 154 | pushq $0 |
155 | jmp hypercall_iret | 155 | jmp hypercall_iret |
156 | ENDPROC(xen_syscall32_target) | 156 | ENDPROC(xen_syscall32_target) |
157 | ENDPROC(xen_sysenter_target) | 157 | ENDPROC(xen_sysenter_target) |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 355fa6b99c9c..f9153a300bce 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -25,6 +25,7 @@ extern struct shared_info *HYPERVISOR_shared_info; | |||
25 | 25 | ||
26 | void xen_setup_mfn_list_list(void); | 26 | void xen_setup_mfn_list_list(void); |
27 | void xen_setup_shared_info(void); | 27 | void xen_setup_shared_info(void); |
28 | void xen_build_mfn_list_list(void); | ||
28 | void xen_setup_machphys_mapping(void); | 29 | void xen_setup_machphys_mapping(void); |
29 | pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | 30 | pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); |
30 | void xen_ident_map_ISA(void); | 31 | void xen_ident_map_ISA(void); |
@@ -41,6 +42,7 @@ void __init xen_build_dynamic_phys_to_machine(void); | |||
41 | 42 | ||
42 | void xen_init_irq_ops(void); | 43 | void xen_init_irq_ops(void); |
43 | void xen_setup_timer(int cpu); | 44 | void xen_setup_timer(int cpu); |
45 | void xen_setup_runstate_info(int cpu); | ||
44 | void xen_teardown_timer(int cpu); | 46 | void xen_teardown_timer(int cpu); |
45 | cycle_t xen_clocksource_read(void); | 47 | cycle_t xen_clocksource_read(void); |
46 | void xen_setup_cpu_clockevents(void); | 48 | void xen_setup_cpu_clockevents(void); |