diff options
Diffstat (limited to 'arch/x86')
235 files changed, 11132 insertions, 5699 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index efb42949cc09..6c14ecd851d0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -26,6 +26,8 @@ config X86 | |||
26 | select HAVE_IOREMAP_PROT | 26 | select HAVE_IOREMAP_PROT |
27 | select HAVE_KPROBES | 27 | select HAVE_KPROBES |
28 | select HAVE_MEMBLOCK | 28 | select HAVE_MEMBLOCK |
29 | select HAVE_MEMBLOCK_NODE_MAP | ||
30 | select ARCH_DISCARD_MEMBLOCK | ||
29 | select ARCH_WANT_OPTIONAL_GPIOLIB | 31 | select ARCH_WANT_OPTIONAL_GPIOLIB |
30 | select ARCH_WANT_FRAME_POINTERS | 32 | select ARCH_WANT_FRAME_POINTERS |
31 | select HAVE_DMA_ATTRS | 33 | select HAVE_DMA_ATTRS |
@@ -58,8 +60,12 @@ config X86 | |||
58 | select PERF_EVENTS | 60 | select PERF_EVENTS |
59 | select HAVE_PERF_EVENTS_NMI | 61 | select HAVE_PERF_EVENTS_NMI |
60 | select ANON_INODES | 62 | select ANON_INODES |
63 | select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 | ||
64 | select HAVE_CMPXCHG_LOCAL if !M386 | ||
65 | select HAVE_CMPXCHG_DOUBLE | ||
61 | select HAVE_ARCH_KMEMCHECK | 66 | select HAVE_ARCH_KMEMCHECK |
62 | select HAVE_USER_RETURN_NOTIFIER | 67 | select HAVE_USER_RETURN_NOTIFIER |
68 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | ||
63 | select HAVE_ARCH_JUMP_LABEL | 69 | select HAVE_ARCH_JUMP_LABEL |
64 | select HAVE_TEXT_POKE_SMP | 70 | select HAVE_TEXT_POKE_SMP |
65 | select HAVE_GENERIC_HARDIRQS | 71 | select HAVE_GENERIC_HARDIRQS |
@@ -75,6 +81,7 @@ config X86 | |||
75 | select HAVE_BPF_JIT if (X86_64 && NET) | 81 | select HAVE_BPF_JIT if (X86_64 && NET) |
76 | select CLKEVT_I8253 | 82 | select CLKEVT_I8253 |
77 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | 83 | select ARCH_HAVE_NMI_SAFE_CMPXCHG |
84 | select GENERIC_IOMAP | ||
78 | 85 | ||
79 | config INSTRUCTION_DECODER | 86 | config INSTRUCTION_DECODER |
80 | def_bool (KPROBES || PERF_EVENTS) | 87 | def_bool (KPROBES || PERF_EVENTS) |
@@ -140,9 +147,6 @@ config NEED_SG_DMA_LENGTH | |||
140 | config GENERIC_ISA_DMA | 147 | config GENERIC_ISA_DMA |
141 | def_bool ISA_DMA_API | 148 | def_bool ISA_DMA_API |
142 | 149 | ||
143 | config GENERIC_IOMAP | ||
144 | def_bool y | ||
145 | |||
146 | config GENERIC_BUG | 150 | config GENERIC_BUG |
147 | def_bool y | 151 | def_bool y |
148 | depends on BUG | 152 | depends on BUG |
@@ -204,9 +208,6 @@ config ZONE_DMA32 | |||
204 | bool | 208 | bool |
205 | default X86_64 | 209 | default X86_64 |
206 | 210 | ||
207 | config ARCH_POPULATES_NODE_MAP | ||
208 | def_bool y | ||
209 | |||
210 | config AUDIT_ARCH | 211 | config AUDIT_ARCH |
211 | bool | 212 | bool |
212 | default X86_64 | 213 | default X86_64 |
@@ -343,6 +344,7 @@ config X86_EXTENDED_PLATFORM | |||
343 | 344 | ||
344 | If you enable this option then you'll be able to select support | 345 | If you enable this option then you'll be able to select support |
345 | for the following (non-PC) 64 bit x86 platforms: | 346 | for the following (non-PC) 64 bit x86 platforms: |
347 | Numascale NumaChip | ||
346 | ScaleMP vSMP | 348 | ScaleMP vSMP |
347 | SGI Ultraviolet | 349 | SGI Ultraviolet |
348 | 350 | ||
@@ -351,6 +353,18 @@ config X86_EXTENDED_PLATFORM | |||
351 | endif | 353 | endif |
352 | # This is an alphabetically sorted list of 64 bit extended platforms | 354 | # This is an alphabetically sorted list of 64 bit extended platforms |
353 | # Please maintain the alphabetic order if and when there are additions | 355 | # Please maintain the alphabetic order if and when there are additions |
356 | config X86_NUMACHIP | ||
357 | bool "Numascale NumaChip" | ||
358 | depends on X86_64 | ||
359 | depends on X86_EXTENDED_PLATFORM | ||
360 | depends on NUMA | ||
361 | depends on SMP | ||
362 | depends on X86_X2APIC | ||
363 | depends on !EDAC_AMD64 | ||
364 | ---help--- | ||
365 | Adds support for Numascale NumaChip large-SMP systems. Needed to | ||
366 | enable more than ~168 cores. | ||
367 | If you don't have one of these, you should say N here. | ||
354 | 368 | ||
355 | config X86_VSMP | 369 | config X86_VSMP |
356 | bool "ScaleMP vSMP" | 370 | bool "ScaleMP vSMP" |
@@ -409,12 +423,14 @@ config X86_MRST | |||
409 | depends on PCI | 423 | depends on PCI |
410 | depends on PCI_GOANY | 424 | depends on PCI_GOANY |
411 | depends on X86_IO_APIC | 425 | depends on X86_IO_APIC |
426 | select X86_INTEL_MID | ||
427 | select SFI | ||
428 | select DW_APB_TIMER | ||
412 | select APB_TIMER | 429 | select APB_TIMER |
413 | select I2C | 430 | select I2C |
414 | select SPI | 431 | select SPI |
415 | select INTEL_SCU_IPC | 432 | select INTEL_SCU_IPC |
416 | select X86_PLATFORM_DEVICES | 433 | select X86_PLATFORM_DEVICES |
417 | select X86_INTEL_MID | ||
418 | ---help--- | 434 | ---help--- |
419 | Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin | 435 | Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin |
420 | Internet Device(MID) platform. Moorestown consists of two chips: | 436 | Internet Device(MID) platform. Moorestown consists of two chips: |
@@ -423,6 +439,26 @@ config X86_MRST | |||
423 | nor standard legacy replacement devices/features. e.g. Moorestown does | 439 | nor standard legacy replacement devices/features. e.g. Moorestown does |
424 | not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. | 440 | not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. |
425 | 441 | ||
442 | config X86_MDFLD | ||
443 | bool "Medfield MID platform" | ||
444 | depends on PCI | ||
445 | depends on PCI_GOANY | ||
446 | depends on X86_IO_APIC | ||
447 | select X86_INTEL_MID | ||
448 | select SFI | ||
449 | select DW_APB_TIMER | ||
450 | select APB_TIMER | ||
451 | select I2C | ||
452 | select SPI | ||
453 | select INTEL_SCU_IPC | ||
454 | select X86_PLATFORM_DEVICES | ||
455 | ---help--- | ||
456 | Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin | ||
457 | Internet Device(MID) platform. | ||
458 | Unlike standard x86 PCs, Medfield does not have many legacy devices | ||
459 | nor standard legacy replacement devices/features. e.g. Medfield does | ||
460 | not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. | ||
461 | |||
426 | endif | 462 | endif |
427 | 463 | ||
428 | config X86_RDC321X | 464 | config X86_RDC321X |
@@ -620,7 +656,7 @@ config X86_SUMMIT_NUMA | |||
620 | 656 | ||
621 | config X86_CYCLONE_TIMER | 657 | config X86_CYCLONE_TIMER |
622 | def_bool y | 658 | def_bool y |
623 | depends on X86_32_NON_STANDARD | 659 | depends on X86_SUMMIT |
624 | 660 | ||
625 | source "arch/x86/Kconfig.cpu" | 661 | source "arch/x86/Kconfig.cpu" |
626 | 662 | ||
@@ -648,9 +684,10 @@ config HPET_EMULATE_RTC | |||
648 | depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) | 684 | depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) |
649 | 685 | ||
650 | config APB_TIMER | 686 | config APB_TIMER |
651 | def_bool y if MRST | 687 | def_bool y if X86_INTEL_MID |
652 | prompt "Langwell APB Timer Support" if X86_MRST | 688 | prompt "Intel MID APB Timer Support" if X86_INTEL_MID |
653 | select DW_APB_TIMER | 689 | select DW_APB_TIMER |
690 | depends on X86_INTEL_MID && SFI | ||
654 | help | 691 | help |
655 | APB timer is the replacement for 8254, HPET on X86 MID platforms. | 692 | APB timer is the replacement for 8254, HPET on X86 MID platforms. |
656 | The APBT provides a stable time base on SMP | 693 | The APBT provides a stable time base on SMP |
@@ -1478,6 +1515,13 @@ config EFI | |||
1478 | resultant kernel should continue to boot on existing non-EFI | 1515 | resultant kernel should continue to boot on existing non-EFI |
1479 | platforms. | 1516 | platforms. |
1480 | 1517 | ||
1518 | config EFI_STUB | ||
1519 | bool "EFI stub support" | ||
1520 | depends on EFI | ||
1521 | ---help--- | ||
1522 | This kernel feature allows a bzImage to be loaded directly | ||
1523 | by EFI firmware without the use of a bootloader. | ||
1524 | |||
1481 | config SECCOMP | 1525 | config SECCOMP |
1482 | def_bool y | 1526 | def_bool y |
1483 | prompt "Enable seccomp to safely compute untrusted bytecode" | 1527 | prompt "Enable seccomp to safely compute untrusted bytecode" |
@@ -1730,7 +1774,7 @@ source "drivers/sfi/Kconfig" | |||
1730 | 1774 | ||
1731 | config X86_APM_BOOT | 1775 | config X86_APM_BOOT |
1732 | def_bool y | 1776 | def_bool y |
1733 | depends on APM || APM_MODULE | 1777 | depends on APM |
1734 | 1778 | ||
1735 | menuconfig APM | 1779 | menuconfig APM |
1736 | tristate "APM (Advanced Power Management) BIOS support" | 1780 | tristate "APM (Advanced Power Management) BIOS support" |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index e3ca7e0d858c..3c57033e2211 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -309,12 +309,6 @@ config X86_INTERNODE_CACHE_SHIFT | |||
309 | config X86_CMPXCHG | 309 | config X86_CMPXCHG |
310 | def_bool X86_64 || (X86_32 && !M386) | 310 | def_bool X86_64 || (X86_32 && !M386) |
311 | 311 | ||
312 | config CMPXCHG_LOCAL | ||
313 | def_bool X86_64 || (X86_32 && !M386) | ||
314 | |||
315 | config CMPXCHG_DOUBLE | ||
316 | def_bool y | ||
317 | |||
318 | config X86_L1_CACHE_SHIFT | 312 | config X86_L1_CACHE_SHIFT |
319 | int | 313 | int |
320 | default "7" if MPENTIUM4 || MPSC | 314 | default "7" if MPENTIUM4 || MPSC |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index bf56e1793272..e46c2147397f 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -43,9 +43,9 @@ config EARLY_PRINTK | |||
43 | with klogd/syslogd or the X server. You should normally N here, | 43 | with klogd/syslogd or the X server. You should normally N here, |
44 | unless you want to debug such a crash. | 44 | unless you want to debug such a crash. |
45 | 45 | ||
46 | config EARLY_PRINTK_MRST | 46 | config EARLY_PRINTK_INTEL_MID |
47 | bool "Early printk for MRST platform support" | 47 | bool "Early printk for Intel MID platform support" |
48 | depends on EARLY_PRINTK && X86_MRST | 48 | depends on EARLY_PRINTK && X86_INTEL_MID |
49 | 49 | ||
50 | config EARLY_PRINTK_DBGP | 50 | config EARLY_PRINTK_DBGP |
51 | bool "Early printk via EHCI debug port" | 51 | bool "Early printk via EHCI debug port" |
@@ -63,8 +63,11 @@ config DEBUG_STACKOVERFLOW | |||
63 | bool "Check for stack overflows" | 63 | bool "Check for stack overflows" |
64 | depends on DEBUG_KERNEL | 64 | depends on DEBUG_KERNEL |
65 | ---help--- | 65 | ---help--- |
66 | This option will cause messages to be printed if free stack space | 66 | Say Y here if you want to check the overflows of kernel, IRQ |
67 | drops below a certain limit. | 67 | and exception stacks. This option will cause messages of the |
68 | stacks in detail when free stack space drops below a certain | ||
69 | limit. | ||
70 | If in doubt, say "N". | ||
68 | 71 | ||
69 | config X86_PTDUMP | 72 | config X86_PTDUMP |
70 | bool "Export kernel pagetable layout to userspace via debugfs" | 73 | bool "Export kernel pagetable layout to userspace via debugfs" |
@@ -284,4 +287,16 @@ config DEBUG_STRICT_USER_COPY_CHECKS | |||
284 | 287 | ||
285 | If unsure, or if you run an older (pre 4.4) gcc, say N. | 288 | If unsure, or if you run an older (pre 4.4) gcc, say N. |
286 | 289 | ||
290 | config DEBUG_NMI_SELFTEST | ||
291 | bool "NMI Selftest" | ||
292 | depends on DEBUG_KERNEL && X86_LOCAL_APIC | ||
293 | ---help--- | ||
294 | Enabling this option turns on a quick NMI selftest to verify | ||
295 | that the NMI behaves correctly. | ||
296 | |||
297 | This might help diagnose strange hangs that rely on NMI to | ||
298 | function properly. | ||
299 | |||
300 | If unsure, say N. | ||
301 | |||
287 | endmenu | 302 | endmenu |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b02e509072a7..209ba1294592 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -118,6 +118,12 @@ KBUILD_CFLAGS += $(mflags-y) | |||
118 | KBUILD_AFLAGS += $(mflags-y) | 118 | KBUILD_AFLAGS += $(mflags-y) |
119 | 119 | ||
120 | ### | 120 | ### |
121 | # Syscall table generation | ||
122 | |||
123 | archheaders: | ||
124 | $(Q)$(MAKE) $(build)=arch/x86/syscalls all | ||
125 | |||
126 | ### | ||
121 | # Kernel objects | 127 | # Kernel objects |
122 | 128 | ||
123 | head-y := arch/x86/kernel/head_$(BITS).o | 129 | head-y := arch/x86/kernel/head_$(BITS).o |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 09664efb9cee..b123b9a8f5b3 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -23,7 +23,15 @@ LDFLAGS_vmlinux := -T | |||
23 | 23 | ||
24 | hostprogs-y := mkpiggy | 24 | hostprogs-y := mkpiggy |
25 | 25 | ||
26 | $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o $(obj)/piggy.o FORCE | 26 | VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ |
27 | $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ | ||
28 | $(obj)/piggy.o | ||
29 | |||
30 | ifeq ($(CONFIG_EFI_STUB), y) | ||
31 | VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o | ||
32 | endif | ||
33 | |||
34 | $(obj)/vmlinux: $(VMLINUX_OBJS) FORCE | ||
27 | $(call if_changed,ld) | 35 | $(call if_changed,ld) |
28 | @: | 36 | @: |
29 | 37 | ||
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c new file mode 100644 index 000000000000..fec216f4fbc3 --- /dev/null +++ b/arch/x86/boot/compressed/eboot.c | |||
@@ -0,0 +1,1022 @@ | |||
1 | /* ----------------------------------------------------------------------- | ||
2 | * | ||
3 | * Copyright 2011 Intel Corporation; author Matt Fleming | ||
4 | * | ||
5 | * This file is part of the Linux kernel, and is made available under | ||
6 | * the terms of the GNU General Public License version 2. | ||
7 | * | ||
8 | * ----------------------------------------------------------------------- */ | ||
9 | |||
10 | #include <linux/efi.h> | ||
11 | #include <asm/efi.h> | ||
12 | #include <asm/setup.h> | ||
13 | #include <asm/desc.h> | ||
14 | |||
15 | #include "eboot.h" | ||
16 | |||
17 | static efi_system_table_t *sys_table; | ||
18 | |||
19 | static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size, | ||
20 | unsigned long *desc_size) | ||
21 | { | ||
22 | efi_memory_desc_t *m = NULL; | ||
23 | efi_status_t status; | ||
24 | unsigned long key; | ||
25 | u32 desc_version; | ||
26 | |||
27 | *map_size = sizeof(*m) * 32; | ||
28 | again: | ||
29 | /* | ||
30 | * Add an additional efi_memory_desc_t because we're doing an | ||
31 | * allocation which may be in a new descriptor region. | ||
32 | */ | ||
33 | *map_size += sizeof(*m); | ||
34 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
35 | EFI_LOADER_DATA, *map_size, (void **)&m); | ||
36 | if (status != EFI_SUCCESS) | ||
37 | goto fail; | ||
38 | |||
39 | status = efi_call_phys5(sys_table->boottime->get_memory_map, map_size, | ||
40 | m, &key, desc_size, &desc_version); | ||
41 | if (status == EFI_BUFFER_TOO_SMALL) { | ||
42 | efi_call_phys1(sys_table->boottime->free_pool, m); | ||
43 | goto again; | ||
44 | } | ||
45 | |||
46 | if (status != EFI_SUCCESS) | ||
47 | efi_call_phys1(sys_table->boottime->free_pool, m); | ||
48 | |||
49 | fail: | ||
50 | *map = m; | ||
51 | return status; | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * Allocate at the highest possible address that is not above 'max'. | ||
56 | */ | ||
57 | static efi_status_t high_alloc(unsigned long size, unsigned long align, | ||
58 | unsigned long *addr, unsigned long max) | ||
59 | { | ||
60 | unsigned long map_size, desc_size; | ||
61 | efi_memory_desc_t *map; | ||
62 | efi_status_t status; | ||
63 | unsigned long nr_pages; | ||
64 | u64 max_addr = 0; | ||
65 | int i; | ||
66 | |||
67 | status = __get_map(&map, &map_size, &desc_size); | ||
68 | if (status != EFI_SUCCESS) | ||
69 | goto fail; | ||
70 | |||
71 | nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; | ||
72 | again: | ||
73 | for (i = 0; i < map_size / desc_size; i++) { | ||
74 | efi_memory_desc_t *desc; | ||
75 | unsigned long m = (unsigned long)map; | ||
76 | u64 start, end; | ||
77 | |||
78 | desc = (efi_memory_desc_t *)(m + (i * desc_size)); | ||
79 | if (desc->type != EFI_CONVENTIONAL_MEMORY) | ||
80 | continue; | ||
81 | |||
82 | if (desc->num_pages < nr_pages) | ||
83 | continue; | ||
84 | |||
85 | start = desc->phys_addr; | ||
86 | end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT); | ||
87 | |||
88 | if ((start + size) > end || (start + size) > max) | ||
89 | continue; | ||
90 | |||
91 | if (end - size > max) | ||
92 | end = max; | ||
93 | |||
94 | if (round_down(end - size, align) < start) | ||
95 | continue; | ||
96 | |||
97 | start = round_down(end - size, align); | ||
98 | |||
99 | /* | ||
100 | * Don't allocate at 0x0. It will confuse code that | ||
101 | * checks pointers against NULL. | ||
102 | */ | ||
103 | if (start == 0x0) | ||
104 | continue; | ||
105 | |||
106 | if (start > max_addr) | ||
107 | max_addr = start; | ||
108 | } | ||
109 | |||
110 | if (!max_addr) | ||
111 | status = EFI_NOT_FOUND; | ||
112 | else { | ||
113 | status = efi_call_phys4(sys_table->boottime->allocate_pages, | ||
114 | EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, | ||
115 | nr_pages, &max_addr); | ||
116 | if (status != EFI_SUCCESS) { | ||
117 | max = max_addr; | ||
118 | max_addr = 0; | ||
119 | goto again; | ||
120 | } | ||
121 | |||
122 | *addr = max_addr; | ||
123 | } | ||
124 | |||
125 | free_pool: | ||
126 | efi_call_phys1(sys_table->boottime->free_pool, map); | ||
127 | |||
128 | fail: | ||
129 | return status; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Allocate at the lowest possible address. | ||
134 | */ | ||
135 | static efi_status_t low_alloc(unsigned long size, unsigned long align, | ||
136 | unsigned long *addr) | ||
137 | { | ||
138 | unsigned long map_size, desc_size; | ||
139 | efi_memory_desc_t *map; | ||
140 | efi_status_t status; | ||
141 | unsigned long nr_pages; | ||
142 | int i; | ||
143 | |||
144 | status = __get_map(&map, &map_size, &desc_size); | ||
145 | if (status != EFI_SUCCESS) | ||
146 | goto fail; | ||
147 | |||
148 | nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; | ||
149 | for (i = 0; i < map_size / desc_size; i++) { | ||
150 | efi_memory_desc_t *desc; | ||
151 | unsigned long m = (unsigned long)map; | ||
152 | u64 start, end; | ||
153 | |||
154 | desc = (efi_memory_desc_t *)(m + (i * desc_size)); | ||
155 | |||
156 | if (desc->type != EFI_CONVENTIONAL_MEMORY) | ||
157 | continue; | ||
158 | |||
159 | if (desc->num_pages < nr_pages) | ||
160 | continue; | ||
161 | |||
162 | start = desc->phys_addr; | ||
163 | end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT); | ||
164 | |||
165 | /* | ||
166 | * Don't allocate at 0x0. It will confuse code that | ||
167 | * checks pointers against NULL. Skip the first 8 | ||
168 | * bytes so we start at a nice even number. | ||
169 | */ | ||
170 | if (start == 0x0) | ||
171 | start += 8; | ||
172 | |||
173 | start = round_up(start, align); | ||
174 | if ((start + size) > end) | ||
175 | continue; | ||
176 | |||
177 | status = efi_call_phys4(sys_table->boottime->allocate_pages, | ||
178 | EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, | ||
179 | nr_pages, &start); | ||
180 | if (status == EFI_SUCCESS) { | ||
181 | *addr = start; | ||
182 | break; | ||
183 | } | ||
184 | } | ||
185 | |||
186 | if (i == map_size / desc_size) | ||
187 | status = EFI_NOT_FOUND; | ||
188 | |||
189 | free_pool: | ||
190 | efi_call_phys1(sys_table->boottime->free_pool, map); | ||
191 | fail: | ||
192 | return status; | ||
193 | } | ||
194 | |||
195 | static void low_free(unsigned long size, unsigned long addr) | ||
196 | { | ||
197 | unsigned long nr_pages; | ||
198 | |||
199 | nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; | ||
200 | efi_call_phys2(sys_table->boottime->free_pages, addr, size); | ||
201 | } | ||
202 | |||
203 | static void find_bits(unsigned long mask, u8 *pos, u8 *size) | ||
204 | { | ||
205 | u8 first, len; | ||
206 | |||
207 | first = 0; | ||
208 | len = 0; | ||
209 | |||
210 | if (mask) { | ||
211 | while (!(mask & 0x1)) { | ||
212 | mask = mask >> 1; | ||
213 | first++; | ||
214 | } | ||
215 | |||
216 | while (mask & 0x1) { | ||
217 | mask = mask >> 1; | ||
218 | len++; | ||
219 | } | ||
220 | } | ||
221 | |||
222 | *pos = first; | ||
223 | *size = len; | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * See if we have Graphics Output Protocol | ||
228 | */ | ||
229 | static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, | ||
230 | unsigned long size) | ||
231 | { | ||
232 | struct efi_graphics_output_protocol *gop, *first_gop; | ||
233 | struct efi_pixel_bitmask pixel_info; | ||
234 | unsigned long nr_gops; | ||
235 | efi_status_t status; | ||
236 | void **gop_handle; | ||
237 | u16 width, height; | ||
238 | u32 fb_base, fb_size; | ||
239 | u32 pixels_per_scan_line; | ||
240 | int pixel_format; | ||
241 | int i; | ||
242 | |||
243 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
244 | EFI_LOADER_DATA, size, &gop_handle); | ||
245 | if (status != EFI_SUCCESS) | ||
246 | return status; | ||
247 | |||
248 | status = efi_call_phys5(sys_table->boottime->locate_handle, | ||
249 | EFI_LOCATE_BY_PROTOCOL, proto, | ||
250 | NULL, &size, gop_handle); | ||
251 | if (status != EFI_SUCCESS) | ||
252 | goto free_handle; | ||
253 | |||
254 | first_gop = NULL; | ||
255 | |||
256 | nr_gops = size / sizeof(void *); | ||
257 | for (i = 0; i < nr_gops; i++) { | ||
258 | struct efi_graphics_output_mode_info *info; | ||
259 | efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; | ||
260 | void *pciio; | ||
261 | void *h = gop_handle[i]; | ||
262 | |||
263 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | ||
264 | h, proto, &gop); | ||
265 | if (status != EFI_SUCCESS) | ||
266 | continue; | ||
267 | |||
268 | efi_call_phys3(sys_table->boottime->handle_protocol, | ||
269 | h, &pciio_proto, &pciio); | ||
270 | |||
271 | status = efi_call_phys4(gop->query_mode, gop, | ||
272 | gop->mode->mode, &size, &info); | ||
273 | if (status == EFI_SUCCESS && (!first_gop || pciio)) { | ||
274 | /* | ||
275 | * Apple provide GOPs that are not backed by | ||
276 | * real hardware (they're used to handle | ||
277 | * multiple displays). The workaround is to | ||
278 | * search for a GOP implementing the PCIIO | ||
279 | * protocol, and if one isn't found, to just | ||
280 | * fallback to the first GOP. | ||
281 | */ | ||
282 | width = info->horizontal_resolution; | ||
283 | height = info->vertical_resolution; | ||
284 | fb_base = gop->mode->frame_buffer_base; | ||
285 | fb_size = gop->mode->frame_buffer_size; | ||
286 | pixel_format = info->pixel_format; | ||
287 | pixel_info = info->pixel_information; | ||
288 | pixels_per_scan_line = info->pixels_per_scan_line; | ||
289 | |||
290 | /* | ||
291 | * Once we've found a GOP supporting PCIIO, | ||
292 | * don't bother looking any further. | ||
293 | */ | ||
294 | if (pciio) | ||
295 | break; | ||
296 | |||
297 | first_gop = gop; | ||
298 | } | ||
299 | } | ||
300 | |||
301 | /* Did we find any GOPs? */ | ||
302 | if (!first_gop) | ||
303 | goto free_handle; | ||
304 | |||
305 | /* EFI framebuffer */ | ||
306 | si->orig_video_isVGA = VIDEO_TYPE_EFI; | ||
307 | |||
308 | si->lfb_width = width; | ||
309 | si->lfb_height = height; | ||
310 | si->lfb_base = fb_base; | ||
311 | si->lfb_size = fb_size; | ||
312 | si->pages = 1; | ||
313 | |||
314 | if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { | ||
315 | si->lfb_depth = 32; | ||
316 | si->lfb_linelength = pixels_per_scan_line * 4; | ||
317 | si->red_size = 8; | ||
318 | si->red_pos = 0; | ||
319 | si->green_size = 8; | ||
320 | si->green_pos = 8; | ||
321 | si->blue_size = 8; | ||
322 | si->blue_pos = 16; | ||
323 | si->rsvd_size = 8; | ||
324 | si->rsvd_pos = 24; | ||
325 | } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) { | ||
326 | si->lfb_depth = 32; | ||
327 | si->lfb_linelength = pixels_per_scan_line * 4; | ||
328 | si->red_size = 8; | ||
329 | si->red_pos = 16; | ||
330 | si->green_size = 8; | ||
331 | si->green_pos = 8; | ||
332 | si->blue_size = 8; | ||
333 | si->blue_pos = 0; | ||
334 | si->rsvd_size = 8; | ||
335 | si->rsvd_pos = 24; | ||
336 | } else if (pixel_format == PIXEL_BIT_MASK) { | ||
337 | find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size); | ||
338 | find_bits(pixel_info.green_mask, &si->green_pos, | ||
339 | &si->green_size); | ||
340 | find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size); | ||
341 | find_bits(pixel_info.reserved_mask, &si->rsvd_pos, | ||
342 | &si->rsvd_size); | ||
343 | si->lfb_depth = si->red_size + si->green_size + | ||
344 | si->blue_size + si->rsvd_size; | ||
345 | si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8; | ||
346 | } else { | ||
347 | si->lfb_depth = 4; | ||
348 | si->lfb_linelength = si->lfb_width / 2; | ||
349 | si->red_size = 0; | ||
350 | si->red_pos = 0; | ||
351 | si->green_size = 0; | ||
352 | si->green_pos = 0; | ||
353 | si->blue_size = 0; | ||
354 | si->blue_pos = 0; | ||
355 | si->rsvd_size = 0; | ||
356 | si->rsvd_pos = 0; | ||
357 | } | ||
358 | |||
359 | free_handle: | ||
360 | efi_call_phys1(sys_table->boottime->free_pool, gop_handle); | ||
361 | return status; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * See if we have Universal Graphics Adapter (UGA) protocol | ||
366 | */ | ||
367 | static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, | ||
368 | unsigned long size) | ||
369 | { | ||
370 | struct efi_uga_draw_protocol *uga, *first_uga; | ||
371 | unsigned long nr_ugas; | ||
372 | efi_status_t status; | ||
373 | u32 width, height; | ||
374 | void **uga_handle = NULL; | ||
375 | int i; | ||
376 | |||
377 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
378 | EFI_LOADER_DATA, size, &uga_handle); | ||
379 | if (status != EFI_SUCCESS) | ||
380 | return status; | ||
381 | |||
382 | status = efi_call_phys5(sys_table->boottime->locate_handle, | ||
383 | EFI_LOCATE_BY_PROTOCOL, uga_proto, | ||
384 | NULL, &size, uga_handle); | ||
385 | if (status != EFI_SUCCESS) | ||
386 | goto free_handle; | ||
387 | |||
388 | first_uga = NULL; | ||
389 | |||
390 | nr_ugas = size / sizeof(void *); | ||
391 | for (i = 0; i < nr_ugas; i++) { | ||
392 | efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; | ||
393 | void *handle = uga_handle[i]; | ||
394 | u32 w, h, depth, refresh; | ||
395 | void *pciio; | ||
396 | |||
397 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | ||
398 | handle, uga_proto, &uga); | ||
399 | if (status != EFI_SUCCESS) | ||
400 | continue; | ||
401 | |||
402 | efi_call_phys3(sys_table->boottime->handle_protocol, | ||
403 | handle, &pciio_proto, &pciio); | ||
404 | |||
405 | status = efi_call_phys5(uga->get_mode, uga, &w, &h, | ||
406 | &depth, &refresh); | ||
407 | if (status == EFI_SUCCESS && (!first_uga || pciio)) { | ||
408 | width = w; | ||
409 | height = h; | ||
410 | |||
411 | /* | ||
412 | * Once we've found a UGA supporting PCIIO, | ||
413 | * don't bother looking any further. | ||
414 | */ | ||
415 | if (pciio) | ||
416 | break; | ||
417 | |||
418 | first_uga = uga; | ||
419 | } | ||
420 | } | ||
421 | |||
422 | if (!first_uga) | ||
423 | goto free_handle; | ||
424 | |||
425 | /* EFI framebuffer */ | ||
426 | si->orig_video_isVGA = VIDEO_TYPE_EFI; | ||
427 | |||
428 | si->lfb_depth = 32; | ||
429 | si->lfb_width = width; | ||
430 | si->lfb_height = height; | ||
431 | |||
432 | si->red_size = 8; | ||
433 | si->red_pos = 16; | ||
434 | si->green_size = 8; | ||
435 | si->green_pos = 8; | ||
436 | si->blue_size = 8; | ||
437 | si->blue_pos = 0; | ||
438 | si->rsvd_size = 8; | ||
439 | si->rsvd_pos = 24; | ||
440 | |||
441 | |||
442 | free_handle: | ||
443 | efi_call_phys1(sys_table->boottime->free_pool, uga_handle); | ||
444 | return status; | ||
445 | } | ||
446 | |||
447 | void setup_graphics(struct boot_params *boot_params) | ||
448 | { | ||
449 | efi_guid_t graphics_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID; | ||
450 | struct screen_info *si; | ||
451 | efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; | ||
452 | efi_status_t status; | ||
453 | unsigned long size; | ||
454 | void **gop_handle = NULL; | ||
455 | void **uga_handle = NULL; | ||
456 | |||
457 | si = &boot_params->screen_info; | ||
458 | memset(si, 0, sizeof(*si)); | ||
459 | |||
460 | size = 0; | ||
461 | status = efi_call_phys5(sys_table->boottime->locate_handle, | ||
462 | EFI_LOCATE_BY_PROTOCOL, &graphics_proto, | ||
463 | NULL, &size, gop_handle); | ||
464 | if (status == EFI_BUFFER_TOO_SMALL) | ||
465 | status = setup_gop(si, &graphics_proto, size); | ||
466 | |||
467 | if (status != EFI_SUCCESS) { | ||
468 | size = 0; | ||
469 | status = efi_call_phys5(sys_table->boottime->locate_handle, | ||
470 | EFI_LOCATE_BY_PROTOCOL, &uga_proto, | ||
471 | NULL, &size, uga_handle); | ||
472 | if (status == EFI_BUFFER_TOO_SMALL) | ||
473 | setup_uga(si, &uga_proto, size); | ||
474 | } | ||
475 | } | ||
476 | |||
477 | struct initrd { | ||
478 | efi_file_handle_t *handle; | ||
479 | u64 size; | ||
480 | }; | ||
481 | |||
482 | /* | ||
483 | * Check the cmdline for a LILO-style initrd= arguments. | ||
484 | * | ||
485 | * We only support loading an initrd from the same filesystem as the | ||
486 | * kernel image. | ||
487 | */ | ||
488 | static efi_status_t handle_ramdisks(efi_loaded_image_t *image, | ||
489 | struct setup_header *hdr) | ||
490 | { | ||
491 | struct initrd *initrds; | ||
492 | unsigned long initrd_addr; | ||
493 | efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; | ||
494 | u64 initrd_total; | ||
495 | efi_file_io_interface_t *io; | ||
496 | efi_file_handle_t *fh; | ||
497 | efi_status_t status; | ||
498 | int nr_initrds; | ||
499 | char *str; | ||
500 | int i, j, k; | ||
501 | |||
502 | initrd_addr = 0; | ||
503 | initrd_total = 0; | ||
504 | |||
505 | str = (char *)(unsigned long)hdr->cmd_line_ptr; | ||
506 | |||
507 | j = 0; /* See close_handles */ | ||
508 | |||
509 | if (!str || !*str) | ||
510 | return EFI_SUCCESS; | ||
511 | |||
512 | for (nr_initrds = 0; *str; nr_initrds++) { | ||
513 | str = strstr(str, "initrd="); | ||
514 | if (!str) | ||
515 | break; | ||
516 | |||
517 | str += 7; | ||
518 | |||
519 | /* Skip any leading slashes */ | ||
520 | while (*str == '/' || *str == '\\') | ||
521 | str++; | ||
522 | |||
523 | while (*str && *str != ' ' && *str != '\n') | ||
524 | str++; | ||
525 | } | ||
526 | |||
527 | if (!nr_initrds) | ||
528 | return EFI_SUCCESS; | ||
529 | |||
530 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
531 | EFI_LOADER_DATA, | ||
532 | nr_initrds * sizeof(*initrds), | ||
533 | &initrds); | ||
534 | if (status != EFI_SUCCESS) | ||
535 | goto fail; | ||
536 | |||
537 | str = (char *)(unsigned long)hdr->cmd_line_ptr; | ||
538 | for (i = 0; i < nr_initrds; i++) { | ||
539 | struct initrd *initrd; | ||
540 | efi_file_handle_t *h; | ||
541 | efi_file_info_t *info; | ||
542 | efi_char16_t filename[256]; | ||
543 | unsigned long info_sz; | ||
544 | efi_guid_t info_guid = EFI_FILE_INFO_ID; | ||
545 | efi_char16_t *p; | ||
546 | u64 file_sz; | ||
547 | |||
548 | str = strstr(str, "initrd="); | ||
549 | if (!str) | ||
550 | break; | ||
551 | |||
552 | str += 7; | ||
553 | |||
554 | initrd = &initrds[i]; | ||
555 | p = filename; | ||
556 | |||
557 | /* Skip any leading slashes */ | ||
558 | while (*str == '/' || *str == '\\') | ||
559 | str++; | ||
560 | |||
561 | while (*str && *str != ' ' && *str != '\n') { | ||
562 | if (p >= filename + sizeof(filename)) | ||
563 | break; | ||
564 | |||
565 | *p++ = *str++; | ||
566 | } | ||
567 | |||
568 | *p = '\0'; | ||
569 | |||
570 | /* Only open the volume once. */ | ||
571 | if (!i) { | ||
572 | efi_boot_services_t *boottime; | ||
573 | |||
574 | boottime = sys_table->boottime; | ||
575 | |||
576 | status = efi_call_phys3(boottime->handle_protocol, | ||
577 | image->device_handle, &fs_proto, &io); | ||
578 | if (status != EFI_SUCCESS) | ||
579 | goto free_initrds; | ||
580 | |||
581 | status = efi_call_phys2(io->open_volume, io, &fh); | ||
582 | if (status != EFI_SUCCESS) | ||
583 | goto free_initrds; | ||
584 | } | ||
585 | |||
586 | status = efi_call_phys5(fh->open, fh, &h, filename, | ||
587 | EFI_FILE_MODE_READ, (u64)0); | ||
588 | if (status != EFI_SUCCESS) | ||
589 | goto close_handles; | ||
590 | |||
591 | initrd->handle = h; | ||
592 | |||
593 | info_sz = 0; | ||
594 | status = efi_call_phys4(h->get_info, h, &info_guid, | ||
595 | &info_sz, NULL); | ||
596 | if (status != EFI_BUFFER_TOO_SMALL) | ||
597 | goto close_handles; | ||
598 | |||
599 | grow: | ||
600 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
601 | EFI_LOADER_DATA, info_sz, &info); | ||
602 | if (status != EFI_SUCCESS) | ||
603 | goto close_handles; | ||
604 | |||
605 | status = efi_call_phys4(h->get_info, h, &info_guid, | ||
606 | &info_sz, info); | ||
607 | if (status == EFI_BUFFER_TOO_SMALL) { | ||
608 | efi_call_phys1(sys_table->boottime->free_pool, info); | ||
609 | goto grow; | ||
610 | } | ||
611 | |||
612 | file_sz = info->file_size; | ||
613 | efi_call_phys1(sys_table->boottime->free_pool, info); | ||
614 | |||
615 | if (status != EFI_SUCCESS) | ||
616 | goto close_handles; | ||
617 | |||
618 | initrd->size = file_sz; | ||
619 | initrd_total += file_sz; | ||
620 | } | ||
621 | |||
622 | if (initrd_total) { | ||
623 | unsigned long addr; | ||
624 | |||
625 | /* | ||
626 | * Multiple initrd's need to be at consecutive | ||
627 | * addresses in memory, so allocate enough memory for | ||
628 | * all the initrd's. | ||
629 | */ | ||
630 | status = high_alloc(initrd_total, 0x1000, | ||
631 | &initrd_addr, hdr->initrd_addr_max); | ||
632 | if (status != EFI_SUCCESS) | ||
633 | goto close_handles; | ||
634 | |||
635 | /* We've run out of free low memory. */ | ||
636 | if (initrd_addr > hdr->initrd_addr_max) { | ||
637 | status = EFI_INVALID_PARAMETER; | ||
638 | goto free_initrd_total; | ||
639 | } | ||
640 | |||
641 | addr = initrd_addr; | ||
642 | for (j = 0; j < nr_initrds; j++) { | ||
643 | u64 size; | ||
644 | |||
645 | size = initrds[j].size; | ||
646 | while (size) { | ||
647 | u64 chunksize; | ||
648 | if (size > EFI_READ_CHUNK_SIZE) | ||
649 | chunksize = EFI_READ_CHUNK_SIZE; | ||
650 | else | ||
651 | chunksize = size; | ||
652 | status = efi_call_phys3(fh->read, | ||
653 | initrds[j].handle, | ||
654 | &chunksize, addr); | ||
655 | if (status != EFI_SUCCESS) | ||
656 | goto free_initrd_total; | ||
657 | addr += chunksize; | ||
658 | size -= chunksize; | ||
659 | } | ||
660 | |||
661 | efi_call_phys1(fh->close, initrds[j].handle); | ||
662 | } | ||
663 | |||
664 | } | ||
665 | |||
666 | efi_call_phys1(sys_table->boottime->free_pool, initrds); | ||
667 | |||
668 | hdr->ramdisk_image = initrd_addr; | ||
669 | hdr->ramdisk_size = initrd_total; | ||
670 | |||
671 | return status; | ||
672 | |||
673 | free_initrd_total: | ||
674 | low_free(initrd_total, initrd_addr); | ||
675 | |||
676 | close_handles: | ||
677 | for (k = j; k < nr_initrds; k++) | ||
678 | efi_call_phys1(fh->close, initrds[k].handle); | ||
679 | free_initrds: | ||
680 | efi_call_phys1(sys_table->boottime->free_pool, initrds); | ||
681 | fail: | ||
682 | hdr->ramdisk_image = 0; | ||
683 | hdr->ramdisk_size = 0; | ||
684 | |||
685 | return status; | ||
686 | } | ||
687 | |||
688 | /* | ||
689 | * Because the x86 boot code expects to be passed a boot_params we | ||
690 | * need to create one ourselves (usually the bootloader would create | ||
691 | * one for us). | ||
692 | */ | ||
693 | static efi_status_t make_boot_params(struct boot_params *boot_params, | ||
694 | efi_loaded_image_t *image, | ||
695 | void *handle) | ||
696 | { | ||
697 | struct efi_info *efi = &boot_params->efi_info; | ||
698 | struct apm_bios_info *bi = &boot_params->apm_bios_info; | ||
699 | struct sys_desc_table *sdt = &boot_params->sys_desc_table; | ||
700 | struct e820entry *e820_map = &boot_params->e820_map[0]; | ||
701 | struct e820entry *prev = NULL; | ||
702 | struct setup_header *hdr = &boot_params->hdr; | ||
703 | unsigned long size, key, desc_size, _size; | ||
704 | efi_memory_desc_t *mem_map; | ||
705 | void *options = image->load_options; | ||
706 | u32 load_options_size = image->load_options_size / 2; /* ASCII */ | ||
707 | int options_size = 0; | ||
708 | efi_status_t status; | ||
709 | __u32 desc_version; | ||
710 | unsigned long cmdline; | ||
711 | u8 nr_entries; | ||
712 | u16 *s2; | ||
713 | u8 *s1; | ||
714 | int i; | ||
715 | |||
716 | hdr->type_of_loader = 0x21; | ||
717 | |||
718 | /* Convert unicode cmdline to ascii */ | ||
719 | cmdline = 0; | ||
720 | s2 = (u16 *)options; | ||
721 | |||
722 | if (s2) { | ||
723 | while (*s2 && *s2 != '\n' && options_size < load_options_size) { | ||
724 | s2++; | ||
725 | options_size++; | ||
726 | } | ||
727 | |||
728 | if (options_size) { | ||
729 | if (options_size > hdr->cmdline_size) | ||
730 | options_size = hdr->cmdline_size; | ||
731 | |||
732 | options_size++; /* NUL termination */ | ||
733 | |||
734 | status = low_alloc(options_size, 1, &cmdline); | ||
735 | if (status != EFI_SUCCESS) | ||
736 | goto fail; | ||
737 | |||
738 | s1 = (u8 *)(unsigned long)cmdline; | ||
739 | s2 = (u16 *)options; | ||
740 | |||
741 | for (i = 0; i < options_size - 1; i++) | ||
742 | *s1++ = *s2++; | ||
743 | |||
744 | *s1 = '\0'; | ||
745 | } | ||
746 | } | ||
747 | |||
748 | hdr->cmd_line_ptr = cmdline; | ||
749 | |||
750 | hdr->ramdisk_image = 0; | ||
751 | hdr->ramdisk_size = 0; | ||
752 | |||
753 | status = handle_ramdisks(image, hdr); | ||
754 | if (status != EFI_SUCCESS) | ||
755 | goto free_cmdline; | ||
756 | |||
757 | setup_graphics(boot_params); | ||
758 | |||
759 | /* Clear APM BIOS info */ | ||
760 | memset(bi, 0, sizeof(*bi)); | ||
761 | |||
762 | memset(sdt, 0, sizeof(*sdt)); | ||
763 | |||
764 | memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32)); | ||
765 | |||
766 | size = sizeof(*mem_map) * 32; | ||
767 | |||
768 | again: | ||
769 | size += sizeof(*mem_map); | ||
770 | _size = size; | ||
771 | status = low_alloc(size, 1, (unsigned long *)&mem_map); | ||
772 | if (status != EFI_SUCCESS) | ||
773 | goto free_cmdline; | ||
774 | |||
775 | status = efi_call_phys5(sys_table->boottime->get_memory_map, &size, | ||
776 | mem_map, &key, &desc_size, &desc_version); | ||
777 | if (status == EFI_BUFFER_TOO_SMALL) { | ||
778 | low_free(_size, (unsigned long)mem_map); | ||
779 | goto again; | ||
780 | } | ||
781 | |||
782 | if (status != EFI_SUCCESS) | ||
783 | goto free_mem_map; | ||
784 | |||
785 | efi->efi_systab = (unsigned long)sys_table; | ||
786 | efi->efi_memdesc_size = desc_size; | ||
787 | efi->efi_memdesc_version = desc_version; | ||
788 | efi->efi_memmap = (unsigned long)mem_map; | ||
789 | efi->efi_memmap_size = size; | ||
790 | |||
791 | #ifdef CONFIG_X86_64 | ||
792 | efi->efi_systab_hi = (unsigned long)sys_table >> 32; | ||
793 | efi->efi_memmap_hi = (unsigned long)mem_map >> 32; | ||
794 | #endif | ||
795 | |||
796 | /* Might as well exit boot services now */ | ||
797 | status = efi_call_phys2(sys_table->boottime->exit_boot_services, | ||
798 | handle, key); | ||
799 | if (status != EFI_SUCCESS) | ||
800 | goto free_mem_map; | ||
801 | |||
802 | /* Historic? */ | ||
803 | boot_params->alt_mem_k = 32 * 1024; | ||
804 | |||
805 | /* | ||
806 | * Convert the EFI memory map to E820. | ||
807 | */ | ||
808 | nr_entries = 0; | ||
809 | for (i = 0; i < size / desc_size; i++) { | ||
810 | efi_memory_desc_t *d; | ||
811 | unsigned int e820_type = 0; | ||
812 | unsigned long m = (unsigned long)mem_map; | ||
813 | |||
814 | d = (efi_memory_desc_t *)(m + (i * desc_size)); | ||
815 | switch (d->type) { | ||
816 | case EFI_RESERVED_TYPE: | ||
817 | case EFI_RUNTIME_SERVICES_CODE: | ||
818 | case EFI_RUNTIME_SERVICES_DATA: | ||
819 | case EFI_MEMORY_MAPPED_IO: | ||
820 | case EFI_MEMORY_MAPPED_IO_PORT_SPACE: | ||
821 | case EFI_PAL_CODE: | ||
822 | e820_type = E820_RESERVED; | ||
823 | break; | ||
824 | |||
825 | case EFI_UNUSABLE_MEMORY: | ||
826 | e820_type = E820_UNUSABLE; | ||
827 | break; | ||
828 | |||
829 | case EFI_ACPI_RECLAIM_MEMORY: | ||
830 | e820_type = E820_ACPI; | ||
831 | break; | ||
832 | |||
833 | case EFI_LOADER_CODE: | ||
834 | case EFI_LOADER_DATA: | ||
835 | case EFI_BOOT_SERVICES_CODE: | ||
836 | case EFI_BOOT_SERVICES_DATA: | ||
837 | case EFI_CONVENTIONAL_MEMORY: | ||
838 | e820_type = E820_RAM; | ||
839 | break; | ||
840 | |||
841 | case EFI_ACPI_MEMORY_NVS: | ||
842 | e820_type = E820_NVS; | ||
843 | break; | ||
844 | |||
845 | default: | ||
846 | continue; | ||
847 | } | ||
848 | |||
849 | /* Merge adjacent mappings */ | ||
850 | if (prev && prev->type == e820_type && | ||
851 | (prev->addr + prev->size) == d->phys_addr) | ||
852 | prev->size += d->num_pages << 12; | ||
853 | else { | ||
854 | e820_map->addr = d->phys_addr; | ||
855 | e820_map->size = d->num_pages << 12; | ||
856 | e820_map->type = e820_type; | ||
857 | prev = e820_map++; | ||
858 | nr_entries++; | ||
859 | } | ||
860 | } | ||
861 | |||
862 | boot_params->e820_entries = nr_entries; | ||
863 | |||
864 | return EFI_SUCCESS; | ||
865 | |||
866 | free_mem_map: | ||
867 | low_free(_size, (unsigned long)mem_map); | ||
868 | free_cmdline: | ||
869 | if (options_size) | ||
870 | low_free(options_size, hdr->cmd_line_ptr); | ||
871 | fail: | ||
872 | return status; | ||
873 | } | ||
874 | |||
875 | /* | ||
876 | * On success we return a pointer to a boot_params structure, and NULL | ||
877 | * on failure. | ||
878 | */ | ||
879 | struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | ||
880 | { | ||
881 | struct boot_params *boot_params; | ||
882 | unsigned long start, nr_pages; | ||
883 | struct desc_ptr *gdt, *idt; | ||
884 | efi_loaded_image_t *image; | ||
885 | struct setup_header *hdr; | ||
886 | efi_status_t status; | ||
887 | efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; | ||
888 | struct desc_struct *desc; | ||
889 | |||
890 | sys_table = _table; | ||
891 | |||
892 | /* Check if we were booted by the EFI firmware */ | ||
893 | if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | ||
894 | goto fail; | ||
895 | |||
896 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | ||
897 | handle, &proto, (void *)&image); | ||
898 | if (status != EFI_SUCCESS) | ||
899 | goto fail; | ||
900 | |||
901 | status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); | ||
902 | if (status != EFI_SUCCESS) | ||
903 | goto fail; | ||
904 | |||
905 | memset(boot_params, 0x0, 0x4000); | ||
906 | |||
907 | /* Copy first two sectors to boot_params */ | ||
908 | memcpy(boot_params, image->image_base, 1024); | ||
909 | |||
910 | hdr = &boot_params->hdr; | ||
911 | |||
912 | /* | ||
913 | * The EFI firmware loader could have placed the kernel image | ||
914 | * anywhere in memory, but the kernel has various restrictions | ||
915 | * on the max physical address it can run at. Attempt to move | ||
916 | * the kernel to boot_params.pref_address, or as low as | ||
917 | * possible. | ||
918 | */ | ||
919 | start = hdr->pref_address; | ||
920 | nr_pages = round_up(hdr->init_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; | ||
921 | |||
922 | status = efi_call_phys4(sys_table->boottime->allocate_pages, | ||
923 | EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, | ||
924 | nr_pages, &start); | ||
925 | if (status != EFI_SUCCESS) { | ||
926 | status = low_alloc(hdr->init_size, hdr->kernel_alignment, | ||
927 | &start); | ||
928 | if (status != EFI_SUCCESS) | ||
929 | goto fail; | ||
930 | } | ||
931 | |||
932 | hdr->code32_start = (__u32)start; | ||
933 | hdr->pref_address = (__u64)(unsigned long)image->image_base; | ||
934 | |||
935 | memcpy((void *)start, image->image_base, image->image_size); | ||
936 | |||
937 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
938 | EFI_LOADER_DATA, sizeof(*gdt), | ||
939 | (void **)&gdt); | ||
940 | if (status != EFI_SUCCESS) | ||
941 | goto fail; | ||
942 | |||
943 | gdt->size = 0x800; | ||
944 | status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); | ||
945 | if (status != EFI_SUCCESS) | ||
946 | goto fail; | ||
947 | |||
948 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
949 | EFI_LOADER_DATA, sizeof(*idt), | ||
950 | (void **)&idt); | ||
951 | if (status != EFI_SUCCESS) | ||
952 | goto fail; | ||
953 | |||
954 | idt->size = 0; | ||
955 | idt->address = 0; | ||
956 | |||
957 | status = make_boot_params(boot_params, image, handle); | ||
958 | if (status != EFI_SUCCESS) | ||
959 | goto fail; | ||
960 | |||
961 | memset((char *)gdt->address, 0x0, gdt->size); | ||
962 | desc = (struct desc_struct *)gdt->address; | ||
963 | |||
964 | /* The first GDT is a dummy and the second is unused. */ | ||
965 | desc += 2; | ||
966 | |||
967 | desc->limit0 = 0xffff; | ||
968 | desc->base0 = 0x0000; | ||
969 | desc->base1 = 0x0000; | ||
970 | desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ; | ||
971 | desc->s = DESC_TYPE_CODE_DATA; | ||
972 | desc->dpl = 0; | ||
973 | desc->p = 1; | ||
974 | desc->limit = 0xf; | ||
975 | desc->avl = 0; | ||
976 | desc->l = 0; | ||
977 | desc->d = SEG_OP_SIZE_32BIT; | ||
978 | desc->g = SEG_GRANULARITY_4KB; | ||
979 | desc->base2 = 0x00; | ||
980 | |||
981 | desc++; | ||
982 | desc->limit0 = 0xffff; | ||
983 | desc->base0 = 0x0000; | ||
984 | desc->base1 = 0x0000; | ||
985 | desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE; | ||
986 | desc->s = DESC_TYPE_CODE_DATA; | ||
987 | desc->dpl = 0; | ||
988 | desc->p = 1; | ||
989 | desc->limit = 0xf; | ||
990 | desc->avl = 0; | ||
991 | desc->l = 0; | ||
992 | desc->d = SEG_OP_SIZE_32BIT; | ||
993 | desc->g = SEG_GRANULARITY_4KB; | ||
994 | desc->base2 = 0x00; | ||
995 | |||
996 | #ifdef CONFIG_X86_64 | ||
997 | /* Task segment value */ | ||
998 | desc++; | ||
999 | desc->limit0 = 0x0000; | ||
1000 | desc->base0 = 0x0000; | ||
1001 | desc->base1 = 0x0000; | ||
1002 | desc->type = SEG_TYPE_TSS; | ||
1003 | desc->s = 0; | ||
1004 | desc->dpl = 0; | ||
1005 | desc->p = 1; | ||
1006 | desc->limit = 0x0; | ||
1007 | desc->avl = 0; | ||
1008 | desc->l = 0; | ||
1009 | desc->d = 0; | ||
1010 | desc->g = SEG_GRANULARITY_4KB; | ||
1011 | desc->base2 = 0x00; | ||
1012 | #endif /* CONFIG_X86_64 */ | ||
1013 | |||
1014 | asm volatile ("lidt %0" : : "m" (*idt)); | ||
1015 | asm volatile ("lgdt %0" : : "m" (*gdt)); | ||
1016 | |||
1017 | asm volatile("cli"); | ||
1018 | |||
1019 | return boot_params; | ||
1020 | fail: | ||
1021 | return NULL; | ||
1022 | } | ||
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h new file mode 100644 index 000000000000..39251663e65b --- /dev/null +++ b/arch/x86/boot/compressed/eboot.h | |||
@@ -0,0 +1,61 @@ | |||
1 | #ifndef BOOT_COMPRESSED_EBOOT_H | ||
2 | #define BOOT_COMPRESSED_EBOOT_H | ||
3 | |||
4 | #define SEG_TYPE_DATA (0 << 3) | ||
5 | #define SEG_TYPE_READ_WRITE (1 << 1) | ||
6 | #define SEG_TYPE_CODE (1 << 3) | ||
7 | #define SEG_TYPE_EXEC_READ (1 << 1) | ||
8 | #define SEG_TYPE_TSS ((1 << 3) | (1 << 0)) | ||
9 | #define SEG_OP_SIZE_32BIT (1 << 0) | ||
10 | #define SEG_GRANULARITY_4KB (1 << 0) | ||
11 | |||
12 | #define DESC_TYPE_CODE_DATA (1 << 0) | ||
13 | |||
14 | #define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT) | ||
15 | #define EFI_READ_CHUNK_SIZE (1024 * 1024) | ||
16 | |||
17 | #define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 | ||
18 | #define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 | ||
19 | #define PIXEL_BIT_MASK 2 | ||
20 | #define PIXEL_BLT_ONLY 3 | ||
21 | #define PIXEL_FORMAT_MAX 4 | ||
22 | |||
23 | struct efi_pixel_bitmask { | ||
24 | u32 red_mask; | ||
25 | u32 green_mask; | ||
26 | u32 blue_mask; | ||
27 | u32 reserved_mask; | ||
28 | }; | ||
29 | |||
30 | struct efi_graphics_output_mode_info { | ||
31 | u32 version; | ||
32 | u32 horizontal_resolution; | ||
33 | u32 vertical_resolution; | ||
34 | int pixel_format; | ||
35 | struct efi_pixel_bitmask pixel_information; | ||
36 | u32 pixels_per_scan_line; | ||
37 | } __packed; | ||
38 | |||
39 | struct efi_graphics_output_protocol_mode { | ||
40 | u32 max_mode; | ||
41 | u32 mode; | ||
42 | unsigned long info; | ||
43 | unsigned long size_of_info; | ||
44 | u64 frame_buffer_base; | ||
45 | unsigned long frame_buffer_size; | ||
46 | } __packed; | ||
47 | |||
48 | struct efi_graphics_output_protocol { | ||
49 | void *query_mode; | ||
50 | unsigned long set_mode; | ||
51 | unsigned long blt; | ||
52 | struct efi_graphics_output_protocol_mode *mode; | ||
53 | }; | ||
54 | |||
55 | struct efi_uga_draw_protocol { | ||
56 | void *get_mode; | ||
57 | void *set_mode; | ||
58 | void *blt; | ||
59 | }; | ||
60 | |||
61 | #endif /* BOOT_COMPRESSED_EBOOT_H */ | ||
diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S new file mode 100644 index 000000000000..a53440e81d52 --- /dev/null +++ b/arch/x86/boot/compressed/efi_stub_32.S | |||
@@ -0,0 +1,86 @@ | |||
1 | /* | ||
2 | * EFI call stub for IA32. | ||
3 | * | ||
4 | * This stub allows us to make EFI calls in physical mode with interrupts | ||
5 | * turned off. Note that this implementation is different from the one in | ||
6 | * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical | ||
7 | * mode at this point. | ||
8 | */ | ||
9 | |||
10 | #include <linux/linkage.h> | ||
11 | #include <asm/page_types.h> | ||
12 | |||
13 | /* | ||
14 | * efi_call_phys(void *, ...) is a function with variable parameters. | ||
15 | * All the callers of this function assure that all the parameters are 4-bytes. | ||
16 | */ | ||
17 | |||
18 | /* | ||
19 | * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. | ||
20 | * So we'd better save all of them at the beginning of this function and restore | ||
21 | * at the end no matter how many we use, because we can not assure EFI runtime | ||
22 | * service functions will comply with gcc calling convention, too. | ||
23 | */ | ||
24 | |||
25 | .text | ||
26 | ENTRY(efi_call_phys) | ||
27 | /* | ||
28 | * 0. The function can only be called in Linux kernel. So CS has been | ||
29 | * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found | ||
30 | * the values of these registers are the same. And, the corresponding | ||
31 | * GDT entries are identical. So I will do nothing about segment reg | ||
32 | * and GDT, but change GDT base register in prelog and epilog. | ||
33 | */ | ||
34 | |||
35 | /* | ||
36 | * 1. Because we haven't been relocated by this point we need to | ||
37 | * use relative addressing. | ||
38 | */ | ||
39 | call 1f | ||
40 | 1: popl %edx | ||
41 | subl $1b, %edx | ||
42 | |||
43 | /* | ||
44 | * 2. Now on the top of stack is the return | ||
45 | * address in the caller of efi_call_phys(), then parameter 1, | ||
46 | * parameter 2, ..., param n. To make things easy, we save the return | ||
47 | * address of efi_call_phys in a global variable. | ||
48 | */ | ||
49 | popl %ecx | ||
50 | movl %ecx, saved_return_addr(%edx) | ||
51 | /* get the function pointer into ECX*/ | ||
52 | popl %ecx | ||
53 | movl %ecx, efi_rt_function_ptr(%edx) | ||
54 | |||
55 | /* | ||
56 | * 3. Call the physical function. | ||
57 | */ | ||
58 | call *%ecx | ||
59 | |||
60 | /* | ||
61 | * 4. Balance the stack. And because EAX contain the return value, | ||
62 | * we'd better not clobber it. We need to calculate our address | ||
63 | * again because %ecx and %edx are not preserved across EFI function | ||
64 | * calls. | ||
65 | */ | ||
66 | call 1f | ||
67 | 1: popl %edx | ||
68 | subl $1b, %edx | ||
69 | |||
70 | movl efi_rt_function_ptr(%edx), %ecx | ||
71 | pushl %ecx | ||
72 | |||
73 | /* | ||
74 | * 10. Push the saved return address onto the stack and return. | ||
75 | */ | ||
76 | movl saved_return_addr(%edx), %ecx | ||
77 | pushl %ecx | ||
78 | ret | ||
79 | ENDPROC(efi_call_phys) | ||
80 | .previous | ||
81 | |||
82 | .data | ||
83 | saved_return_addr: | ||
84 | .long 0 | ||
85 | efi_rt_function_ptr: | ||
86 | .long 0 | ||
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S new file mode 100644 index 000000000000..cedc60de86eb --- /dev/null +++ b/arch/x86/boot/compressed/efi_stub_64.S | |||
@@ -0,0 +1 @@ | |||
#include "../../platform/efi/efi_stub_64.S" | |||
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 67a655a39ce4..a0559930a180 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -32,6 +32,28 @@ | |||
32 | 32 | ||
33 | __HEAD | 33 | __HEAD |
34 | ENTRY(startup_32) | 34 | ENTRY(startup_32) |
35 | #ifdef CONFIG_EFI_STUB | ||
36 | /* | ||
37 | * We don't need the return address, so set up the stack so | ||
38 | * efi_main() can find its arugments. | ||
39 | */ | ||
40 | add $0x4, %esp | ||
41 | |||
42 | call efi_main | ||
43 | cmpl $0, %eax | ||
44 | je preferred_addr | ||
45 | movl %eax, %esi | ||
46 | call 1f | ||
47 | 1: | ||
48 | popl %eax | ||
49 | subl $1b, %eax | ||
50 | subl BP_pref_address(%esi), %eax | ||
51 | add BP_code32_start(%esi), %eax | ||
52 | leal preferred_addr(%eax), %eax | ||
53 | jmp *%eax | ||
54 | |||
55 | preferred_addr: | ||
56 | #endif | ||
35 | cld | 57 | cld |
36 | /* | 58 | /* |
37 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking | 59 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 35af09d13dc1..558d76ce23bc 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -199,6 +199,26 @@ ENTRY(startup_64) | |||
199 | * an identity mapped page table being provied that maps our | 199 | * an identity mapped page table being provied that maps our |
200 | * entire text+data+bss and hopefully all of memory. | 200 | * entire text+data+bss and hopefully all of memory. |
201 | */ | 201 | */ |
202 | #ifdef CONFIG_EFI_STUB | ||
203 | pushq %rsi | ||
204 | mov %rcx, %rdi | ||
205 | mov %rdx, %rsi | ||
206 | call efi_main | ||
207 | popq %rsi | ||
208 | cmpq $0,%rax | ||
209 | je preferred_addr | ||
210 | movq %rax,%rsi | ||
211 | call 1f | ||
212 | 1: | ||
213 | popq %rax | ||
214 | subq $1b, %rax | ||
215 | subq BP_pref_address(%rsi), %rax | ||
216 | add BP_code32_start(%esi), %eax | ||
217 | leaq preferred_addr(%rax), %rax | ||
218 | jmp *%rax | ||
219 | |||
220 | preferred_addr: | ||
221 | #endif | ||
202 | 222 | ||
203 | /* Setup data segments. */ | 223 | /* Setup data segments. */ |
204 | xorl %eax, %eax | 224 | xorl %eax, %eax |
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c index 19b3e693cd72..ffb9c5c9d748 100644 --- a/arch/x86/boot/compressed/string.c +++ b/arch/x86/boot/compressed/string.c | |||
@@ -1,2 +1,11 @@ | |||
1 | #include "misc.h" | 1 | #include "misc.h" |
2 | |||
3 | int memcmp(const void *s1, const void *s2, size_t len) | ||
4 | { | ||
5 | u8 diff; | ||
6 | asm("repe; cmpsb; setnz %0" | ||
7 | : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); | ||
8 | return diff; | ||
9 | } | ||
10 | |||
2 | #include "../string.c" | 11 | #include "../string.c" |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index bdb4d458ec8c..f1bbeeb09148 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -45,6 +45,11 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ | |||
45 | 45 | ||
46 | .global bootsect_start | 46 | .global bootsect_start |
47 | bootsect_start: | 47 | bootsect_start: |
48 | #ifdef CONFIG_EFI_STUB | ||
49 | # "MZ", MS-DOS header | ||
50 | .byte 0x4d | ||
51 | .byte 0x5a | ||
52 | #endif | ||
48 | 53 | ||
49 | # Normalize the start address | 54 | # Normalize the start address |
50 | ljmp $BOOTSEG, $start2 | 55 | ljmp $BOOTSEG, $start2 |
@@ -79,6 +84,14 @@ bs_die: | |||
79 | # invoke the BIOS reset code... | 84 | # invoke the BIOS reset code... |
80 | ljmp $0xf000,$0xfff0 | 85 | ljmp $0xf000,$0xfff0 |
81 | 86 | ||
87 | #ifdef CONFIG_EFI_STUB | ||
88 | .org 0x3c | ||
89 | # | ||
90 | # Offset to the PE header. | ||
91 | # | ||
92 | .long pe_header | ||
93 | #endif /* CONFIG_EFI_STUB */ | ||
94 | |||
82 | .section ".bsdata", "a" | 95 | .section ".bsdata", "a" |
83 | bugger_off_msg: | 96 | bugger_off_msg: |
84 | .ascii "Direct booting from floppy is no longer supported.\r\n" | 97 | .ascii "Direct booting from floppy is no longer supported.\r\n" |
@@ -87,6 +100,141 @@ bugger_off_msg: | |||
87 | .ascii "Remove disk and press any key to reboot . . .\r\n" | 100 | .ascii "Remove disk and press any key to reboot . . .\r\n" |
88 | .byte 0 | 101 | .byte 0 |
89 | 102 | ||
103 | #ifdef CONFIG_EFI_STUB | ||
104 | pe_header: | ||
105 | .ascii "PE" | ||
106 | .word 0 | ||
107 | |||
108 | coff_header: | ||
109 | #ifdef CONFIG_X86_32 | ||
110 | .word 0x14c # i386 | ||
111 | #else | ||
112 | .word 0x8664 # x86-64 | ||
113 | #endif | ||
114 | .word 2 # nr_sections | ||
115 | .long 0 # TimeDateStamp | ||
116 | .long 0 # PointerToSymbolTable | ||
117 | .long 1 # NumberOfSymbols | ||
118 | .word section_table - optional_header # SizeOfOptionalHeader | ||
119 | #ifdef CONFIG_X86_32 | ||
120 | .word 0x306 # Characteristics. | ||
121 | # IMAGE_FILE_32BIT_MACHINE | | ||
122 | # IMAGE_FILE_DEBUG_STRIPPED | | ||
123 | # IMAGE_FILE_EXECUTABLE_IMAGE | | ||
124 | # IMAGE_FILE_LINE_NUMS_STRIPPED | ||
125 | #else | ||
126 | .word 0x206 # Characteristics | ||
127 | # IMAGE_FILE_DEBUG_STRIPPED | | ||
128 | # IMAGE_FILE_EXECUTABLE_IMAGE | | ||
129 | # IMAGE_FILE_LINE_NUMS_STRIPPED | ||
130 | #endif | ||
131 | |||
132 | optional_header: | ||
133 | #ifdef CONFIG_X86_32 | ||
134 | .word 0x10b # PE32 format | ||
135 | #else | ||
136 | .word 0x20b # PE32+ format | ||
137 | #endif | ||
138 | .byte 0x02 # MajorLinkerVersion | ||
139 | .byte 0x14 # MinorLinkerVersion | ||
140 | |||
141 | # Filled in by build.c | ||
142 | .long 0 # SizeOfCode | ||
143 | |||
144 | .long 0 # SizeOfInitializedData | ||
145 | .long 0 # SizeOfUninitializedData | ||
146 | |||
147 | # Filled in by build.c | ||
148 | .long 0x0000 # AddressOfEntryPoint | ||
149 | |||
150 | .long 0x0000 # BaseOfCode | ||
151 | #ifdef CONFIG_X86_32 | ||
152 | .long 0 # data | ||
153 | #endif | ||
154 | |||
155 | extra_header_fields: | ||
156 | #ifdef CONFIG_X86_32 | ||
157 | .long 0 # ImageBase | ||
158 | #else | ||
159 | .quad 0 # ImageBase | ||
160 | #endif | ||
161 | .long 0x1000 # SectionAlignment | ||
162 | .long 0x200 # FileAlignment | ||
163 | .word 0 # MajorOperatingSystemVersion | ||
164 | .word 0 # MinorOperatingSystemVersion | ||
165 | .word 0 # MajorImageVersion | ||
166 | .word 0 # MinorImageVersion | ||
167 | .word 0 # MajorSubsystemVersion | ||
168 | .word 0 # MinorSubsystemVersion | ||
169 | .long 0 # Win32VersionValue | ||
170 | |||
171 | # | ||
172 | # The size of the bzImage is written in tools/build.c | ||
173 | # | ||
174 | .long 0 # SizeOfImage | ||
175 | |||
176 | .long 0x200 # SizeOfHeaders | ||
177 | .long 0 # CheckSum | ||
178 | .word 0xa # Subsystem (EFI application) | ||
179 | .word 0 # DllCharacteristics | ||
180 | #ifdef CONFIG_X86_32 | ||
181 | .long 0 # SizeOfStackReserve | ||
182 | .long 0 # SizeOfStackCommit | ||
183 | .long 0 # SizeOfHeapReserve | ||
184 | .long 0 # SizeOfHeapCommit | ||
185 | #else | ||
186 | .quad 0 # SizeOfStackReserve | ||
187 | .quad 0 # SizeOfStackCommit | ||
188 | .quad 0 # SizeOfHeapReserve | ||
189 | .quad 0 # SizeOfHeapCommit | ||
190 | #endif | ||
191 | .long 0 # LoaderFlags | ||
192 | .long 0x1 # NumberOfRvaAndSizes | ||
193 | |||
194 | .quad 0 # ExportTable | ||
195 | .quad 0 # ImportTable | ||
196 | .quad 0 # ResourceTable | ||
197 | .quad 0 # ExceptionTable | ||
198 | .quad 0 # CertificationTable | ||
199 | .quad 0 # BaseRelocationTable | ||
200 | |||
201 | # Section table | ||
202 | section_table: | ||
203 | .ascii ".text" | ||
204 | .byte 0 | ||
205 | .byte 0 | ||
206 | .byte 0 | ||
207 | .long 0 | ||
208 | .long 0x0 # startup_{32,64} | ||
209 | .long 0 # Size of initialized data | ||
210 | # on disk | ||
211 | .long 0x0 # startup_{32,64} | ||
212 | .long 0 # PointerToRelocations | ||
213 | .long 0 # PointerToLineNumbers | ||
214 | .word 0 # NumberOfRelocations | ||
215 | .word 0 # NumberOfLineNumbers | ||
216 | .long 0x60500020 # Characteristics (section flags) | ||
217 | |||
218 | # | ||
219 | # The EFI application loader requires a relocation section | ||
220 | # because EFI applications are relocatable and not having | ||
221 | # this section seems to confuse it. But since we don't need | ||
222 | # the loader to fixup any relocs for us just fill it with a | ||
223 | # single dummy reloc. | ||
224 | # | ||
225 | .ascii ".reloc" | ||
226 | .byte 0 | ||
227 | .byte 0 | ||
228 | .long reloc_end - reloc_start | ||
229 | .long reloc_start | ||
230 | .long reloc_end - reloc_start # SizeOfRawData | ||
231 | .long reloc_start # PointerToRawData | ||
232 | .long 0 # PointerToRelocations | ||
233 | .long 0 # PointerToLineNumbers | ||
234 | .word 0 # NumberOfRelocations | ||
235 | .word 0 # NumberOfLineNumbers | ||
236 | .long 0x42100040 # Characteristics (section flags) | ||
237 | #endif /* CONFIG_EFI_STUB */ | ||
90 | 238 | ||
91 | # Kernel attributes; used by setup. This is part 1 of the | 239 | # Kernel attributes; used by setup. This is part 1 of the |
92 | # header, from the old boot sector. | 240 | # header, from the old boot sector. |
@@ -318,3 +466,13 @@ die: | |||
318 | setup_corrupt: | 466 | setup_corrupt: |
319 | .byte 7 | 467 | .byte 7 |
320 | .string "No setup signature found...\n" | 468 | .string "No setup signature found...\n" |
469 | |||
470 | .data | ||
471 | dummy: .long 0 | ||
472 | |||
473 | .section .reloc | ||
474 | reloc_start: | ||
475 | .long dummy - reloc_start | ||
476 | .long 10 | ||
477 | .word 0 | ||
478 | reloc_end: | ||
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 3cbc4058dd26..574dedfe2890 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c | |||
@@ -111,3 +111,38 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas | |||
111 | 111 | ||
112 | return result; | 112 | return result; |
113 | } | 113 | } |
114 | |||
115 | /** | ||
116 | * strlen - Find the length of a string | ||
117 | * @s: The string to be sized | ||
118 | */ | ||
119 | size_t strlen(const char *s) | ||
120 | { | ||
121 | const char *sc; | ||
122 | |||
123 | for (sc = s; *sc != '\0'; ++sc) | ||
124 | /* nothing */; | ||
125 | return sc - s; | ||
126 | } | ||
127 | |||
128 | /** | ||
129 | * strstr - Find the first substring in a %NUL terminated string | ||
130 | * @s1: The string to be searched | ||
131 | * @s2: The string to search for | ||
132 | */ | ||
133 | char *strstr(const char *s1, const char *s2) | ||
134 | { | ||
135 | size_t l1, l2; | ||
136 | |||
137 | l2 = strlen(s2); | ||
138 | if (!l2) | ||
139 | return (char *)s1; | ||
140 | l1 = strlen(s1); | ||
141 | while (l1 >= l2) { | ||
142 | l1--; | ||
143 | if (!memcmp(s1, s2, l2)) | ||
144 | return (char *)s1; | ||
145 | s1++; | ||
146 | } | ||
147 | return NULL; | ||
148 | } | ||
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index fdc60a0b3c20..4e9bd6bcafa6 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c | |||
@@ -135,6 +135,9 @@ static void usage(void) | |||
135 | 135 | ||
136 | int main(int argc, char ** argv) | 136 | int main(int argc, char ** argv) |
137 | { | 137 | { |
138 | #ifdef CONFIG_EFI_STUB | ||
139 | unsigned int file_sz, pe_header; | ||
140 | #endif | ||
138 | unsigned int i, sz, setup_sectors; | 141 | unsigned int i, sz, setup_sectors; |
139 | int c; | 142 | int c; |
140 | u32 sys_size; | 143 | u32 sys_size; |
@@ -194,6 +197,42 @@ int main(int argc, char ** argv) | |||
194 | buf[0x1f6] = sys_size >> 16; | 197 | buf[0x1f6] = sys_size >> 16; |
195 | buf[0x1f7] = sys_size >> 24; | 198 | buf[0x1f7] = sys_size >> 24; |
196 | 199 | ||
200 | #ifdef CONFIG_EFI_STUB | ||
201 | file_sz = sz + i + ((sys_size * 16) - sz); | ||
202 | |||
203 | pe_header = *(unsigned int *)&buf[0x3c]; | ||
204 | |||
205 | /* Size of code */ | ||
206 | *(unsigned int *)&buf[pe_header + 0x1c] = file_sz; | ||
207 | |||
208 | /* Size of image */ | ||
209 | *(unsigned int *)&buf[pe_header + 0x50] = file_sz; | ||
210 | |||
211 | #ifdef CONFIG_X86_32 | ||
212 | /* Address of entry point */ | ||
213 | *(unsigned int *)&buf[pe_header + 0x28] = i; | ||
214 | |||
215 | /* .text size */ | ||
216 | *(unsigned int *)&buf[pe_header + 0xb0] = file_sz; | ||
217 | |||
218 | /* .text size of initialised data */ | ||
219 | *(unsigned int *)&buf[pe_header + 0xb8] = file_sz; | ||
220 | #else | ||
221 | /* | ||
222 | * Address of entry point. startup_32 is at the beginning and | ||
223 | * the 64-bit entry point (startup_64) is always 512 bytes | ||
224 | * after. | ||
225 | */ | ||
226 | *(unsigned int *)&buf[pe_header + 0x28] = i + 512; | ||
227 | |||
228 | /* .text size */ | ||
229 | *(unsigned int *)&buf[pe_header + 0xc0] = file_sz; | ||
230 | |||
231 | /* .text size of initialised data */ | ||
232 | *(unsigned int *)&buf[pe_header + 0xc8] = file_sz; | ||
233 | #endif /* CONFIG_X86_32 */ | ||
234 | #endif /* CONFIG_EFI_STUB */ | ||
235 | |||
197 | crc = partial_crc32(buf, i, crc); | 236 | crc = partial_crc32(buf, i, crc); |
198 | if (fwrite(buf, 1, i, stdout) != i) | 237 | if (fwrite(buf, 1, i, stdout) != i) |
199 | die("Writing setup failed"); | 238 | die("Writing setup failed"); |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 3537d4b91f74..2b0b9631474b 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -5,12 +5,14 @@ | |||
5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | 5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o |
6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | 6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o |
7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | 7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o |
8 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o | ||
8 | 9 | ||
9 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o | 10 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o |
10 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o | 11 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o |
11 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 12 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
12 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o | 13 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o |
13 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o | 14 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o |
15 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o | ||
14 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o | 16 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o |
15 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o | 17 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o |
16 | 18 | ||
@@ -20,12 +22,14 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o | |||
20 | aes-i586-y := aes-i586-asm_32.o aes_glue.o | 22 | aes-i586-y := aes-i586-asm_32.o aes_glue.o |
21 | twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o | 23 | twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o |
22 | salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o | 24 | salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o |
25 | serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o | ||
23 | 26 | ||
24 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o | 27 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o |
25 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o | 28 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o |
26 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | 29 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
27 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o | 30 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o |
28 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 31 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
32 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o | ||
29 | 33 | ||
30 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 34 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
31 | 35 | ||
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S new file mode 100644 index 000000000000..4e37677ca851 --- /dev/null +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S | |||
@@ -0,0 +1,638 @@ | |||
1 | /* | ||
2 | * Serpent Cipher 4-way parallel algorithm (i586/SSE2) | ||
3 | * | ||
4 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on crypto/serpent.c by | ||
7 | * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no> | ||
8 | * 2003 Herbert Valerio Riedel <hvr@gnu.org> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | .file "serpent-sse2-i586-asm_32.S" | ||
28 | .text | ||
29 | |||
30 | #define arg_ctx 4 | ||
31 | #define arg_dst 8 | ||
32 | #define arg_src 12 | ||
33 | #define arg_xor 16 | ||
34 | |||
35 | /********************************************************************** | ||
36 | 4-way SSE2 serpent | ||
37 | **********************************************************************/ | ||
38 | #define CTX %edx | ||
39 | |||
40 | #define RA %xmm0 | ||
41 | #define RB %xmm1 | ||
42 | #define RC %xmm2 | ||
43 | #define RD %xmm3 | ||
44 | #define RE %xmm4 | ||
45 | |||
46 | #define RT0 %xmm5 | ||
47 | #define RT1 %xmm6 | ||
48 | |||
49 | #define RNOT %xmm7 | ||
50 | |||
51 | #define get_key(i, j, t) \ | ||
52 | movd (4*(i)+(j))*4(CTX), t; \ | ||
53 | pshufd $0, t, t; | ||
54 | |||
55 | #define K(x0, x1, x2, x3, x4, i) \ | ||
56 | get_key(i, 0, x4); \ | ||
57 | get_key(i, 1, RT0); \ | ||
58 | get_key(i, 2, RT1); \ | ||
59 | pxor x4, x0; \ | ||
60 | pxor RT0, x1; \ | ||
61 | pxor RT1, x2; \ | ||
62 | get_key(i, 3, x4); \ | ||
63 | pxor x4, x3; | ||
64 | |||
65 | #define LK(x0, x1, x2, x3, x4, i) \ | ||
66 | movdqa x0, x4; \ | ||
67 | pslld $13, x0; \ | ||
68 | psrld $(32 - 13), x4; \ | ||
69 | por x4, x0; \ | ||
70 | pxor x0, x1; \ | ||
71 | movdqa x2, x4; \ | ||
72 | pslld $3, x2; \ | ||
73 | psrld $(32 - 3), x4; \ | ||
74 | por x4, x2; \ | ||
75 | pxor x2, x1; \ | ||
76 | movdqa x1, x4; \ | ||
77 | pslld $1, x1; \ | ||
78 | psrld $(32 - 1), x4; \ | ||
79 | por x4, x1; \ | ||
80 | movdqa x0, x4; \ | ||
81 | pslld $3, x4; \ | ||
82 | pxor x2, x3; \ | ||
83 | pxor x4, x3; \ | ||
84 | movdqa x3, x4; \ | ||
85 | pslld $7, x3; \ | ||
86 | psrld $(32 - 7), x4; \ | ||
87 | por x4, x3; \ | ||
88 | movdqa x1, x4; \ | ||
89 | pslld $7, x4; \ | ||
90 | pxor x1, x0; \ | ||
91 | pxor x3, x0; \ | ||
92 | pxor x3, x2; \ | ||
93 | pxor x4, x2; \ | ||
94 | movdqa x0, x4; \ | ||
95 | get_key(i, 1, RT0); \ | ||
96 | pxor RT0, x1; \ | ||
97 | get_key(i, 3, RT0); \ | ||
98 | pxor RT0, x3; \ | ||
99 | pslld $5, x0; \ | ||
100 | psrld $(32 - 5), x4; \ | ||
101 | por x4, x0; \ | ||
102 | movdqa x2, x4; \ | ||
103 | pslld $22, x2; \ | ||
104 | psrld $(32 - 22), x4; \ | ||
105 | por x4, x2; \ | ||
106 | get_key(i, 0, RT0); \ | ||
107 | pxor RT0, x0; \ | ||
108 | get_key(i, 2, RT0); \ | ||
109 | pxor RT0, x2; | ||
110 | |||
111 | #define KL(x0, x1, x2, x3, x4, i) \ | ||
112 | K(x0, x1, x2, x3, x4, i); \ | ||
113 | movdqa x0, x4; \ | ||
114 | psrld $5, x0; \ | ||
115 | pslld $(32 - 5), x4; \ | ||
116 | por x4, x0; \ | ||
117 | movdqa x2, x4; \ | ||
118 | psrld $22, x2; \ | ||
119 | pslld $(32 - 22), x4; \ | ||
120 | por x4, x2; \ | ||
121 | pxor x3, x2; \ | ||
122 | pxor x3, x0; \ | ||
123 | movdqa x1, x4; \ | ||
124 | pslld $7, x4; \ | ||
125 | pxor x1, x0; \ | ||
126 | pxor x4, x2; \ | ||
127 | movdqa x1, x4; \ | ||
128 | psrld $1, x1; \ | ||
129 | pslld $(32 - 1), x4; \ | ||
130 | por x4, x1; \ | ||
131 | movdqa x3, x4; \ | ||
132 | psrld $7, x3; \ | ||
133 | pslld $(32 - 7), x4; \ | ||
134 | por x4, x3; \ | ||
135 | pxor x0, x1; \ | ||
136 | movdqa x0, x4; \ | ||
137 | pslld $3, x4; \ | ||
138 | pxor x4, x3; \ | ||
139 | movdqa x0, x4; \ | ||
140 | psrld $13, x0; \ | ||
141 | pslld $(32 - 13), x4; \ | ||
142 | por x4, x0; \ | ||
143 | pxor x2, x1; \ | ||
144 | pxor x2, x3; \ | ||
145 | movdqa x2, x4; \ | ||
146 | psrld $3, x2; \ | ||
147 | pslld $(32 - 3), x4; \ | ||
148 | por x4, x2; | ||
149 | |||
150 | #define S0(x0, x1, x2, x3, x4) \ | ||
151 | movdqa x3, x4; \ | ||
152 | por x0, x3; \ | ||
153 | pxor x4, x0; \ | ||
154 | pxor x2, x4; \ | ||
155 | pxor RNOT, x4; \ | ||
156 | pxor x1, x3; \ | ||
157 | pand x0, x1; \ | ||
158 | pxor x4, x1; \ | ||
159 | pxor x0, x2; \ | ||
160 | pxor x3, x0; \ | ||
161 | por x0, x4; \ | ||
162 | pxor x2, x0; \ | ||
163 | pand x1, x2; \ | ||
164 | pxor x2, x3; \ | ||
165 | pxor RNOT, x1; \ | ||
166 | pxor x4, x2; \ | ||
167 | pxor x2, x1; | ||
168 | |||
169 | #define S1(x0, x1, x2, x3, x4) \ | ||
170 | movdqa x1, x4; \ | ||
171 | pxor x0, x1; \ | ||
172 | pxor x3, x0; \ | ||
173 | pxor RNOT, x3; \ | ||
174 | pand x1, x4; \ | ||
175 | por x1, x0; \ | ||
176 | pxor x2, x3; \ | ||
177 | pxor x3, x0; \ | ||
178 | pxor x3, x1; \ | ||
179 | pxor x4, x3; \ | ||
180 | por x4, x1; \ | ||
181 | pxor x2, x4; \ | ||
182 | pand x0, x2; \ | ||
183 | pxor x1, x2; \ | ||
184 | por x0, x1; \ | ||
185 | pxor RNOT, x0; \ | ||
186 | pxor x2, x0; \ | ||
187 | pxor x1, x4; | ||
188 | |||
189 | #define S2(x0, x1, x2, x3, x4) \ | ||
190 | pxor RNOT, x3; \ | ||
191 | pxor x0, x1; \ | ||
192 | movdqa x0, x4; \ | ||
193 | pand x2, x0; \ | ||
194 | pxor x3, x0; \ | ||
195 | por x4, x3; \ | ||
196 | pxor x1, x2; \ | ||
197 | pxor x1, x3; \ | ||
198 | pand x0, x1; \ | ||
199 | pxor x2, x0; \ | ||
200 | pand x3, x2; \ | ||
201 | por x1, x3; \ | ||
202 | pxor RNOT, x0; \ | ||
203 | pxor x0, x3; \ | ||
204 | pxor x0, x4; \ | ||
205 | pxor x2, x0; \ | ||
206 | por x2, x1; | ||
207 | |||
208 | #define S3(x0, x1, x2, x3, x4) \ | ||
209 | movdqa x1, x4; \ | ||
210 | pxor x3, x1; \ | ||
211 | por x0, x3; \ | ||
212 | pand x0, x4; \ | ||
213 | pxor x2, x0; \ | ||
214 | pxor x1, x2; \ | ||
215 | pand x3, x1; \ | ||
216 | pxor x3, x2; \ | ||
217 | por x4, x0; \ | ||
218 | pxor x3, x4; \ | ||
219 | pxor x0, x1; \ | ||
220 | pand x3, x0; \ | ||
221 | pand x4, x3; \ | ||
222 | pxor x2, x3; \ | ||
223 | por x1, x4; \ | ||
224 | pand x1, x2; \ | ||
225 | pxor x3, x4; \ | ||
226 | pxor x3, x0; \ | ||
227 | pxor x2, x3; | ||
228 | |||
229 | #define S4(x0, x1, x2, x3, x4) \ | ||
230 | movdqa x3, x4; \ | ||
231 | pand x0, x3; \ | ||
232 | pxor x4, x0; \ | ||
233 | pxor x2, x3; \ | ||
234 | por x4, x2; \ | ||
235 | pxor x1, x0; \ | ||
236 | pxor x3, x4; \ | ||
237 | por x0, x2; \ | ||
238 | pxor x1, x2; \ | ||
239 | pand x0, x1; \ | ||
240 | pxor x4, x1; \ | ||
241 | pand x2, x4; \ | ||
242 | pxor x3, x2; \ | ||
243 | pxor x0, x4; \ | ||
244 | por x1, x3; \ | ||
245 | pxor RNOT, x1; \ | ||
246 | pxor x0, x3; | ||
247 | |||
248 | #define S5(x0, x1, x2, x3, x4) \ | ||
249 | movdqa x1, x4; \ | ||
250 | por x0, x1; \ | ||
251 | pxor x1, x2; \ | ||
252 | pxor RNOT, x3; \ | ||
253 | pxor x0, x4; \ | ||
254 | pxor x2, x0; \ | ||
255 | pand x4, x1; \ | ||
256 | por x3, x4; \ | ||
257 | pxor x0, x4; \ | ||
258 | pand x3, x0; \ | ||
259 | pxor x3, x1; \ | ||
260 | pxor x2, x3; \ | ||
261 | pxor x1, x0; \ | ||
262 | pand x4, x2; \ | ||
263 | pxor x2, x1; \ | ||
264 | pand x0, x2; \ | ||
265 | pxor x2, x3; | ||
266 | |||
267 | #define S6(x0, x1, x2, x3, x4) \ | ||
268 | movdqa x1, x4; \ | ||
269 | pxor x0, x3; \ | ||
270 | pxor x2, x1; \ | ||
271 | pxor x0, x2; \ | ||
272 | pand x3, x0; \ | ||
273 | por x3, x1; \ | ||
274 | pxor RNOT, x4; \ | ||
275 | pxor x1, x0; \ | ||
276 | pxor x2, x1; \ | ||
277 | pxor x4, x3; \ | ||
278 | pxor x0, x4; \ | ||
279 | pand x0, x2; \ | ||
280 | pxor x1, x4; \ | ||
281 | pxor x3, x2; \ | ||
282 | pand x1, x3; \ | ||
283 | pxor x0, x3; \ | ||
284 | pxor x2, x1; | ||
285 | |||
286 | #define S7(x0, x1, x2, x3, x4) \ | ||
287 | pxor RNOT, x1; \ | ||
288 | movdqa x1, x4; \ | ||
289 | pxor RNOT, x0; \ | ||
290 | pand x2, x1; \ | ||
291 | pxor x3, x1; \ | ||
292 | por x4, x3; \ | ||
293 | pxor x2, x4; \ | ||
294 | pxor x3, x2; \ | ||
295 | pxor x0, x3; \ | ||
296 | por x1, x0; \ | ||
297 | pand x0, x2; \ | ||
298 | pxor x4, x0; \ | ||
299 | pxor x3, x4; \ | ||
300 | pand x0, x3; \ | ||
301 | pxor x1, x4; \ | ||
302 | pxor x4, x2; \ | ||
303 | pxor x1, x3; \ | ||
304 | por x0, x4; \ | ||
305 | pxor x1, x4; | ||
306 | |||
307 | #define SI0(x0, x1, x2, x3, x4) \ | ||
308 | movdqa x3, x4; \ | ||
309 | pxor x0, x1; \ | ||
310 | por x1, x3; \ | ||
311 | pxor x1, x4; \ | ||
312 | pxor RNOT, x0; \ | ||
313 | pxor x3, x2; \ | ||
314 | pxor x0, x3; \ | ||
315 | pand x1, x0; \ | ||
316 | pxor x2, x0; \ | ||
317 | pand x3, x2; \ | ||
318 | pxor x4, x3; \ | ||
319 | pxor x3, x2; \ | ||
320 | pxor x3, x1; \ | ||
321 | pand x0, x3; \ | ||
322 | pxor x0, x1; \ | ||
323 | pxor x2, x0; \ | ||
324 | pxor x3, x4; | ||
325 | |||
326 | #define SI1(x0, x1, x2, x3, x4) \ | ||
327 | pxor x3, x1; \ | ||
328 | movdqa x0, x4; \ | ||
329 | pxor x2, x0; \ | ||
330 | pxor RNOT, x2; \ | ||
331 | por x1, x4; \ | ||
332 | pxor x3, x4; \ | ||
333 | pand x1, x3; \ | ||
334 | pxor x2, x1; \ | ||
335 | pand x4, x2; \ | ||
336 | pxor x1, x4; \ | ||
337 | por x3, x1; \ | ||
338 | pxor x0, x3; \ | ||
339 | pxor x0, x2; \ | ||
340 | por x4, x0; \ | ||
341 | pxor x4, x2; \ | ||
342 | pxor x0, x1; \ | ||
343 | pxor x1, x4; | ||
344 | |||
345 | #define SI2(x0, x1, x2, x3, x4) \ | ||
346 | pxor x1, x2; \ | ||
347 | movdqa x3, x4; \ | ||
348 | pxor RNOT, x3; \ | ||
349 | por x2, x3; \ | ||
350 | pxor x4, x2; \ | ||
351 | pxor x0, x4; \ | ||
352 | pxor x1, x3; \ | ||
353 | por x2, x1; \ | ||
354 | pxor x0, x2; \ | ||
355 | pxor x4, x1; \ | ||
356 | por x3, x4; \ | ||
357 | pxor x3, x2; \ | ||
358 | pxor x2, x4; \ | ||
359 | pand x1, x2; \ | ||
360 | pxor x3, x2; \ | ||
361 | pxor x4, x3; \ | ||
362 | pxor x0, x4; | ||
363 | |||
364 | #define SI3(x0, x1, x2, x3, x4) \ | ||
365 | pxor x1, x2; \ | ||
366 | movdqa x1, x4; \ | ||
367 | pand x2, x1; \ | ||
368 | pxor x0, x1; \ | ||
369 | por x4, x0; \ | ||
370 | pxor x3, x4; \ | ||
371 | pxor x3, x0; \ | ||
372 | por x1, x3; \ | ||
373 | pxor x2, x1; \ | ||
374 | pxor x3, x1; \ | ||
375 | pxor x2, x0; \ | ||
376 | pxor x3, x2; \ | ||
377 | pand x1, x3; \ | ||
378 | pxor x0, x1; \ | ||
379 | pand x2, x0; \ | ||
380 | pxor x3, x4; \ | ||
381 | pxor x0, x3; \ | ||
382 | pxor x1, x0; | ||
383 | |||
384 | #define SI4(x0, x1, x2, x3, x4) \ | ||
385 | pxor x3, x2; \ | ||
386 | movdqa x0, x4; \ | ||
387 | pand x1, x0; \ | ||
388 | pxor x2, x0; \ | ||
389 | por x3, x2; \ | ||
390 | pxor RNOT, x4; \ | ||
391 | pxor x0, x1; \ | ||
392 | pxor x2, x0; \ | ||
393 | pand x4, x2; \ | ||
394 | pxor x0, x2; \ | ||
395 | por x4, x0; \ | ||
396 | pxor x3, x0; \ | ||
397 | pand x2, x3; \ | ||
398 | pxor x3, x4; \ | ||
399 | pxor x1, x3; \ | ||
400 | pand x0, x1; \ | ||
401 | pxor x1, x4; \ | ||
402 | pxor x3, x0; | ||
403 | |||
404 | #define SI5(x0, x1, x2, x3, x4) \ | ||
405 | movdqa x1, x4; \ | ||
406 | por x2, x1; \ | ||
407 | pxor x4, x2; \ | ||
408 | pxor x3, x1; \ | ||
409 | pand x4, x3; \ | ||
410 | pxor x3, x2; \ | ||
411 | por x0, x3; \ | ||
412 | pxor RNOT, x0; \ | ||
413 | pxor x2, x3; \ | ||
414 | por x0, x2; \ | ||
415 | pxor x1, x4; \ | ||
416 | pxor x4, x2; \ | ||
417 | pand x0, x4; \ | ||
418 | pxor x1, x0; \ | ||
419 | pxor x3, x1; \ | ||
420 | pand x2, x0; \ | ||
421 | pxor x3, x2; \ | ||
422 | pxor x2, x0; \ | ||
423 | pxor x4, x2; \ | ||
424 | pxor x3, x4; | ||
425 | |||
426 | #define SI6(x0, x1, x2, x3, x4) \ | ||
427 | pxor x2, x0; \ | ||
428 | movdqa x0, x4; \ | ||
429 | pand x3, x0; \ | ||
430 | pxor x3, x2; \ | ||
431 | pxor x2, x0; \ | ||
432 | pxor x1, x3; \ | ||
433 | por x4, x2; \ | ||
434 | pxor x3, x2; \ | ||
435 | pand x0, x3; \ | ||
436 | pxor RNOT, x0; \ | ||
437 | pxor x1, x3; \ | ||
438 | pand x2, x1; \ | ||
439 | pxor x0, x4; \ | ||
440 | pxor x4, x3; \ | ||
441 | pxor x2, x4; \ | ||
442 | pxor x1, x0; \ | ||
443 | pxor x0, x2; | ||
444 | |||
445 | #define SI7(x0, x1, x2, x3, x4) \ | ||
446 | movdqa x3, x4; \ | ||
447 | pand x0, x3; \ | ||
448 | pxor x2, x0; \ | ||
449 | por x4, x2; \ | ||
450 | pxor x1, x4; \ | ||
451 | pxor RNOT, x0; \ | ||
452 | por x3, x1; \ | ||
453 | pxor x0, x4; \ | ||
454 | pand x2, x0; \ | ||
455 | pxor x1, x0; \ | ||
456 | pand x2, x1; \ | ||
457 | pxor x2, x3; \ | ||
458 | pxor x3, x4; \ | ||
459 | pand x3, x2; \ | ||
460 | por x0, x3; \ | ||
461 | pxor x4, x1; \ | ||
462 | pxor x4, x3; \ | ||
463 | pand x0, x4; \ | ||
464 | pxor x2, x4; | ||
465 | |||
466 | #define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ | ||
467 | movdqa x2, t3; \ | ||
468 | movdqa x0, t1; \ | ||
469 | unpcklps x3, t3; \ | ||
470 | movdqa x0, t2; \ | ||
471 | unpcklps x1, t1; \ | ||
472 | unpckhps x1, t2; \ | ||
473 | movdqa t3, x1; \ | ||
474 | unpckhps x3, x2; \ | ||
475 | movdqa t1, x0; \ | ||
476 | movhlps t1, x1; \ | ||
477 | movdqa t2, t1; \ | ||
478 | movlhps t3, x0; \ | ||
479 | movlhps x2, t1; \ | ||
480 | movhlps t2, x2; \ | ||
481 | movdqa x2, x3; \ | ||
482 | movdqa t1, x2; | ||
483 | |||
484 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ | ||
485 | movdqu (0*4*4)(in), x0; \ | ||
486 | movdqu (1*4*4)(in), x1; \ | ||
487 | movdqu (2*4*4)(in), x2; \ | ||
488 | movdqu (3*4*4)(in), x3; \ | ||
489 | \ | ||
490 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
491 | |||
492 | #define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
493 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
494 | \ | ||
495 | movdqu x0, (0*4*4)(out); \ | ||
496 | movdqu x1, (1*4*4)(out); \ | ||
497 | movdqu x2, (2*4*4)(out); \ | ||
498 | movdqu x3, (3*4*4)(out); | ||
499 | |||
500 | #define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
501 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
502 | \ | ||
503 | movdqu (0*4*4)(out), t0; \ | ||
504 | pxor t0, x0; \ | ||
505 | movdqu x0, (0*4*4)(out); \ | ||
506 | movdqu (1*4*4)(out), t0; \ | ||
507 | pxor t0, x1; \ | ||
508 | movdqu x1, (1*4*4)(out); \ | ||
509 | movdqu (2*4*4)(out), t0; \ | ||
510 | pxor t0, x2; \ | ||
511 | movdqu x2, (2*4*4)(out); \ | ||
512 | movdqu (3*4*4)(out), t0; \ | ||
513 | pxor t0, x3; \ | ||
514 | movdqu x3, (3*4*4)(out); | ||
515 | |||
516 | .align 8 | ||
517 | .global __serpent_enc_blk_4way | ||
518 | .type __serpent_enc_blk_4way,@function; | ||
519 | |||
520 | __serpent_enc_blk_4way: | ||
521 | /* input: | ||
522 | * arg_ctx(%esp): ctx, CTX | ||
523 | * arg_dst(%esp): dst | ||
524 | * arg_src(%esp): src | ||
525 | * arg_xor(%esp): bool, if true: xor output | ||
526 | */ | ||
527 | |||
528 | pcmpeqd RNOT, RNOT; | ||
529 | |||
530 | movl arg_ctx(%esp), CTX; | ||
531 | |||
532 | movl arg_src(%esp), %eax; | ||
533 | read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | ||
534 | |||
535 | K(RA, RB, RC, RD, RE, 0); | ||
536 | S0(RA, RB, RC, RD, RE); LK(RC, RB, RD, RA, RE, 1); | ||
537 | S1(RC, RB, RD, RA, RE); LK(RE, RD, RA, RC, RB, 2); | ||
538 | S2(RE, RD, RA, RC, RB); LK(RB, RD, RE, RC, RA, 3); | ||
539 | S3(RB, RD, RE, RC, RA); LK(RC, RA, RD, RB, RE, 4); | ||
540 | S4(RC, RA, RD, RB, RE); LK(RA, RD, RB, RE, RC, 5); | ||
541 | S5(RA, RD, RB, RE, RC); LK(RC, RA, RD, RE, RB, 6); | ||
542 | S6(RC, RA, RD, RE, RB); LK(RD, RB, RA, RE, RC, 7); | ||
543 | S7(RD, RB, RA, RE, RC); LK(RC, RA, RE, RD, RB, 8); | ||
544 | S0(RC, RA, RE, RD, RB); LK(RE, RA, RD, RC, RB, 9); | ||
545 | S1(RE, RA, RD, RC, RB); LK(RB, RD, RC, RE, RA, 10); | ||
546 | S2(RB, RD, RC, RE, RA); LK(RA, RD, RB, RE, RC, 11); | ||
547 | S3(RA, RD, RB, RE, RC); LK(RE, RC, RD, RA, RB, 12); | ||
548 | S4(RE, RC, RD, RA, RB); LK(RC, RD, RA, RB, RE, 13); | ||
549 | S5(RC, RD, RA, RB, RE); LK(RE, RC, RD, RB, RA, 14); | ||
550 | S6(RE, RC, RD, RB, RA); LK(RD, RA, RC, RB, RE, 15); | ||
551 | S7(RD, RA, RC, RB, RE); LK(RE, RC, RB, RD, RA, 16); | ||
552 | S0(RE, RC, RB, RD, RA); LK(RB, RC, RD, RE, RA, 17); | ||
553 | S1(RB, RC, RD, RE, RA); LK(RA, RD, RE, RB, RC, 18); | ||
554 | S2(RA, RD, RE, RB, RC); LK(RC, RD, RA, RB, RE, 19); | ||
555 | S3(RC, RD, RA, RB, RE); LK(RB, RE, RD, RC, RA, 20); | ||
556 | S4(RB, RE, RD, RC, RA); LK(RE, RD, RC, RA, RB, 21); | ||
557 | S5(RE, RD, RC, RA, RB); LK(RB, RE, RD, RA, RC, 22); | ||
558 | S6(RB, RE, RD, RA, RC); LK(RD, RC, RE, RA, RB, 23); | ||
559 | S7(RD, RC, RE, RA, RB); LK(RB, RE, RA, RD, RC, 24); | ||
560 | S0(RB, RE, RA, RD, RC); LK(RA, RE, RD, RB, RC, 25); | ||
561 | S1(RA, RE, RD, RB, RC); LK(RC, RD, RB, RA, RE, 26); | ||
562 | S2(RC, RD, RB, RA, RE); LK(RE, RD, RC, RA, RB, 27); | ||
563 | S3(RE, RD, RC, RA, RB); LK(RA, RB, RD, RE, RC, 28); | ||
564 | S4(RA, RB, RD, RE, RC); LK(RB, RD, RE, RC, RA, 29); | ||
565 | S5(RB, RD, RE, RC, RA); LK(RA, RB, RD, RC, RE, 30); | ||
566 | S6(RA, RB, RD, RC, RE); LK(RD, RE, RB, RC, RA, 31); | ||
567 | S7(RD, RE, RB, RC, RA); K(RA, RB, RC, RD, RE, 32); | ||
568 | |||
569 | movl arg_dst(%esp), %eax; | ||
570 | |||
571 | cmpb $0, arg_xor(%esp); | ||
572 | jnz __enc_xor4; | ||
573 | |||
574 | write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | ||
575 | |||
576 | ret; | ||
577 | |||
578 | __enc_xor4: | ||
579 | xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | ||
580 | |||
581 | ret; | ||
582 | |||
583 | .align 8 | ||
584 | .global serpent_dec_blk_4way | ||
585 | .type serpent_dec_blk_4way,@function; | ||
586 | |||
587 | serpent_dec_blk_4way: | ||
588 | /* input: | ||
589 | * arg_ctx(%esp): ctx, CTX | ||
590 | * arg_dst(%esp): dst | ||
591 | * arg_src(%esp): src | ||
592 | */ | ||
593 | |||
594 | pcmpeqd RNOT, RNOT; | ||
595 | |||
596 | movl arg_ctx(%esp), CTX; | ||
597 | |||
598 | movl arg_src(%esp), %eax; | ||
599 | read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | ||
600 | |||
601 | K(RA, RB, RC, RD, RE, 32); | ||
602 | SI7(RA, RB, RC, RD, RE); KL(RB, RD, RA, RE, RC, 31); | ||
603 | SI6(RB, RD, RA, RE, RC); KL(RA, RC, RE, RB, RD, 30); | ||
604 | SI5(RA, RC, RE, RB, RD); KL(RC, RD, RA, RE, RB, 29); | ||
605 | SI4(RC, RD, RA, RE, RB); KL(RC, RA, RB, RE, RD, 28); | ||
606 | SI3(RC, RA, RB, RE, RD); KL(RB, RC, RD, RE, RA, 27); | ||
607 | SI2(RB, RC, RD, RE, RA); KL(RC, RA, RE, RD, RB, 26); | ||
608 | SI1(RC, RA, RE, RD, RB); KL(RB, RA, RE, RD, RC, 25); | ||
609 | SI0(RB, RA, RE, RD, RC); KL(RE, RC, RA, RB, RD, 24); | ||
610 | SI7(RE, RC, RA, RB, RD); KL(RC, RB, RE, RD, RA, 23); | ||
611 | SI6(RC, RB, RE, RD, RA); KL(RE, RA, RD, RC, RB, 22); | ||
612 | SI5(RE, RA, RD, RC, RB); KL(RA, RB, RE, RD, RC, 21); | ||
613 | SI4(RA, RB, RE, RD, RC); KL(RA, RE, RC, RD, RB, 20); | ||
614 | SI3(RA, RE, RC, RD, RB); KL(RC, RA, RB, RD, RE, 19); | ||
615 | SI2(RC, RA, RB, RD, RE); KL(RA, RE, RD, RB, RC, 18); | ||
616 | SI1(RA, RE, RD, RB, RC); KL(RC, RE, RD, RB, RA, 17); | ||
617 | SI0(RC, RE, RD, RB, RA); KL(RD, RA, RE, RC, RB, 16); | ||
618 | SI7(RD, RA, RE, RC, RB); KL(RA, RC, RD, RB, RE, 15); | ||
619 | SI6(RA, RC, RD, RB, RE); KL(RD, RE, RB, RA, RC, 14); | ||
620 | SI5(RD, RE, RB, RA, RC); KL(RE, RC, RD, RB, RA, 13); | ||
621 | SI4(RE, RC, RD, RB, RA); KL(RE, RD, RA, RB, RC, 12); | ||
622 | SI3(RE, RD, RA, RB, RC); KL(RA, RE, RC, RB, RD, 11); | ||
623 | SI2(RA, RE, RC, RB, RD); KL(RE, RD, RB, RC, RA, 10); | ||
624 | SI1(RE, RD, RB, RC, RA); KL(RA, RD, RB, RC, RE, 9); | ||
625 | SI0(RA, RD, RB, RC, RE); KL(RB, RE, RD, RA, RC, 8); | ||
626 | SI7(RB, RE, RD, RA, RC); KL(RE, RA, RB, RC, RD, 7); | ||
627 | SI6(RE, RA, RB, RC, RD); KL(RB, RD, RC, RE, RA, 6); | ||
628 | SI5(RB, RD, RC, RE, RA); KL(RD, RA, RB, RC, RE, 5); | ||
629 | SI4(RD, RA, RB, RC, RE); KL(RD, RB, RE, RC, RA, 4); | ||
630 | SI3(RD, RB, RE, RC, RA); KL(RE, RD, RA, RC, RB, 3); | ||
631 | SI2(RE, RD, RA, RC, RB); KL(RD, RB, RC, RA, RE, 2); | ||
632 | SI1(RD, RB, RC, RA, RE); KL(RE, RB, RC, RA, RD, 1); | ||
633 | SI0(RE, RB, RC, RA, RD); K(RC, RD, RB, RE, RA, 0); | ||
634 | |||
635 | movl arg_dst(%esp), %eax; | ||
636 | write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); | ||
637 | |||
638 | ret; | ||
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S new file mode 100644 index 000000000000..7f24a1540821 --- /dev/null +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | |||
@@ -0,0 +1,761 @@ | |||
1 | /* | ||
2 | * Serpent Cipher 8-way parallel algorithm (x86_64/SSE2) | ||
3 | * | ||
4 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on crypto/serpent.c by | ||
7 | * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no> | ||
8 | * 2003 Herbert Valerio Riedel <hvr@gnu.org> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | .file "serpent-sse2-x86_64-asm_64.S" | ||
28 | .text | ||
29 | |||
30 | #define CTX %rdi | ||
31 | |||
32 | /********************************************************************** | ||
33 | 8-way SSE2 serpent | ||
34 | **********************************************************************/ | ||
35 | #define RA1 %xmm0 | ||
36 | #define RB1 %xmm1 | ||
37 | #define RC1 %xmm2 | ||
38 | #define RD1 %xmm3 | ||
39 | #define RE1 %xmm4 | ||
40 | |||
41 | #define RA2 %xmm5 | ||
42 | #define RB2 %xmm6 | ||
43 | #define RC2 %xmm7 | ||
44 | #define RD2 %xmm8 | ||
45 | #define RE2 %xmm9 | ||
46 | |||
47 | #define RNOT %xmm10 | ||
48 | |||
49 | #define RK0 %xmm11 | ||
50 | #define RK1 %xmm12 | ||
51 | #define RK2 %xmm13 | ||
52 | #define RK3 %xmm14 | ||
53 | |||
54 | #define S0_1(x0, x1, x2, x3, x4) \ | ||
55 | movdqa x3, x4; \ | ||
56 | por x0, x3; \ | ||
57 | pxor x4, x0; \ | ||
58 | pxor x2, x4; \ | ||
59 | pxor RNOT, x4; \ | ||
60 | pxor x1, x3; \ | ||
61 | pand x0, x1; \ | ||
62 | pxor x4, x1; \ | ||
63 | pxor x0, x2; | ||
64 | #define S0_2(x0, x1, x2, x3, x4) \ | ||
65 | pxor x3, x0; \ | ||
66 | por x0, x4; \ | ||
67 | pxor x2, x0; \ | ||
68 | pand x1, x2; \ | ||
69 | pxor x2, x3; \ | ||
70 | pxor RNOT, x1; \ | ||
71 | pxor x4, x2; \ | ||
72 | pxor x2, x1; | ||
73 | |||
74 | #define S1_1(x0, x1, x2, x3, x4) \ | ||
75 | movdqa x1, x4; \ | ||
76 | pxor x0, x1; \ | ||
77 | pxor x3, x0; \ | ||
78 | pxor RNOT, x3; \ | ||
79 | pand x1, x4; \ | ||
80 | por x1, x0; \ | ||
81 | pxor x2, x3; \ | ||
82 | pxor x3, x0; \ | ||
83 | pxor x3, x1; | ||
84 | #define S1_2(x0, x1, x2, x3, x4) \ | ||
85 | pxor x4, x3; \ | ||
86 | por x4, x1; \ | ||
87 | pxor x2, x4; \ | ||
88 | pand x0, x2; \ | ||
89 | pxor x1, x2; \ | ||
90 | por x0, x1; \ | ||
91 | pxor RNOT, x0; \ | ||
92 | pxor x2, x0; \ | ||
93 | pxor x1, x4; | ||
94 | |||
95 | #define S2_1(x0, x1, x2, x3, x4) \ | ||
96 | pxor RNOT, x3; \ | ||
97 | pxor x0, x1; \ | ||
98 | movdqa x0, x4; \ | ||
99 | pand x2, x0; \ | ||
100 | pxor x3, x0; \ | ||
101 | por x4, x3; \ | ||
102 | pxor x1, x2; \ | ||
103 | pxor x1, x3; \ | ||
104 | pand x0, x1; | ||
105 | #define S2_2(x0, x1, x2, x3, x4) \ | ||
106 | pxor x2, x0; \ | ||
107 | pand x3, x2; \ | ||
108 | por x1, x3; \ | ||
109 | pxor RNOT, x0; \ | ||
110 | pxor x0, x3; \ | ||
111 | pxor x0, x4; \ | ||
112 | pxor x2, x0; \ | ||
113 | por x2, x1; | ||
114 | |||
115 | #define S3_1(x0, x1, x2, x3, x4) \ | ||
116 | movdqa x1, x4; \ | ||
117 | pxor x3, x1; \ | ||
118 | por x0, x3; \ | ||
119 | pand x0, x4; \ | ||
120 | pxor x2, x0; \ | ||
121 | pxor x1, x2; \ | ||
122 | pand x3, x1; \ | ||
123 | pxor x3, x2; \ | ||
124 | por x4, x0; \ | ||
125 | pxor x3, x4; | ||
126 | #define S3_2(x0, x1, x2, x3, x4) \ | ||
127 | pxor x0, x1; \ | ||
128 | pand x3, x0; \ | ||
129 | pand x4, x3; \ | ||
130 | pxor x2, x3; \ | ||
131 | por x1, x4; \ | ||
132 | pand x1, x2; \ | ||
133 | pxor x3, x4; \ | ||
134 | pxor x3, x0; \ | ||
135 | pxor x2, x3; | ||
136 | |||
137 | #define S4_1(x0, x1, x2, x3, x4) \ | ||
138 | movdqa x3, x4; \ | ||
139 | pand x0, x3; \ | ||
140 | pxor x4, x0; \ | ||
141 | pxor x2, x3; \ | ||
142 | por x4, x2; \ | ||
143 | pxor x1, x0; \ | ||
144 | pxor x3, x4; \ | ||
145 | por x0, x2; \ | ||
146 | pxor x1, x2; | ||
147 | #define S4_2(x0, x1, x2, x3, x4) \ | ||
148 | pand x0, x1; \ | ||
149 | pxor x4, x1; \ | ||
150 | pand x2, x4; \ | ||
151 | pxor x3, x2; \ | ||
152 | pxor x0, x4; \ | ||
153 | por x1, x3; \ | ||
154 | pxor RNOT, x1; \ | ||
155 | pxor x0, x3; | ||
156 | |||
157 | #define S5_1(x0, x1, x2, x3, x4) \ | ||
158 | movdqa x1, x4; \ | ||
159 | por x0, x1; \ | ||
160 | pxor x1, x2; \ | ||
161 | pxor RNOT, x3; \ | ||
162 | pxor x0, x4; \ | ||
163 | pxor x2, x0; \ | ||
164 | pand x4, x1; \ | ||
165 | por x3, x4; \ | ||
166 | pxor x0, x4; | ||
167 | #define S5_2(x0, x1, x2, x3, x4) \ | ||
168 | pand x3, x0; \ | ||
169 | pxor x3, x1; \ | ||
170 | pxor x2, x3; \ | ||
171 | pxor x1, x0; \ | ||
172 | pand x4, x2; \ | ||
173 | pxor x2, x1; \ | ||
174 | pand x0, x2; \ | ||
175 | pxor x2, x3; | ||
176 | |||
177 | #define S6_1(x0, x1, x2, x3, x4) \ | ||
178 | movdqa x1, x4; \ | ||
179 | pxor x0, x3; \ | ||
180 | pxor x2, x1; \ | ||
181 | pxor x0, x2; \ | ||
182 | pand x3, x0; \ | ||
183 | por x3, x1; \ | ||
184 | pxor RNOT, x4; \ | ||
185 | pxor x1, x0; \ | ||
186 | pxor x2, x1; | ||
187 | #define S6_2(x0, x1, x2, x3, x4) \ | ||
188 | pxor x4, x3; \ | ||
189 | pxor x0, x4; \ | ||
190 | pand x0, x2; \ | ||
191 | pxor x1, x4; \ | ||
192 | pxor x3, x2; \ | ||
193 | pand x1, x3; \ | ||
194 | pxor x0, x3; \ | ||
195 | pxor x2, x1; | ||
196 | |||
197 | #define S7_1(x0, x1, x2, x3, x4) \ | ||
198 | pxor RNOT, x1; \ | ||
199 | movdqa x1, x4; \ | ||
200 | pxor RNOT, x0; \ | ||
201 | pand x2, x1; \ | ||
202 | pxor x3, x1; \ | ||
203 | por x4, x3; \ | ||
204 | pxor x2, x4; \ | ||
205 | pxor x3, x2; \ | ||
206 | pxor x0, x3; \ | ||
207 | por x1, x0; | ||
208 | #define S7_2(x0, x1, x2, x3, x4) \ | ||
209 | pand x0, x2; \ | ||
210 | pxor x4, x0; \ | ||
211 | pxor x3, x4; \ | ||
212 | pand x0, x3; \ | ||
213 | pxor x1, x4; \ | ||
214 | pxor x4, x2; \ | ||
215 | pxor x1, x3; \ | ||
216 | por x0, x4; \ | ||
217 | pxor x1, x4; | ||
218 | |||
219 | #define SI0_1(x0, x1, x2, x3, x4) \ | ||
220 | movdqa x3, x4; \ | ||
221 | pxor x0, x1; \ | ||
222 | por x1, x3; \ | ||
223 | pxor x1, x4; \ | ||
224 | pxor RNOT, x0; \ | ||
225 | pxor x3, x2; \ | ||
226 | pxor x0, x3; \ | ||
227 | pand x1, x0; \ | ||
228 | pxor x2, x0; | ||
229 | #define SI0_2(x0, x1, x2, x3, x4) \ | ||
230 | pand x3, x2; \ | ||
231 | pxor x4, x3; \ | ||
232 | pxor x3, x2; \ | ||
233 | pxor x3, x1; \ | ||
234 | pand x0, x3; \ | ||
235 | pxor x0, x1; \ | ||
236 | pxor x2, x0; \ | ||
237 | pxor x3, x4; | ||
238 | |||
239 | #define SI1_1(x0, x1, x2, x3, x4) \ | ||
240 | pxor x3, x1; \ | ||
241 | movdqa x0, x4; \ | ||
242 | pxor x2, x0; \ | ||
243 | pxor RNOT, x2; \ | ||
244 | por x1, x4; \ | ||
245 | pxor x3, x4; \ | ||
246 | pand x1, x3; \ | ||
247 | pxor x2, x1; \ | ||
248 | pand x4, x2; | ||
249 | #define SI1_2(x0, x1, x2, x3, x4) \ | ||
250 | pxor x1, x4; \ | ||
251 | por x3, x1; \ | ||
252 | pxor x0, x3; \ | ||
253 | pxor x0, x2; \ | ||
254 | por x4, x0; \ | ||
255 | pxor x4, x2; \ | ||
256 | pxor x0, x1; \ | ||
257 | pxor x1, x4; | ||
258 | |||
259 | #define SI2_1(x0, x1, x2, x3, x4) \ | ||
260 | pxor x1, x2; \ | ||
261 | movdqa x3, x4; \ | ||
262 | pxor RNOT, x3; \ | ||
263 | por x2, x3; \ | ||
264 | pxor x4, x2; \ | ||
265 | pxor x0, x4; \ | ||
266 | pxor x1, x3; \ | ||
267 | por x2, x1; \ | ||
268 | pxor x0, x2; | ||
269 | #define SI2_2(x0, x1, x2, x3, x4) \ | ||
270 | pxor x4, x1; \ | ||
271 | por x3, x4; \ | ||
272 | pxor x3, x2; \ | ||
273 | pxor x2, x4; \ | ||
274 | pand x1, x2; \ | ||
275 | pxor x3, x2; \ | ||
276 | pxor x4, x3; \ | ||
277 | pxor x0, x4; | ||
278 | |||
279 | #define SI3_1(x0, x1, x2, x3, x4) \ | ||
280 | pxor x1, x2; \ | ||
281 | movdqa x1, x4; \ | ||
282 | pand x2, x1; \ | ||
283 | pxor x0, x1; \ | ||
284 | por x4, x0; \ | ||
285 | pxor x3, x4; \ | ||
286 | pxor x3, x0; \ | ||
287 | por x1, x3; \ | ||
288 | pxor x2, x1; | ||
289 | #define SI3_2(x0, x1, x2, x3, x4) \ | ||
290 | pxor x3, x1; \ | ||
291 | pxor x2, x0; \ | ||
292 | pxor x3, x2; \ | ||
293 | pand x1, x3; \ | ||
294 | pxor x0, x1; \ | ||
295 | pand x2, x0; \ | ||
296 | pxor x3, x4; \ | ||
297 | pxor x0, x3; \ | ||
298 | pxor x1, x0; | ||
299 | |||
300 | #define SI4_1(x0, x1, x2, x3, x4) \ | ||
301 | pxor x3, x2; \ | ||
302 | movdqa x0, x4; \ | ||
303 | pand x1, x0; \ | ||
304 | pxor x2, x0; \ | ||
305 | por x3, x2; \ | ||
306 | pxor RNOT, x4; \ | ||
307 | pxor x0, x1; \ | ||
308 | pxor x2, x0; \ | ||
309 | pand x4, x2; | ||
310 | #define SI4_2(x0, x1, x2, x3, x4) \ | ||
311 | pxor x0, x2; \ | ||
312 | por x4, x0; \ | ||
313 | pxor x3, x0; \ | ||
314 | pand x2, x3; \ | ||
315 | pxor x3, x4; \ | ||
316 | pxor x1, x3; \ | ||
317 | pand x0, x1; \ | ||
318 | pxor x1, x4; \ | ||
319 | pxor x3, x0; | ||
320 | |||
321 | #define SI5_1(x0, x1, x2, x3, x4) \ | ||
322 | movdqa x1, x4; \ | ||
323 | por x2, x1; \ | ||
324 | pxor x4, x2; \ | ||
325 | pxor x3, x1; \ | ||
326 | pand x4, x3; \ | ||
327 | pxor x3, x2; \ | ||
328 | por x0, x3; \ | ||
329 | pxor RNOT, x0; \ | ||
330 | pxor x2, x3; \ | ||
331 | por x0, x2; | ||
332 | #define SI5_2(x0, x1, x2, x3, x4) \ | ||
333 | pxor x1, x4; \ | ||
334 | pxor x4, x2; \ | ||
335 | pand x0, x4; \ | ||
336 | pxor x1, x0; \ | ||
337 | pxor x3, x1; \ | ||
338 | pand x2, x0; \ | ||
339 | pxor x3, x2; \ | ||
340 | pxor x2, x0; \ | ||
341 | pxor x4, x2; \ | ||
342 | pxor x3, x4; | ||
343 | |||
344 | #define SI6_1(x0, x1, x2, x3, x4) \ | ||
345 | pxor x2, x0; \ | ||
346 | movdqa x0, x4; \ | ||
347 | pand x3, x0; \ | ||
348 | pxor x3, x2; \ | ||
349 | pxor x2, x0; \ | ||
350 | pxor x1, x3; \ | ||
351 | por x4, x2; \ | ||
352 | pxor x3, x2; \ | ||
353 | pand x0, x3; | ||
354 | #define SI6_2(x0, x1, x2, x3, x4) \ | ||
355 | pxor RNOT, x0; \ | ||
356 | pxor x1, x3; \ | ||
357 | pand x2, x1; \ | ||
358 | pxor x0, x4; \ | ||
359 | pxor x4, x3; \ | ||
360 | pxor x2, x4; \ | ||
361 | pxor x1, x0; \ | ||
362 | pxor x0, x2; | ||
363 | |||
364 | #define SI7_1(x0, x1, x2, x3, x4) \ | ||
365 | movdqa x3, x4; \ | ||
366 | pand x0, x3; \ | ||
367 | pxor x2, x0; \ | ||
368 | por x4, x2; \ | ||
369 | pxor x1, x4; \ | ||
370 | pxor RNOT, x0; \ | ||
371 | por x3, x1; \ | ||
372 | pxor x0, x4; \ | ||
373 | pand x2, x0; \ | ||
374 | pxor x1, x0; | ||
375 | #define SI7_2(x0, x1, x2, x3, x4) \ | ||
376 | pand x2, x1; \ | ||
377 | pxor x2, x3; \ | ||
378 | pxor x3, x4; \ | ||
379 | pand x3, x2; \ | ||
380 | por x0, x3; \ | ||
381 | pxor x4, x1; \ | ||
382 | pxor x4, x3; \ | ||
383 | pand x0, x4; \ | ||
384 | pxor x2, x4; | ||
385 | |||
386 | #define get_key(i, j, t) \ | ||
387 | movd (4*(i)+(j))*4(CTX), t; \ | ||
388 | pshufd $0, t, t; | ||
389 | |||
390 | #define K2(x0, x1, x2, x3, x4, i) \ | ||
391 | get_key(i, 0, RK0); \ | ||
392 | get_key(i, 1, RK1); \ | ||
393 | get_key(i, 2, RK2); \ | ||
394 | get_key(i, 3, RK3); \ | ||
395 | pxor RK0, x0 ## 1; \ | ||
396 | pxor RK1, x1 ## 1; \ | ||
397 | pxor RK2, x2 ## 1; \ | ||
398 | pxor RK3, x3 ## 1; \ | ||
399 | pxor RK0, x0 ## 2; \ | ||
400 | pxor RK1, x1 ## 2; \ | ||
401 | pxor RK2, x2 ## 2; \ | ||
402 | pxor RK3, x3 ## 2; | ||
403 | |||
404 | #define LK2(x0, x1, x2, x3, x4, i) \ | ||
405 | movdqa x0 ## 1, x4 ## 1; \ | ||
406 | pslld $13, x0 ## 1; \ | ||
407 | psrld $(32 - 13), x4 ## 1; \ | ||
408 | por x4 ## 1, x0 ## 1; \ | ||
409 | pxor x0 ## 1, x1 ## 1; \ | ||
410 | movdqa x2 ## 1, x4 ## 1; \ | ||
411 | pslld $3, x2 ## 1; \ | ||
412 | psrld $(32 - 3), x4 ## 1; \ | ||
413 | por x4 ## 1, x2 ## 1; \ | ||
414 | pxor x2 ## 1, x1 ## 1; \ | ||
415 | movdqa x0 ## 2, x4 ## 2; \ | ||
416 | pslld $13, x0 ## 2; \ | ||
417 | psrld $(32 - 13), x4 ## 2; \ | ||
418 | por x4 ## 2, x0 ## 2; \ | ||
419 | pxor x0 ## 2, x1 ## 2; \ | ||
420 | movdqa x2 ## 2, x4 ## 2; \ | ||
421 | pslld $3, x2 ## 2; \ | ||
422 | psrld $(32 - 3), x4 ## 2; \ | ||
423 | por x4 ## 2, x2 ## 2; \ | ||
424 | pxor x2 ## 2, x1 ## 2; \ | ||
425 | movdqa x1 ## 1, x4 ## 1; \ | ||
426 | pslld $1, x1 ## 1; \ | ||
427 | psrld $(32 - 1), x4 ## 1; \ | ||
428 | por x4 ## 1, x1 ## 1; \ | ||
429 | movdqa x0 ## 1, x4 ## 1; \ | ||
430 | pslld $3, x4 ## 1; \ | ||
431 | pxor x2 ## 1, x3 ## 1; \ | ||
432 | pxor x4 ## 1, x3 ## 1; \ | ||
433 | movdqa x3 ## 1, x4 ## 1; \ | ||
434 | get_key(i, 1, RK1); \ | ||
435 | movdqa x1 ## 2, x4 ## 2; \ | ||
436 | pslld $1, x1 ## 2; \ | ||
437 | psrld $(32 - 1), x4 ## 2; \ | ||
438 | por x4 ## 2, x1 ## 2; \ | ||
439 | movdqa x0 ## 2, x4 ## 2; \ | ||
440 | pslld $3, x4 ## 2; \ | ||
441 | pxor x2 ## 2, x3 ## 2; \ | ||
442 | pxor x4 ## 2, x3 ## 2; \ | ||
443 | movdqa x3 ## 2, x4 ## 2; \ | ||
444 | get_key(i, 3, RK3); \ | ||
445 | pslld $7, x3 ## 1; \ | ||
446 | psrld $(32 - 7), x4 ## 1; \ | ||
447 | por x4 ## 1, x3 ## 1; \ | ||
448 | movdqa x1 ## 1, x4 ## 1; \ | ||
449 | pslld $7, x4 ## 1; \ | ||
450 | pxor x1 ## 1, x0 ## 1; \ | ||
451 | pxor x3 ## 1, x0 ## 1; \ | ||
452 | pxor x3 ## 1, x2 ## 1; \ | ||
453 | pxor x4 ## 1, x2 ## 1; \ | ||
454 | get_key(i, 0, RK0); \ | ||
455 | pslld $7, x3 ## 2; \ | ||
456 | psrld $(32 - 7), x4 ## 2; \ | ||
457 | por x4 ## 2, x3 ## 2; \ | ||
458 | movdqa x1 ## 2, x4 ## 2; \ | ||
459 | pslld $7, x4 ## 2; \ | ||
460 | pxor x1 ## 2, x0 ## 2; \ | ||
461 | pxor x3 ## 2, x0 ## 2; \ | ||
462 | pxor x3 ## 2, x2 ## 2; \ | ||
463 | pxor x4 ## 2, x2 ## 2; \ | ||
464 | get_key(i, 2, RK2); \ | ||
465 | pxor RK1, x1 ## 1; \ | ||
466 | pxor RK3, x3 ## 1; \ | ||
467 | movdqa x0 ## 1, x4 ## 1; \ | ||
468 | pslld $5, x0 ## 1; \ | ||
469 | psrld $(32 - 5), x4 ## 1; \ | ||
470 | por x4 ## 1, x0 ## 1; \ | ||
471 | movdqa x2 ## 1, x4 ## 1; \ | ||
472 | pslld $22, x2 ## 1; \ | ||
473 | psrld $(32 - 22), x4 ## 1; \ | ||
474 | por x4 ## 1, x2 ## 1; \ | ||
475 | pxor RK0, x0 ## 1; \ | ||
476 | pxor RK2, x2 ## 1; \ | ||
477 | pxor RK1, x1 ## 2; \ | ||
478 | pxor RK3, x3 ## 2; \ | ||
479 | movdqa x0 ## 2, x4 ## 2; \ | ||
480 | pslld $5, x0 ## 2; \ | ||
481 | psrld $(32 - 5), x4 ## 2; \ | ||
482 | por x4 ## 2, x0 ## 2; \ | ||
483 | movdqa x2 ## 2, x4 ## 2; \ | ||
484 | pslld $22, x2 ## 2; \ | ||
485 | psrld $(32 - 22), x4 ## 2; \ | ||
486 | por x4 ## 2, x2 ## 2; \ | ||
487 | pxor RK0, x0 ## 2; \ | ||
488 | pxor RK2, x2 ## 2; | ||
489 | |||
490 | #define KL2(x0, x1, x2, x3, x4, i) \ | ||
491 | pxor RK0, x0 ## 1; \ | ||
492 | pxor RK2, x2 ## 1; \ | ||
493 | movdqa x0 ## 1, x4 ## 1; \ | ||
494 | psrld $5, x0 ## 1; \ | ||
495 | pslld $(32 - 5), x4 ## 1; \ | ||
496 | por x4 ## 1, x0 ## 1; \ | ||
497 | pxor RK3, x3 ## 1; \ | ||
498 | pxor RK1, x1 ## 1; \ | ||
499 | movdqa x2 ## 1, x4 ## 1; \ | ||
500 | psrld $22, x2 ## 1; \ | ||
501 | pslld $(32 - 22), x4 ## 1; \ | ||
502 | por x4 ## 1, x2 ## 1; \ | ||
503 | pxor x3 ## 1, x2 ## 1; \ | ||
504 | pxor RK0, x0 ## 2; \ | ||
505 | pxor RK2, x2 ## 2; \ | ||
506 | movdqa x0 ## 2, x4 ## 2; \ | ||
507 | psrld $5, x0 ## 2; \ | ||
508 | pslld $(32 - 5), x4 ## 2; \ | ||
509 | por x4 ## 2, x0 ## 2; \ | ||
510 | pxor RK3, x3 ## 2; \ | ||
511 | pxor RK1, x1 ## 2; \ | ||
512 | movdqa x2 ## 2, x4 ## 2; \ | ||
513 | psrld $22, x2 ## 2; \ | ||
514 | pslld $(32 - 22), x4 ## 2; \ | ||
515 | por x4 ## 2, x2 ## 2; \ | ||
516 | pxor x3 ## 2, x2 ## 2; \ | ||
517 | pxor x3 ## 1, x0 ## 1; \ | ||
518 | movdqa x1 ## 1, x4 ## 1; \ | ||
519 | pslld $7, x4 ## 1; \ | ||
520 | pxor x1 ## 1, x0 ## 1; \ | ||
521 | pxor x4 ## 1, x2 ## 1; \ | ||
522 | movdqa x1 ## 1, x4 ## 1; \ | ||
523 | psrld $1, x1 ## 1; \ | ||
524 | pslld $(32 - 1), x4 ## 1; \ | ||
525 | por x4 ## 1, x1 ## 1; \ | ||
526 | pxor x3 ## 2, x0 ## 2; \ | ||
527 | movdqa x1 ## 2, x4 ## 2; \ | ||
528 | pslld $7, x4 ## 2; \ | ||
529 | pxor x1 ## 2, x0 ## 2; \ | ||
530 | pxor x4 ## 2, x2 ## 2; \ | ||
531 | movdqa x1 ## 2, x4 ## 2; \ | ||
532 | psrld $1, x1 ## 2; \ | ||
533 | pslld $(32 - 1), x4 ## 2; \ | ||
534 | por x4 ## 2, x1 ## 2; \ | ||
535 | movdqa x3 ## 1, x4 ## 1; \ | ||
536 | psrld $7, x3 ## 1; \ | ||
537 | pslld $(32 - 7), x4 ## 1; \ | ||
538 | por x4 ## 1, x3 ## 1; \ | ||
539 | pxor x0 ## 1, x1 ## 1; \ | ||
540 | movdqa x0 ## 1, x4 ## 1; \ | ||
541 | pslld $3, x4 ## 1; \ | ||
542 | pxor x4 ## 1, x3 ## 1; \ | ||
543 | movdqa x0 ## 1, x4 ## 1; \ | ||
544 | movdqa x3 ## 2, x4 ## 2; \ | ||
545 | psrld $7, x3 ## 2; \ | ||
546 | pslld $(32 - 7), x4 ## 2; \ | ||
547 | por x4 ## 2, x3 ## 2; \ | ||
548 | pxor x0 ## 2, x1 ## 2; \ | ||
549 | movdqa x0 ## 2, x4 ## 2; \ | ||
550 | pslld $3, x4 ## 2; \ | ||
551 | pxor x4 ## 2, x3 ## 2; \ | ||
552 | movdqa x0 ## 2, x4 ## 2; \ | ||
553 | psrld $13, x0 ## 1; \ | ||
554 | pslld $(32 - 13), x4 ## 1; \ | ||
555 | por x4 ## 1, x0 ## 1; \ | ||
556 | pxor x2 ## 1, x1 ## 1; \ | ||
557 | pxor x2 ## 1, x3 ## 1; \ | ||
558 | movdqa x2 ## 1, x4 ## 1; \ | ||
559 | psrld $3, x2 ## 1; \ | ||
560 | pslld $(32 - 3), x4 ## 1; \ | ||
561 | por x4 ## 1, x2 ## 1; \ | ||
562 | psrld $13, x0 ## 2; \ | ||
563 | pslld $(32 - 13), x4 ## 2; \ | ||
564 | por x4 ## 2, x0 ## 2; \ | ||
565 | pxor x2 ## 2, x1 ## 2; \ | ||
566 | pxor x2 ## 2, x3 ## 2; \ | ||
567 | movdqa x2 ## 2, x4 ## 2; \ | ||
568 | psrld $3, x2 ## 2; \ | ||
569 | pslld $(32 - 3), x4 ## 2; \ | ||
570 | por x4 ## 2, x2 ## 2; | ||
571 | |||
572 | #define S(SBOX, x0, x1, x2, x3, x4) \ | ||
573 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
574 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
575 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
576 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); | ||
577 | |||
578 | #define SP(SBOX, x0, x1, x2, x3, x4, i) \ | ||
579 | get_key(i, 0, RK0); \ | ||
580 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
581 | get_key(i, 2, RK2); \ | ||
582 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
583 | get_key(i, 3, RK3); \ | ||
584 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
585 | get_key(i, 1, RK1); \ | ||
586 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
587 | |||
588 | #define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ | ||
589 | movdqa x2, t3; \ | ||
590 | movdqa x0, t1; \ | ||
591 | unpcklps x3, t3; \ | ||
592 | movdqa x0, t2; \ | ||
593 | unpcklps x1, t1; \ | ||
594 | unpckhps x1, t2; \ | ||
595 | movdqa t3, x1; \ | ||
596 | unpckhps x3, x2; \ | ||
597 | movdqa t1, x0; \ | ||
598 | movhlps t1, x1; \ | ||
599 | movdqa t2, t1; \ | ||
600 | movlhps t3, x0; \ | ||
601 | movlhps x2, t1; \ | ||
602 | movhlps t2, x2; \ | ||
603 | movdqa x2, x3; \ | ||
604 | movdqa t1, x2; | ||
605 | |||
606 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ | ||
607 | movdqu (0*4*4)(in), x0; \ | ||
608 | movdqu (1*4*4)(in), x1; \ | ||
609 | movdqu (2*4*4)(in), x2; \ | ||
610 | movdqu (3*4*4)(in), x3; \ | ||
611 | \ | ||
612 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
613 | |||
614 | #define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
615 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
616 | \ | ||
617 | movdqu x0, (0*4*4)(out); \ | ||
618 | movdqu x1, (1*4*4)(out); \ | ||
619 | movdqu x2, (2*4*4)(out); \ | ||
620 | movdqu x3, (3*4*4)(out); | ||
621 | |||
622 | #define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
623 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
624 | \ | ||
625 | movdqu (0*4*4)(out), t0; \ | ||
626 | pxor t0, x0; \ | ||
627 | movdqu x0, (0*4*4)(out); \ | ||
628 | movdqu (1*4*4)(out), t0; \ | ||
629 | pxor t0, x1; \ | ||
630 | movdqu x1, (1*4*4)(out); \ | ||
631 | movdqu (2*4*4)(out), t0; \ | ||
632 | pxor t0, x2; \ | ||
633 | movdqu x2, (2*4*4)(out); \ | ||
634 | movdqu (3*4*4)(out), t0; \ | ||
635 | pxor t0, x3; \ | ||
636 | movdqu x3, (3*4*4)(out); | ||
637 | |||
638 | .align 8 | ||
639 | .global __serpent_enc_blk_8way | ||
640 | .type __serpent_enc_blk_8way,@function; | ||
641 | |||
642 | __serpent_enc_blk_8way: | ||
643 | /* input: | ||
644 | * %rdi: ctx, CTX | ||
645 | * %rsi: dst | ||
646 | * %rdx: src | ||
647 | * %rcx: bool, if true: xor output | ||
648 | */ | ||
649 | |||
650 | pcmpeqd RNOT, RNOT; | ||
651 | |||
652 | leaq (4*4*4)(%rdx), %rax; | ||
653 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
654 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
655 | |||
656 | K2(RA, RB, RC, RD, RE, 0); | ||
657 | S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); | ||
658 | S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); | ||
659 | S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); | ||
660 | S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); | ||
661 | S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); | ||
662 | S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); | ||
663 | S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); | ||
664 | S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); | ||
665 | S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); | ||
666 | S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); | ||
667 | S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); | ||
668 | S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); | ||
669 | S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); | ||
670 | S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); | ||
671 | S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); | ||
672 | S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); | ||
673 | S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); | ||
674 | S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); | ||
675 | S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); | ||
676 | S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); | ||
677 | S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); | ||
678 | S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); | ||
679 | S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); | ||
680 | S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); | ||
681 | S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); | ||
682 | S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); | ||
683 | S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); | ||
684 | S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); | ||
685 | S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); | ||
686 | S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); | ||
687 | S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); | ||
688 | S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); | ||
689 | |||
690 | leaq (4*4*4)(%rsi), %rax; | ||
691 | |||
692 | testb %cl, %cl; | ||
693 | jnz __enc_xor8; | ||
694 | |||
695 | write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
696 | write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
697 | |||
698 | ret; | ||
699 | |||
700 | __enc_xor8: | ||
701 | xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
702 | xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
703 | |||
704 | ret; | ||
705 | |||
706 | .align 8 | ||
707 | .global serpent_dec_blk_8way | ||
708 | .type serpent_dec_blk_8way,@function; | ||
709 | |||
710 | serpent_dec_blk_8way: | ||
711 | /* input: | ||
712 | * %rdi: ctx, CTX | ||
713 | * %rsi: dst | ||
714 | * %rdx: src | ||
715 | */ | ||
716 | |||
717 | pcmpeqd RNOT, RNOT; | ||
718 | |||
719 | leaq (4*4*4)(%rdx), %rax; | ||
720 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
721 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
722 | |||
723 | K2(RA, RB, RC, RD, RE, 32); | ||
724 | SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); | ||
725 | SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); | ||
726 | SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); | ||
727 | SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); | ||
728 | SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); | ||
729 | SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); | ||
730 | SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); | ||
731 | SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); | ||
732 | SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); | ||
733 | SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); | ||
734 | SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); | ||
735 | SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); | ||
736 | SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); | ||
737 | SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); | ||
738 | SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); | ||
739 | SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); | ||
740 | SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); | ||
741 | SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); | ||
742 | SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); | ||
743 | SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); | ||
744 | SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); | ||
745 | SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); | ||
746 | SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); | ||
747 | SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); | ||
748 | SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); | ||
749 | SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); | ||
750 | SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); | ||
751 | SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); | ||
752 | SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); | ||
753 | SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); | ||
754 | SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); | ||
755 | S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); | ||
756 | |||
757 | leaq (4*4*4)(%rsi), %rax; | ||
758 | write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); | ||
759 | write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); | ||
760 | |||
761 | ret; | ||
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c new file mode 100644 index 000000000000..7955a9b76b91 --- /dev/null +++ b/arch/x86/crypto/serpent_sse2_glue.c | |||
@@ -0,0 +1,1070 @@ | |||
1 | /* | ||
2 | * Glue Code for SSE2 assembler versions of Serpent Cipher | ||
3 | * | ||
4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Glue code based on aesni-intel_glue.c by: | ||
7 | * Copyright (C) 2008, Intel Corp. | ||
8 | * Author: Huang Ying <ying.huang@intel.com> | ||
9 | * | ||
10 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
11 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
12 | * CTR part based on code (crypto/ctr.c) by: | ||
13 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
23 | * GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
28 | * USA | ||
29 | * | ||
30 | */ | ||
31 | |||
32 | #include <linux/module.h> | ||
33 | #include <linux/hardirq.h> | ||
34 | #include <linux/types.h> | ||
35 | #include <linux/crypto.h> | ||
36 | #include <linux/err.h> | ||
37 | #include <crypto/algapi.h> | ||
38 | #include <crypto/serpent.h> | ||
39 | #include <crypto/cryptd.h> | ||
40 | #include <crypto/b128ops.h> | ||
41 | #include <crypto/ctr.h> | ||
42 | #include <crypto/lrw.h> | ||
43 | #include <crypto/xts.h> | ||
44 | #include <asm/i387.h> | ||
45 | #include <asm/serpent.h> | ||
46 | #include <crypto/scatterwalk.h> | ||
47 | #include <linux/workqueue.h> | ||
48 | #include <linux/spinlock.h> | ||
49 | |||
50 | struct async_serpent_ctx { | ||
51 | struct cryptd_ablkcipher *cryptd_tfm; | ||
52 | }; | ||
53 | |||
54 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
55 | { | ||
56 | if (fpu_enabled) | ||
57 | return true; | ||
58 | |||
59 | /* SSE2 is only used when chunk to be processed is large enough, so | ||
60 | * do not enable FPU until it is necessary. | ||
61 | */ | ||
62 | if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) | ||
63 | return false; | ||
64 | |||
65 | kernel_fpu_begin(); | ||
66 | return true; | ||
67 | } | ||
68 | |||
69 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
70 | { | ||
71 | if (fpu_enabled) | ||
72 | kernel_fpu_end(); | ||
73 | } | ||
74 | |||
75 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | ||
76 | bool enc) | ||
77 | { | ||
78 | bool fpu_enabled = false; | ||
79 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
80 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
81 | unsigned int nbytes; | ||
82 | int err; | ||
83 | |||
84 | err = blkcipher_walk_virt(desc, walk); | ||
85 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
86 | |||
87 | while ((nbytes = walk->nbytes)) { | ||
88 | u8 *wsrc = walk->src.virt.addr; | ||
89 | u8 *wdst = walk->dst.virt.addr; | ||
90 | |||
91 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
92 | |||
93 | /* Process multi-block batch */ | ||
94 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
95 | do { | ||
96 | if (enc) | ||
97 | serpent_enc_blk_xway(ctx, wdst, wsrc); | ||
98 | else | ||
99 | serpent_dec_blk_xway(ctx, wdst, wsrc); | ||
100 | |||
101 | wsrc += bsize * SERPENT_PARALLEL_BLOCKS; | ||
102 | wdst += bsize * SERPENT_PARALLEL_BLOCKS; | ||
103 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
104 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
105 | |||
106 | if (nbytes < bsize) | ||
107 | goto done; | ||
108 | } | ||
109 | |||
110 | /* Handle leftovers */ | ||
111 | do { | ||
112 | if (enc) | ||
113 | __serpent_encrypt(ctx, wdst, wsrc); | ||
114 | else | ||
115 | __serpent_decrypt(ctx, wdst, wsrc); | ||
116 | |||
117 | wsrc += bsize; | ||
118 | wdst += bsize; | ||
119 | nbytes -= bsize; | ||
120 | } while (nbytes >= bsize); | ||
121 | |||
122 | done: | ||
123 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
124 | } | ||
125 | |||
126 | serpent_fpu_end(fpu_enabled); | ||
127 | return err; | ||
128 | } | ||
129 | |||
130 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
131 | struct scatterlist *src, unsigned int nbytes) | ||
132 | { | ||
133 | struct blkcipher_walk walk; | ||
134 | |||
135 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
136 | return ecb_crypt(desc, &walk, true); | ||
137 | } | ||
138 | |||
139 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
140 | struct scatterlist *src, unsigned int nbytes) | ||
141 | { | ||
142 | struct blkcipher_walk walk; | ||
143 | |||
144 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
145 | return ecb_crypt(desc, &walk, false); | ||
146 | } | ||
147 | |||
148 | static struct crypto_alg blk_ecb_alg = { | ||
149 | .cra_name = "__ecb-serpent-sse2", | ||
150 | .cra_driver_name = "__driver-ecb-serpent-sse2", | ||
151 | .cra_priority = 0, | ||
152 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
153 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
154 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
155 | .cra_alignmask = 0, | ||
156 | .cra_type = &crypto_blkcipher_type, | ||
157 | .cra_module = THIS_MODULE, | ||
158 | .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list), | ||
159 | .cra_u = { | ||
160 | .blkcipher = { | ||
161 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
162 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
163 | .setkey = serpent_setkey, | ||
164 | .encrypt = ecb_encrypt, | ||
165 | .decrypt = ecb_decrypt, | ||
166 | }, | ||
167 | }, | ||
168 | }; | ||
169 | |||
170 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | ||
171 | struct blkcipher_walk *walk) | ||
172 | { | ||
173 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
174 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
175 | unsigned int nbytes = walk->nbytes; | ||
176 | u128 *src = (u128 *)walk->src.virt.addr; | ||
177 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
178 | u128 *iv = (u128 *)walk->iv; | ||
179 | |||
180 | do { | ||
181 | u128_xor(dst, src, iv); | ||
182 | __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); | ||
183 | iv = dst; | ||
184 | |||
185 | src += 1; | ||
186 | dst += 1; | ||
187 | nbytes -= bsize; | ||
188 | } while (nbytes >= bsize); | ||
189 | |||
190 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
191 | return nbytes; | ||
192 | } | ||
193 | |||
194 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
195 | struct scatterlist *src, unsigned int nbytes) | ||
196 | { | ||
197 | struct blkcipher_walk walk; | ||
198 | int err; | ||
199 | |||
200 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
201 | err = blkcipher_walk_virt(desc, &walk); | ||
202 | |||
203 | while ((nbytes = walk.nbytes)) { | ||
204 | nbytes = __cbc_encrypt(desc, &walk); | ||
205 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
206 | } | ||
207 | |||
208 | return err; | ||
209 | } | ||
210 | |||
211 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | ||
212 | struct blkcipher_walk *walk) | ||
213 | { | ||
214 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
215 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
216 | unsigned int nbytes = walk->nbytes; | ||
217 | u128 *src = (u128 *)walk->src.virt.addr; | ||
218 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
219 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
220 | u128 last_iv; | ||
221 | int i; | ||
222 | |||
223 | /* Start of the last block. */ | ||
224 | src += nbytes / bsize - 1; | ||
225 | dst += nbytes / bsize - 1; | ||
226 | |||
227 | last_iv = *src; | ||
228 | |||
229 | /* Process multi-block batch */ | ||
230 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
231 | do { | ||
232 | nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); | ||
233 | src -= SERPENT_PARALLEL_BLOCKS - 1; | ||
234 | dst -= SERPENT_PARALLEL_BLOCKS - 1; | ||
235 | |||
236 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | ||
237 | ivs[i] = src[i]; | ||
238 | |||
239 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
240 | |||
241 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | ||
242 | u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); | ||
243 | |||
244 | nbytes -= bsize; | ||
245 | if (nbytes < bsize) | ||
246 | goto done; | ||
247 | |||
248 | u128_xor(dst, dst, src - 1); | ||
249 | src -= 1; | ||
250 | dst -= 1; | ||
251 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
252 | |||
253 | if (nbytes < bsize) | ||
254 | goto done; | ||
255 | } | ||
256 | |||
257 | /* Handle leftovers */ | ||
258 | for (;;) { | ||
259 | __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); | ||
260 | |||
261 | nbytes -= bsize; | ||
262 | if (nbytes < bsize) | ||
263 | break; | ||
264 | |||
265 | u128_xor(dst, dst, src - 1); | ||
266 | src -= 1; | ||
267 | dst -= 1; | ||
268 | } | ||
269 | |||
270 | done: | ||
271 | u128_xor(dst, dst, (u128 *)walk->iv); | ||
272 | *(u128 *)walk->iv = last_iv; | ||
273 | |||
274 | return nbytes; | ||
275 | } | ||
276 | |||
277 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
278 | struct scatterlist *src, unsigned int nbytes) | ||
279 | { | ||
280 | bool fpu_enabled = false; | ||
281 | struct blkcipher_walk walk; | ||
282 | int err; | ||
283 | |||
284 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
285 | err = blkcipher_walk_virt(desc, &walk); | ||
286 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
287 | |||
288 | while ((nbytes = walk.nbytes)) { | ||
289 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
290 | nbytes = __cbc_decrypt(desc, &walk); | ||
291 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
292 | } | ||
293 | |||
294 | serpent_fpu_end(fpu_enabled); | ||
295 | return err; | ||
296 | } | ||
297 | |||
298 | static struct crypto_alg blk_cbc_alg = { | ||
299 | .cra_name = "__cbc-serpent-sse2", | ||
300 | .cra_driver_name = "__driver-cbc-serpent-sse2", | ||
301 | .cra_priority = 0, | ||
302 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
303 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
304 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
305 | .cra_alignmask = 0, | ||
306 | .cra_type = &crypto_blkcipher_type, | ||
307 | .cra_module = THIS_MODULE, | ||
308 | .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list), | ||
309 | .cra_u = { | ||
310 | .blkcipher = { | ||
311 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
312 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
313 | .setkey = serpent_setkey, | ||
314 | .encrypt = cbc_encrypt, | ||
315 | .decrypt = cbc_decrypt, | ||
316 | }, | ||
317 | }, | ||
318 | }; | ||
319 | |||
320 | static inline void u128_to_be128(be128 *dst, const u128 *src) | ||
321 | { | ||
322 | dst->a = cpu_to_be64(src->a); | ||
323 | dst->b = cpu_to_be64(src->b); | ||
324 | } | ||
325 | |||
326 | static inline void be128_to_u128(u128 *dst, const be128 *src) | ||
327 | { | ||
328 | dst->a = be64_to_cpu(src->a); | ||
329 | dst->b = be64_to_cpu(src->b); | ||
330 | } | ||
331 | |||
332 | static inline void u128_inc(u128 *i) | ||
333 | { | ||
334 | i->b++; | ||
335 | if (!i->b) | ||
336 | i->a++; | ||
337 | } | ||
338 | |||
339 | static void ctr_crypt_final(struct blkcipher_desc *desc, | ||
340 | struct blkcipher_walk *walk) | ||
341 | { | ||
342 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
343 | u8 *ctrblk = walk->iv; | ||
344 | u8 keystream[SERPENT_BLOCK_SIZE]; | ||
345 | u8 *src = walk->src.virt.addr; | ||
346 | u8 *dst = walk->dst.virt.addr; | ||
347 | unsigned int nbytes = walk->nbytes; | ||
348 | |||
349 | __serpent_encrypt(ctx, keystream, ctrblk); | ||
350 | crypto_xor(keystream, src, nbytes); | ||
351 | memcpy(dst, keystream, nbytes); | ||
352 | |||
353 | crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); | ||
354 | } | ||
355 | |||
356 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | ||
357 | struct blkcipher_walk *walk) | ||
358 | { | ||
359 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
360 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
361 | unsigned int nbytes = walk->nbytes; | ||
362 | u128 *src = (u128 *)walk->src.virt.addr; | ||
363 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
364 | u128 ctrblk; | ||
365 | be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; | ||
366 | int i; | ||
367 | |||
368 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
369 | |||
370 | /* Process multi-block batch */ | ||
371 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
372 | do { | ||
373 | /* create ctrblks for parallel encrypt */ | ||
374 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
375 | if (dst != src) | ||
376 | dst[i] = src[i]; | ||
377 | |||
378 | u128_to_be128(&ctrblocks[i], &ctrblk); | ||
379 | u128_inc(&ctrblk); | ||
380 | } | ||
381 | |||
382 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, | ||
383 | (u8 *)ctrblocks); | ||
384 | |||
385 | src += SERPENT_PARALLEL_BLOCKS; | ||
386 | dst += SERPENT_PARALLEL_BLOCKS; | ||
387 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
388 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
389 | |||
390 | if (nbytes < bsize) | ||
391 | goto done; | ||
392 | } | ||
393 | |||
394 | /* Handle leftovers */ | ||
395 | do { | ||
396 | if (dst != src) | ||
397 | *dst = *src; | ||
398 | |||
399 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
400 | u128_inc(&ctrblk); | ||
401 | |||
402 | __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); | ||
403 | u128_xor(dst, dst, (u128 *)ctrblocks); | ||
404 | |||
405 | src += 1; | ||
406 | dst += 1; | ||
407 | nbytes -= bsize; | ||
408 | } while (nbytes >= bsize); | ||
409 | |||
410 | done: | ||
411 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
412 | return nbytes; | ||
413 | } | ||
414 | |||
415 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
416 | struct scatterlist *src, unsigned int nbytes) | ||
417 | { | ||
418 | bool fpu_enabled = false; | ||
419 | struct blkcipher_walk walk; | ||
420 | int err; | ||
421 | |||
422 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
423 | err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); | ||
424 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
425 | |||
426 | while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { | ||
427 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
428 | nbytes = __ctr_crypt(desc, &walk); | ||
429 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
430 | } | ||
431 | |||
432 | serpent_fpu_end(fpu_enabled); | ||
433 | |||
434 | if (walk.nbytes) { | ||
435 | ctr_crypt_final(desc, &walk); | ||
436 | err = blkcipher_walk_done(desc, &walk, 0); | ||
437 | } | ||
438 | |||
439 | return err; | ||
440 | } | ||
441 | |||
442 | static struct crypto_alg blk_ctr_alg = { | ||
443 | .cra_name = "__ctr-serpent-sse2", | ||
444 | .cra_driver_name = "__driver-ctr-serpent-sse2", | ||
445 | .cra_priority = 0, | ||
446 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
447 | .cra_blocksize = 1, | ||
448 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
449 | .cra_alignmask = 0, | ||
450 | .cra_type = &crypto_blkcipher_type, | ||
451 | .cra_module = THIS_MODULE, | ||
452 | .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), | ||
453 | .cra_u = { | ||
454 | .blkcipher = { | ||
455 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
456 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
457 | .ivsize = SERPENT_BLOCK_SIZE, | ||
458 | .setkey = serpent_setkey, | ||
459 | .encrypt = ctr_crypt, | ||
460 | .decrypt = ctr_crypt, | ||
461 | }, | ||
462 | }, | ||
463 | }; | ||
464 | |||
465 | struct crypt_priv { | ||
466 | struct serpent_ctx *ctx; | ||
467 | bool fpu_enabled; | ||
468 | }; | ||
469 | |||
470 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
471 | { | ||
472 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
473 | struct crypt_priv *ctx = priv; | ||
474 | int i; | ||
475 | |||
476 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
477 | |||
478 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
479 | serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); | ||
480 | return; | ||
481 | } | ||
482 | |||
483 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
484 | __serpent_encrypt(ctx->ctx, srcdst, srcdst); | ||
485 | } | ||
486 | |||
487 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
488 | { | ||
489 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
490 | struct crypt_priv *ctx = priv; | ||
491 | int i; | ||
492 | |||
493 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
494 | |||
495 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
496 | serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); | ||
497 | return; | ||
498 | } | ||
499 | |||
500 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
501 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | ||
502 | } | ||
503 | |||
504 | struct serpent_lrw_ctx { | ||
505 | struct lrw_table_ctx lrw_table; | ||
506 | struct serpent_ctx serpent_ctx; | ||
507 | }; | ||
508 | |||
509 | static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
510 | unsigned int keylen) | ||
511 | { | ||
512 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
513 | int err; | ||
514 | |||
515 | err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - | ||
516 | SERPENT_BLOCK_SIZE); | ||
517 | if (err) | ||
518 | return err; | ||
519 | |||
520 | return lrw_init_table(&ctx->lrw_table, key + keylen - | ||
521 | SERPENT_BLOCK_SIZE); | ||
522 | } | ||
523 | |||
524 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
525 | struct scatterlist *src, unsigned int nbytes) | ||
526 | { | ||
527 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
528 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
529 | struct crypt_priv crypt_ctx = { | ||
530 | .ctx = &ctx->serpent_ctx, | ||
531 | .fpu_enabled = false, | ||
532 | }; | ||
533 | struct lrw_crypt_req req = { | ||
534 | .tbuf = buf, | ||
535 | .tbuflen = sizeof(buf), | ||
536 | |||
537 | .table_ctx = &ctx->lrw_table, | ||
538 | .crypt_ctx = &crypt_ctx, | ||
539 | .crypt_fn = encrypt_callback, | ||
540 | }; | ||
541 | int ret; | ||
542 | |||
543 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
544 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
545 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
546 | |||
547 | return ret; | ||
548 | } | ||
549 | |||
550 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
551 | struct scatterlist *src, unsigned int nbytes) | ||
552 | { | ||
553 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
554 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
555 | struct crypt_priv crypt_ctx = { | ||
556 | .ctx = &ctx->serpent_ctx, | ||
557 | .fpu_enabled = false, | ||
558 | }; | ||
559 | struct lrw_crypt_req req = { | ||
560 | .tbuf = buf, | ||
561 | .tbuflen = sizeof(buf), | ||
562 | |||
563 | .table_ctx = &ctx->lrw_table, | ||
564 | .crypt_ctx = &crypt_ctx, | ||
565 | .crypt_fn = decrypt_callback, | ||
566 | }; | ||
567 | int ret; | ||
568 | |||
569 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
570 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
571 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
572 | |||
573 | return ret; | ||
574 | } | ||
575 | |||
576 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | ||
577 | { | ||
578 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
579 | |||
580 | lrw_free_table(&ctx->lrw_table); | ||
581 | } | ||
582 | |||
583 | static struct crypto_alg blk_lrw_alg = { | ||
584 | .cra_name = "__lrw-serpent-sse2", | ||
585 | .cra_driver_name = "__driver-lrw-serpent-sse2", | ||
586 | .cra_priority = 0, | ||
587 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
588 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
589 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | ||
590 | .cra_alignmask = 0, | ||
591 | .cra_type = &crypto_blkcipher_type, | ||
592 | .cra_module = THIS_MODULE, | ||
593 | .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list), | ||
594 | .cra_exit = lrw_exit_tfm, | ||
595 | .cra_u = { | ||
596 | .blkcipher = { | ||
597 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
598 | SERPENT_BLOCK_SIZE, | ||
599 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
600 | SERPENT_BLOCK_SIZE, | ||
601 | .ivsize = SERPENT_BLOCK_SIZE, | ||
602 | .setkey = lrw_serpent_setkey, | ||
603 | .encrypt = lrw_encrypt, | ||
604 | .decrypt = lrw_decrypt, | ||
605 | }, | ||
606 | }, | ||
607 | }; | ||
608 | |||
609 | struct serpent_xts_ctx { | ||
610 | struct serpent_ctx tweak_ctx; | ||
611 | struct serpent_ctx crypt_ctx; | ||
612 | }; | ||
613 | |||
614 | static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
615 | unsigned int keylen) | ||
616 | { | ||
617 | struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
618 | u32 *flags = &tfm->crt_flags; | ||
619 | int err; | ||
620 | |||
621 | /* key consists of keys of equal size concatenated, therefore | ||
622 | * the length must be even | ||
623 | */ | ||
624 | if (keylen % 2) { | ||
625 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
626 | return -EINVAL; | ||
627 | } | ||
628 | |||
629 | /* first half of xts-key is for crypt */ | ||
630 | err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); | ||
631 | if (err) | ||
632 | return err; | ||
633 | |||
634 | /* second half of xts-key is for tweak */ | ||
635 | return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); | ||
636 | } | ||
637 | |||
638 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
639 | struct scatterlist *src, unsigned int nbytes) | ||
640 | { | ||
641 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
642 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
643 | struct crypt_priv crypt_ctx = { | ||
644 | .ctx = &ctx->crypt_ctx, | ||
645 | .fpu_enabled = false, | ||
646 | }; | ||
647 | struct xts_crypt_req req = { | ||
648 | .tbuf = buf, | ||
649 | .tbuflen = sizeof(buf), | ||
650 | |||
651 | .tweak_ctx = &ctx->tweak_ctx, | ||
652 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
653 | .crypt_ctx = &crypt_ctx, | ||
654 | .crypt_fn = encrypt_callback, | ||
655 | }; | ||
656 | int ret; | ||
657 | |||
658 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
659 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
660 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
661 | |||
662 | return ret; | ||
663 | } | ||
664 | |||
665 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
666 | struct scatterlist *src, unsigned int nbytes) | ||
667 | { | ||
668 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
669 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
670 | struct crypt_priv crypt_ctx = { | ||
671 | .ctx = &ctx->crypt_ctx, | ||
672 | .fpu_enabled = false, | ||
673 | }; | ||
674 | struct xts_crypt_req req = { | ||
675 | .tbuf = buf, | ||
676 | .tbuflen = sizeof(buf), | ||
677 | |||
678 | .tweak_ctx = &ctx->tweak_ctx, | ||
679 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
680 | .crypt_ctx = &crypt_ctx, | ||
681 | .crypt_fn = decrypt_callback, | ||
682 | }; | ||
683 | int ret; | ||
684 | |||
685 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
686 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
687 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
688 | |||
689 | return ret; | ||
690 | } | ||
691 | |||
692 | static struct crypto_alg blk_xts_alg = { | ||
693 | .cra_name = "__xts-serpent-sse2", | ||
694 | .cra_driver_name = "__driver-xts-serpent-sse2", | ||
695 | .cra_priority = 0, | ||
696 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
697 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
698 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | ||
699 | .cra_alignmask = 0, | ||
700 | .cra_type = &crypto_blkcipher_type, | ||
701 | .cra_module = THIS_MODULE, | ||
702 | .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list), | ||
703 | .cra_u = { | ||
704 | .blkcipher = { | ||
705 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
706 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
707 | .ivsize = SERPENT_BLOCK_SIZE, | ||
708 | .setkey = xts_serpent_setkey, | ||
709 | .encrypt = xts_encrypt, | ||
710 | .decrypt = xts_decrypt, | ||
711 | }, | ||
712 | }, | ||
713 | }; | ||
714 | |||
715 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
716 | unsigned int key_len) | ||
717 | { | ||
718 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
719 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
720 | int err; | ||
721 | |||
722 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
723 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
724 | & CRYPTO_TFM_REQ_MASK); | ||
725 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
726 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
727 | & CRYPTO_TFM_RES_MASK); | ||
728 | return err; | ||
729 | } | ||
730 | |||
731 | static int __ablk_encrypt(struct ablkcipher_request *req) | ||
732 | { | ||
733 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
734 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
735 | struct blkcipher_desc desc; | ||
736 | |||
737 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
738 | desc.info = req->info; | ||
739 | desc.flags = 0; | ||
740 | |||
741 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
742 | &desc, req->dst, req->src, req->nbytes); | ||
743 | } | ||
744 | |||
745 | static int ablk_encrypt(struct ablkcipher_request *req) | ||
746 | { | ||
747 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
748 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
749 | |||
750 | if (!irq_fpu_usable()) { | ||
751 | struct ablkcipher_request *cryptd_req = | ||
752 | ablkcipher_request_ctx(req); | ||
753 | |||
754 | memcpy(cryptd_req, req, sizeof(*req)); | ||
755 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
756 | |||
757 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
758 | } else { | ||
759 | return __ablk_encrypt(req); | ||
760 | } | ||
761 | } | ||
762 | |||
763 | static int ablk_decrypt(struct ablkcipher_request *req) | ||
764 | { | ||
765 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
766 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
767 | |||
768 | if (!irq_fpu_usable()) { | ||
769 | struct ablkcipher_request *cryptd_req = | ||
770 | ablkcipher_request_ctx(req); | ||
771 | |||
772 | memcpy(cryptd_req, req, sizeof(*req)); | ||
773 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
774 | |||
775 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
776 | } else { | ||
777 | struct blkcipher_desc desc; | ||
778 | |||
779 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
780 | desc.info = req->info; | ||
781 | desc.flags = 0; | ||
782 | |||
783 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
784 | &desc, req->dst, req->src, req->nbytes); | ||
785 | } | ||
786 | } | ||
787 | |||
788 | static void ablk_exit(struct crypto_tfm *tfm) | ||
789 | { | ||
790 | struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); | ||
791 | |||
792 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
793 | } | ||
794 | |||
795 | static void ablk_init_common(struct crypto_tfm *tfm, | ||
796 | struct cryptd_ablkcipher *cryptd_tfm) | ||
797 | { | ||
798 | struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); | ||
799 | |||
800 | ctx->cryptd_tfm = cryptd_tfm; | ||
801 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
802 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
803 | } | ||
804 | |||
805 | static int ablk_ecb_init(struct crypto_tfm *tfm) | ||
806 | { | ||
807 | struct cryptd_ablkcipher *cryptd_tfm; | ||
808 | |||
809 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-serpent-sse2", 0, 0); | ||
810 | if (IS_ERR(cryptd_tfm)) | ||
811 | return PTR_ERR(cryptd_tfm); | ||
812 | ablk_init_common(tfm, cryptd_tfm); | ||
813 | return 0; | ||
814 | } | ||
815 | |||
816 | static struct crypto_alg ablk_ecb_alg = { | ||
817 | .cra_name = "ecb(serpent)", | ||
818 | .cra_driver_name = "ecb-serpent-sse2", | ||
819 | .cra_priority = 400, | ||
820 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
821 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
822 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | ||
823 | .cra_alignmask = 0, | ||
824 | .cra_type = &crypto_ablkcipher_type, | ||
825 | .cra_module = THIS_MODULE, | ||
826 | .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list), | ||
827 | .cra_init = ablk_ecb_init, | ||
828 | .cra_exit = ablk_exit, | ||
829 | .cra_u = { | ||
830 | .ablkcipher = { | ||
831 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
832 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
833 | .setkey = ablk_set_key, | ||
834 | .encrypt = ablk_encrypt, | ||
835 | .decrypt = ablk_decrypt, | ||
836 | }, | ||
837 | }, | ||
838 | }; | ||
839 | |||
840 | static int ablk_cbc_init(struct crypto_tfm *tfm) | ||
841 | { | ||
842 | struct cryptd_ablkcipher *cryptd_tfm; | ||
843 | |||
844 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-serpent-sse2", 0, 0); | ||
845 | if (IS_ERR(cryptd_tfm)) | ||
846 | return PTR_ERR(cryptd_tfm); | ||
847 | ablk_init_common(tfm, cryptd_tfm); | ||
848 | return 0; | ||
849 | } | ||
850 | |||
851 | static struct crypto_alg ablk_cbc_alg = { | ||
852 | .cra_name = "cbc(serpent)", | ||
853 | .cra_driver_name = "cbc-serpent-sse2", | ||
854 | .cra_priority = 400, | ||
855 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
856 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
857 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | ||
858 | .cra_alignmask = 0, | ||
859 | .cra_type = &crypto_ablkcipher_type, | ||
860 | .cra_module = THIS_MODULE, | ||
861 | .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list), | ||
862 | .cra_init = ablk_cbc_init, | ||
863 | .cra_exit = ablk_exit, | ||
864 | .cra_u = { | ||
865 | .ablkcipher = { | ||
866 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
867 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
868 | .ivsize = SERPENT_BLOCK_SIZE, | ||
869 | .setkey = ablk_set_key, | ||
870 | .encrypt = __ablk_encrypt, | ||
871 | .decrypt = ablk_decrypt, | ||
872 | }, | ||
873 | }, | ||
874 | }; | ||
875 | |||
876 | static int ablk_ctr_init(struct crypto_tfm *tfm) | ||
877 | { | ||
878 | struct cryptd_ablkcipher *cryptd_tfm; | ||
879 | |||
880 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-serpent-sse2", 0, 0); | ||
881 | if (IS_ERR(cryptd_tfm)) | ||
882 | return PTR_ERR(cryptd_tfm); | ||
883 | ablk_init_common(tfm, cryptd_tfm); | ||
884 | return 0; | ||
885 | } | ||
886 | |||
887 | static struct crypto_alg ablk_ctr_alg = { | ||
888 | .cra_name = "ctr(serpent)", | ||
889 | .cra_driver_name = "ctr-serpent-sse2", | ||
890 | .cra_priority = 400, | ||
891 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
892 | .cra_blocksize = 1, | ||
893 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | ||
894 | .cra_alignmask = 0, | ||
895 | .cra_type = &crypto_ablkcipher_type, | ||
896 | .cra_module = THIS_MODULE, | ||
897 | .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list), | ||
898 | .cra_init = ablk_ctr_init, | ||
899 | .cra_exit = ablk_exit, | ||
900 | .cra_u = { | ||
901 | .ablkcipher = { | ||
902 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
903 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
904 | .ivsize = SERPENT_BLOCK_SIZE, | ||
905 | .setkey = ablk_set_key, | ||
906 | .encrypt = ablk_encrypt, | ||
907 | .decrypt = ablk_encrypt, | ||
908 | .geniv = "chainiv", | ||
909 | }, | ||
910 | }, | ||
911 | }; | ||
912 | |||
913 | static int ablk_lrw_init(struct crypto_tfm *tfm) | ||
914 | { | ||
915 | struct cryptd_ablkcipher *cryptd_tfm; | ||
916 | |||
917 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-lrw-serpent-sse2", 0, 0); | ||
918 | if (IS_ERR(cryptd_tfm)) | ||
919 | return PTR_ERR(cryptd_tfm); | ||
920 | ablk_init_common(tfm, cryptd_tfm); | ||
921 | return 0; | ||
922 | } | ||
923 | |||
924 | static struct crypto_alg ablk_lrw_alg = { | ||
925 | .cra_name = "lrw(serpent)", | ||
926 | .cra_driver_name = "lrw-serpent-sse2", | ||
927 | .cra_priority = 400, | ||
928 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
929 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
930 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | ||
931 | .cra_alignmask = 0, | ||
932 | .cra_type = &crypto_ablkcipher_type, | ||
933 | .cra_module = THIS_MODULE, | ||
934 | .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list), | ||
935 | .cra_init = ablk_lrw_init, | ||
936 | .cra_exit = ablk_exit, | ||
937 | .cra_u = { | ||
938 | .ablkcipher = { | ||
939 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
940 | SERPENT_BLOCK_SIZE, | ||
941 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
942 | SERPENT_BLOCK_SIZE, | ||
943 | .ivsize = SERPENT_BLOCK_SIZE, | ||
944 | .setkey = ablk_set_key, | ||
945 | .encrypt = ablk_encrypt, | ||
946 | .decrypt = ablk_decrypt, | ||
947 | }, | ||
948 | }, | ||
949 | }; | ||
950 | |||
951 | static int ablk_xts_init(struct crypto_tfm *tfm) | ||
952 | { | ||
953 | struct cryptd_ablkcipher *cryptd_tfm; | ||
954 | |||
955 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-xts-serpent-sse2", 0, 0); | ||
956 | if (IS_ERR(cryptd_tfm)) | ||
957 | return PTR_ERR(cryptd_tfm); | ||
958 | ablk_init_common(tfm, cryptd_tfm); | ||
959 | return 0; | ||
960 | } | ||
961 | |||
962 | static struct crypto_alg ablk_xts_alg = { | ||
963 | .cra_name = "xts(serpent)", | ||
964 | .cra_driver_name = "xts-serpent-sse2", | ||
965 | .cra_priority = 400, | ||
966 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
967 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
968 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | ||
969 | .cra_alignmask = 0, | ||
970 | .cra_type = &crypto_ablkcipher_type, | ||
971 | .cra_module = THIS_MODULE, | ||
972 | .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list), | ||
973 | .cra_init = ablk_xts_init, | ||
974 | .cra_exit = ablk_exit, | ||
975 | .cra_u = { | ||
976 | .ablkcipher = { | ||
977 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
978 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
979 | .ivsize = SERPENT_BLOCK_SIZE, | ||
980 | .setkey = ablk_set_key, | ||
981 | .encrypt = ablk_encrypt, | ||
982 | .decrypt = ablk_decrypt, | ||
983 | }, | ||
984 | }, | ||
985 | }; | ||
986 | |||
987 | static int __init serpent_sse2_init(void) | ||
988 | { | ||
989 | int err; | ||
990 | |||
991 | if (!cpu_has_xmm2) { | ||
992 | printk(KERN_INFO "SSE2 instructions are not detected.\n"); | ||
993 | return -ENODEV; | ||
994 | } | ||
995 | |||
996 | err = crypto_register_alg(&blk_ecb_alg); | ||
997 | if (err) | ||
998 | goto blk_ecb_err; | ||
999 | err = crypto_register_alg(&blk_cbc_alg); | ||
1000 | if (err) | ||
1001 | goto blk_cbc_err; | ||
1002 | err = crypto_register_alg(&blk_ctr_alg); | ||
1003 | if (err) | ||
1004 | goto blk_ctr_err; | ||
1005 | err = crypto_register_alg(&ablk_ecb_alg); | ||
1006 | if (err) | ||
1007 | goto ablk_ecb_err; | ||
1008 | err = crypto_register_alg(&ablk_cbc_alg); | ||
1009 | if (err) | ||
1010 | goto ablk_cbc_err; | ||
1011 | err = crypto_register_alg(&ablk_ctr_alg); | ||
1012 | if (err) | ||
1013 | goto ablk_ctr_err; | ||
1014 | err = crypto_register_alg(&blk_lrw_alg); | ||
1015 | if (err) | ||
1016 | goto blk_lrw_err; | ||
1017 | err = crypto_register_alg(&ablk_lrw_alg); | ||
1018 | if (err) | ||
1019 | goto ablk_lrw_err; | ||
1020 | err = crypto_register_alg(&blk_xts_alg); | ||
1021 | if (err) | ||
1022 | goto blk_xts_err; | ||
1023 | err = crypto_register_alg(&ablk_xts_alg); | ||
1024 | if (err) | ||
1025 | goto ablk_xts_err; | ||
1026 | return err; | ||
1027 | |||
1028 | crypto_unregister_alg(&ablk_xts_alg); | ||
1029 | ablk_xts_err: | ||
1030 | crypto_unregister_alg(&blk_xts_alg); | ||
1031 | blk_xts_err: | ||
1032 | crypto_unregister_alg(&ablk_lrw_alg); | ||
1033 | ablk_lrw_err: | ||
1034 | crypto_unregister_alg(&blk_lrw_alg); | ||
1035 | blk_lrw_err: | ||
1036 | crypto_unregister_alg(&ablk_ctr_alg); | ||
1037 | ablk_ctr_err: | ||
1038 | crypto_unregister_alg(&ablk_cbc_alg); | ||
1039 | ablk_cbc_err: | ||
1040 | crypto_unregister_alg(&ablk_ecb_alg); | ||
1041 | ablk_ecb_err: | ||
1042 | crypto_unregister_alg(&blk_ctr_alg); | ||
1043 | blk_ctr_err: | ||
1044 | crypto_unregister_alg(&blk_cbc_alg); | ||
1045 | blk_cbc_err: | ||
1046 | crypto_unregister_alg(&blk_ecb_alg); | ||
1047 | blk_ecb_err: | ||
1048 | return err; | ||
1049 | } | ||
1050 | |||
1051 | static void __exit serpent_sse2_exit(void) | ||
1052 | { | ||
1053 | crypto_unregister_alg(&ablk_xts_alg); | ||
1054 | crypto_unregister_alg(&blk_xts_alg); | ||
1055 | crypto_unregister_alg(&ablk_lrw_alg); | ||
1056 | crypto_unregister_alg(&blk_lrw_alg); | ||
1057 | crypto_unregister_alg(&ablk_ctr_alg); | ||
1058 | crypto_unregister_alg(&ablk_cbc_alg); | ||
1059 | crypto_unregister_alg(&ablk_ecb_alg); | ||
1060 | crypto_unregister_alg(&blk_ctr_alg); | ||
1061 | crypto_unregister_alg(&blk_cbc_alg); | ||
1062 | crypto_unregister_alg(&blk_ecb_alg); | ||
1063 | } | ||
1064 | |||
1065 | module_init(serpent_sse2_init); | ||
1066 | module_exit(serpent_sse2_exit); | ||
1067 | |||
1068 | MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized"); | ||
1069 | MODULE_LICENSE("GPL"); | ||
1070 | MODULE_ALIAS("serpent"); | ||
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 5ede9c444c3e..7fee8c152f93 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c | |||
@@ -32,6 +32,8 @@ | |||
32 | #include <crypto/algapi.h> | 32 | #include <crypto/algapi.h> |
33 | #include <crypto/twofish.h> | 33 | #include <crypto/twofish.h> |
34 | #include <crypto/b128ops.h> | 34 | #include <crypto/b128ops.h> |
35 | #include <crypto/lrw.h> | ||
36 | #include <crypto/xts.h> | ||
35 | 37 | ||
36 | /* regular block cipher functions from twofish_x86_64 module */ | 38 | /* regular block cipher functions from twofish_x86_64 module */ |
37 | asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, | 39 | asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, |
@@ -432,6 +434,209 @@ static struct crypto_alg blk_ctr_alg = { | |||
432 | }, | 434 | }, |
433 | }; | 435 | }; |
434 | 436 | ||
437 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
438 | { | ||
439 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
440 | struct twofish_ctx *ctx = priv; | ||
441 | int i; | ||
442 | |||
443 | if (nbytes == 3 * bsize) { | ||
444 | twofish_enc_blk_3way(ctx, srcdst, srcdst); | ||
445 | return; | ||
446 | } | ||
447 | |||
448 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
449 | twofish_enc_blk(ctx, srcdst, srcdst); | ||
450 | } | ||
451 | |||
452 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
453 | { | ||
454 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
455 | struct twofish_ctx *ctx = priv; | ||
456 | int i; | ||
457 | |||
458 | if (nbytes == 3 * bsize) { | ||
459 | twofish_dec_blk_3way(ctx, srcdst, srcdst); | ||
460 | return; | ||
461 | } | ||
462 | |||
463 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
464 | twofish_dec_blk(ctx, srcdst, srcdst); | ||
465 | } | ||
466 | |||
467 | struct twofish_lrw_ctx { | ||
468 | struct lrw_table_ctx lrw_table; | ||
469 | struct twofish_ctx twofish_ctx; | ||
470 | }; | ||
471 | |||
472 | static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
473 | unsigned int keylen) | ||
474 | { | ||
475 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
476 | int err; | ||
477 | |||
478 | err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE, | ||
479 | &tfm->crt_flags); | ||
480 | if (err) | ||
481 | return err; | ||
482 | |||
483 | return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); | ||
484 | } | ||
485 | |||
486 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
487 | struct scatterlist *src, unsigned int nbytes) | ||
488 | { | ||
489 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
490 | be128 buf[3]; | ||
491 | struct lrw_crypt_req req = { | ||
492 | .tbuf = buf, | ||
493 | .tbuflen = sizeof(buf), | ||
494 | |||
495 | .table_ctx = &ctx->lrw_table, | ||
496 | .crypt_ctx = &ctx->twofish_ctx, | ||
497 | .crypt_fn = encrypt_callback, | ||
498 | }; | ||
499 | |||
500 | return lrw_crypt(desc, dst, src, nbytes, &req); | ||
501 | } | ||
502 | |||
503 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
504 | struct scatterlist *src, unsigned int nbytes) | ||
505 | { | ||
506 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
507 | be128 buf[3]; | ||
508 | struct lrw_crypt_req req = { | ||
509 | .tbuf = buf, | ||
510 | .tbuflen = sizeof(buf), | ||
511 | |||
512 | .table_ctx = &ctx->lrw_table, | ||
513 | .crypt_ctx = &ctx->twofish_ctx, | ||
514 | .crypt_fn = decrypt_callback, | ||
515 | }; | ||
516 | |||
517 | return lrw_crypt(desc, dst, src, nbytes, &req); | ||
518 | } | ||
519 | |||
520 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | ||
521 | { | ||
522 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
523 | |||
524 | lrw_free_table(&ctx->lrw_table); | ||
525 | } | ||
526 | |||
527 | static struct crypto_alg blk_lrw_alg = { | ||
528 | .cra_name = "lrw(twofish)", | ||
529 | .cra_driver_name = "lrw-twofish-3way", | ||
530 | .cra_priority = 300, | ||
531 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
532 | .cra_blocksize = TF_BLOCK_SIZE, | ||
533 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | ||
534 | .cra_alignmask = 0, | ||
535 | .cra_type = &crypto_blkcipher_type, | ||
536 | .cra_module = THIS_MODULE, | ||
537 | .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list), | ||
538 | .cra_exit = lrw_exit_tfm, | ||
539 | .cra_u = { | ||
540 | .blkcipher = { | ||
541 | .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, | ||
542 | .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE, | ||
543 | .ivsize = TF_BLOCK_SIZE, | ||
544 | .setkey = lrw_twofish_setkey, | ||
545 | .encrypt = lrw_encrypt, | ||
546 | .decrypt = lrw_decrypt, | ||
547 | }, | ||
548 | }, | ||
549 | }; | ||
550 | |||
551 | struct twofish_xts_ctx { | ||
552 | struct twofish_ctx tweak_ctx; | ||
553 | struct twofish_ctx crypt_ctx; | ||
554 | }; | ||
555 | |||
556 | static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
557 | unsigned int keylen) | ||
558 | { | ||
559 | struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
560 | u32 *flags = &tfm->crt_flags; | ||
561 | int err; | ||
562 | |||
563 | /* key consists of keys of equal size concatenated, therefore | ||
564 | * the length must be even | ||
565 | */ | ||
566 | if (keylen % 2) { | ||
567 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
568 | return -EINVAL; | ||
569 | } | ||
570 | |||
571 | /* first half of xts-key is for crypt */ | ||
572 | err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); | ||
573 | if (err) | ||
574 | return err; | ||
575 | |||
576 | /* second half of xts-key is for tweak */ | ||
577 | return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, | ||
578 | flags); | ||
579 | } | ||
580 | |||
581 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
582 | struct scatterlist *src, unsigned int nbytes) | ||
583 | { | ||
584 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
585 | be128 buf[3]; | ||
586 | struct xts_crypt_req req = { | ||
587 | .tbuf = buf, | ||
588 | .tbuflen = sizeof(buf), | ||
589 | |||
590 | .tweak_ctx = &ctx->tweak_ctx, | ||
591 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
592 | .crypt_ctx = &ctx->crypt_ctx, | ||
593 | .crypt_fn = encrypt_callback, | ||
594 | }; | ||
595 | |||
596 | return xts_crypt(desc, dst, src, nbytes, &req); | ||
597 | } | ||
598 | |||
599 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
600 | struct scatterlist *src, unsigned int nbytes) | ||
601 | { | ||
602 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
603 | be128 buf[3]; | ||
604 | struct xts_crypt_req req = { | ||
605 | .tbuf = buf, | ||
606 | .tbuflen = sizeof(buf), | ||
607 | |||
608 | .tweak_ctx = &ctx->tweak_ctx, | ||
609 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
610 | .crypt_ctx = &ctx->crypt_ctx, | ||
611 | .crypt_fn = decrypt_callback, | ||
612 | }; | ||
613 | |||
614 | return xts_crypt(desc, dst, src, nbytes, &req); | ||
615 | } | ||
616 | |||
617 | static struct crypto_alg blk_xts_alg = { | ||
618 | .cra_name = "xts(twofish)", | ||
619 | .cra_driver_name = "xts-twofish-3way", | ||
620 | .cra_priority = 300, | ||
621 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
622 | .cra_blocksize = TF_BLOCK_SIZE, | ||
623 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | ||
624 | .cra_alignmask = 0, | ||
625 | .cra_type = &crypto_blkcipher_type, | ||
626 | .cra_module = THIS_MODULE, | ||
627 | .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list), | ||
628 | .cra_u = { | ||
629 | .blkcipher = { | ||
630 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
631 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
632 | .ivsize = TF_BLOCK_SIZE, | ||
633 | .setkey = xts_twofish_setkey, | ||
634 | .encrypt = xts_encrypt, | ||
635 | .decrypt = xts_decrypt, | ||
636 | }, | ||
637 | }, | ||
638 | }; | ||
639 | |||
435 | int __init init(void) | 640 | int __init init(void) |
436 | { | 641 | { |
437 | int err; | 642 | int err; |
@@ -445,9 +650,20 @@ int __init init(void) | |||
445 | err = crypto_register_alg(&blk_ctr_alg); | 650 | err = crypto_register_alg(&blk_ctr_alg); |
446 | if (err) | 651 | if (err) |
447 | goto ctr_err; | 652 | goto ctr_err; |
653 | err = crypto_register_alg(&blk_lrw_alg); | ||
654 | if (err) | ||
655 | goto blk_lrw_err; | ||
656 | err = crypto_register_alg(&blk_xts_alg); | ||
657 | if (err) | ||
658 | goto blk_xts_err; | ||
448 | 659 | ||
449 | return 0; | 660 | return 0; |
450 | 661 | ||
662 | crypto_unregister_alg(&blk_xts_alg); | ||
663 | blk_xts_err: | ||
664 | crypto_unregister_alg(&blk_lrw_alg); | ||
665 | blk_lrw_err: | ||
666 | crypto_unregister_alg(&blk_ctr_alg); | ||
451 | ctr_err: | 667 | ctr_err: |
452 | crypto_unregister_alg(&blk_cbc_alg); | 668 | crypto_unregister_alg(&blk_cbc_alg); |
453 | cbc_err: | 669 | cbc_err: |
@@ -458,6 +674,8 @@ ecb_err: | |||
458 | 674 | ||
459 | void __exit fini(void) | 675 | void __exit fini(void) |
460 | { | 676 | { |
677 | crypto_unregister_alg(&blk_xts_alg); | ||
678 | crypto_unregister_alg(&blk_lrw_alg); | ||
461 | crypto_unregister_alg(&blk_ctr_alg); | 679 | crypto_unregister_alg(&blk_ctr_alg); |
462 | crypto_unregister_alg(&blk_cbc_alg); | 680 | crypto_unregister_alg(&blk_cbc_alg); |
463 | crypto_unregister_alg(&blk_ecb_alg); | 681 | crypto_unregister_alg(&blk_ecb_alg); |
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile index 52d0ccfcf6ea..455646e0e532 100644 --- a/arch/x86/ia32/Makefile +++ b/arch/x86/ia32/Makefile | |||
@@ -3,6 +3,7 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o | 5 | obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o |
6 | obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o | ||
6 | 7 | ||
7 | sysv-$(CONFIG_SYSVIPC) := ipc32.o | 8 | sysv-$(CONFIG_SYSVIPC) := ipc32.o |
8 | obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) | 9 | obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a6253ec1b284..e3e734005e19 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/segment.h> | 14 | #include <asm/segment.h> |
15 | #include <asm/irqflags.h> | 15 | #include <asm/irqflags.h> |
16 | #include <linux/linkage.h> | 16 | #include <linux/linkage.h> |
17 | #include <linux/err.h> | ||
17 | 18 | ||
18 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 19 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
19 | #include <linux/elf-em.h> | 20 | #include <linux/elf-em.h> |
@@ -27,8 +28,6 @@ | |||
27 | 28 | ||
28 | .section .entry.text, "ax" | 29 | .section .entry.text, "ax" |
29 | 30 | ||
30 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | ||
31 | |||
32 | .macro IA32_ARG_FIXUP noebp=0 | 31 | .macro IA32_ARG_FIXUP noebp=0 |
33 | movl %edi,%r8d | 32 | movl %edi,%r8d |
34 | .if \noebp | 33 | .if \noebp |
@@ -134,7 +133,7 @@ ENTRY(ia32_sysenter_target) | |||
134 | CFI_REL_OFFSET rsp,0 | 133 | CFI_REL_OFFSET rsp,0 |
135 | pushfq_cfi | 134 | pushfq_cfi |
136 | /*CFI_REL_OFFSET rflags,0*/ | 135 | /*CFI_REL_OFFSET rflags,0*/ |
137 | movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d | 136 | movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d |
138 | CFI_REGISTER rip,r10 | 137 | CFI_REGISTER rip,r10 |
139 | pushq_cfi $__USER32_CS | 138 | pushq_cfi $__USER32_CS |
140 | /*CFI_REL_OFFSET cs,0*/ | 139 | /*CFI_REL_OFFSET cs,0*/ |
@@ -150,9 +149,8 @@ ENTRY(ia32_sysenter_target) | |||
150 | .section __ex_table,"a" | 149 | .section __ex_table,"a" |
151 | .quad 1b,ia32_badarg | 150 | .quad 1b,ia32_badarg |
152 | .previous | 151 | .previous |
153 | GET_THREAD_INFO(%r10) | 152 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
154 | orl $TS_COMPAT,TI_status(%r10) | 153 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
155 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) | ||
156 | CFI_REMEMBER_STATE | 154 | CFI_REMEMBER_STATE |
157 | jnz sysenter_tracesys | 155 | jnz sysenter_tracesys |
158 | cmpq $(IA32_NR_syscalls-1),%rax | 156 | cmpq $(IA32_NR_syscalls-1),%rax |
@@ -162,13 +160,12 @@ sysenter_do_call: | |||
162 | sysenter_dispatch: | 160 | sysenter_dispatch: |
163 | call *ia32_sys_call_table(,%rax,8) | 161 | call *ia32_sys_call_table(,%rax,8) |
164 | movq %rax,RAX-ARGOFFSET(%rsp) | 162 | movq %rax,RAX-ARGOFFSET(%rsp) |
165 | GET_THREAD_INFO(%r10) | ||
166 | DISABLE_INTERRUPTS(CLBR_NONE) | 163 | DISABLE_INTERRUPTS(CLBR_NONE) |
167 | TRACE_IRQS_OFF | 164 | TRACE_IRQS_OFF |
168 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) | 165 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
169 | jnz sysexit_audit | 166 | jnz sysexit_audit |
170 | sysexit_from_sys_call: | 167 | sysexit_from_sys_call: |
171 | andl $~TS_COMPAT,TI_status(%r10) | 168 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
172 | /* clear IF, that popfq doesn't enable interrupts early */ | 169 | /* clear IF, that popfq doesn't enable interrupts early */ |
173 | andl $~0x200,EFLAGS-R11(%rsp) | 170 | andl $~0x200,EFLAGS-R11(%rsp) |
174 | movl RIP-R11(%rsp),%edx /* User %eip */ | 171 | movl RIP-R11(%rsp),%edx /* User %eip */ |
@@ -193,7 +190,7 @@ sysexit_from_sys_call: | |||
193 | movl %ebx,%edx /* 3rd arg: 1st syscall arg */ | 190 | movl %ebx,%edx /* 3rd arg: 1st syscall arg */ |
194 | movl %eax,%esi /* 2nd arg: syscall number */ | 191 | movl %eax,%esi /* 2nd arg: syscall number */ |
195 | movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ | 192 | movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ |
196 | call audit_syscall_entry | 193 | call __audit_syscall_entry |
197 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ | 194 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ |
198 | cmpq $(IA32_NR_syscalls-1),%rax | 195 | cmpq $(IA32_NR_syscalls-1),%rax |
199 | ja ia32_badsys | 196 | ja ia32_badsys |
@@ -205,22 +202,22 @@ sysexit_from_sys_call: | |||
205 | .endm | 202 | .endm |
206 | 203 | ||
207 | .macro auditsys_exit exit | 204 | .macro auditsys_exit exit |
208 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | 205 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
209 | jnz ia32_ret_from_sys_call | 206 | jnz ia32_ret_from_sys_call |
210 | TRACE_IRQS_ON | 207 | TRACE_IRQS_ON |
211 | sti | 208 | sti |
212 | movl %eax,%esi /* second arg, syscall return value */ | 209 | movl %eax,%esi /* second arg, syscall return value */ |
213 | cmpl $0,%eax /* is it < 0? */ | 210 | cmpl $-MAX_ERRNO,%eax /* is it an error ? */ |
214 | setl %al /* 1 if so, 0 if not */ | 211 | jbe 1f |
212 | movslq %eax, %rsi /* if error sign extend to 64 bits */ | ||
213 | 1: setbe %al /* 1 if error, 0 if not */ | ||
215 | movzbl %al,%edi /* zero-extend that into %edi */ | 214 | movzbl %al,%edi /* zero-extend that into %edi */ |
216 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | 215 | call __audit_syscall_exit |
217 | call audit_syscall_exit | 216 | movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ |
218 | GET_THREAD_INFO(%r10) | ||
219 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ | ||
220 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 217 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
221 | cli | 218 | cli |
222 | TRACE_IRQS_OFF | 219 | TRACE_IRQS_OFF |
223 | testl %edi,TI_flags(%r10) | 220 | testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
224 | jz \exit | 221 | jz \exit |
225 | CLEAR_RREGS -ARGOFFSET | 222 | CLEAR_RREGS -ARGOFFSET |
226 | jmp int_with_check | 223 | jmp int_with_check |
@@ -238,7 +235,7 @@ sysexit_audit: | |||
238 | 235 | ||
239 | sysenter_tracesys: | 236 | sysenter_tracesys: |
240 | #ifdef CONFIG_AUDITSYSCALL | 237 | #ifdef CONFIG_AUDITSYSCALL |
241 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | 238 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
242 | jz sysenter_auditsys | 239 | jz sysenter_auditsys |
243 | #endif | 240 | #endif |
244 | SAVE_REST | 241 | SAVE_REST |
@@ -309,9 +306,8 @@ ENTRY(ia32_cstar_target) | |||
309 | .section __ex_table,"a" | 306 | .section __ex_table,"a" |
310 | .quad 1b,ia32_badarg | 307 | .quad 1b,ia32_badarg |
311 | .previous | 308 | .previous |
312 | GET_THREAD_INFO(%r10) | 309 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
313 | orl $TS_COMPAT,TI_status(%r10) | 310 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
314 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) | ||
315 | CFI_REMEMBER_STATE | 311 | CFI_REMEMBER_STATE |
316 | jnz cstar_tracesys | 312 | jnz cstar_tracesys |
317 | cmpq $IA32_NR_syscalls-1,%rax | 313 | cmpq $IA32_NR_syscalls-1,%rax |
@@ -321,13 +317,12 @@ cstar_do_call: | |||
321 | cstar_dispatch: | 317 | cstar_dispatch: |
322 | call *ia32_sys_call_table(,%rax,8) | 318 | call *ia32_sys_call_table(,%rax,8) |
323 | movq %rax,RAX-ARGOFFSET(%rsp) | 319 | movq %rax,RAX-ARGOFFSET(%rsp) |
324 | GET_THREAD_INFO(%r10) | ||
325 | DISABLE_INTERRUPTS(CLBR_NONE) | 320 | DISABLE_INTERRUPTS(CLBR_NONE) |
326 | TRACE_IRQS_OFF | 321 | TRACE_IRQS_OFF |
327 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) | 322 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
328 | jnz sysretl_audit | 323 | jnz sysretl_audit |
329 | sysretl_from_sys_call: | 324 | sysretl_from_sys_call: |
330 | andl $~TS_COMPAT,TI_status(%r10) | 325 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
331 | RESTORE_ARGS 0,-ARG_SKIP,0,0,0 | 326 | RESTORE_ARGS 0,-ARG_SKIP,0,0,0 |
332 | movl RIP-ARGOFFSET(%rsp),%ecx | 327 | movl RIP-ARGOFFSET(%rsp),%ecx |
333 | CFI_REGISTER rip,rcx | 328 | CFI_REGISTER rip,rcx |
@@ -355,7 +350,7 @@ sysretl_audit: | |||
355 | 350 | ||
356 | cstar_tracesys: | 351 | cstar_tracesys: |
357 | #ifdef CONFIG_AUDITSYSCALL | 352 | #ifdef CONFIG_AUDITSYSCALL |
358 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | 353 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
359 | jz cstar_auditsys | 354 | jz cstar_auditsys |
360 | #endif | 355 | #endif |
361 | xchgl %r9d,%ebp | 356 | xchgl %r9d,%ebp |
@@ -420,9 +415,8 @@ ENTRY(ia32_syscall) | |||
420 | /* note the registers are not zero extended to the sf. | 415 | /* note the registers are not zero extended to the sf. |
421 | this could be a problem. */ | 416 | this could be a problem. */ |
422 | SAVE_ARGS 0,1,0 | 417 | SAVE_ARGS 0,1,0 |
423 | GET_THREAD_INFO(%r10) | 418 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
424 | orl $TS_COMPAT,TI_status(%r10) | 419 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
425 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) | ||
426 | jnz ia32_tracesys | 420 | jnz ia32_tracesys |
427 | cmpq $(IA32_NR_syscalls-1),%rax | 421 | cmpq $(IA32_NR_syscalls-1),%rax |
428 | ja ia32_badsys | 422 | ja ia32_badsys |
@@ -453,14 +447,11 @@ ia32_badsys: | |||
453 | movq $-ENOSYS,%rax | 447 | movq $-ENOSYS,%rax |
454 | jmp ia32_sysret | 448 | jmp ia32_sysret |
455 | 449 | ||
456 | quiet_ni_syscall: | ||
457 | movq $-ENOSYS,%rax | ||
458 | ret | ||
459 | CFI_ENDPROC | 450 | CFI_ENDPROC |
460 | 451 | ||
461 | .macro PTREGSCALL label, func, arg | 452 | .macro PTREGSCALL label, func, arg |
462 | .globl \label | 453 | ALIGN |
463 | \label: | 454 | GLOBAL(\label) |
464 | leaq \func(%rip),%rax | 455 | leaq \func(%rip),%rax |
465 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ | 456 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ |
466 | jmp ia32_ptregs_common | 457 | jmp ia32_ptregs_common |
@@ -477,7 +468,8 @@ quiet_ni_syscall: | |||
477 | PTREGSCALL stub32_vfork, sys_vfork, %rdi | 468 | PTREGSCALL stub32_vfork, sys_vfork, %rdi |
478 | PTREGSCALL stub32_iopl, sys_iopl, %rsi | 469 | PTREGSCALL stub32_iopl, sys_iopl, %rsi |
479 | 470 | ||
480 | ENTRY(ia32_ptregs_common) | 471 | ALIGN |
472 | ia32_ptregs_common: | ||
481 | popq %r11 | 473 | popq %r11 |
482 | CFI_ENDPROC | 474 | CFI_ENDPROC |
483 | CFI_STARTPROC32 simple | 475 | CFI_STARTPROC32 simple |
@@ -499,357 +491,3 @@ ENTRY(ia32_ptregs_common) | |||
499 | jmp ia32_sysret /* misbalances the return cache */ | 491 | jmp ia32_sysret /* misbalances the return cache */ |
500 | CFI_ENDPROC | 492 | CFI_ENDPROC |
501 | END(ia32_ptregs_common) | 493 | END(ia32_ptregs_common) |
502 | |||
503 | .section .rodata,"a" | ||
504 | .align 8 | ||
505 | ia32_sys_call_table: | ||
506 | .quad sys_restart_syscall | ||
507 | .quad sys_exit | ||
508 | .quad stub32_fork | ||
509 | .quad sys_read | ||
510 | .quad sys_write | ||
511 | .quad compat_sys_open /* 5 */ | ||
512 | .quad sys_close | ||
513 | .quad sys32_waitpid | ||
514 | .quad sys_creat | ||
515 | .quad sys_link | ||
516 | .quad sys_unlink /* 10 */ | ||
517 | .quad stub32_execve | ||
518 | .quad sys_chdir | ||
519 | .quad compat_sys_time | ||
520 | .quad sys_mknod | ||
521 | .quad sys_chmod /* 15 */ | ||
522 | .quad sys_lchown16 | ||
523 | .quad quiet_ni_syscall /* old break syscall holder */ | ||
524 | .quad sys_stat | ||
525 | .quad sys32_lseek | ||
526 | .quad sys_getpid /* 20 */ | ||
527 | .quad compat_sys_mount /* mount */ | ||
528 | .quad sys_oldumount /* old_umount */ | ||
529 | .quad sys_setuid16 | ||
530 | .quad sys_getuid16 | ||
531 | .quad compat_sys_stime /* stime */ /* 25 */ | ||
532 | .quad compat_sys_ptrace /* ptrace */ | ||
533 | .quad sys_alarm | ||
534 | .quad sys_fstat /* (old)fstat */ | ||
535 | .quad sys_pause | ||
536 | .quad compat_sys_utime /* 30 */ | ||
537 | .quad quiet_ni_syscall /* old stty syscall holder */ | ||
538 | .quad quiet_ni_syscall /* old gtty syscall holder */ | ||
539 | .quad sys_access | ||
540 | .quad sys_nice | ||
541 | .quad quiet_ni_syscall /* 35 */ /* old ftime syscall holder */ | ||
542 | .quad sys_sync | ||
543 | .quad sys32_kill | ||
544 | .quad sys_rename | ||
545 | .quad sys_mkdir | ||
546 | .quad sys_rmdir /* 40 */ | ||
547 | .quad sys_dup | ||
548 | .quad sys_pipe | ||
549 | .quad compat_sys_times | ||
550 | .quad quiet_ni_syscall /* old prof syscall holder */ | ||
551 | .quad sys_brk /* 45 */ | ||
552 | .quad sys_setgid16 | ||
553 | .quad sys_getgid16 | ||
554 | .quad sys_signal | ||
555 | .quad sys_geteuid16 | ||
556 | .quad sys_getegid16 /* 50 */ | ||
557 | .quad sys_acct | ||
558 | .quad sys_umount /* new_umount */ | ||
559 | .quad quiet_ni_syscall /* old lock syscall holder */ | ||
560 | .quad compat_sys_ioctl | ||
561 | .quad compat_sys_fcntl64 /* 55 */ | ||
562 | .quad quiet_ni_syscall /* old mpx syscall holder */ | ||
563 | .quad sys_setpgid | ||
564 | .quad quiet_ni_syscall /* old ulimit syscall holder */ | ||
565 | .quad sys_olduname | ||
566 | .quad sys_umask /* 60 */ | ||
567 | .quad sys_chroot | ||
568 | .quad compat_sys_ustat | ||
569 | .quad sys_dup2 | ||
570 | .quad sys_getppid | ||
571 | .quad sys_getpgrp /* 65 */ | ||
572 | .quad sys_setsid | ||
573 | .quad sys32_sigaction | ||
574 | .quad sys_sgetmask | ||
575 | .quad sys_ssetmask | ||
576 | .quad sys_setreuid16 /* 70 */ | ||
577 | .quad sys_setregid16 | ||
578 | .quad sys32_sigsuspend | ||
579 | .quad compat_sys_sigpending | ||
580 | .quad sys_sethostname | ||
581 | .quad compat_sys_setrlimit /* 75 */ | ||
582 | .quad compat_sys_old_getrlimit /* old_getrlimit */ | ||
583 | .quad compat_sys_getrusage | ||
584 | .quad compat_sys_gettimeofday | ||
585 | .quad compat_sys_settimeofday | ||
586 | .quad sys_getgroups16 /* 80 */ | ||
587 | .quad sys_setgroups16 | ||
588 | .quad compat_sys_old_select | ||
589 | .quad sys_symlink | ||
590 | .quad sys_lstat | ||
591 | .quad sys_readlink /* 85 */ | ||
592 | .quad sys_uselib | ||
593 | .quad sys_swapon | ||
594 | .quad sys_reboot | ||
595 | .quad compat_sys_old_readdir | ||
596 | .quad sys32_mmap /* 90 */ | ||
597 | .quad sys_munmap | ||
598 | .quad sys_truncate | ||
599 | .quad sys_ftruncate | ||
600 | .quad sys_fchmod | ||
601 | .quad sys_fchown16 /* 95 */ | ||
602 | .quad sys_getpriority | ||
603 | .quad sys_setpriority | ||
604 | .quad quiet_ni_syscall /* old profil syscall holder */ | ||
605 | .quad compat_sys_statfs | ||
606 | .quad compat_sys_fstatfs /* 100 */ | ||
607 | .quad sys_ioperm | ||
608 | .quad compat_sys_socketcall | ||
609 | .quad sys_syslog | ||
610 | .quad compat_sys_setitimer | ||
611 | .quad compat_sys_getitimer /* 105 */ | ||
612 | .quad compat_sys_newstat | ||
613 | .quad compat_sys_newlstat | ||
614 | .quad compat_sys_newfstat | ||
615 | .quad sys_uname | ||
616 | .quad stub32_iopl /* 110 */ | ||
617 | .quad sys_vhangup | ||
618 | .quad quiet_ni_syscall /* old "idle" system call */ | ||
619 | .quad sys32_vm86_warning /* vm86old */ | ||
620 | .quad compat_sys_wait4 | ||
621 | .quad sys_swapoff /* 115 */ | ||
622 | .quad compat_sys_sysinfo | ||
623 | .quad sys32_ipc | ||
624 | .quad sys_fsync | ||
625 | .quad stub32_sigreturn | ||
626 | .quad stub32_clone /* 120 */ | ||
627 | .quad sys_setdomainname | ||
628 | .quad sys_newuname | ||
629 | .quad sys_modify_ldt | ||
630 | .quad compat_sys_adjtimex | ||
631 | .quad sys32_mprotect /* 125 */ | ||
632 | .quad compat_sys_sigprocmask | ||
633 | .quad quiet_ni_syscall /* create_module */ | ||
634 | .quad sys_init_module | ||
635 | .quad sys_delete_module | ||
636 | .quad quiet_ni_syscall /* 130 get_kernel_syms */ | ||
637 | .quad sys32_quotactl | ||
638 | .quad sys_getpgid | ||
639 | .quad sys_fchdir | ||
640 | .quad quiet_ni_syscall /* bdflush */ | ||
641 | .quad sys_sysfs /* 135 */ | ||
642 | .quad sys_personality | ||
643 | .quad quiet_ni_syscall /* for afs_syscall */ | ||
644 | .quad sys_setfsuid16 | ||
645 | .quad sys_setfsgid16 | ||
646 | .quad sys_llseek /* 140 */ | ||
647 | .quad compat_sys_getdents | ||
648 | .quad compat_sys_select | ||
649 | .quad sys_flock | ||
650 | .quad sys_msync | ||
651 | .quad compat_sys_readv /* 145 */ | ||
652 | .quad compat_sys_writev | ||
653 | .quad sys_getsid | ||
654 | .quad sys_fdatasync | ||
655 | .quad compat_sys_sysctl /* sysctl */ | ||
656 | .quad sys_mlock /* 150 */ | ||
657 | .quad sys_munlock | ||
658 | .quad sys_mlockall | ||
659 | .quad sys_munlockall | ||
660 | .quad sys_sched_setparam | ||
661 | .quad sys_sched_getparam /* 155 */ | ||
662 | .quad sys_sched_setscheduler | ||
663 | .quad sys_sched_getscheduler | ||
664 | .quad sys_sched_yield | ||
665 | .quad sys_sched_get_priority_max | ||
666 | .quad sys_sched_get_priority_min /* 160 */ | ||
667 | .quad sys32_sched_rr_get_interval | ||
668 | .quad compat_sys_nanosleep | ||
669 | .quad sys_mremap | ||
670 | .quad sys_setresuid16 | ||
671 | .quad sys_getresuid16 /* 165 */ | ||
672 | .quad sys32_vm86_warning /* vm86 */ | ||
673 | .quad quiet_ni_syscall /* query_module */ | ||
674 | .quad sys_poll | ||
675 | .quad quiet_ni_syscall /* old nfsservctl */ | ||
676 | .quad sys_setresgid16 /* 170 */ | ||
677 | .quad sys_getresgid16 | ||
678 | .quad sys_prctl | ||
679 | .quad stub32_rt_sigreturn | ||
680 | .quad sys32_rt_sigaction | ||
681 | .quad sys32_rt_sigprocmask /* 175 */ | ||
682 | .quad sys32_rt_sigpending | ||
683 | .quad compat_sys_rt_sigtimedwait | ||
684 | .quad sys32_rt_sigqueueinfo | ||
685 | .quad sys_rt_sigsuspend | ||
686 | .quad sys32_pread /* 180 */ | ||
687 | .quad sys32_pwrite | ||
688 | .quad sys_chown16 | ||
689 | .quad sys_getcwd | ||
690 | .quad sys_capget | ||
691 | .quad sys_capset | ||
692 | .quad stub32_sigaltstack | ||
693 | .quad sys32_sendfile | ||
694 | .quad quiet_ni_syscall /* streams1 */ | ||
695 | .quad quiet_ni_syscall /* streams2 */ | ||
696 | .quad stub32_vfork /* 190 */ | ||
697 | .quad compat_sys_getrlimit | ||
698 | .quad sys_mmap_pgoff | ||
699 | .quad sys32_truncate64 | ||
700 | .quad sys32_ftruncate64 | ||
701 | .quad sys32_stat64 /* 195 */ | ||
702 | .quad sys32_lstat64 | ||
703 | .quad sys32_fstat64 | ||
704 | .quad sys_lchown | ||
705 | .quad sys_getuid | ||
706 | .quad sys_getgid /* 200 */ | ||
707 | .quad sys_geteuid | ||
708 | .quad sys_getegid | ||
709 | .quad sys_setreuid | ||
710 | .quad sys_setregid | ||
711 | .quad sys_getgroups /* 205 */ | ||
712 | .quad sys_setgroups | ||
713 | .quad sys_fchown | ||
714 | .quad sys_setresuid | ||
715 | .quad sys_getresuid | ||
716 | .quad sys_setresgid /* 210 */ | ||
717 | .quad sys_getresgid | ||
718 | .quad sys_chown | ||
719 | .quad sys_setuid | ||
720 | .quad sys_setgid | ||
721 | .quad sys_setfsuid /* 215 */ | ||
722 | .quad sys_setfsgid | ||
723 | .quad sys_pivot_root | ||
724 | .quad sys_mincore | ||
725 | .quad sys_madvise | ||
726 | .quad compat_sys_getdents64 /* 220 getdents64 */ | ||
727 | .quad compat_sys_fcntl64 | ||
728 | .quad quiet_ni_syscall /* tux */ | ||
729 | .quad quiet_ni_syscall /* security */ | ||
730 | .quad sys_gettid | ||
731 | .quad sys32_readahead /* 225 */ | ||
732 | .quad sys_setxattr | ||
733 | .quad sys_lsetxattr | ||
734 | .quad sys_fsetxattr | ||
735 | .quad sys_getxattr | ||
736 | .quad sys_lgetxattr /* 230 */ | ||
737 | .quad sys_fgetxattr | ||
738 | .quad sys_listxattr | ||
739 | .quad sys_llistxattr | ||
740 | .quad sys_flistxattr | ||
741 | .quad sys_removexattr /* 235 */ | ||
742 | .quad sys_lremovexattr | ||
743 | .quad sys_fremovexattr | ||
744 | .quad sys_tkill | ||
745 | .quad sys_sendfile64 | ||
746 | .quad compat_sys_futex /* 240 */ | ||
747 | .quad compat_sys_sched_setaffinity | ||
748 | .quad compat_sys_sched_getaffinity | ||
749 | .quad sys_set_thread_area | ||
750 | .quad sys_get_thread_area | ||
751 | .quad compat_sys_io_setup /* 245 */ | ||
752 | .quad sys_io_destroy | ||
753 | .quad compat_sys_io_getevents | ||
754 | .quad compat_sys_io_submit | ||
755 | .quad sys_io_cancel | ||
756 | .quad sys32_fadvise64 /* 250 */ | ||
757 | .quad quiet_ni_syscall /* free_huge_pages */ | ||
758 | .quad sys_exit_group | ||
759 | .quad sys32_lookup_dcookie | ||
760 | .quad sys_epoll_create | ||
761 | .quad sys_epoll_ctl /* 255 */ | ||
762 | .quad sys_epoll_wait | ||
763 | .quad sys_remap_file_pages | ||
764 | .quad sys_set_tid_address | ||
765 | .quad compat_sys_timer_create | ||
766 | .quad compat_sys_timer_settime /* 260 */ | ||
767 | .quad compat_sys_timer_gettime | ||
768 | .quad sys_timer_getoverrun | ||
769 | .quad sys_timer_delete | ||
770 | .quad compat_sys_clock_settime | ||
771 | .quad compat_sys_clock_gettime /* 265 */ | ||
772 | .quad compat_sys_clock_getres | ||
773 | .quad compat_sys_clock_nanosleep | ||
774 | .quad compat_sys_statfs64 | ||
775 | .quad compat_sys_fstatfs64 | ||
776 | .quad sys_tgkill /* 270 */ | ||
777 | .quad compat_sys_utimes | ||
778 | .quad sys32_fadvise64_64 | ||
779 | .quad quiet_ni_syscall /* sys_vserver */ | ||
780 | .quad sys_mbind | ||
781 | .quad compat_sys_get_mempolicy /* 275 */ | ||
782 | .quad sys_set_mempolicy | ||
783 | .quad compat_sys_mq_open | ||
784 | .quad sys_mq_unlink | ||
785 | .quad compat_sys_mq_timedsend | ||
786 | .quad compat_sys_mq_timedreceive /* 280 */ | ||
787 | .quad compat_sys_mq_notify | ||
788 | .quad compat_sys_mq_getsetattr | ||
789 | .quad compat_sys_kexec_load /* reserved for kexec */ | ||
790 | .quad compat_sys_waitid | ||
791 | .quad quiet_ni_syscall /* 285: sys_altroot */ | ||
792 | .quad sys_add_key | ||
793 | .quad sys_request_key | ||
794 | .quad sys_keyctl | ||
795 | .quad sys_ioprio_set | ||
796 | .quad sys_ioprio_get /* 290 */ | ||
797 | .quad sys_inotify_init | ||
798 | .quad sys_inotify_add_watch | ||
799 | .quad sys_inotify_rm_watch | ||
800 | .quad sys_migrate_pages | ||
801 | .quad compat_sys_openat /* 295 */ | ||
802 | .quad sys_mkdirat | ||
803 | .quad sys_mknodat | ||
804 | .quad sys_fchownat | ||
805 | .quad compat_sys_futimesat | ||
806 | .quad sys32_fstatat /* 300 */ | ||
807 | .quad sys_unlinkat | ||
808 | .quad sys_renameat | ||
809 | .quad sys_linkat | ||
810 | .quad sys_symlinkat | ||
811 | .quad sys_readlinkat /* 305 */ | ||
812 | .quad sys_fchmodat | ||
813 | .quad sys_faccessat | ||
814 | .quad compat_sys_pselect6 | ||
815 | .quad compat_sys_ppoll | ||
816 | .quad sys_unshare /* 310 */ | ||
817 | .quad compat_sys_set_robust_list | ||
818 | .quad compat_sys_get_robust_list | ||
819 | .quad sys_splice | ||
820 | .quad sys32_sync_file_range | ||
821 | .quad sys_tee /* 315 */ | ||
822 | .quad compat_sys_vmsplice | ||
823 | .quad compat_sys_move_pages | ||
824 | .quad sys_getcpu | ||
825 | .quad sys_epoll_pwait | ||
826 | .quad compat_sys_utimensat /* 320 */ | ||
827 | .quad compat_sys_signalfd | ||
828 | .quad sys_timerfd_create | ||
829 | .quad sys_eventfd | ||
830 | .quad sys32_fallocate | ||
831 | .quad compat_sys_timerfd_settime /* 325 */ | ||
832 | .quad compat_sys_timerfd_gettime | ||
833 | .quad compat_sys_signalfd4 | ||
834 | .quad sys_eventfd2 | ||
835 | .quad sys_epoll_create1 | ||
836 | .quad sys_dup3 /* 330 */ | ||
837 | .quad sys_pipe2 | ||
838 | .quad sys_inotify_init1 | ||
839 | .quad compat_sys_preadv | ||
840 | .quad compat_sys_pwritev | ||
841 | .quad compat_sys_rt_tgsigqueueinfo /* 335 */ | ||
842 | .quad sys_perf_event_open | ||
843 | .quad compat_sys_recvmmsg | ||
844 | .quad sys_fanotify_init | ||
845 | .quad sys32_fanotify_mark | ||
846 | .quad sys_prlimit64 /* 340 */ | ||
847 | .quad sys_name_to_handle_at | ||
848 | .quad compat_sys_open_by_handle_at | ||
849 | .quad compat_sys_clock_adjtime | ||
850 | .quad sys_syncfs | ||
851 | .quad compat_sys_sendmmsg /* 345 */ | ||
852 | .quad sys_setns | ||
853 | .quad compat_sys_process_vm_readv | ||
854 | .quad compat_sys_process_vm_writev | ||
855 | ia32_syscall_end: | ||
diff --git a/arch/x86/ia32/nosyscall.c b/arch/x86/ia32/nosyscall.c new file mode 100644 index 000000000000..51ecd5b4e787 --- /dev/null +++ b/arch/x86/ia32/nosyscall.c | |||
@@ -0,0 +1,7 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/errno.h> | ||
3 | |||
4 | long compat_ni_syscall(void) | ||
5 | { | ||
6 | return -ENOSYS; | ||
7 | } | ||
diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c new file mode 100644 index 000000000000..4754ba0f5d9f --- /dev/null +++ b/arch/x86/ia32/syscall_ia32.c | |||
@@ -0,0 +1,25 @@ | |||
1 | /* System call table for ia32 emulation. */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <linux/sys.h> | ||
5 | #include <linux/cache.h> | ||
6 | #include <asm/asm-offsets.h> | ||
7 | |||
8 | #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ; | ||
9 | #include <asm/syscalls_32.h> | ||
10 | #undef __SYSCALL_I386 | ||
11 | |||
12 | #define __SYSCALL_I386(nr, sym, compat) [nr] = compat, | ||
13 | |||
14 | typedef void (*sys_call_ptr_t)(void); | ||
15 | |||
16 | extern void compat_ni_syscall(void); | ||
17 | |||
18 | const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = { | ||
19 | /* | ||
20 | * Smells like a compiler bug -- it doesn't work | ||
21 | * when the & below is removed. | ||
22 | */ | ||
23 | [0 ... __NR_ia32_syscall_max] = &compat_ni_syscall, | ||
24 | #include <asm/syscalls_32.h> | ||
25 | }; | ||
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 6fa90a845e4c..b57e6a43a37a 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild | |||
@@ -19,7 +19,8 @@ header-y += processor-flags.h | |||
19 | header-y += ptrace-abi.h | 19 | header-y += ptrace-abi.h |
20 | header-y += sigcontext32.h | 20 | header-y += sigcontext32.h |
21 | header-y += ucontext.h | 21 | header-y += ucontext.h |
22 | header-y += unistd_32.h | ||
23 | header-y += unistd_64.h | ||
24 | header-y += vm86.h | 22 | header-y += vm86.h |
25 | header-y += vsyscall.h | 23 | header-y += vsyscall.h |
24 | |||
25 | genhdr-y += unistd_32.h | ||
26 | genhdr-y += unistd_64.h | ||
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 091508b533b4..952bd0100c5c 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -4,10 +4,10 @@ | |||
4 | 4 | ||
5 | #ifdef CONFIG_SMP | 5 | #ifdef CONFIG_SMP |
6 | .macro LOCK_PREFIX | 6 | .macro LOCK_PREFIX |
7 | 1: lock | 7 | 672: lock |
8 | .section .smp_locks,"a" | 8 | .section .smp_locks,"a" |
9 | .balign 4 | 9 | .balign 4 |
10 | .long 1b - . | 10 | .long 672b - . |
11 | .previous | 11 | .previous |
12 | .endm | 12 | .endm |
13 | #else | 13 | #else |
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 8e41071704a5..49ad773f4b9f 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _ASM_X86_AMD_NB_H | 1 | #ifndef _ASM_X86_AMD_NB_H |
2 | #define _ASM_X86_AMD_NB_H | 2 | #define _ASM_X86_AMD_NB_H |
3 | 3 | ||
4 | #include <linux/ioport.h> | ||
4 | #include <linux/pci.h> | 5 | #include <linux/pci.h> |
5 | 6 | ||
6 | struct amd_nb_bus_dev_range { | 7 | struct amd_nb_bus_dev_range { |
@@ -13,6 +14,7 @@ extern const struct pci_device_id amd_nb_misc_ids[]; | |||
13 | extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; | 14 | extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; |
14 | 15 | ||
15 | extern bool early_is_amd_nb(u32 value); | 16 | extern bool early_is_amd_nb(u32 value); |
17 | extern struct resource *amd_get_mmconfig_range(struct resource *res); | ||
16 | extern int amd_cache_northbridges(void); | 18 | extern int amd_cache_northbridges(void); |
17 | extern void amd_flush_garts(void); | 19 | extern void amd_flush_garts(void); |
18 | extern int amd_numa_init(void); | 20 | extern int amd_numa_init(void); |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1a6c09af048f..3ab9bdd87e79 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -176,6 +176,7 @@ static inline u64 native_x2apic_icr_read(void) | |||
176 | } | 176 | } |
177 | 177 | ||
178 | extern int x2apic_phys; | 178 | extern int x2apic_phys; |
179 | extern int x2apic_preenabled; | ||
179 | extern void check_x2apic(void); | 180 | extern void check_x2apic(void); |
180 | extern void enable_x2apic(void); | 181 | extern void enable_x2apic(void); |
181 | extern void x2apic_icr_write(u32 low, u32 id); | 182 | extern void x2apic_icr_write(u32 low, u32 id); |
@@ -198,6 +199,9 @@ static inline void x2apic_force_phys(void) | |||
198 | x2apic_phys = 1; | 199 | x2apic_phys = 1; |
199 | } | 200 | } |
200 | #else | 201 | #else |
202 | static inline void disable_x2apic(void) | ||
203 | { | ||
204 | } | ||
201 | static inline void check_x2apic(void) | 205 | static inline void check_x2apic(void) |
202 | { | 206 | { |
203 | } | 207 | } |
@@ -212,6 +216,7 @@ static inline void x2apic_force_phys(void) | |||
212 | { | 216 | { |
213 | } | 217 | } |
214 | 218 | ||
219 | #define nox2apic 0 | ||
215 | #define x2apic_preenabled 0 | 220 | #define x2apic_preenabled 0 |
216 | #define x2apic_supported() 0 | 221 | #define x2apic_supported() 0 |
217 | #endif | 222 | #endif |
@@ -410,6 +415,7 @@ extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); | |||
410 | #endif | 415 | #endif |
411 | 416 | ||
412 | #ifdef CONFIG_X86_LOCAL_APIC | 417 | #ifdef CONFIG_X86_LOCAL_APIC |
418 | |||
413 | static inline u32 apic_read(u32 reg) | 419 | static inline u32 apic_read(u32 reg) |
414 | { | 420 | { |
415 | return apic->read(reg); | 421 | return apic->read(reg); |
diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h new file mode 100644 index 000000000000..a2d312796440 --- /dev/null +++ b/arch/x86/include/asm/apic_flat_64.h | |||
@@ -0,0 +1,7 @@ | |||
1 | #ifndef _ASM_X86_APIC_FLAT_64_H | ||
2 | #define _ASM_X86_APIC_FLAT_64_H | ||
3 | |||
4 | extern void flat_init_apic_ldr(void); | ||
5 | |||
6 | #endif | ||
7 | |||
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 3925d8007864..134bba00df09 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -144,6 +144,7 @@ | |||
144 | 144 | ||
145 | #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) | 145 | #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) |
146 | #define APIC_BASE_MSR 0x800 | 146 | #define APIC_BASE_MSR 0x800 |
147 | #define XAPIC_ENABLE (1UL << 11) | ||
147 | #define X2APIC_ENABLE (1UL << 10) | 148 | #define X2APIC_ENABLE (1UL << 10) |
148 | 149 | ||
149 | #ifdef CONFIG_X86_32 | 150 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 24098aafce0d..fa13f0ec2874 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h | |||
@@ -82,7 +82,7 @@ static inline void atomic64_set(atomic64_t *v, long long i) | |||
82 | * | 82 | * |
83 | * Atomically reads the value of @v and returns it. | 83 | * Atomically reads the value of @v and returns it. |
84 | */ | 84 | */ |
85 | static inline long long atomic64_read(atomic64_t *v) | 85 | static inline long long atomic64_read(const atomic64_t *v) |
86 | { | 86 | { |
87 | long long r; | 87 | long long r; |
88 | asm volatile(ATOMIC64_ALTERNATIVE(read) | 88 | asm volatile(ATOMIC64_ALTERNATIVE(read) |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 1775d6e5920e..b97596e2b68c 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -380,6 +380,8 @@ static inline unsigned long __fls(unsigned long word) | |||
380 | return word; | 380 | return word; |
381 | } | 381 | } |
382 | 382 | ||
383 | #undef ADDR | ||
384 | |||
383 | #ifdef __KERNEL__ | 385 | #ifdef __KERNEL__ |
384 | /** | 386 | /** |
385 | * ffs - find first set bit in word | 387 | * ffs - find first set bit in word |
@@ -395,10 +397,25 @@ static inline unsigned long __fls(unsigned long word) | |||
395 | static inline int ffs(int x) | 397 | static inline int ffs(int x) |
396 | { | 398 | { |
397 | int r; | 399 | int r; |
398 | #ifdef CONFIG_X86_CMOV | 400 | |
401 | #ifdef CONFIG_X86_64 | ||
402 | /* | ||
403 | * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the | ||
404 | * dest reg is undefined if x==0, but their CPU architect says its | ||
405 | * value is written to set it to the same as before, except that the | ||
406 | * top 32 bits will be cleared. | ||
407 | * | ||
408 | * We cannot do this on 32 bits because at the very least some | ||
409 | * 486 CPUs did not behave this way. | ||
410 | */ | ||
411 | long tmp = -1; | ||
412 | asm("bsfl %1,%0" | ||
413 | : "=r" (r) | ||
414 | : "rm" (x), "0" (tmp)); | ||
415 | #elif defined(CONFIG_X86_CMOV) | ||
399 | asm("bsfl %1,%0\n\t" | 416 | asm("bsfl %1,%0\n\t" |
400 | "cmovzl %2,%0" | 417 | "cmovzl %2,%0" |
401 | : "=r" (r) : "rm" (x), "r" (-1)); | 418 | : "=&r" (r) : "rm" (x), "r" (-1)); |
402 | #else | 419 | #else |
403 | asm("bsfl %1,%0\n\t" | 420 | asm("bsfl %1,%0\n\t" |
404 | "jnz 1f\n\t" | 421 | "jnz 1f\n\t" |
@@ -422,7 +439,22 @@ static inline int ffs(int x) | |||
422 | static inline int fls(int x) | 439 | static inline int fls(int x) |
423 | { | 440 | { |
424 | int r; | 441 | int r; |
425 | #ifdef CONFIG_X86_CMOV | 442 | |
443 | #ifdef CONFIG_X86_64 | ||
444 | /* | ||
445 | * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the | ||
446 | * dest reg is undefined if x==0, but their CPU architect says its | ||
447 | * value is written to set it to the same as before, except that the | ||
448 | * top 32 bits will be cleared. | ||
449 | * | ||
450 | * We cannot do this on 32 bits because at the very least some | ||
451 | * 486 CPUs did not behave this way. | ||
452 | */ | ||
453 | long tmp = -1; | ||
454 | asm("bsrl %1,%0" | ||
455 | : "=r" (r) | ||
456 | : "rm" (x), "0" (tmp)); | ||
457 | #elif defined(CONFIG_X86_CMOV) | ||
426 | asm("bsrl %1,%0\n\t" | 458 | asm("bsrl %1,%0\n\t" |
427 | "cmovzl %2,%0" | 459 | "cmovzl %2,%0" |
428 | : "=&r" (r) : "rm" (x), "rm" (-1)); | 460 | : "=&r" (r) : "rm" (x), "rm" (-1)); |
@@ -434,11 +466,35 @@ static inline int fls(int x) | |||
434 | #endif | 466 | #endif |
435 | return r + 1; | 467 | return r + 1; |
436 | } | 468 | } |
437 | #endif /* __KERNEL__ */ | ||
438 | |||
439 | #undef ADDR | ||
440 | 469 | ||
441 | #ifdef __KERNEL__ | 470 | /** |
471 | * fls64 - find last set bit in a 64-bit word | ||
472 | * @x: the word to search | ||
473 | * | ||
474 | * This is defined in a similar way as the libc and compiler builtin | ||
475 | * ffsll, but returns the position of the most significant set bit. | ||
476 | * | ||
477 | * fls64(value) returns 0 if value is 0 or the position of the last | ||
478 | * set bit if value is nonzero. The last (most significant) bit is | ||
479 | * at position 64. | ||
480 | */ | ||
481 | #ifdef CONFIG_X86_64 | ||
482 | static __always_inline int fls64(__u64 x) | ||
483 | { | ||
484 | long bitpos = -1; | ||
485 | /* | ||
486 | * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the | ||
487 | * dest reg is undefined if x==0, but their CPU architect says its | ||
488 | * value is written to set it to the same as before. | ||
489 | */ | ||
490 | asm("bsrq %1,%0" | ||
491 | : "+r" (bitpos) | ||
492 | : "rm" (x)); | ||
493 | return bitpos + 1; | ||
494 | } | ||
495 | #else | ||
496 | #include <asm-generic/bitops/fls64.h> | ||
497 | #endif | ||
442 | 498 | ||
443 | #include <asm-generic/bitops/find.h> | 499 | #include <asm-generic/bitops/find.h> |
444 | 500 | ||
@@ -450,12 +506,6 @@ static inline int fls(int x) | |||
450 | 506 | ||
451 | #include <asm-generic/bitops/const_hweight.h> | 507 | #include <asm-generic/bitops/const_hweight.h> |
452 | 508 | ||
453 | #endif /* __KERNEL__ */ | ||
454 | |||
455 | #include <asm-generic/bitops/fls64.h> | ||
456 | |||
457 | #ifdef __KERNEL__ | ||
458 | |||
459 | #include <asm-generic/bitops/le.h> | 509 | #include <asm-generic/bitops/le.h> |
460 | 510 | ||
461 | #include <asm-generic/bitops/ext2-atomic-setbit.h> | 511 | #include <asm-generic/bitops/ext2-atomic-setbit.h> |
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index e020d88ec02d..2f90c51cc49d 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h | |||
@@ -64,6 +64,8 @@ struct setup_header { | |||
64 | __u32 payload_offset; | 64 | __u32 payload_offset; |
65 | __u32 payload_length; | 65 | __u32 payload_length; |
66 | __u64 setup_data; | 66 | __u64 setup_data; |
67 | __u64 pref_address; | ||
68 | __u32 init_size; | ||
67 | } __attribute__((packed)); | 69 | } __attribute__((packed)); |
68 | 70 | ||
69 | struct sys_desc_table { | 71 | struct sys_desc_table { |
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 5d3acdf5a7a6..0c9fa2745f13 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h | |||
@@ -14,6 +14,8 @@ extern void __cmpxchg_wrong_size(void) | |||
14 | __compiletime_error("Bad argument size for cmpxchg"); | 14 | __compiletime_error("Bad argument size for cmpxchg"); |
15 | extern void __xadd_wrong_size(void) | 15 | extern void __xadd_wrong_size(void) |
16 | __compiletime_error("Bad argument size for xadd"); | 16 | __compiletime_error("Bad argument size for xadd"); |
17 | extern void __add_wrong_size(void) | ||
18 | __compiletime_error("Bad argument size for add"); | ||
17 | 19 | ||
18 | /* | 20 | /* |
19 | * Constants for operation sizes. On 32-bit, the 64-bit size it set to | 21 | * Constants for operation sizes. On 32-bit, the 64-bit size it set to |
@@ -31,60 +33,47 @@ extern void __xadd_wrong_size(void) | |||
31 | #define __X86_CASE_Q -1 /* sizeof will never return -1 */ | 33 | #define __X86_CASE_Q -1 /* sizeof will never return -1 */ |
32 | #endif | 34 | #endif |
33 | 35 | ||
36 | /* | ||
37 | * An exchange-type operation, which takes a value and a pointer, and | ||
38 | * returns a the old value. | ||
39 | */ | ||
40 | #define __xchg_op(ptr, arg, op, lock) \ | ||
41 | ({ \ | ||
42 | __typeof__ (*(ptr)) __ret = (arg); \ | ||
43 | switch (sizeof(*(ptr))) { \ | ||
44 | case __X86_CASE_B: \ | ||
45 | asm volatile (lock #op "b %b0, %1\n" \ | ||
46 | : "+r" (__ret), "+m" (*(ptr)) \ | ||
47 | : : "memory", "cc"); \ | ||
48 | break; \ | ||
49 | case __X86_CASE_W: \ | ||
50 | asm volatile (lock #op "w %w0, %1\n" \ | ||
51 | : "+r" (__ret), "+m" (*(ptr)) \ | ||
52 | : : "memory", "cc"); \ | ||
53 | break; \ | ||
54 | case __X86_CASE_L: \ | ||
55 | asm volatile (lock #op "l %0, %1\n" \ | ||
56 | : "+r" (__ret), "+m" (*(ptr)) \ | ||
57 | : : "memory", "cc"); \ | ||
58 | break; \ | ||
59 | case __X86_CASE_Q: \ | ||
60 | asm volatile (lock #op "q %q0, %1\n" \ | ||
61 | : "+r" (__ret), "+m" (*(ptr)) \ | ||
62 | : : "memory", "cc"); \ | ||
63 | break; \ | ||
64 | default: \ | ||
65 | __ ## op ## _wrong_size(); \ | ||
66 | } \ | ||
67 | __ret; \ | ||
68 | }) | ||
69 | |||
34 | /* | 70 | /* |
35 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. | 71 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. |
36 | * Since this is generally used to protect other memory information, we | 72 | * Since this is generally used to protect other memory information, we |
37 | * use "asm volatile" and "memory" clobbers to prevent gcc from moving | 73 | * use "asm volatile" and "memory" clobbers to prevent gcc from moving |
38 | * information around. | 74 | * information around. |
39 | */ | 75 | */ |
40 | #define __xchg(x, ptr, size) \ | 76 | #define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "") |
41 | ({ \ | ||
42 | __typeof(*(ptr)) __x = (x); \ | ||
43 | switch (size) { \ | ||
44 | case __X86_CASE_B: \ | ||
45 | { \ | ||
46 | volatile u8 *__ptr = (volatile u8 *)(ptr); \ | ||
47 | asm volatile("xchgb %0,%1" \ | ||
48 | : "=q" (__x), "+m" (*__ptr) \ | ||
49 | : "0" (__x) \ | ||
50 | : "memory"); \ | ||
51 | break; \ | ||
52 | } \ | ||
53 | case __X86_CASE_W: \ | ||
54 | { \ | ||
55 | volatile u16 *__ptr = (volatile u16 *)(ptr); \ | ||
56 | asm volatile("xchgw %0,%1" \ | ||
57 | : "=r" (__x), "+m" (*__ptr) \ | ||
58 | : "0" (__x) \ | ||
59 | : "memory"); \ | ||
60 | break; \ | ||
61 | } \ | ||
62 | case __X86_CASE_L: \ | ||
63 | { \ | ||
64 | volatile u32 *__ptr = (volatile u32 *)(ptr); \ | ||
65 | asm volatile("xchgl %0,%1" \ | ||
66 | : "=r" (__x), "+m" (*__ptr) \ | ||
67 | : "0" (__x) \ | ||
68 | : "memory"); \ | ||
69 | break; \ | ||
70 | } \ | ||
71 | case __X86_CASE_Q: \ | ||
72 | { \ | ||
73 | volatile u64 *__ptr = (volatile u64 *)(ptr); \ | ||
74 | asm volatile("xchgq %0,%1" \ | ||
75 | : "=r" (__x), "+m" (*__ptr) \ | ||
76 | : "0" (__x) \ | ||
77 | : "memory"); \ | ||
78 | break; \ | ||
79 | } \ | ||
80 | default: \ | ||
81 | __xchg_wrong_size(); \ | ||
82 | } \ | ||
83 | __x; \ | ||
84 | }) | ||
85 | |||
86 | #define xchg(ptr, v) \ | ||
87 | __xchg((v), (ptr), sizeof(*ptr)) | ||
88 | 77 | ||
89 | /* | 78 | /* |
90 | * Atomic compare and exchange. Compare OLD with MEM, if identical, | 79 | * Atomic compare and exchange. Compare OLD with MEM, if identical, |
@@ -165,46 +154,80 @@ extern void __xadd_wrong_size(void) | |||
165 | __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) | 154 | __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) |
166 | #endif | 155 | #endif |
167 | 156 | ||
168 | #define __xadd(ptr, inc, lock) \ | 157 | /* |
158 | * xadd() adds "inc" to "*ptr" and atomically returns the previous | ||
159 | * value of "*ptr". | ||
160 | * | ||
161 | * xadd() is locked when multiple CPUs are online | ||
162 | * xadd_sync() is always locked | ||
163 | * xadd_local() is never locked | ||
164 | */ | ||
165 | #define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock) | ||
166 | #define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) | ||
167 | #define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") | ||
168 | #define xadd_local(ptr, inc) __xadd((ptr), (inc), "") | ||
169 | |||
170 | #define __add(ptr, inc, lock) \ | ||
169 | ({ \ | 171 | ({ \ |
170 | __typeof__ (*(ptr)) __ret = (inc); \ | 172 | __typeof__ (*(ptr)) __ret = (inc); \ |
171 | switch (sizeof(*(ptr))) { \ | 173 | switch (sizeof(*(ptr))) { \ |
172 | case __X86_CASE_B: \ | 174 | case __X86_CASE_B: \ |
173 | asm volatile (lock "xaddb %b0, %1\n" \ | 175 | asm volatile (lock "addb %b1, %0\n" \ |
174 | : "+r" (__ret), "+m" (*(ptr)) \ | 176 | : "+m" (*(ptr)) : "ri" (inc) \ |
175 | : : "memory", "cc"); \ | 177 | : "memory", "cc"); \ |
176 | break; \ | 178 | break; \ |
177 | case __X86_CASE_W: \ | 179 | case __X86_CASE_W: \ |
178 | asm volatile (lock "xaddw %w0, %1\n" \ | 180 | asm volatile (lock "addw %w1, %0\n" \ |
179 | : "+r" (__ret), "+m" (*(ptr)) \ | 181 | : "+m" (*(ptr)) : "ri" (inc) \ |
180 | : : "memory", "cc"); \ | 182 | : "memory", "cc"); \ |
181 | break; \ | 183 | break; \ |
182 | case __X86_CASE_L: \ | 184 | case __X86_CASE_L: \ |
183 | asm volatile (lock "xaddl %0, %1\n" \ | 185 | asm volatile (lock "addl %1, %0\n" \ |
184 | : "+r" (__ret), "+m" (*(ptr)) \ | 186 | : "+m" (*(ptr)) : "ri" (inc) \ |
185 | : : "memory", "cc"); \ | 187 | : "memory", "cc"); \ |
186 | break; \ | 188 | break; \ |
187 | case __X86_CASE_Q: \ | 189 | case __X86_CASE_Q: \ |
188 | asm volatile (lock "xaddq %q0, %1\n" \ | 190 | asm volatile (lock "addq %1, %0\n" \ |
189 | : "+r" (__ret), "+m" (*(ptr)) \ | 191 | : "+m" (*(ptr)) : "ri" (inc) \ |
190 | : : "memory", "cc"); \ | 192 | : "memory", "cc"); \ |
191 | break; \ | 193 | break; \ |
192 | default: \ | 194 | default: \ |
193 | __xadd_wrong_size(); \ | 195 | __add_wrong_size(); \ |
194 | } \ | 196 | } \ |
195 | __ret; \ | 197 | __ret; \ |
196 | }) | 198 | }) |
197 | 199 | ||
198 | /* | 200 | /* |
199 | * xadd() adds "inc" to "*ptr" and atomically returns the previous | 201 | * add_*() adds "inc" to "*ptr" |
200 | * value of "*ptr". | ||
201 | * | 202 | * |
202 | * xadd() is locked when multiple CPUs are online | 203 | * __add() takes a lock prefix |
203 | * xadd_sync() is always locked | 204 | * add_smp() is locked when multiple CPUs are online |
204 | * xadd_local() is never locked | 205 | * add_sync() is always locked |
205 | */ | 206 | */ |
206 | #define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) | 207 | #define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX) |
207 | #define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") | 208 | #define add_sync(ptr, inc) __add((ptr), (inc), "lock; ") |
208 | #define xadd_local(ptr, inc) __xadd((ptr), (inc), "") | 209 | |
210 | #define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \ | ||
211 | ({ \ | ||
212 | bool __ret; \ | ||
213 | __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \ | ||
214 | __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \ | ||
215 | BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ | ||
216 | BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ | ||
217 | VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ | ||
218 | VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ | ||
219 | asm volatile(pfx "cmpxchg%c4b %2; sete %0" \ | ||
220 | : "=a" (__ret), "+d" (__old2), \ | ||
221 | "+m" (*(p1)), "+m" (*(p2)) \ | ||
222 | : "i" (2 * sizeof(long)), "a" (__old1), \ | ||
223 | "b" (__new1), "c" (__new2)); \ | ||
224 | __ret; \ | ||
225 | }) | ||
226 | |||
227 | #define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ | ||
228 | __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) | ||
229 | |||
230 | #define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ | ||
231 | __cmpxchg_double(, p1, p2, o1, o2, n1, n2) | ||
209 | 232 | ||
210 | #endif /* ASM_X86_CMPXCHG_H */ | 233 | #endif /* ASM_X86_CMPXCHG_H */ |
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index fbebb07dd80b..53f4b219336b 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, | |||
166 | 166 | ||
167 | #endif | 167 | #endif |
168 | 168 | ||
169 | #define cmpxchg8b(ptr, o1, o2, n1, n2) \ | ||
170 | ({ \ | ||
171 | char __ret; \ | ||
172 | __typeof__(o2) __dummy; \ | ||
173 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
174 | __typeof__(o2) __old2 = (o2); \ | ||
175 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
176 | __typeof__(o2) __new2 = (n2); \ | ||
177 | asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \ | ||
178 | : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\ | ||
179 | : "a" (__old1), "d"(__old2), \ | ||
180 | "b" (__new1), "c" (__new2) \ | ||
181 | : "memory"); \ | ||
182 | __ret; }) | ||
183 | |||
184 | |||
185 | #define cmpxchg8b_local(ptr, o1, o2, n1, n2) \ | ||
186 | ({ \ | ||
187 | char __ret; \ | ||
188 | __typeof__(o2) __dummy; \ | ||
189 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
190 | __typeof__(o2) __old2 = (o2); \ | ||
191 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
192 | __typeof__(o2) __new2 = (n2); \ | ||
193 | asm volatile("cmpxchg8b %2; setz %1" \ | ||
194 | : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\ | ||
195 | : "a" (__old), "d"(__old2), \ | ||
196 | "b" (__new1), "c" (__new2), \ | ||
197 | : "memory"); \ | ||
198 | __ret; }) | ||
199 | |||
200 | |||
201 | #define cmpxchg_double(ptr, o1, o2, n1, n2) \ | ||
202 | ({ \ | ||
203 | BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ | ||
204 | VM_BUG_ON((unsigned long)(ptr) % 8); \ | ||
205 | cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \ | ||
206 | }) | ||
207 | |||
208 | #define cmpxchg_double_local(ptr, o1, o2, n1, n2) \ | ||
209 | ({ \ | ||
210 | BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ | ||
211 | VM_BUG_ON((unsigned long)(ptr) % 8); \ | ||
212 | cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \ | ||
213 | }) | ||
214 | |||
215 | #define system_has_cmpxchg_double() cpu_has_cx8 | 169 | #define system_has_cmpxchg_double() cpu_has_cx8 |
216 | 170 | ||
217 | #endif /* _ASM_X86_CMPXCHG_32_H */ | 171 | #endif /* _ASM_X86_CMPXCHG_32_H */ |
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 285da02c38fa..614be87f1a9b 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h | |||
@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) | |||
20 | cmpxchg_local((ptr), (o), (n)); \ | 20 | cmpxchg_local((ptr), (o), (n)); \ |
21 | }) | 21 | }) |
22 | 22 | ||
23 | #define cmpxchg16b(ptr, o1, o2, n1, n2) \ | ||
24 | ({ \ | ||
25 | char __ret; \ | ||
26 | __typeof__(o2) __junk; \ | ||
27 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
28 | __typeof__(o2) __old2 = (o2); \ | ||
29 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
30 | __typeof__(o2) __new2 = (n2); \ | ||
31 | asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \ | ||
32 | : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ | ||
33 | : "b"(__new1), "c"(__new2), \ | ||
34 | "a"(__old1), "d"(__old2)); \ | ||
35 | __ret; }) | ||
36 | |||
37 | |||
38 | #define cmpxchg16b_local(ptr, o1, o2, n1, n2) \ | ||
39 | ({ \ | ||
40 | char __ret; \ | ||
41 | __typeof__(o2) __junk; \ | ||
42 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
43 | __typeof__(o2) __old2 = (o2); \ | ||
44 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
45 | __typeof__(o2) __new2 = (n2); \ | ||
46 | asm volatile("cmpxchg16b %2;setz %1" \ | ||
47 | : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ | ||
48 | : "b"(__new1), "c"(__new2), \ | ||
49 | "a"(__old1), "d"(__old2)); \ | ||
50 | __ret; }) | ||
51 | |||
52 | #define cmpxchg_double(ptr, o1, o2, n1, n2) \ | ||
53 | ({ \ | ||
54 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ | ||
55 | VM_BUG_ON((unsigned long)(ptr) % 16); \ | ||
56 | cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \ | ||
57 | }) | ||
58 | |||
59 | #define cmpxchg_double_local(ptr, o1, o2, n1, n2) \ | ||
60 | ({ \ | ||
61 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ | ||
62 | VM_BUG_ON((unsigned long)(ptr) % 16); \ | ||
63 | cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \ | ||
64 | }) | ||
65 | |||
66 | #define system_has_cmpxchg_double() cpu_has_cx16 | 23 | #define system_has_cmpxchg_double() cpu_has_cx16 |
67 | 24 | ||
68 | #endif /* _ASM_X86_CMPXCHG_64_H */ | 25 | #endif /* _ASM_X86_CMPXCHG_64_H */ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f3444f700f36..17c5d4bdee5e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -197,7 +197,10 @@ | |||
197 | 197 | ||
198 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ | 198 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ |
199 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ | 199 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ |
200 | #define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */ | ||
201 | #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ | ||
200 | #define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ | 202 | #define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ |
203 | #define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */ | ||
201 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | 204 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ |
202 | 205 | ||
203 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 206 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 078ad0caefc6..b903d5ea3941 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump); | |||
101 | 101 | ||
102 | extern void hw_breakpoint_restore(void); | 102 | extern void hw_breakpoint_restore(void); |
103 | 103 | ||
104 | #ifdef CONFIG_X86_64 | ||
105 | DECLARE_PER_CPU(int, debug_stack_usage); | ||
106 | static inline void debug_stack_usage_inc(void) | ||
107 | { | ||
108 | __get_cpu_var(debug_stack_usage)++; | ||
109 | } | ||
110 | static inline void debug_stack_usage_dec(void) | ||
111 | { | ||
112 | __get_cpu_var(debug_stack_usage)--; | ||
113 | } | ||
114 | int is_debug_stack(unsigned long addr); | ||
115 | void debug_stack_set_zero(void); | ||
116 | void debug_stack_reset(void); | ||
117 | #else /* !X86_64 */ | ||
118 | static inline int is_debug_stack(unsigned long addr) { return 0; } | ||
119 | static inline void debug_stack_set_zero(void) { } | ||
120 | static inline void debug_stack_reset(void) { } | ||
121 | static inline void debug_stack_usage_inc(void) { } | ||
122 | static inline void debug_stack_usage_dec(void) { } | ||
123 | #endif /* X86_64 */ | ||
124 | |||
125 | |||
104 | #endif /* __KERNEL__ */ | 126 | #endif /* __KERNEL__ */ |
105 | 127 | ||
106 | #endif /* _ASM_X86_DEBUGREG_H */ | 128 | #endif /* _ASM_X86_DEBUGREG_H */ |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 41935fadfdfc..e95822d683f4 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in | |||
35 | 35 | ||
36 | extern struct desc_ptr idt_descr; | 36 | extern struct desc_ptr idt_descr; |
37 | extern gate_desc idt_table[]; | 37 | extern gate_desc idt_table[]; |
38 | extern struct desc_ptr nmi_idt_descr; | ||
39 | extern gate_desc nmi_idt_table[]; | ||
38 | 40 | ||
39 | struct gdt_page { | 41 | struct gdt_page { |
40 | struct desc_struct gdt[GDT_ENTRIES]; | 42 | struct desc_struct gdt[GDT_ENTRIES]; |
@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) | |||
307 | desc->limit = (limit >> 16) & 0xf; | 309 | desc->limit = (limit >> 16) & 0xf; |
308 | } | 310 | } |
309 | 311 | ||
312 | #ifdef CONFIG_X86_64 | ||
313 | static inline void set_nmi_gate(int gate, void *addr) | ||
314 | { | ||
315 | gate_desc s; | ||
316 | |||
317 | pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); | ||
318 | write_idt_entry(nmi_idt_table, gate, &s); | ||
319 | } | ||
320 | #endif | ||
321 | |||
310 | static inline void _set_gate(int gate, unsigned type, void *addr, | 322 | static inline void _set_gate(int gate, unsigned type, void *addr, |
311 | unsigned dpl, unsigned ist, unsigned seg) | 323 | unsigned dpl, unsigned ist, unsigned seg) |
312 | { | 324 | { |
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index 9a2d644c08ef..ced283ac79df 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h | |||
@@ -4,6 +4,7 @@ | |||
4 | #ifdef CONFIG_X86_32 | 4 | #ifdef CONFIG_X86_32 |
5 | 5 | ||
6 | #include <linux/types.h> | 6 | #include <linux/types.h> |
7 | #include <linux/log2.h> | ||
7 | 8 | ||
8 | /* | 9 | /* |
9 | * do_div() is NOT a C function. It wants to return | 10 | * do_div() is NOT a C function. It wants to return |
@@ -21,15 +22,20 @@ | |||
21 | ({ \ | 22 | ({ \ |
22 | unsigned long __upper, __low, __high, __mod, __base; \ | 23 | unsigned long __upper, __low, __high, __mod, __base; \ |
23 | __base = (base); \ | 24 | __base = (base); \ |
24 | asm("":"=a" (__low), "=d" (__high) : "A" (n)); \ | 25 | if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \ |
25 | __upper = __high; \ | 26 | __mod = n & (__base - 1); \ |
26 | if (__high) { \ | 27 | n >>= ilog2(__base); \ |
27 | __upper = __high % (__base); \ | 28 | } else { \ |
28 | __high = __high / (__base); \ | 29 | asm("" : "=a" (__low), "=d" (__high) : "A" (n));\ |
30 | __upper = __high; \ | ||
31 | if (__high) { \ | ||
32 | __upper = __high % (__base); \ | ||
33 | __high = __high / (__base); \ | ||
34 | } \ | ||
35 | asm("divl %2" : "=a" (__low), "=d" (__mod) \ | ||
36 | : "rm" (__base), "0" (__low), "1" (__upper)); \ | ||
37 | asm("" : "=A" (n) : "a" (__low), "d" (__high)); \ | ||
29 | } \ | 38 | } \ |
30 | asm("divl %2":"=a" (__low), "=d" (__mod) \ | ||
31 | : "rm" (__base), "0" (__low), "1" (__upper)); \ | ||
32 | asm("":"=A" (n) : "a" (__low), "d" (__high)); \ | ||
33 | __mod; \ | 39 | __mod; \ |
34 | }) | 40 | }) |
35 | 41 | ||
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 908b96957d88..37782566af24 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -117,7 +117,7 @@ static inline void early_memtest(unsigned long start, unsigned long end) | |||
117 | 117 | ||
118 | extern unsigned long e820_end_of_ram_pfn(void); | 118 | extern unsigned long e820_end_of_ram_pfn(void); |
119 | extern unsigned long e820_end_of_low_ram_pfn(void); | 119 | extern unsigned long e820_end_of_low_ram_pfn(void); |
120 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); | 120 | extern u64 early_reserve_e820(u64 sizet, u64 align); |
121 | 121 | ||
122 | void memblock_x86_fill(void); | 122 | void memblock_x86_fill(void); |
123 | void memblock_find_dma_reserve(void); | 123 | void memblock_find_dma_reserve(void); |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 7093e4a6a0bc..844f735fd63a 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -3,6 +3,8 @@ | |||
3 | 3 | ||
4 | #ifdef CONFIG_X86_32 | 4 | #ifdef CONFIG_X86_32 |
5 | 5 | ||
6 | #define EFI_LOADER_SIGNATURE "EL32" | ||
7 | |||
6 | extern unsigned long asmlinkage efi_call_phys(void *, ...); | 8 | extern unsigned long asmlinkage efi_call_phys(void *, ...); |
7 | 9 | ||
8 | #define efi_call_phys0(f) efi_call_phys(f) | 10 | #define efi_call_phys0(f) efi_call_phys(f) |
@@ -37,6 +39,8 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); | |||
37 | 39 | ||
38 | #else /* !CONFIG_X86_32 */ | 40 | #else /* !CONFIG_X86_32 */ |
39 | 41 | ||
42 | #define EFI_LOADER_SIGNATURE "EL64" | ||
43 | |||
40 | extern u64 efi_call0(void *fp); | 44 | extern u64 efi_call0(void *fp); |
41 | extern u64 efi_call1(void *fp, u64 arg1); | 45 | extern u64 efi_call1(void *fp, u64 arg1); |
42 | extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); | 46 | extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 460c74e4852c..4da3c0c4c974 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -117,7 +117,7 @@ enum fixed_addresses { | |||
117 | #endif | 117 | #endif |
118 | FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ | 118 | FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ |
119 | FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ | 119 | FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ |
120 | #ifdef CONFIG_X86_MRST | 120 | #ifdef CONFIG_X86_INTEL_MID |
121 | FIX_LNW_VRTC, | 121 | FIX_LNW_VRTC, |
122 | #endif | 122 | #endif |
123 | __end_of_permanent_fixed_addresses, | 123 | __end_of_permanent_fixed_addresses, |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 55e4de613f0e..da0b3ca815b7 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -11,6 +11,7 @@ typedef struct { | |||
11 | #ifdef CONFIG_X86_LOCAL_APIC | 11 | #ifdef CONFIG_X86_LOCAL_APIC |
12 | unsigned int apic_timer_irqs; /* arch dependent */ | 12 | unsigned int apic_timer_irqs; /* arch dependent */ |
13 | unsigned int irq_spurious_count; | 13 | unsigned int irq_spurious_count; |
14 | unsigned int icr_read_retry_count; | ||
14 | #endif | 15 | #endif |
15 | unsigned int x86_platform_ipis; /* arch dependent */ | 16 | unsigned int x86_platform_ipis; /* arch dependent */ |
16 | unsigned int apic_perf_irqs; | 17 | unsigned int apic_perf_irqs; |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index c9e09ea05644..6919e936345b 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -218,7 +218,7 @@ static inline void fpu_fxsave(struct fpu *fpu) | |||
218 | #ifdef CONFIG_SMP | 218 | #ifdef CONFIG_SMP |
219 | #define safe_address (__per_cpu_offset[0]) | 219 | #define safe_address (__per_cpu_offset[0]) |
220 | #else | 220 | #else |
221 | #define safe_address (kstat_cpu(0).cpustat.user) | 221 | #define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER]) |
222 | #endif | 222 | #endif |
223 | 223 | ||
224 | /* | 224 | /* |
diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h index 976f6ecd2ce6..b0d5716ca1e4 100644 --- a/arch/x86/include/asm/ia32_unistd.h +++ b/arch/x86/include/asm/ia32_unistd.h | |||
@@ -2,17 +2,10 @@ | |||
2 | #define _ASM_X86_IA32_UNISTD_H | 2 | #define _ASM_X86_IA32_UNISTD_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * This file contains the system call numbers of the ia32 port, | 5 | * This file contains the system call numbers of the ia32 compat ABI, |
6 | * this is for the kernel only. | 6 | * this is for the kernel only. |
7 | * Only add syscalls here where some part of the kernel needs to know | ||
8 | * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK | ||
9 | */ | 7 | */ |
10 | 8 | #define __SYSCALL_ia32_NR(x) (x) | |
11 | #define __NR_ia32_restart_syscall 0 | 9 | #include <asm/unistd_32_ia32.h> |
12 | #define __NR_ia32_exit 1 | ||
13 | #define __NR_ia32_read 3 | ||
14 | #define __NR_ia32_write 4 | ||
15 | #define __NR_ia32_sigreturn 119 | ||
16 | #define __NR_ia32_rt_sigreturn 173 | ||
17 | 10 | ||
18 | #endif /* _ASM_X86_IA32_UNISTD_H */ | 11 | #endif /* _ASM_X86_IA32_UNISTD_H */ |
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 8dbe353e41e1..adcc0ae73d09 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h | |||
@@ -5,6 +5,8 @@ | |||
5 | extern void __init early_ioremap_page_table_range_init(void); | 5 | extern void __init early_ioremap_page_table_range_init(void); |
6 | #endif | 6 | #endif |
7 | 7 | ||
8 | extern void __init zone_sizes_init(void); | ||
9 | |||
8 | extern unsigned long __init | 10 | extern unsigned long __init |
9 | kernel_physical_mapping_init(unsigned long start, | 11 | kernel_physical_mapping_init(unsigned long start, |
10 | unsigned long end, | 12 | unsigned long end, |
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 88c765e16410..74df3f1eddfd 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
@@ -137,6 +137,13 @@ static inline int insn_is_avx(struct insn *insn) | |||
137 | return (insn->vex_prefix.value != 0); | 137 | return (insn->vex_prefix.value != 0); |
138 | } | 138 | } |
139 | 139 | ||
140 | /* Ensure this instruction is decoded completely */ | ||
141 | static inline int insn_complete(struct insn *insn) | ||
142 | { | ||
143 | return insn->opcode.got && insn->modrm.got && insn->sib.got && | ||
144 | insn->displacement.got && insn->immediate.got; | ||
145 | } | ||
146 | |||
140 | static inline insn_byte_t insn_vex_m_bits(struct insn *insn) | 147 | static inline insn_byte_t insn_vex_m_bits(struct insn *insn) |
141 | { | 148 | { |
142 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ | 149 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ |
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 345c99cef152..dffc38ee6255 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h | |||
@@ -5,6 +5,7 @@ extern struct dma_map_ops nommu_dma_ops; | |||
5 | extern int force_iommu, no_iommu; | 5 | extern int force_iommu, no_iommu; |
6 | extern int iommu_detected; | 6 | extern int iommu_detected; |
7 | extern int iommu_pass_through; | 7 | extern int iommu_pass_through; |
8 | extern int iommu_group_mf; | ||
8 | 9 | ||
9 | /* 10 seconds */ | 10 | /* 10 seconds */ |
10 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) | 11 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index a026507893e9..ab4092e3214e 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -181,6 +181,7 @@ struct x86_emulate_ops { | |||
181 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); | 181 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); |
182 | int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); | 182 | int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); |
183 | int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); | 183 | int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); |
184 | int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); | ||
184 | void (*halt)(struct x86_emulate_ctxt *ctxt); | 185 | void (*halt)(struct x86_emulate_ctxt *ctxt); |
185 | void (*wbinvd)(struct x86_emulate_ctxt *ctxt); | 186 | void (*wbinvd)(struct x86_emulate_ctxt *ctxt); |
186 | int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); | 187 | int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); |
@@ -364,6 +365,7 @@ enum x86_intercept { | |||
364 | #endif | 365 | #endif |
365 | 366 | ||
366 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); | 367 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); |
368 | bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); | ||
367 | #define EMULATION_FAILED -1 | 369 | #define EMULATION_FAILED -1 |
368 | #define EMULATION_OK 0 | 370 | #define EMULATION_OK 0 |
369 | #define EMULATION_RESTART 1 | 371 | #define EMULATION_RESTART 1 |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b4973f4dab98..52d6640a5ca1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -16,10 +16,12 @@ | |||
16 | #include <linux/mmu_notifier.h> | 16 | #include <linux/mmu_notifier.h> |
17 | #include <linux/tracepoint.h> | 17 | #include <linux/tracepoint.h> |
18 | #include <linux/cpumask.h> | 18 | #include <linux/cpumask.h> |
19 | #include <linux/irq_work.h> | ||
19 | 20 | ||
20 | #include <linux/kvm.h> | 21 | #include <linux/kvm.h> |
21 | #include <linux/kvm_para.h> | 22 | #include <linux/kvm_para.h> |
22 | #include <linux/kvm_types.h> | 23 | #include <linux/kvm_types.h> |
24 | #include <linux/perf_event.h> | ||
23 | 25 | ||
24 | #include <asm/pvclock-abi.h> | 26 | #include <asm/pvclock-abi.h> |
25 | #include <asm/desc.h> | 27 | #include <asm/desc.h> |
@@ -31,6 +33,8 @@ | |||
31 | #define KVM_MEMORY_SLOTS 32 | 33 | #define KVM_MEMORY_SLOTS 32 |
32 | /* memory slots that does not exposed to userspace */ | 34 | /* memory slots that does not exposed to userspace */ |
33 | #define KVM_PRIVATE_MEM_SLOTS 4 | 35 | #define KVM_PRIVATE_MEM_SLOTS 4 |
36 | #define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) | ||
37 | |||
34 | #define KVM_MMIO_SIZE 16 | 38 | #define KVM_MMIO_SIZE 16 |
35 | 39 | ||
36 | #define KVM_PIO_PAGE_OFFSET 1 | 40 | #define KVM_PIO_PAGE_OFFSET 1 |
@@ -228,7 +232,7 @@ struct kvm_mmu_page { | |||
228 | * One bit set per slot which has memory | 232 | * One bit set per slot which has memory |
229 | * in this shadow page. | 233 | * in this shadow page. |
230 | */ | 234 | */ |
231 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 235 | DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM); |
232 | bool unsync; | 236 | bool unsync; |
233 | int root_count; /* Currently serving as active root */ | 237 | int root_count; /* Currently serving as active root */ |
234 | unsigned int unsync_children; | 238 | unsigned int unsync_children; |
@@ -239,14 +243,9 @@ struct kvm_mmu_page { | |||
239 | int clear_spte_count; | 243 | int clear_spte_count; |
240 | #endif | 244 | #endif |
241 | 245 | ||
242 | struct rcu_head rcu; | 246 | int write_flooding_count; |
243 | }; | ||
244 | 247 | ||
245 | struct kvm_pv_mmu_op_buffer { | 248 | struct rcu_head rcu; |
246 | void *ptr; | ||
247 | unsigned len; | ||
248 | unsigned processed; | ||
249 | char buf[512] __aligned(sizeof(long)); | ||
250 | }; | 249 | }; |
251 | 250 | ||
252 | struct kvm_pio_request { | 251 | struct kvm_pio_request { |
@@ -294,6 +293,37 @@ struct kvm_mmu { | |||
294 | u64 pdptrs[4]; /* pae */ | 293 | u64 pdptrs[4]; /* pae */ |
295 | }; | 294 | }; |
296 | 295 | ||
296 | enum pmc_type { | ||
297 | KVM_PMC_GP = 0, | ||
298 | KVM_PMC_FIXED, | ||
299 | }; | ||
300 | |||
301 | struct kvm_pmc { | ||
302 | enum pmc_type type; | ||
303 | u8 idx; | ||
304 | u64 counter; | ||
305 | u64 eventsel; | ||
306 | struct perf_event *perf_event; | ||
307 | struct kvm_vcpu *vcpu; | ||
308 | }; | ||
309 | |||
310 | struct kvm_pmu { | ||
311 | unsigned nr_arch_gp_counters; | ||
312 | unsigned nr_arch_fixed_counters; | ||
313 | unsigned available_event_types; | ||
314 | u64 fixed_ctr_ctrl; | ||
315 | u64 global_ctrl; | ||
316 | u64 global_status; | ||
317 | u64 global_ovf_ctrl; | ||
318 | u64 counter_bitmask[2]; | ||
319 | u64 global_ctrl_mask; | ||
320 | u8 version; | ||
321 | struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC]; | ||
322 | struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED]; | ||
323 | struct irq_work irq_work; | ||
324 | u64 reprogram_pmi; | ||
325 | }; | ||
326 | |||
297 | struct kvm_vcpu_arch { | 327 | struct kvm_vcpu_arch { |
298 | /* | 328 | /* |
299 | * rip and regs accesses must go through | 329 | * rip and regs accesses must go through |
@@ -345,19 +375,10 @@ struct kvm_vcpu_arch { | |||
345 | */ | 375 | */ |
346 | struct kvm_mmu *walk_mmu; | 376 | struct kvm_mmu *walk_mmu; |
347 | 377 | ||
348 | /* only needed in kvm_pv_mmu_op() path, but it's hot so | ||
349 | * put it here to avoid allocation */ | ||
350 | struct kvm_pv_mmu_op_buffer mmu_op_buffer; | ||
351 | |||
352 | struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; | 378 | struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; |
353 | struct kvm_mmu_memory_cache mmu_page_cache; | 379 | struct kvm_mmu_memory_cache mmu_page_cache; |
354 | struct kvm_mmu_memory_cache mmu_page_header_cache; | 380 | struct kvm_mmu_memory_cache mmu_page_header_cache; |
355 | 381 | ||
356 | gfn_t last_pt_write_gfn; | ||
357 | int last_pt_write_count; | ||
358 | u64 *last_pte_updated; | ||
359 | gfn_t last_pte_gfn; | ||
360 | |||
361 | struct fpu guest_fpu; | 382 | struct fpu guest_fpu; |
362 | u64 xcr0; | 383 | u64 xcr0; |
363 | 384 | ||
@@ -436,6 +457,8 @@ struct kvm_vcpu_arch { | |||
436 | unsigned access; | 457 | unsigned access; |
437 | gfn_t mmio_gfn; | 458 | gfn_t mmio_gfn; |
438 | 459 | ||
460 | struct kvm_pmu pmu; | ||
461 | |||
439 | /* used for guest single stepping over the given code position */ | 462 | /* used for guest single stepping over the given code position */ |
440 | unsigned long singlestep_rip; | 463 | unsigned long singlestep_rip; |
441 | 464 | ||
@@ -444,6 +467,9 @@ struct kvm_vcpu_arch { | |||
444 | 467 | ||
445 | cpumask_var_t wbinvd_dirty_mask; | 468 | cpumask_var_t wbinvd_dirty_mask; |
446 | 469 | ||
470 | unsigned long last_retry_eip; | ||
471 | unsigned long last_retry_addr; | ||
472 | |||
447 | struct { | 473 | struct { |
448 | bool halted; | 474 | bool halted; |
449 | gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; | 475 | gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; |
@@ -459,7 +485,6 @@ struct kvm_arch { | |||
459 | unsigned int n_requested_mmu_pages; | 485 | unsigned int n_requested_mmu_pages; |
460 | unsigned int n_max_mmu_pages; | 486 | unsigned int n_max_mmu_pages; |
461 | unsigned int indirect_shadow_pages; | 487 | unsigned int indirect_shadow_pages; |
462 | atomic_t invlpg_counter; | ||
463 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 488 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
464 | /* | 489 | /* |
465 | * Hash table of struct kvm_mmu_page. | 490 | * Hash table of struct kvm_mmu_page. |
@@ -660,6 +685,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
660 | 685 | ||
661 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 686 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
662 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); | 687 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); |
688 | int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, | ||
689 | struct kvm_memory_slot *slot); | ||
663 | void kvm_mmu_zap_all(struct kvm *kvm); | 690 | void kvm_mmu_zap_all(struct kvm *kvm); |
664 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 691 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
665 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 692 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
@@ -668,8 +695,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); | |||
668 | 695 | ||
669 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 696 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
670 | const void *val, int bytes); | 697 | const void *val, int bytes); |
671 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | ||
672 | gpa_t addr, unsigned long *ret); | ||
673 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); | 698 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); |
674 | 699 | ||
675 | extern bool tdp_enabled; | 700 | extern bool tdp_enabled; |
@@ -692,6 +717,7 @@ enum emulation_result { | |||
692 | #define EMULTYPE_NO_DECODE (1 << 0) | 717 | #define EMULTYPE_NO_DECODE (1 << 0) |
693 | #define EMULTYPE_TRAP_UD (1 << 1) | 718 | #define EMULTYPE_TRAP_UD (1 << 1) |
694 | #define EMULTYPE_SKIP (1 << 2) | 719 | #define EMULTYPE_SKIP (1 << 2) |
720 | #define EMULTYPE_RETRY (1 << 3) | ||
695 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, | 721 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, |
696 | int emulation_type, void *insn, int insn_len); | 722 | int emulation_type, void *insn, int insn_len); |
697 | 723 | ||
@@ -734,6 +760,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); | |||
734 | 760 | ||
735 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); | 761 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); |
736 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | 762 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); |
763 | bool kvm_rdpmc(struct kvm_vcpu *vcpu); | ||
737 | 764 | ||
738 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); | 765 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
739 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 766 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
@@ -754,13 +781,14 @@ int fx_init(struct kvm_vcpu *vcpu); | |||
754 | 781 | ||
755 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 782 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
756 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 783 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
757 | const u8 *new, int bytes, | 784 | const u8 *new, int bytes); |
758 | bool guest_initiated); | 785 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); |
759 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); | 786 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); |
760 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | 787 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); |
761 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 788 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
762 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); | 789 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); |
763 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); | 790 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); |
791 | gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); | ||
764 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, | 792 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, |
765 | struct x86_exception *exception); | 793 | struct x86_exception *exception); |
766 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, | 794 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, |
@@ -782,6 +810,11 @@ void kvm_disable_tdp(void); | |||
782 | int complete_pio(struct kvm_vcpu *vcpu); | 810 | int complete_pio(struct kvm_vcpu *vcpu); |
783 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | 811 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); |
784 | 812 | ||
813 | static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | ||
814 | { | ||
815 | return gpa; | ||
816 | } | ||
817 | |||
785 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) | 818 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) |
786 | { | 819 | { |
787 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); | 820 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); |
@@ -894,4 +927,17 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); | |||
894 | 927 | ||
895 | void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); | 928 | void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); |
896 | 929 | ||
930 | int kvm_is_in_guest(void); | ||
931 | |||
932 | void kvm_pmu_init(struct kvm_vcpu *vcpu); | ||
933 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu); | ||
934 | void kvm_pmu_reset(struct kvm_vcpu *vcpu); | ||
935 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); | ||
936 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); | ||
937 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | ||
938 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); | ||
939 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); | ||
940 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); | ||
941 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu); | ||
942 | |||
897 | #endif /* _ASM_X86_KVM_HOST_H */ | 943 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/mach_timer.h b/arch/x86/include/asm/mach_timer.h index 853728519ae9..88d0c3c74c13 100644 --- a/arch/x86/include/asm/mach_timer.h +++ b/arch/x86/include/asm/mach_timer.h | |||
@@ -15,7 +15,7 @@ | |||
15 | 15 | ||
16 | #define CALIBRATE_TIME_MSEC 30 /* 30 msecs */ | 16 | #define CALIBRATE_TIME_MSEC 30 /* 30 msecs */ |
17 | #define CALIBRATE_LATCH \ | 17 | #define CALIBRATE_LATCH \ |
18 | ((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000) | 18 | ((PIT_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000) |
19 | 19 | ||
20 | static inline void mach_prepare_counter(void) | 20 | static inline void mach_prepare_counter(void) |
21 | { | 21 | { |
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index 01fdf5674e24..0e8e85bb7c51 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h | |||
@@ -81,8 +81,8 @@ static inline unsigned char current_lock_cmos_reg(void) | |||
81 | #else | 81 | #else |
82 | #define lock_cmos_prefix(reg) do {} while (0) | 82 | #define lock_cmos_prefix(reg) do {} while (0) |
83 | #define lock_cmos_suffix(reg) do {} while (0) | 83 | #define lock_cmos_suffix(reg) do {} while (0) |
84 | #define lock_cmos(reg) | 84 | #define lock_cmos(reg) do { } while (0) |
85 | #define unlock_cmos() | 85 | #define unlock_cmos() do { } while (0) |
86 | #define do_i_have_lock_cmos() 0 | 86 | #define do_i_have_lock_cmos() 0 |
87 | #define current_lock_cmos_reg() 0 | 87 | #define current_lock_cmos_reg() 0 |
88 | #endif | 88 | #endif |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 0e8ae57d3656..6aefb14cbbc5 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -50,10 +50,11 @@ | |||
50 | #define MCJ_CTX_MASK 3 | 50 | #define MCJ_CTX_MASK 3 |
51 | #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) | 51 | #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) |
52 | #define MCJ_CTX_RANDOM 0 /* inject context: random */ | 52 | #define MCJ_CTX_RANDOM 0 /* inject context: random */ |
53 | #define MCJ_CTX_PROCESS 1 /* inject context: process */ | 53 | #define MCJ_CTX_PROCESS 0x1 /* inject context: process */ |
54 | #define MCJ_CTX_IRQ 2 /* inject context: IRQ */ | 54 | #define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ |
55 | #define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */ | 55 | #define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ |
56 | #define MCJ_EXCEPTION 8 /* raise as exception */ | 56 | #define MCJ_EXCEPTION 0x8 /* raise as exception */ |
57 | #define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ | ||
57 | 58 | ||
58 | /* Fields are zero when not available */ | 59 | /* Fields are zero when not available */ |
59 | struct mce { | 60 | struct mce { |
@@ -120,7 +121,8 @@ struct mce_log { | |||
120 | 121 | ||
121 | #ifdef __KERNEL__ | 122 | #ifdef __KERNEL__ |
122 | 123 | ||
123 | extern struct atomic_notifier_head x86_mce_decoder_chain; | 124 | extern void mce_register_decode_chain(struct notifier_block *nb); |
125 | extern void mce_unregister_decode_chain(struct notifier_block *nb); | ||
124 | 126 | ||
125 | #include <linux/percpu.h> | 127 | #include <linux/percpu.h> |
126 | #include <linux/init.h> | 128 | #include <linux/init.h> |
@@ -149,7 +151,7 @@ static inline void enable_p5_mce(void) {} | |||
149 | 151 | ||
150 | void mce_setup(struct mce *m); | 152 | void mce_setup(struct mce *m); |
151 | void mce_log(struct mce *m); | 153 | void mce_log(struct mce *m); |
152 | DECLARE_PER_CPU(struct sys_device, mce_sysdev); | 154 | extern struct device *mce_device[CONFIG_NR_CPUS]; |
153 | 155 | ||
154 | /* | 156 | /* |
155 | * Maximum banks number. | 157 | * Maximum banks number. |
diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h deleted file mode 100644 index 0cd3800f33b9..000000000000 --- a/arch/x86/include/asm/memblock.h +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | #ifndef _X86_MEMBLOCK_H | ||
2 | #define _X86_MEMBLOCK_H | ||
3 | |||
4 | #define ARCH_DISCARD_MEMBLOCK | ||
5 | |||
6 | u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align); | ||
7 | |||
8 | void memblock_x86_reserve_range(u64 start, u64 end, char *name); | ||
9 | void memblock_x86_free_range(u64 start, u64 end); | ||
10 | struct range; | ||
11 | int __get_free_all_memory_range(struct range **range, int nodeid, | ||
12 | unsigned long start_pfn, unsigned long end_pfn); | ||
13 | int get_free_all_memory_range(struct range **rangep, int nodeid); | ||
14 | |||
15 | void memblock_x86_register_active_regions(int nid, unsigned long start_pfn, | ||
16 | unsigned long last_pfn); | ||
17 | u64 memblock_x86_hole_size(u64 start, u64 end); | ||
18 | u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align); | ||
19 | u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit); | ||
20 | u64 memblock_x86_memory_in_range(u64 addr, u64 limit); | ||
21 | bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align); | ||
22 | |||
23 | #endif | ||
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 24215072d0e1..4ebe157bf73d 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -48,6 +48,7 @@ static inline struct microcode_ops * __init init_intel_microcode(void) | |||
48 | 48 | ||
49 | #ifdef CONFIG_MICROCODE_AMD | 49 | #ifdef CONFIG_MICROCODE_AMD |
50 | extern struct microcode_ops * __init init_amd_microcode(void); | 50 | extern struct microcode_ops * __init init_amd_microcode(void); |
51 | extern void __exit exit_amd_microcode(void); | ||
51 | 52 | ||
52 | static inline void get_ucode_data(void *to, const u8 *from, size_t n) | 53 | static inline void get_ucode_data(void *to, const u8 *from, size_t n) |
53 | { | 54 | { |
@@ -59,6 +60,7 @@ static inline struct microcode_ops * __init init_amd_microcode(void) | |||
59 | { | 60 | { |
60 | return NULL; | 61 | return NULL; |
61 | } | 62 | } |
63 | static inline void __exit exit_amd_microcode(void) {} | ||
62 | #endif | 64 | #endif |
63 | 65 | ||
64 | #endif /* _ASM_X86_MICROCODE_H */ | 66 | #endif /* _ASM_X86_MICROCODE_H */ |
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 93f79094c224..0a0a95460434 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h | |||
@@ -67,7 +67,7 @@ extern struct console early_mrst_console; | |||
67 | extern void mrst_early_console_init(void); | 67 | extern void mrst_early_console_init(void); |
68 | 68 | ||
69 | extern struct console early_hsu_console; | 69 | extern struct console early_hsu_console; |
70 | extern void hsu_early_console_init(void); | 70 | extern void hsu_early_console_init(const char *); |
71 | 71 | ||
72 | extern void intel_scu_devices_create(void); | 72 | extern void intel_scu_devices_create(void); |
73 | extern void intel_scu_devices_destroy(void); | 73 | extern void intel_scu_devices_destroy(void); |
diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h new file mode 100644 index 000000000000..660f843df928 --- /dev/null +++ b/arch/x86/include/asm/numachip/numachip_csr.h | |||
@@ -0,0 +1,167 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Numascale NumaConnect-Specific Header file | ||
7 | * | ||
8 | * Copyright (C) 2011 Numascale AS. All rights reserved. | ||
9 | * | ||
10 | * Send feedback to <support@numascale.com> | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H | ||
15 | #define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H | ||
16 | |||
17 | #include <linux/numa.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/io.h> | ||
20 | #include <linux/swab.h> | ||
21 | #include <asm/types.h> | ||
22 | #include <asm/processor.h> | ||
23 | |||
24 | #define CSR_NODE_SHIFT 16 | ||
25 | #define CSR_NODE_BITS(p) (((unsigned long)(p)) << CSR_NODE_SHIFT) | ||
26 | #define CSR_NODE_MASK 0x0fff /* 4K nodes */ | ||
27 | |||
28 | /* 32K CSR space, b15 indicates geo/non-geo */ | ||
29 | #define CSR_OFFSET_MASK 0x7fffUL | ||
30 | |||
31 | /* Global CSR space covers all 4K possible nodes with 64K CSR space per node */ | ||
32 | #define NUMACHIP_GCSR_BASE 0x3fff00000000ULL | ||
33 | #define NUMACHIP_GCSR_LIM 0x3fff0fffffffULL | ||
34 | #define NUMACHIP_GCSR_SIZE (NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1) | ||
35 | |||
36 | /* | ||
37 | * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however | ||
38 | * when using the direct mapping on x86_64, both start and size needs to be | ||
39 | * aligned with PMD_SIZE which is 2M | ||
40 | */ | ||
41 | #define NUMACHIP_LCSR_BASE 0x3ffffe000000ULL | ||
42 | #define NUMACHIP_LCSR_LIM 0x3fffffffffffULL | ||
43 | #define NUMACHIP_LCSR_SIZE (NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1) | ||
44 | |||
45 | static inline void *gcsr_address(int node, unsigned long offset) | ||
46 | { | ||
47 | return __va(NUMACHIP_GCSR_BASE | (1UL << 15) | | ||
48 | CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK)); | ||
49 | } | ||
50 | |||
51 | static inline void *lcsr_address(unsigned long offset) | ||
52 | { | ||
53 | return __va(NUMACHIP_LCSR_BASE | (1UL << 15) | | ||
54 | CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK)); | ||
55 | } | ||
56 | |||
57 | static inline unsigned int read_gcsr(int node, unsigned long offset) | ||
58 | { | ||
59 | return swab32(readl(gcsr_address(node, offset))); | ||
60 | } | ||
61 | |||
62 | static inline void write_gcsr(int node, unsigned long offset, unsigned int val) | ||
63 | { | ||
64 | writel(swab32(val), gcsr_address(node, offset)); | ||
65 | } | ||
66 | |||
67 | static inline unsigned int read_lcsr(unsigned long offset) | ||
68 | { | ||
69 | return swab32(readl(lcsr_address(offset))); | ||
70 | } | ||
71 | |||
72 | static inline void write_lcsr(unsigned long offset, unsigned int val) | ||
73 | { | ||
74 | writel(swab32(val), lcsr_address(offset)); | ||
75 | } | ||
76 | |||
77 | /* ========================================================================= */ | ||
78 | /* CSR_G0_STATE_CLEAR */ | ||
79 | /* ========================================================================= */ | ||
80 | |||
81 | #define CSR_G0_STATE_CLEAR (0x000 + (0 << 12)) | ||
82 | union numachip_csr_g0_state_clear { | ||
83 | unsigned int v; | ||
84 | struct numachip_csr_g0_state_clear_s { | ||
85 | unsigned int _state:2; | ||
86 | unsigned int _rsvd_2_6:5; | ||
87 | unsigned int _lost:1; | ||
88 | unsigned int _rsvd_8_31:24; | ||
89 | } s; | ||
90 | }; | ||
91 | |||
92 | /* ========================================================================= */ | ||
93 | /* CSR_G0_NODE_IDS */ | ||
94 | /* ========================================================================= */ | ||
95 | |||
96 | #define CSR_G0_NODE_IDS (0x008 + (0 << 12)) | ||
97 | union numachip_csr_g0_node_ids { | ||
98 | unsigned int v; | ||
99 | struct numachip_csr_g0_node_ids_s { | ||
100 | unsigned int _initialid:16; | ||
101 | unsigned int _nodeid:12; | ||
102 | unsigned int _rsvd_28_31:4; | ||
103 | } s; | ||
104 | }; | ||
105 | |||
106 | /* ========================================================================= */ | ||
107 | /* CSR_G3_EXT_IRQ_GEN */ | ||
108 | /* ========================================================================= */ | ||
109 | |||
110 | #define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12)) | ||
111 | union numachip_csr_g3_ext_irq_gen { | ||
112 | unsigned int v; | ||
113 | struct numachip_csr_g3_ext_irq_gen_s { | ||
114 | unsigned int _vector:8; | ||
115 | unsigned int _msgtype:3; | ||
116 | unsigned int _index:5; | ||
117 | unsigned int _destination_apic_id:16; | ||
118 | } s; | ||
119 | }; | ||
120 | |||
121 | /* ========================================================================= */ | ||
122 | /* CSR_G3_EXT_IRQ_STATUS */ | ||
123 | /* ========================================================================= */ | ||
124 | |||
125 | #define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12)) | ||
126 | union numachip_csr_g3_ext_irq_status { | ||
127 | unsigned int v; | ||
128 | struct numachip_csr_g3_ext_irq_status_s { | ||
129 | unsigned int _result:32; | ||
130 | } s; | ||
131 | }; | ||
132 | |||
133 | /* ========================================================================= */ | ||
134 | /* CSR_G3_EXT_IRQ_DEST */ | ||
135 | /* ========================================================================= */ | ||
136 | |||
137 | #define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12)) | ||
138 | union numachip_csr_g3_ext_irq_dest { | ||
139 | unsigned int v; | ||
140 | struct numachip_csr_g3_ext_irq_dest_s { | ||
141 | unsigned int _irq:8; | ||
142 | unsigned int _rsvd_8_31:24; | ||
143 | } s; | ||
144 | }; | ||
145 | |||
146 | /* ========================================================================= */ | ||
147 | /* CSR_G3_NC_ATT_MAP_SELECT */ | ||
148 | /* ========================================================================= */ | ||
149 | |||
150 | #define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12)) | ||
151 | union numachip_csr_g3_nc_att_map_select { | ||
152 | unsigned int v; | ||
153 | struct numachip_csr_g3_nc_att_map_select_s { | ||
154 | unsigned int _upper_address_bits:4; | ||
155 | unsigned int _select_ram:4; | ||
156 | unsigned int _rsvd_8_31:24; | ||
157 | } s; | ||
158 | }; | ||
159 | |||
160 | /* ========================================================================= */ | ||
161 | /* CSR_G3_NC_ATT_MAP_SELECT_0-255 */ | ||
162 | /* ========================================================================= */ | ||
163 | |||
164 | #define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12)) | ||
165 | |||
166 | #endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */ | ||
167 | |||
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index d498943b906c..df75d07571ce 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -112,19 +112,28 @@ static inline void x86_teardown_msi_irq(unsigned int irq) | |||
112 | { | 112 | { |
113 | x86_msi.teardown_msi_irq(irq); | 113 | x86_msi.teardown_msi_irq(irq); |
114 | } | 114 | } |
115 | static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq) | ||
116 | { | ||
117 | x86_msi.restore_msi_irqs(dev, irq); | ||
118 | } | ||
115 | #define arch_setup_msi_irqs x86_setup_msi_irqs | 119 | #define arch_setup_msi_irqs x86_setup_msi_irqs |
116 | #define arch_teardown_msi_irqs x86_teardown_msi_irqs | 120 | #define arch_teardown_msi_irqs x86_teardown_msi_irqs |
117 | #define arch_teardown_msi_irq x86_teardown_msi_irq | 121 | #define arch_teardown_msi_irq x86_teardown_msi_irq |
122 | #define arch_restore_msi_irqs x86_restore_msi_irqs | ||
118 | /* implemented in arch/x86/kernel/apic/io_apic. */ | 123 | /* implemented in arch/x86/kernel/apic/io_apic. */ |
119 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); | 124 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); |
120 | void native_teardown_msi_irq(unsigned int irq); | 125 | void native_teardown_msi_irq(unsigned int irq); |
126 | void native_restore_msi_irqs(struct pci_dev *dev, int irq); | ||
121 | /* default to the implementation in drivers/lib/msi.c */ | 127 | /* default to the implementation in drivers/lib/msi.c */ |
122 | #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS | 128 | #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS |
129 | #define HAVE_DEFAULT_MSI_RESTORE_IRQS | ||
123 | void default_teardown_msi_irqs(struct pci_dev *dev); | 130 | void default_teardown_msi_irqs(struct pci_dev *dev); |
131 | void default_restore_msi_irqs(struct pci_dev *dev, int irq); | ||
124 | #else | 132 | #else |
125 | #define native_setup_msi_irqs NULL | 133 | #define native_setup_msi_irqs NULL |
126 | #define native_teardown_msi_irq NULL | 134 | #define native_teardown_msi_irq NULL |
127 | #define default_teardown_msi_irqs NULL | 135 | #define default_teardown_msi_irqs NULL |
136 | #define default_restore_msi_irqs NULL | ||
128 | #endif | 137 | #endif |
129 | 138 | ||
130 | #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) | 139 | #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) |
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index e38197806853..b3a531746026 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h | |||
@@ -44,8 +44,6 @@ enum pci_bf_sort_state { | |||
44 | 44 | ||
45 | /* pci-i386.c */ | 45 | /* pci-i386.c */ |
46 | 46 | ||
47 | extern unsigned int pcibios_max_latency; | ||
48 | |||
49 | void pcibios_resource_survey(void); | 47 | void pcibios_resource_survey(void); |
50 | void pcibios_set_cache_line_size(void); | 48 | void pcibios_set_cache_line_size(void); |
51 | 49 | ||
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 3470c9d0ebba..7a11910a63c4 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -414,22 +414,6 @@ do { \ | |||
414 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | 414 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) |
415 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | 415 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) |
416 | 416 | ||
417 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) | ||
418 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) | ||
419 | #define irqsafe_cpu_add_4(pcp, val) percpu_add_op((pcp), val) | ||
420 | #define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) | ||
421 | #define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) | ||
422 | #define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) | ||
423 | #define irqsafe_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) | ||
424 | #define irqsafe_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) | ||
425 | #define irqsafe_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) | ||
426 | #define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | ||
427 | #define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | ||
428 | #define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | ||
429 | #define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) | ||
430 | #define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | ||
431 | #define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | ||
432 | |||
433 | #ifndef CONFIG_M386 | 417 | #ifndef CONFIG_M386 |
434 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | 418 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) |
435 | #define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) | 419 | #define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) |
@@ -445,29 +429,22 @@ do { \ | |||
445 | #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 429 | #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
446 | #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 430 | #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
447 | 431 | ||
448 | #define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
449 | #define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
450 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
451 | #endif /* !CONFIG_M386 */ | 432 | #endif /* !CONFIG_M386 */ |
452 | 433 | ||
453 | #ifdef CONFIG_X86_CMPXCHG64 | 434 | #ifdef CONFIG_X86_CMPXCHG64 |
454 | #define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ | 435 | #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ |
455 | ({ \ | 436 | ({ \ |
456 | char __ret; \ | 437 | bool __ret; \ |
457 | typeof(o1) __o1 = o1; \ | 438 | typeof(pcp1) __o1 = (o1), __n1 = (n1); \ |
458 | typeof(o1) __n1 = n1; \ | 439 | typeof(pcp2) __o2 = (o2), __n2 = (n2); \ |
459 | typeof(o2) __o2 = o2; \ | ||
460 | typeof(o2) __n2 = n2; \ | ||
461 | typeof(o2) __dummy = n2; \ | ||
462 | asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ | 440 | asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ |
463 | : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ | 441 | : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \ |
464 | : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ | 442 | : "b" (__n1), "c" (__n2), "a" (__o1)); \ |
465 | __ret; \ | 443 | __ret; \ |
466 | }) | 444 | }) |
467 | 445 | ||
468 | #define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | 446 | #define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
469 | #define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | 447 | #define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
470 | #define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
471 | #endif /* CONFIG_X86_CMPXCHG64 */ | 448 | #endif /* CONFIG_X86_CMPXCHG64 */ |
472 | 449 | ||
473 | /* | 450 | /* |
@@ -495,44 +472,28 @@ do { \ | |||
495 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | 472 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) |
496 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 473 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
497 | 474 | ||
498 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) | ||
499 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | ||
500 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | ||
501 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | ||
502 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
503 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
504 | |||
505 | /* | 475 | /* |
506 | * Pretty complex macro to generate cmpxchg16 instruction. The instruction | 476 | * Pretty complex macro to generate cmpxchg16 instruction. The instruction |
507 | * is not supported on early AMD64 processors so we must be able to emulate | 477 | * is not supported on early AMD64 processors so we must be able to emulate |
508 | * it in software. The address used in the cmpxchg16 instruction must be | 478 | * it in software. The address used in the cmpxchg16 instruction must be |
509 | * aligned to a 16 byte boundary. | 479 | * aligned to a 16 byte boundary. |
510 | */ | 480 | */ |
511 | #ifdef CONFIG_SMP | 481 | #define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \ |
512 | #define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3 | ||
513 | #else | ||
514 | #define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2 | ||
515 | #endif | ||
516 | #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ | ||
517 | ({ \ | 482 | ({ \ |
518 | char __ret; \ | 483 | bool __ret; \ |
519 | typeof(o1) __o1 = o1; \ | 484 | typeof(pcp1) __o1 = (o1), __n1 = (n1); \ |
520 | typeof(o1) __n1 = n1; \ | 485 | typeof(pcp2) __o2 = (o2), __n2 = (n2); \ |
521 | typeof(o2) __o2 = o2; \ | 486 | alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ |
522 | typeof(o2) __n2 = n2; \ | 487 | "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ |
523 | typeof(o2) __dummy; \ | ||
524 | alternative_io(CMPXCHG16B_EMU_CALL, \ | ||
525 | "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ | ||
526 | X86_FEATURE_CX16, \ | 488 | X86_FEATURE_CX16, \ |
527 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ | 489 | ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ |
528 | "S" (&pcp1), "b"(__n1), "c"(__n2), \ | 490 | "+m" (pcp2), "+d" (__o2)), \ |
529 | "a"(__o1), "d"(__o2) : "memory"); \ | 491 | "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ |
530 | __ret; \ | 492 | __ret; \ |
531 | }) | 493 | }) |
532 | 494 | ||
533 | #define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | 495 | #define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
534 | #define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | 496 | #define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
535 | #define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
536 | 497 | ||
537 | #endif | 498 | #endif |
538 | 499 | ||
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index f61c62f7d5d8..096c975e099f 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -57,6 +57,7 @@ | |||
57 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | 57 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) |
58 | 58 | ||
59 | #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 | 59 | #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 |
60 | #define ARCH_PERFMON_EVENTS_COUNT 7 | ||
60 | 61 | ||
61 | /* | 62 | /* |
62 | * Intel "Architectural Performance Monitoring" CPUID | 63 | * Intel "Architectural Performance Monitoring" CPUID |
@@ -72,6 +73,19 @@ union cpuid10_eax { | |||
72 | unsigned int full; | 73 | unsigned int full; |
73 | }; | 74 | }; |
74 | 75 | ||
76 | union cpuid10_ebx { | ||
77 | struct { | ||
78 | unsigned int no_unhalted_core_cycles:1; | ||
79 | unsigned int no_instructions_retired:1; | ||
80 | unsigned int no_unhalted_reference_cycles:1; | ||
81 | unsigned int no_llc_reference:1; | ||
82 | unsigned int no_llc_misses:1; | ||
83 | unsigned int no_branch_instruction_retired:1; | ||
84 | unsigned int no_branch_misses_retired:1; | ||
85 | } split; | ||
86 | unsigned int full; | ||
87 | }; | ||
88 | |||
75 | union cpuid10_edx { | 89 | union cpuid10_edx { |
76 | struct { | 90 | struct { |
77 | unsigned int num_counters_fixed:5; | 91 | unsigned int num_counters_fixed:5; |
@@ -81,6 +95,15 @@ union cpuid10_edx { | |||
81 | unsigned int full; | 95 | unsigned int full; |
82 | }; | 96 | }; |
83 | 97 | ||
98 | struct x86_pmu_capability { | ||
99 | int version; | ||
100 | int num_counters_gp; | ||
101 | int num_counters_fixed; | ||
102 | int bit_width_gp; | ||
103 | int bit_width_fixed; | ||
104 | unsigned int events_mask; | ||
105 | int events_mask_len; | ||
106 | }; | ||
84 | 107 | ||
85 | /* | 108 | /* |
86 | * Fixed-purpose performance events: | 109 | * Fixed-purpose performance events: |
@@ -89,23 +112,24 @@ union cpuid10_edx { | |||
89 | /* | 112 | /* |
90 | * All 3 fixed-mode PMCs are configured via this single MSR: | 113 | * All 3 fixed-mode PMCs are configured via this single MSR: |
91 | */ | 114 | */ |
92 | #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d | 115 | #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d |
93 | 116 | ||
94 | /* | 117 | /* |
95 | * The counts are available in three separate MSRs: | 118 | * The counts are available in three separate MSRs: |
96 | */ | 119 | */ |
97 | 120 | ||
98 | /* Instr_Retired.Any: */ | 121 | /* Instr_Retired.Any: */ |
99 | #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 | 122 | #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 |
100 | #define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) | 123 | #define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) |
101 | 124 | ||
102 | /* CPU_CLK_Unhalted.Core: */ | 125 | /* CPU_CLK_Unhalted.Core: */ |
103 | #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a | 126 | #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a |
104 | #define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) | 127 | #define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) |
105 | 128 | ||
106 | /* CPU_CLK_Unhalted.Ref: */ | 129 | /* CPU_CLK_Unhalted.Ref: */ |
107 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b | 130 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b |
108 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) | 131 | #define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2) |
132 | #define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES) | ||
109 | 133 | ||
110 | /* | 134 | /* |
111 | * We model BTS tracing as another fixed-mode PMC. | 135 | * We model BTS tracing as another fixed-mode PMC. |
@@ -202,6 +226,7 @@ struct perf_guest_switch_msr { | |||
202 | }; | 226 | }; |
203 | 227 | ||
204 | extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); | 228 | extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); |
229 | extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); | ||
205 | #else | 230 | #else |
206 | static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) | 231 | static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) |
207 | { | 232 | { |
@@ -209,6 +234,11 @@ static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) | |||
209 | return NULL; | 234 | return NULL; |
210 | } | 235 | } |
211 | 236 | ||
237 | static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) | ||
238 | { | ||
239 | memset(cap, 0, sizeof(*cap)); | ||
240 | } | ||
241 | |||
212 | static inline void perf_events_lapic_init(void) { } | 242 | static inline void perf_events_lapic_init(void) { } |
213 | #endif | 243 | #endif |
214 | 244 | ||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 18601c86fab1..49afb3f41eb6 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -703,7 +703,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, | |||
703 | pte_update(mm, addr, ptep); | 703 | pte_update(mm, addr, ptep); |
704 | } | 704 | } |
705 | 705 | ||
706 | #define flush_tlb_fix_spurious_fault(vma, address) | 706 | #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) |
707 | 707 | ||
708 | #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) | 708 | #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) |
709 | 709 | ||
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 2dddb317bb39..f8ab3eaad128 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h | |||
@@ -6,6 +6,7 @@ | |||
6 | * EFLAGS bits | 6 | * EFLAGS bits |
7 | */ | 7 | */ |
8 | #define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ | 8 | #define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ |
9 | #define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */ | ||
9 | #define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ | 10 | #define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ |
10 | #define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ | 11 | #define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ |
11 | #define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ | 12 | #define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b650435ffb53..aa9088c26931 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -99,7 +99,6 @@ struct cpuinfo_x86 { | |||
99 | u16 apicid; | 99 | u16 apicid; |
100 | u16 initial_apicid; | 100 | u16 initial_apicid; |
101 | u16 x86_clflush_size; | 101 | u16 x86_clflush_size; |
102 | #ifdef CONFIG_SMP | ||
103 | /* number of cores as seen by the OS: */ | 102 | /* number of cores as seen by the OS: */ |
104 | u16 booted_cores; | 103 | u16 booted_cores; |
105 | /* Physical processor id: */ | 104 | /* Physical processor id: */ |
@@ -110,7 +109,6 @@ struct cpuinfo_x86 { | |||
110 | u8 compute_unit_id; | 109 | u8 compute_unit_id; |
111 | /* Index into per_cpu list: */ | 110 | /* Index into per_cpu list: */ |
112 | u16 cpu_index; | 111 | u16 cpu_index; |
113 | #endif | ||
114 | u32 microcode; | 112 | u32 microcode; |
115 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 113 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
116 | 114 | ||
diff --git a/arch/x86/include/asm/serpent.h b/arch/x86/include/asm/serpent.h new file mode 100644 index 000000000000..d3ef63fe0c81 --- /dev/null +++ b/arch/x86/include/asm/serpent.h | |||
@@ -0,0 +1,63 @@ | |||
1 | #ifndef ASM_X86_SERPENT_H | ||
2 | #define ASM_X86_SERPENT_H | ||
3 | |||
4 | #include <linux/crypto.h> | ||
5 | #include <crypto/serpent.h> | ||
6 | |||
7 | #ifdef CONFIG_X86_32 | ||
8 | |||
9 | #define SERPENT_PARALLEL_BLOCKS 4 | ||
10 | |||
11 | asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst, | ||
12 | const u8 *src, bool xor); | ||
13 | asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst, | ||
14 | const u8 *src); | ||
15 | |||
16 | static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
17 | const u8 *src) | ||
18 | { | ||
19 | __serpent_enc_blk_4way(ctx, dst, src, false); | ||
20 | } | ||
21 | |||
22 | static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, | ||
23 | const u8 *src) | ||
24 | { | ||
25 | __serpent_enc_blk_4way(ctx, dst, src, true); | ||
26 | } | ||
27 | |||
28 | static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
29 | const u8 *src) | ||
30 | { | ||
31 | serpent_dec_blk_4way(ctx, dst, src); | ||
32 | } | ||
33 | |||
34 | #else | ||
35 | |||
36 | #define SERPENT_PARALLEL_BLOCKS 8 | ||
37 | |||
38 | asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, | ||
39 | const u8 *src, bool xor); | ||
40 | asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst, | ||
41 | const u8 *src); | ||
42 | |||
43 | static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
44 | const u8 *src) | ||
45 | { | ||
46 | __serpent_enc_blk_8way(ctx, dst, src, false); | ||
47 | } | ||
48 | |||
49 | static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, | ||
50 | const u8 *src) | ||
51 | { | ||
52 | __serpent_enc_blk_8way(ctx, dst, src, true); | ||
53 | } | ||
54 | |||
55 | static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
56 | const u8 *src) | ||
57 | { | ||
58 | serpent_dec_blk_8way(ctx, dst, src); | ||
59 | } | ||
60 | |||
61 | #endif | ||
62 | |||
63 | #endif | ||
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 9756551ec760..d0f19f9fb846 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -47,7 +47,7 @@ extern void reserve_standard_io_resources(void); | |||
47 | extern void i386_reserve_resources(void); | 47 | extern void i386_reserve_resources(void); |
48 | extern void setup_default_timer_irq(void); | 48 | extern void setup_default_timer_irq(void); |
49 | 49 | ||
50 | #ifdef CONFIG_X86_MRST | 50 | #ifdef CONFIG_X86_INTEL_MID |
51 | extern void x86_mrst_early_setup(void); | 51 | extern void x86_mrst_early_setup(void); |
52 | #else | 52 | #else |
53 | static inline void x86_mrst_early_setup(void) { } | 53 | static inline void x86_mrst_early_setup(void) { } |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 73b11bc0ae6f..0434c400287c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -225,5 +225,11 @@ extern int hard_smp_processor_id(void); | |||
225 | 225 | ||
226 | #endif /* CONFIG_X86_LOCAL_APIC */ | 226 | #endif /* CONFIG_X86_LOCAL_APIC */ |
227 | 227 | ||
228 | #ifdef CONFIG_DEBUG_NMI_SELFTEST | ||
229 | extern void nmi_selftest(void); | ||
230 | #else | ||
231 | #define nmi_selftest() do { } while (0) | ||
232 | #endif | ||
233 | |||
228 | #endif /* __ASSEMBLY__ */ | 234 | #endif /* __ASSEMBLY__ */ |
229 | #endif /* _ASM_X86_SMP_H */ | 235 | #endif /* _ASM_X86_SMP_H */ |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 972c260919a3..a82c2bf504b6 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -79,23 +79,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) | |||
79 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; | 79 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; |
80 | } | 80 | } |
81 | 81 | ||
82 | #if (NR_CPUS < 256) | ||
83 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) | 82 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) |
84 | { | 83 | { |
85 | asm volatile(UNLOCK_LOCK_PREFIX "incb %0" | 84 | __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); |
86 | : "+m" (lock->head_tail) | ||
87 | : | ||
88 | : "memory", "cc"); | ||
89 | } | 85 | } |
90 | #else | ||
91 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) | ||
92 | { | ||
93 | asm volatile(UNLOCK_LOCK_PREFIX "incw %0" | ||
94 | : "+m" (lock->head_tail) | ||
95 | : | ||
96 | : "memory", "cc"); | ||
97 | } | ||
98 | #endif | ||
99 | 86 | ||
100 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) | 87 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) |
101 | { | 88 | { |
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index c4a348f7bd43..d962e5652a73 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | #include <linux/err.h> | 17 | #include <linux/err.h> |
18 | #include <asm/asm-offsets.h> /* For NR_syscalls */ | ||
18 | 19 | ||
19 | extern const unsigned long sys_call_table[]; | 20 | extern const unsigned long sys_call_table[]; |
20 | 21 | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a1fe5c127b52..bc817cd8b443 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -40,7 +40,8 @@ struct thread_info { | |||
40 | */ | 40 | */ |
41 | __u8 supervisor_stack[0]; | 41 | __u8 supervisor_stack[0]; |
42 | #endif | 42 | #endif |
43 | int uaccess_err; | 43 | unsigned int sig_on_uaccess_error:1; |
44 | unsigned int uaccess_err:1; /* uaccess failed */ | ||
44 | }; | 45 | }; |
45 | 46 | ||
46 | #define INIT_THREAD_INFO(tsk) \ | 47 | #define INIT_THREAD_INFO(tsk) \ |
@@ -90,7 +91,6 @@ struct thread_info { | |||
90 | #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ | 91 | #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ |
91 | #define TIF_DEBUG 21 /* uses debug registers */ | 92 | #define TIF_DEBUG 21 /* uses debug registers */ |
92 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ | 93 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ |
93 | #define TIF_FREEZE 23 /* is freezing for suspend */ | ||
94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ | 94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ |
95 | #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ | 95 | #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ |
96 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ | 96 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ |
@@ -112,7 +112,6 @@ struct thread_info { | |||
112 | #define _TIF_FORK (1 << TIF_FORK) | 112 | #define _TIF_FORK (1 << TIF_FORK) |
113 | #define _TIF_DEBUG (1 << TIF_DEBUG) | 113 | #define _TIF_DEBUG (1 << TIF_DEBUG) |
114 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) | 114 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) |
115 | #define _TIF_FREEZE (1 << TIF_FREEZE) | ||
116 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 115 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
117 | #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) | 116 | #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) |
118 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) | 117 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) |
@@ -231,6 +230,12 @@ static inline struct thread_info *current_thread_info(void) | |||
231 | movq PER_CPU_VAR(kernel_stack),reg ; \ | 230 | movq PER_CPU_VAR(kernel_stack),reg ; \ |
232 | subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg | 231 | subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg |
233 | 232 | ||
233 | /* | ||
234 | * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in | ||
235 | * a certain register (to be used in assembler memory operands). | ||
236 | */ | ||
237 | #define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg) | ||
238 | |||
234 | #endif | 239 | #endif |
235 | 240 | ||
236 | #endif /* !X86_32 */ | 241 | #endif /* !X86_32 */ |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index c00692476e9f..b9676ae37ada 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -130,10 +130,8 @@ extern void setup_node_to_cpumask_map(void); | |||
130 | .balance_interval = 1, \ | 130 | .balance_interval = 1, \ |
131 | } | 131 | } |
132 | 132 | ||
133 | #ifdef CONFIG_X86_64 | ||
134 | extern int __node_distance(int, int); | 133 | extern int __node_distance(int, int); |
135 | #define node_distance(a, b) __node_distance(a, b) | 134 | #define node_distance(a, b) __node_distance(a, b) |
136 | #endif | ||
137 | 135 | ||
138 | #else /* !CONFIG_NUMA */ | 136 | #else /* !CONFIG_NUMA */ |
139 | 137 | ||
@@ -174,7 +172,7 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) | |||
174 | } | 172 | } |
175 | 173 | ||
176 | struct pci_bus; | 174 | struct pci_bus; |
177 | void x86_pci_root_bus_res_quirks(struct pci_bus *b); | 175 | void x86_pci_root_bus_resources(int bus, struct list_head *resources); |
178 | 176 | ||
179 | #ifdef CONFIG_SMP | 177 | #ifdef CONFIG_SMP |
180 | #define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \ | 178 | #define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \ |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 83e2efd181e2..15d99153a96d 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -51,6 +51,8 @@ extern int unsynchronized_tsc(void); | |||
51 | extern int check_tsc_unstable(void); | 51 | extern int check_tsc_unstable(void); |
52 | extern unsigned long native_calibrate_tsc(void); | 52 | extern unsigned long native_calibrate_tsc(void); |
53 | 53 | ||
54 | extern int tsc_clocksource_reliable; | ||
55 | |||
54 | /* | 56 | /* |
55 | * Boot-time check whether the TSCs are synchronized across | 57 | * Boot-time check whether the TSCs are synchronized across |
56 | * all CPUs/cores: | 58 | * all CPUs/cores: |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 36361bf6fdd1..8be5f54d9360 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -462,7 +462,7 @@ struct __large_struct { unsigned long buf[100]; }; | |||
462 | barrier(); | 462 | barrier(); |
463 | 463 | ||
464 | #define uaccess_catch(err) \ | 464 | #define uaccess_catch(err) \ |
465 | (err) |= current_thread_info()->uaccess_err; \ | 465 | (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ |
466 | current_thread_info()->uaccess_err = prev_err; \ | 466 | current_thread_info()->uaccess_err = prev_err; \ |
467 | } while (0) | 467 | } while (0) |
468 | 468 | ||
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 2a58ed3e51d8..b4a3db7ce140 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h | |||
@@ -1,13 +1,59 @@ | |||
1 | #ifndef _ASM_X86_UNISTD_H | ||
2 | #define _ASM_X86_UNISTD_H 1 | ||
3 | |||
1 | #ifdef __KERNEL__ | 4 | #ifdef __KERNEL__ |
2 | # ifdef CONFIG_X86_32 | 5 | # ifdef CONFIG_X86_32 |
3 | # include "unistd_32.h" | 6 | |
7 | # include <asm/unistd_32.h> | ||
8 | # define __ARCH_WANT_IPC_PARSE_VERSION | ||
9 | # define __ARCH_WANT_STAT64 | ||
10 | # define __ARCH_WANT_SYS_OLD_MMAP | ||
11 | # define __ARCH_WANT_SYS_OLD_SELECT | ||
12 | |||
4 | # else | 13 | # else |
5 | # include "unistd_64.h" | 14 | |
15 | # include <asm/unistd_64.h> | ||
16 | # define __ARCH_WANT_COMPAT_SYS_TIME | ||
17 | |||
6 | # endif | 18 | # endif |
19 | |||
20 | # define __ARCH_WANT_OLD_READDIR | ||
21 | # define __ARCH_WANT_OLD_STAT | ||
22 | # define __ARCH_WANT_SYS_ALARM | ||
23 | # define __ARCH_WANT_SYS_FADVISE64 | ||
24 | # define __ARCH_WANT_SYS_GETHOSTNAME | ||
25 | # define __ARCH_WANT_SYS_GETPGRP | ||
26 | # define __ARCH_WANT_SYS_LLSEEK | ||
27 | # define __ARCH_WANT_SYS_NICE | ||
28 | # define __ARCH_WANT_SYS_OLDUMOUNT | ||
29 | # define __ARCH_WANT_SYS_OLD_GETRLIMIT | ||
30 | # define __ARCH_WANT_SYS_OLD_UNAME | ||
31 | # define __ARCH_WANT_SYS_PAUSE | ||
32 | # define __ARCH_WANT_SYS_RT_SIGACTION | ||
33 | # define __ARCH_WANT_SYS_RT_SIGSUSPEND | ||
34 | # define __ARCH_WANT_SYS_SGETMASK | ||
35 | # define __ARCH_WANT_SYS_SIGNAL | ||
36 | # define __ARCH_WANT_SYS_SIGPENDING | ||
37 | # define __ARCH_WANT_SYS_SIGPROCMASK | ||
38 | # define __ARCH_WANT_SYS_SOCKETCALL | ||
39 | # define __ARCH_WANT_SYS_TIME | ||
40 | # define __ARCH_WANT_SYS_UTIME | ||
41 | # define __ARCH_WANT_SYS_WAITPID | ||
42 | |||
43 | /* | ||
44 | * "Conditional" syscalls | ||
45 | * | ||
46 | * What we want is __attribute__((weak,alias("sys_ni_syscall"))), | ||
47 | * but it doesn't work on all toolchains, so we just do it by hand | ||
48 | */ | ||
49 | # define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") | ||
50 | |||
7 | #else | 51 | #else |
8 | # ifdef __i386__ | 52 | # ifdef __i386__ |
9 | # include "unistd_32.h" | 53 | # include <asm/unistd_32.h> |
10 | # else | 54 | # else |
11 | # include "unistd_64.h" | 55 | # include <asm/unistd_64.h> |
12 | # endif | 56 | # endif |
13 | #endif | 57 | #endif |
58 | |||
59 | #endif /* _ASM_X86_UNISTD_H */ | ||
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h deleted file mode 100644 index 599c77d38f33..000000000000 --- a/arch/x86/include/asm/unistd_32.h +++ /dev/null | |||
@@ -1,401 +0,0 @@ | |||
1 | #ifndef _ASM_X86_UNISTD_32_H | ||
2 | #define _ASM_X86_UNISTD_32_H | ||
3 | |||
4 | /* | ||
5 | * This file contains the system call numbers. | ||
6 | */ | ||
7 | |||
8 | #define __NR_restart_syscall 0 | ||
9 | #define __NR_exit 1 | ||
10 | #define __NR_fork 2 | ||
11 | #define __NR_read 3 | ||
12 | #define __NR_write 4 | ||
13 | #define __NR_open 5 | ||
14 | #define __NR_close 6 | ||
15 | #define __NR_waitpid 7 | ||
16 | #define __NR_creat 8 | ||
17 | #define __NR_link 9 | ||
18 | #define __NR_unlink 10 | ||
19 | #define __NR_execve 11 | ||
20 | #define __NR_chdir 12 | ||
21 | #define __NR_time 13 | ||
22 | #define __NR_mknod 14 | ||
23 | #define __NR_chmod 15 | ||
24 | #define __NR_lchown 16 | ||
25 | #define __NR_break 17 | ||
26 | #define __NR_oldstat 18 | ||
27 | #define __NR_lseek 19 | ||
28 | #define __NR_getpid 20 | ||
29 | #define __NR_mount 21 | ||
30 | #define __NR_umount 22 | ||
31 | #define __NR_setuid 23 | ||
32 | #define __NR_getuid 24 | ||
33 | #define __NR_stime 25 | ||
34 | #define __NR_ptrace 26 | ||
35 | #define __NR_alarm 27 | ||
36 | #define __NR_oldfstat 28 | ||
37 | #define __NR_pause 29 | ||
38 | #define __NR_utime 30 | ||
39 | #define __NR_stty 31 | ||
40 | #define __NR_gtty 32 | ||
41 | #define __NR_access 33 | ||
42 | #define __NR_nice 34 | ||
43 | #define __NR_ftime 35 | ||
44 | #define __NR_sync 36 | ||
45 | #define __NR_kill 37 | ||
46 | #define __NR_rename 38 | ||
47 | #define __NR_mkdir 39 | ||
48 | #define __NR_rmdir 40 | ||
49 | #define __NR_dup 41 | ||
50 | #define __NR_pipe 42 | ||
51 | #define __NR_times 43 | ||
52 | #define __NR_prof 44 | ||
53 | #define __NR_brk 45 | ||
54 | #define __NR_setgid 46 | ||
55 | #define __NR_getgid 47 | ||
56 | #define __NR_signal 48 | ||
57 | #define __NR_geteuid 49 | ||
58 | #define __NR_getegid 50 | ||
59 | #define __NR_acct 51 | ||
60 | #define __NR_umount2 52 | ||
61 | #define __NR_lock 53 | ||
62 | #define __NR_ioctl 54 | ||
63 | #define __NR_fcntl 55 | ||
64 | #define __NR_mpx 56 | ||
65 | #define __NR_setpgid 57 | ||
66 | #define __NR_ulimit 58 | ||
67 | #define __NR_oldolduname 59 | ||
68 | #define __NR_umask 60 | ||
69 | #define __NR_chroot 61 | ||
70 | #define __NR_ustat 62 | ||
71 | #define __NR_dup2 63 | ||
72 | #define __NR_getppid 64 | ||
73 | #define __NR_getpgrp 65 | ||
74 | #define __NR_setsid 66 | ||
75 | #define __NR_sigaction 67 | ||
76 | #define __NR_sgetmask 68 | ||
77 | #define __NR_ssetmask 69 | ||
78 | #define __NR_setreuid 70 | ||
79 | #define __NR_setregid 71 | ||
80 | #define __NR_sigsuspend 72 | ||
81 | #define __NR_sigpending 73 | ||
82 | #define __NR_sethostname 74 | ||
83 | #define __NR_setrlimit 75 | ||
84 | #define __NR_getrlimit 76 /* Back compatible 2Gig limited rlimit */ | ||
85 | #define __NR_getrusage 77 | ||
86 | #define __NR_gettimeofday 78 | ||
87 | #define __NR_settimeofday 79 | ||
88 | #define __NR_getgroups 80 | ||
89 | #define __NR_setgroups 81 | ||
90 | #define __NR_select 82 | ||
91 | #define __NR_symlink 83 | ||
92 | #define __NR_oldlstat 84 | ||
93 | #define __NR_readlink 85 | ||
94 | #define __NR_uselib 86 | ||
95 | #define __NR_swapon 87 | ||
96 | #define __NR_reboot 88 | ||
97 | #define __NR_readdir 89 | ||
98 | #define __NR_mmap 90 | ||
99 | #define __NR_munmap 91 | ||
100 | #define __NR_truncate 92 | ||
101 | #define __NR_ftruncate 93 | ||
102 | #define __NR_fchmod 94 | ||
103 | #define __NR_fchown 95 | ||
104 | #define __NR_getpriority 96 | ||
105 | #define __NR_setpriority 97 | ||
106 | #define __NR_profil 98 | ||
107 | #define __NR_statfs 99 | ||
108 | #define __NR_fstatfs 100 | ||
109 | #define __NR_ioperm 101 | ||
110 | #define __NR_socketcall 102 | ||
111 | #define __NR_syslog 103 | ||
112 | #define __NR_setitimer 104 | ||
113 | #define __NR_getitimer 105 | ||
114 | #define __NR_stat 106 | ||
115 | #define __NR_lstat 107 | ||
116 | #define __NR_fstat 108 | ||
117 | #define __NR_olduname 109 | ||
118 | #define __NR_iopl 110 | ||
119 | #define __NR_vhangup 111 | ||
120 | #define __NR_idle 112 | ||
121 | #define __NR_vm86old 113 | ||
122 | #define __NR_wait4 114 | ||
123 | #define __NR_swapoff 115 | ||
124 | #define __NR_sysinfo 116 | ||
125 | #define __NR_ipc 117 | ||
126 | #define __NR_fsync 118 | ||
127 | #define __NR_sigreturn 119 | ||
128 | #define __NR_clone 120 | ||
129 | #define __NR_setdomainname 121 | ||
130 | #define __NR_uname 122 | ||
131 | #define __NR_modify_ldt 123 | ||
132 | #define __NR_adjtimex 124 | ||
133 | #define __NR_mprotect 125 | ||
134 | #define __NR_sigprocmask 126 | ||
135 | #define __NR_create_module 127 | ||
136 | #define __NR_init_module 128 | ||
137 | #define __NR_delete_module 129 | ||
138 | #define __NR_get_kernel_syms 130 | ||
139 | #define __NR_quotactl 131 | ||
140 | #define __NR_getpgid 132 | ||
141 | #define __NR_fchdir 133 | ||
142 | #define __NR_bdflush 134 | ||
143 | #define __NR_sysfs 135 | ||
144 | #define __NR_personality 136 | ||
145 | #define __NR_afs_syscall 137 /* Syscall for Andrew File System */ | ||
146 | #define __NR_setfsuid 138 | ||
147 | #define __NR_setfsgid 139 | ||
148 | #define __NR__llseek 140 | ||
149 | #define __NR_getdents 141 | ||
150 | #define __NR__newselect 142 | ||
151 | #define __NR_flock 143 | ||
152 | #define __NR_msync 144 | ||
153 | #define __NR_readv 145 | ||
154 | #define __NR_writev 146 | ||
155 | #define __NR_getsid 147 | ||
156 | #define __NR_fdatasync 148 | ||
157 | #define __NR__sysctl 149 | ||
158 | #define __NR_mlock 150 | ||
159 | #define __NR_munlock 151 | ||
160 | #define __NR_mlockall 152 | ||
161 | #define __NR_munlockall 153 | ||
162 | #define __NR_sched_setparam 154 | ||
163 | #define __NR_sched_getparam 155 | ||
164 | #define __NR_sched_setscheduler 156 | ||
165 | #define __NR_sched_getscheduler 157 | ||
166 | #define __NR_sched_yield 158 | ||
167 | #define __NR_sched_get_priority_max 159 | ||
168 | #define __NR_sched_get_priority_min 160 | ||
169 | #define __NR_sched_rr_get_interval 161 | ||
170 | #define __NR_nanosleep 162 | ||
171 | #define __NR_mremap 163 | ||
172 | #define __NR_setresuid 164 | ||
173 | #define __NR_getresuid 165 | ||
174 | #define __NR_vm86 166 | ||
175 | #define __NR_query_module 167 | ||
176 | #define __NR_poll 168 | ||
177 | #define __NR_nfsservctl 169 | ||
178 | #define __NR_setresgid 170 | ||
179 | #define __NR_getresgid 171 | ||
180 | #define __NR_prctl 172 | ||
181 | #define __NR_rt_sigreturn 173 | ||
182 | #define __NR_rt_sigaction 174 | ||
183 | #define __NR_rt_sigprocmask 175 | ||
184 | #define __NR_rt_sigpending 176 | ||
185 | #define __NR_rt_sigtimedwait 177 | ||
186 | #define __NR_rt_sigqueueinfo 178 | ||
187 | #define __NR_rt_sigsuspend 179 | ||
188 | #define __NR_pread64 180 | ||
189 | #define __NR_pwrite64 181 | ||
190 | #define __NR_chown 182 | ||
191 | #define __NR_getcwd 183 | ||
192 | #define __NR_capget 184 | ||
193 | #define __NR_capset 185 | ||
194 | #define __NR_sigaltstack 186 | ||
195 | #define __NR_sendfile 187 | ||
196 | #define __NR_getpmsg 188 /* some people actually want streams */ | ||
197 | #define __NR_putpmsg 189 /* some people actually want streams */ | ||
198 | #define __NR_vfork 190 | ||
199 | #define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ | ||
200 | #define __NR_mmap2 192 | ||
201 | #define __NR_truncate64 193 | ||
202 | #define __NR_ftruncate64 194 | ||
203 | #define __NR_stat64 195 | ||
204 | #define __NR_lstat64 196 | ||
205 | #define __NR_fstat64 197 | ||
206 | #define __NR_lchown32 198 | ||
207 | #define __NR_getuid32 199 | ||
208 | #define __NR_getgid32 200 | ||
209 | #define __NR_geteuid32 201 | ||
210 | #define __NR_getegid32 202 | ||
211 | #define __NR_setreuid32 203 | ||
212 | #define __NR_setregid32 204 | ||
213 | #define __NR_getgroups32 205 | ||
214 | #define __NR_setgroups32 206 | ||
215 | #define __NR_fchown32 207 | ||
216 | #define __NR_setresuid32 208 | ||
217 | #define __NR_getresuid32 209 | ||
218 | #define __NR_setresgid32 210 | ||
219 | #define __NR_getresgid32 211 | ||
220 | #define __NR_chown32 212 | ||
221 | #define __NR_setuid32 213 | ||
222 | #define __NR_setgid32 214 | ||
223 | #define __NR_setfsuid32 215 | ||
224 | #define __NR_setfsgid32 216 | ||
225 | #define __NR_pivot_root 217 | ||
226 | #define __NR_mincore 218 | ||
227 | #define __NR_madvise 219 | ||
228 | #define __NR_madvise1 219 /* delete when C lib stub is removed */ | ||
229 | #define __NR_getdents64 220 | ||
230 | #define __NR_fcntl64 221 | ||
231 | /* 223 is unused */ | ||
232 | #define __NR_gettid 224 | ||
233 | #define __NR_readahead 225 | ||
234 | #define __NR_setxattr 226 | ||
235 | #define __NR_lsetxattr 227 | ||
236 | #define __NR_fsetxattr 228 | ||
237 | #define __NR_getxattr 229 | ||
238 | #define __NR_lgetxattr 230 | ||
239 | #define __NR_fgetxattr 231 | ||
240 | #define __NR_listxattr 232 | ||
241 | #define __NR_llistxattr 233 | ||
242 | #define __NR_flistxattr 234 | ||
243 | #define __NR_removexattr 235 | ||
244 | #define __NR_lremovexattr 236 | ||
245 | #define __NR_fremovexattr 237 | ||
246 | #define __NR_tkill 238 | ||
247 | #define __NR_sendfile64 239 | ||
248 | #define __NR_futex 240 | ||
249 | #define __NR_sched_setaffinity 241 | ||
250 | #define __NR_sched_getaffinity 242 | ||
251 | #define __NR_set_thread_area 243 | ||
252 | #define __NR_get_thread_area 244 | ||
253 | #define __NR_io_setup 245 | ||
254 | #define __NR_io_destroy 246 | ||
255 | #define __NR_io_getevents 247 | ||
256 | #define __NR_io_submit 248 | ||
257 | #define __NR_io_cancel 249 | ||
258 | #define __NR_fadvise64 250 | ||
259 | /* 251 is available for reuse (was briefly sys_set_zone_reclaim) */ | ||
260 | #define __NR_exit_group 252 | ||
261 | #define __NR_lookup_dcookie 253 | ||
262 | #define __NR_epoll_create 254 | ||
263 | #define __NR_epoll_ctl 255 | ||
264 | #define __NR_epoll_wait 256 | ||
265 | #define __NR_remap_file_pages 257 | ||
266 | #define __NR_set_tid_address 258 | ||
267 | #define __NR_timer_create 259 | ||
268 | #define __NR_timer_settime (__NR_timer_create+1) | ||
269 | #define __NR_timer_gettime (__NR_timer_create+2) | ||
270 | #define __NR_timer_getoverrun (__NR_timer_create+3) | ||
271 | #define __NR_timer_delete (__NR_timer_create+4) | ||
272 | #define __NR_clock_settime (__NR_timer_create+5) | ||
273 | #define __NR_clock_gettime (__NR_timer_create+6) | ||
274 | #define __NR_clock_getres (__NR_timer_create+7) | ||
275 | #define __NR_clock_nanosleep (__NR_timer_create+8) | ||
276 | #define __NR_statfs64 268 | ||
277 | #define __NR_fstatfs64 269 | ||
278 | #define __NR_tgkill 270 | ||
279 | #define __NR_utimes 271 | ||
280 | #define __NR_fadvise64_64 272 | ||
281 | #define __NR_vserver 273 | ||
282 | #define __NR_mbind 274 | ||
283 | #define __NR_get_mempolicy 275 | ||
284 | #define __NR_set_mempolicy 276 | ||
285 | #define __NR_mq_open 277 | ||
286 | #define __NR_mq_unlink (__NR_mq_open+1) | ||
287 | #define __NR_mq_timedsend (__NR_mq_open+2) | ||
288 | #define __NR_mq_timedreceive (__NR_mq_open+3) | ||
289 | #define __NR_mq_notify (__NR_mq_open+4) | ||
290 | #define __NR_mq_getsetattr (__NR_mq_open+5) | ||
291 | #define __NR_kexec_load 283 | ||
292 | #define __NR_waitid 284 | ||
293 | /* #define __NR_sys_setaltroot 285 */ | ||
294 | #define __NR_add_key 286 | ||
295 | #define __NR_request_key 287 | ||
296 | #define __NR_keyctl 288 | ||
297 | #define __NR_ioprio_set 289 | ||
298 | #define __NR_ioprio_get 290 | ||
299 | #define __NR_inotify_init 291 | ||
300 | #define __NR_inotify_add_watch 292 | ||
301 | #define __NR_inotify_rm_watch 293 | ||
302 | #define __NR_migrate_pages 294 | ||
303 | #define __NR_openat 295 | ||
304 | #define __NR_mkdirat 296 | ||
305 | #define __NR_mknodat 297 | ||
306 | #define __NR_fchownat 298 | ||
307 | #define __NR_futimesat 299 | ||
308 | #define __NR_fstatat64 300 | ||
309 | #define __NR_unlinkat 301 | ||
310 | #define __NR_renameat 302 | ||
311 | #define __NR_linkat 303 | ||
312 | #define __NR_symlinkat 304 | ||
313 | #define __NR_readlinkat 305 | ||
314 | #define __NR_fchmodat 306 | ||
315 | #define __NR_faccessat 307 | ||
316 | #define __NR_pselect6 308 | ||
317 | #define __NR_ppoll 309 | ||
318 | #define __NR_unshare 310 | ||
319 | #define __NR_set_robust_list 311 | ||
320 | #define __NR_get_robust_list 312 | ||
321 | #define __NR_splice 313 | ||
322 | #define __NR_sync_file_range 314 | ||
323 | #define __NR_tee 315 | ||
324 | #define __NR_vmsplice 316 | ||
325 | #define __NR_move_pages 317 | ||
326 | #define __NR_getcpu 318 | ||
327 | #define __NR_epoll_pwait 319 | ||
328 | #define __NR_utimensat 320 | ||
329 | #define __NR_signalfd 321 | ||
330 | #define __NR_timerfd_create 322 | ||
331 | #define __NR_eventfd 323 | ||
332 | #define __NR_fallocate 324 | ||
333 | #define __NR_timerfd_settime 325 | ||
334 | #define __NR_timerfd_gettime 326 | ||
335 | #define __NR_signalfd4 327 | ||
336 | #define __NR_eventfd2 328 | ||
337 | #define __NR_epoll_create1 329 | ||
338 | #define __NR_dup3 330 | ||
339 | #define __NR_pipe2 331 | ||
340 | #define __NR_inotify_init1 332 | ||
341 | #define __NR_preadv 333 | ||
342 | #define __NR_pwritev 334 | ||
343 | #define __NR_rt_tgsigqueueinfo 335 | ||
344 | #define __NR_perf_event_open 336 | ||
345 | #define __NR_recvmmsg 337 | ||
346 | #define __NR_fanotify_init 338 | ||
347 | #define __NR_fanotify_mark 339 | ||
348 | #define __NR_prlimit64 340 | ||
349 | #define __NR_name_to_handle_at 341 | ||
350 | #define __NR_open_by_handle_at 342 | ||
351 | #define __NR_clock_adjtime 343 | ||
352 | #define __NR_syncfs 344 | ||
353 | #define __NR_sendmmsg 345 | ||
354 | #define __NR_setns 346 | ||
355 | #define __NR_process_vm_readv 347 | ||
356 | #define __NR_process_vm_writev 348 | ||
357 | |||
358 | #ifdef __KERNEL__ | ||
359 | |||
360 | #define NR_syscalls 349 | ||
361 | |||
362 | #define __ARCH_WANT_IPC_PARSE_VERSION | ||
363 | #define __ARCH_WANT_OLD_READDIR | ||
364 | #define __ARCH_WANT_OLD_STAT | ||
365 | #define __ARCH_WANT_STAT64 | ||
366 | #define __ARCH_WANT_SYS_ALARM | ||
367 | #define __ARCH_WANT_SYS_GETHOSTNAME | ||
368 | #define __ARCH_WANT_SYS_IPC | ||
369 | #define __ARCH_WANT_SYS_PAUSE | ||
370 | #define __ARCH_WANT_SYS_SGETMASK | ||
371 | #define __ARCH_WANT_SYS_SIGNAL | ||
372 | #define __ARCH_WANT_SYS_TIME | ||
373 | #define __ARCH_WANT_SYS_UTIME | ||
374 | #define __ARCH_WANT_SYS_WAITPID | ||
375 | #define __ARCH_WANT_SYS_SOCKETCALL | ||
376 | #define __ARCH_WANT_SYS_FADVISE64 | ||
377 | #define __ARCH_WANT_SYS_GETPGRP | ||
378 | #define __ARCH_WANT_SYS_LLSEEK | ||
379 | #define __ARCH_WANT_SYS_NICE | ||
380 | #define __ARCH_WANT_SYS_OLD_GETRLIMIT | ||
381 | #define __ARCH_WANT_SYS_OLD_UNAME | ||
382 | #define __ARCH_WANT_SYS_OLD_MMAP | ||
383 | #define __ARCH_WANT_SYS_OLD_SELECT | ||
384 | #define __ARCH_WANT_SYS_OLDUMOUNT | ||
385 | #define __ARCH_WANT_SYS_SIGPENDING | ||
386 | #define __ARCH_WANT_SYS_SIGPROCMASK | ||
387 | #define __ARCH_WANT_SYS_RT_SIGACTION | ||
388 | #define __ARCH_WANT_SYS_RT_SIGSUSPEND | ||
389 | |||
390 | /* | ||
391 | * "Conditional" syscalls | ||
392 | * | ||
393 | * What we want is __attribute__((weak,alias("sys_ni_syscall"))), | ||
394 | * but it doesn't work on all toolchains, so we just do it by hand | ||
395 | */ | ||
396 | #ifndef cond_syscall | ||
397 | #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") | ||
398 | #endif | ||
399 | |||
400 | #endif /* __KERNEL__ */ | ||
401 | #endif /* _ASM_X86_UNISTD_32_H */ | ||
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h deleted file mode 100644 index 0431f193c3f2..000000000000 --- a/arch/x86/include/asm/unistd_64.h +++ /dev/null | |||
@@ -1,732 +0,0 @@ | |||
1 | #ifndef _ASM_X86_UNISTD_64_H | ||
2 | #define _ASM_X86_UNISTD_64_H | ||
3 | |||
4 | #ifndef __SYSCALL | ||
5 | #define __SYSCALL(a, b) | ||
6 | #endif | ||
7 | |||
8 | /* | ||
9 | * This file contains the system call numbers. | ||
10 | * | ||
11 | * Note: holes are not allowed. | ||
12 | */ | ||
13 | |||
14 | /* at least 8 syscall per cacheline */ | ||
15 | #define __NR_read 0 | ||
16 | __SYSCALL(__NR_read, sys_read) | ||
17 | #define __NR_write 1 | ||
18 | __SYSCALL(__NR_write, sys_write) | ||
19 | #define __NR_open 2 | ||
20 | __SYSCALL(__NR_open, sys_open) | ||
21 | #define __NR_close 3 | ||
22 | __SYSCALL(__NR_close, sys_close) | ||
23 | #define __NR_stat 4 | ||
24 | __SYSCALL(__NR_stat, sys_newstat) | ||
25 | #define __NR_fstat 5 | ||
26 | __SYSCALL(__NR_fstat, sys_newfstat) | ||
27 | #define __NR_lstat 6 | ||
28 | __SYSCALL(__NR_lstat, sys_newlstat) | ||
29 | #define __NR_poll 7 | ||
30 | __SYSCALL(__NR_poll, sys_poll) | ||
31 | |||
32 | #define __NR_lseek 8 | ||
33 | __SYSCALL(__NR_lseek, sys_lseek) | ||
34 | #define __NR_mmap 9 | ||
35 | __SYSCALL(__NR_mmap, sys_mmap) | ||
36 | #define __NR_mprotect 10 | ||
37 | __SYSCALL(__NR_mprotect, sys_mprotect) | ||
38 | #define __NR_munmap 11 | ||
39 | __SYSCALL(__NR_munmap, sys_munmap) | ||
40 | #define __NR_brk 12 | ||
41 | __SYSCALL(__NR_brk, sys_brk) | ||
42 | #define __NR_rt_sigaction 13 | ||
43 | __SYSCALL(__NR_rt_sigaction, sys_rt_sigaction) | ||
44 | #define __NR_rt_sigprocmask 14 | ||
45 | __SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask) | ||
46 | #define __NR_rt_sigreturn 15 | ||
47 | __SYSCALL(__NR_rt_sigreturn, stub_rt_sigreturn) | ||
48 | |||
49 | #define __NR_ioctl 16 | ||
50 | __SYSCALL(__NR_ioctl, sys_ioctl) | ||
51 | #define __NR_pread64 17 | ||
52 | __SYSCALL(__NR_pread64, sys_pread64) | ||
53 | #define __NR_pwrite64 18 | ||
54 | __SYSCALL(__NR_pwrite64, sys_pwrite64) | ||
55 | #define __NR_readv 19 | ||
56 | __SYSCALL(__NR_readv, sys_readv) | ||
57 | #define __NR_writev 20 | ||
58 | __SYSCALL(__NR_writev, sys_writev) | ||
59 | #define __NR_access 21 | ||
60 | __SYSCALL(__NR_access, sys_access) | ||
61 | #define __NR_pipe 22 | ||
62 | __SYSCALL(__NR_pipe, sys_pipe) | ||
63 | #define __NR_select 23 | ||
64 | __SYSCALL(__NR_select, sys_select) | ||
65 | |||
66 | #define __NR_sched_yield 24 | ||
67 | __SYSCALL(__NR_sched_yield, sys_sched_yield) | ||
68 | #define __NR_mremap 25 | ||
69 | __SYSCALL(__NR_mremap, sys_mremap) | ||
70 | #define __NR_msync 26 | ||
71 | __SYSCALL(__NR_msync, sys_msync) | ||
72 | #define __NR_mincore 27 | ||
73 | __SYSCALL(__NR_mincore, sys_mincore) | ||
74 | #define __NR_madvise 28 | ||
75 | __SYSCALL(__NR_madvise, sys_madvise) | ||
76 | #define __NR_shmget 29 | ||
77 | __SYSCALL(__NR_shmget, sys_shmget) | ||
78 | #define __NR_shmat 30 | ||
79 | __SYSCALL(__NR_shmat, sys_shmat) | ||
80 | #define __NR_shmctl 31 | ||
81 | __SYSCALL(__NR_shmctl, sys_shmctl) | ||
82 | |||
83 | #define __NR_dup 32 | ||
84 | __SYSCALL(__NR_dup, sys_dup) | ||
85 | #define __NR_dup2 33 | ||
86 | __SYSCALL(__NR_dup2, sys_dup2) | ||
87 | #define __NR_pause 34 | ||
88 | __SYSCALL(__NR_pause, sys_pause) | ||
89 | #define __NR_nanosleep 35 | ||
90 | __SYSCALL(__NR_nanosleep, sys_nanosleep) | ||
91 | #define __NR_getitimer 36 | ||
92 | __SYSCALL(__NR_getitimer, sys_getitimer) | ||
93 | #define __NR_alarm 37 | ||
94 | __SYSCALL(__NR_alarm, sys_alarm) | ||
95 | #define __NR_setitimer 38 | ||
96 | __SYSCALL(__NR_setitimer, sys_setitimer) | ||
97 | #define __NR_getpid 39 | ||
98 | __SYSCALL(__NR_getpid, sys_getpid) | ||
99 | |||
100 | #define __NR_sendfile 40 | ||
101 | __SYSCALL(__NR_sendfile, sys_sendfile64) | ||
102 | #define __NR_socket 41 | ||
103 | __SYSCALL(__NR_socket, sys_socket) | ||
104 | #define __NR_connect 42 | ||
105 | __SYSCALL(__NR_connect, sys_connect) | ||
106 | #define __NR_accept 43 | ||
107 | __SYSCALL(__NR_accept, sys_accept) | ||
108 | #define __NR_sendto 44 | ||
109 | __SYSCALL(__NR_sendto, sys_sendto) | ||
110 | #define __NR_recvfrom 45 | ||
111 | __SYSCALL(__NR_recvfrom, sys_recvfrom) | ||
112 | #define __NR_sendmsg 46 | ||
113 | __SYSCALL(__NR_sendmsg, sys_sendmsg) | ||
114 | #define __NR_recvmsg 47 | ||
115 | __SYSCALL(__NR_recvmsg, sys_recvmsg) | ||
116 | |||
117 | #define __NR_shutdown 48 | ||
118 | __SYSCALL(__NR_shutdown, sys_shutdown) | ||
119 | #define __NR_bind 49 | ||
120 | __SYSCALL(__NR_bind, sys_bind) | ||
121 | #define __NR_listen 50 | ||
122 | __SYSCALL(__NR_listen, sys_listen) | ||
123 | #define __NR_getsockname 51 | ||
124 | __SYSCALL(__NR_getsockname, sys_getsockname) | ||
125 | #define __NR_getpeername 52 | ||
126 | __SYSCALL(__NR_getpeername, sys_getpeername) | ||
127 | #define __NR_socketpair 53 | ||
128 | __SYSCALL(__NR_socketpair, sys_socketpair) | ||
129 | #define __NR_setsockopt 54 | ||
130 | __SYSCALL(__NR_setsockopt, sys_setsockopt) | ||
131 | #define __NR_getsockopt 55 | ||
132 | __SYSCALL(__NR_getsockopt, sys_getsockopt) | ||
133 | |||
134 | #define __NR_clone 56 | ||
135 | __SYSCALL(__NR_clone, stub_clone) | ||
136 | #define __NR_fork 57 | ||
137 | __SYSCALL(__NR_fork, stub_fork) | ||
138 | #define __NR_vfork 58 | ||
139 | __SYSCALL(__NR_vfork, stub_vfork) | ||
140 | #define __NR_execve 59 | ||
141 | __SYSCALL(__NR_execve, stub_execve) | ||
142 | #define __NR_exit 60 | ||
143 | __SYSCALL(__NR_exit, sys_exit) | ||
144 | #define __NR_wait4 61 | ||
145 | __SYSCALL(__NR_wait4, sys_wait4) | ||
146 | #define __NR_kill 62 | ||
147 | __SYSCALL(__NR_kill, sys_kill) | ||
148 | #define __NR_uname 63 | ||
149 | __SYSCALL(__NR_uname, sys_newuname) | ||
150 | |||
151 | #define __NR_semget 64 | ||
152 | __SYSCALL(__NR_semget, sys_semget) | ||
153 | #define __NR_semop 65 | ||
154 | __SYSCALL(__NR_semop, sys_semop) | ||
155 | #define __NR_semctl 66 | ||
156 | __SYSCALL(__NR_semctl, sys_semctl) | ||
157 | #define __NR_shmdt 67 | ||
158 | __SYSCALL(__NR_shmdt, sys_shmdt) | ||
159 | #define __NR_msgget 68 | ||
160 | __SYSCALL(__NR_msgget, sys_msgget) | ||
161 | #define __NR_msgsnd 69 | ||
162 | __SYSCALL(__NR_msgsnd, sys_msgsnd) | ||
163 | #define __NR_msgrcv 70 | ||
164 | __SYSCALL(__NR_msgrcv, sys_msgrcv) | ||
165 | #define __NR_msgctl 71 | ||
166 | __SYSCALL(__NR_msgctl, sys_msgctl) | ||
167 | |||
168 | #define __NR_fcntl 72 | ||
169 | __SYSCALL(__NR_fcntl, sys_fcntl) | ||
170 | #define __NR_flock 73 | ||
171 | __SYSCALL(__NR_flock, sys_flock) | ||
172 | #define __NR_fsync 74 | ||
173 | __SYSCALL(__NR_fsync, sys_fsync) | ||
174 | #define __NR_fdatasync 75 | ||
175 | __SYSCALL(__NR_fdatasync, sys_fdatasync) | ||
176 | #define __NR_truncate 76 | ||
177 | __SYSCALL(__NR_truncate, sys_truncate) | ||
178 | #define __NR_ftruncate 77 | ||
179 | __SYSCALL(__NR_ftruncate, sys_ftruncate) | ||
180 | #define __NR_getdents 78 | ||
181 | __SYSCALL(__NR_getdents, sys_getdents) | ||
182 | #define __NR_getcwd 79 | ||
183 | __SYSCALL(__NR_getcwd, sys_getcwd) | ||
184 | |||
185 | #define __NR_chdir 80 | ||
186 | __SYSCALL(__NR_chdir, sys_chdir) | ||
187 | #define __NR_fchdir 81 | ||
188 | __SYSCALL(__NR_fchdir, sys_fchdir) | ||
189 | #define __NR_rename 82 | ||
190 | __SYSCALL(__NR_rename, sys_rename) | ||
191 | #define __NR_mkdir 83 | ||
192 | __SYSCALL(__NR_mkdir, sys_mkdir) | ||
193 | #define __NR_rmdir 84 | ||
194 | __SYSCALL(__NR_rmdir, sys_rmdir) | ||
195 | #define __NR_creat 85 | ||
196 | __SYSCALL(__NR_creat, sys_creat) | ||
197 | #define __NR_link 86 | ||
198 | __SYSCALL(__NR_link, sys_link) | ||
199 | #define __NR_unlink 87 | ||
200 | __SYSCALL(__NR_unlink, sys_unlink) | ||
201 | |||
202 | #define __NR_symlink 88 | ||
203 | __SYSCALL(__NR_symlink, sys_symlink) | ||
204 | #define __NR_readlink 89 | ||
205 | __SYSCALL(__NR_readlink, sys_readlink) | ||
206 | #define __NR_chmod 90 | ||
207 | __SYSCALL(__NR_chmod, sys_chmod) | ||
208 | #define __NR_fchmod 91 | ||
209 | __SYSCALL(__NR_fchmod, sys_fchmod) | ||
210 | #define __NR_chown 92 | ||
211 | __SYSCALL(__NR_chown, sys_chown) | ||
212 | #define __NR_fchown 93 | ||
213 | __SYSCALL(__NR_fchown, sys_fchown) | ||
214 | #define __NR_lchown 94 | ||
215 | __SYSCALL(__NR_lchown, sys_lchown) | ||
216 | #define __NR_umask 95 | ||
217 | __SYSCALL(__NR_umask, sys_umask) | ||
218 | |||
219 | #define __NR_gettimeofday 96 | ||
220 | __SYSCALL(__NR_gettimeofday, sys_gettimeofday) | ||
221 | #define __NR_getrlimit 97 | ||
222 | __SYSCALL(__NR_getrlimit, sys_getrlimit) | ||
223 | #define __NR_getrusage 98 | ||
224 | __SYSCALL(__NR_getrusage, sys_getrusage) | ||
225 | #define __NR_sysinfo 99 | ||
226 | __SYSCALL(__NR_sysinfo, sys_sysinfo) | ||
227 | #define __NR_times 100 | ||
228 | __SYSCALL(__NR_times, sys_times) | ||
229 | #define __NR_ptrace 101 | ||
230 | __SYSCALL(__NR_ptrace, sys_ptrace) | ||
231 | #define __NR_getuid 102 | ||
232 | __SYSCALL(__NR_getuid, sys_getuid) | ||
233 | #define __NR_syslog 103 | ||
234 | __SYSCALL(__NR_syslog, sys_syslog) | ||
235 | |||
236 | /* at the very end the stuff that never runs during the benchmarks */ | ||
237 | #define __NR_getgid 104 | ||
238 | __SYSCALL(__NR_getgid, sys_getgid) | ||
239 | #define __NR_setuid 105 | ||
240 | __SYSCALL(__NR_setuid, sys_setuid) | ||
241 | #define __NR_setgid 106 | ||
242 | __SYSCALL(__NR_setgid, sys_setgid) | ||
243 | #define __NR_geteuid 107 | ||
244 | __SYSCALL(__NR_geteuid, sys_geteuid) | ||
245 | #define __NR_getegid 108 | ||
246 | __SYSCALL(__NR_getegid, sys_getegid) | ||
247 | #define __NR_setpgid 109 | ||
248 | __SYSCALL(__NR_setpgid, sys_setpgid) | ||
249 | #define __NR_getppid 110 | ||
250 | __SYSCALL(__NR_getppid, sys_getppid) | ||
251 | #define __NR_getpgrp 111 | ||
252 | __SYSCALL(__NR_getpgrp, sys_getpgrp) | ||
253 | |||
254 | #define __NR_setsid 112 | ||
255 | __SYSCALL(__NR_setsid, sys_setsid) | ||
256 | #define __NR_setreuid 113 | ||
257 | __SYSCALL(__NR_setreuid, sys_setreuid) | ||
258 | #define __NR_setregid 114 | ||
259 | __SYSCALL(__NR_setregid, sys_setregid) | ||
260 | #define __NR_getgroups 115 | ||
261 | __SYSCALL(__NR_getgroups, sys_getgroups) | ||
262 | #define __NR_setgroups 116 | ||
263 | __SYSCALL(__NR_setgroups, sys_setgroups) | ||
264 | #define __NR_setresuid 117 | ||
265 | __SYSCALL(__NR_setresuid, sys_setresuid) | ||
266 | #define __NR_getresuid 118 | ||
267 | __SYSCALL(__NR_getresuid, sys_getresuid) | ||
268 | #define __NR_setresgid 119 | ||
269 | __SYSCALL(__NR_setresgid, sys_setresgid) | ||
270 | |||
271 | #define __NR_getresgid 120 | ||
272 | __SYSCALL(__NR_getresgid, sys_getresgid) | ||
273 | #define __NR_getpgid 121 | ||
274 | __SYSCALL(__NR_getpgid, sys_getpgid) | ||
275 | #define __NR_setfsuid 122 | ||
276 | __SYSCALL(__NR_setfsuid, sys_setfsuid) | ||
277 | #define __NR_setfsgid 123 | ||
278 | __SYSCALL(__NR_setfsgid, sys_setfsgid) | ||
279 | #define __NR_getsid 124 | ||
280 | __SYSCALL(__NR_getsid, sys_getsid) | ||
281 | #define __NR_capget 125 | ||
282 | __SYSCALL(__NR_capget, sys_capget) | ||
283 | #define __NR_capset 126 | ||
284 | __SYSCALL(__NR_capset, sys_capset) | ||
285 | |||
286 | #define __NR_rt_sigpending 127 | ||
287 | __SYSCALL(__NR_rt_sigpending, sys_rt_sigpending) | ||
288 | #define __NR_rt_sigtimedwait 128 | ||
289 | __SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait) | ||
290 | #define __NR_rt_sigqueueinfo 129 | ||
291 | __SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo) | ||
292 | #define __NR_rt_sigsuspend 130 | ||
293 | __SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) | ||
294 | #define __NR_sigaltstack 131 | ||
295 | __SYSCALL(__NR_sigaltstack, stub_sigaltstack) | ||
296 | #define __NR_utime 132 | ||
297 | __SYSCALL(__NR_utime, sys_utime) | ||
298 | #define __NR_mknod 133 | ||
299 | __SYSCALL(__NR_mknod, sys_mknod) | ||
300 | |||
301 | /* Only needed for a.out */ | ||
302 | #define __NR_uselib 134 | ||
303 | __SYSCALL(__NR_uselib, sys_ni_syscall) | ||
304 | #define __NR_personality 135 | ||
305 | __SYSCALL(__NR_personality, sys_personality) | ||
306 | |||
307 | #define __NR_ustat 136 | ||
308 | __SYSCALL(__NR_ustat, sys_ustat) | ||
309 | #define __NR_statfs 137 | ||
310 | __SYSCALL(__NR_statfs, sys_statfs) | ||
311 | #define __NR_fstatfs 138 | ||
312 | __SYSCALL(__NR_fstatfs, sys_fstatfs) | ||
313 | #define __NR_sysfs 139 | ||
314 | __SYSCALL(__NR_sysfs, sys_sysfs) | ||
315 | |||
316 | #define __NR_getpriority 140 | ||
317 | __SYSCALL(__NR_getpriority, sys_getpriority) | ||
318 | #define __NR_setpriority 141 | ||
319 | __SYSCALL(__NR_setpriority, sys_setpriority) | ||
320 | #define __NR_sched_setparam 142 | ||
321 | __SYSCALL(__NR_sched_setparam, sys_sched_setparam) | ||
322 | #define __NR_sched_getparam 143 | ||
323 | __SYSCALL(__NR_sched_getparam, sys_sched_getparam) | ||
324 | #define __NR_sched_setscheduler 144 | ||
325 | __SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler) | ||
326 | #define __NR_sched_getscheduler 145 | ||
327 | __SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) | ||
328 | #define __NR_sched_get_priority_max 146 | ||
329 | __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) | ||
330 | #define __NR_sched_get_priority_min 147 | ||
331 | __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) | ||
332 | #define __NR_sched_rr_get_interval 148 | ||
333 | __SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval) | ||
334 | |||
335 | #define __NR_mlock 149 | ||
336 | __SYSCALL(__NR_mlock, sys_mlock) | ||
337 | #define __NR_munlock 150 | ||
338 | __SYSCALL(__NR_munlock, sys_munlock) | ||
339 | #define __NR_mlockall 151 | ||
340 | __SYSCALL(__NR_mlockall, sys_mlockall) | ||
341 | #define __NR_munlockall 152 | ||
342 | __SYSCALL(__NR_munlockall, sys_munlockall) | ||
343 | |||
344 | #define __NR_vhangup 153 | ||
345 | __SYSCALL(__NR_vhangup, sys_vhangup) | ||
346 | |||
347 | #define __NR_modify_ldt 154 | ||
348 | __SYSCALL(__NR_modify_ldt, sys_modify_ldt) | ||
349 | |||
350 | #define __NR_pivot_root 155 | ||
351 | __SYSCALL(__NR_pivot_root, sys_pivot_root) | ||
352 | |||
353 | #define __NR__sysctl 156 | ||
354 | __SYSCALL(__NR__sysctl, sys_sysctl) | ||
355 | |||
356 | #define __NR_prctl 157 | ||
357 | __SYSCALL(__NR_prctl, sys_prctl) | ||
358 | #define __NR_arch_prctl 158 | ||
359 | __SYSCALL(__NR_arch_prctl, sys_arch_prctl) | ||
360 | |||
361 | #define __NR_adjtimex 159 | ||
362 | __SYSCALL(__NR_adjtimex, sys_adjtimex) | ||
363 | |||
364 | #define __NR_setrlimit 160 | ||
365 | __SYSCALL(__NR_setrlimit, sys_setrlimit) | ||
366 | |||
367 | #define __NR_chroot 161 | ||
368 | __SYSCALL(__NR_chroot, sys_chroot) | ||
369 | |||
370 | #define __NR_sync 162 | ||
371 | __SYSCALL(__NR_sync, sys_sync) | ||
372 | |||
373 | #define __NR_acct 163 | ||
374 | __SYSCALL(__NR_acct, sys_acct) | ||
375 | |||
376 | #define __NR_settimeofday 164 | ||
377 | __SYSCALL(__NR_settimeofday, sys_settimeofday) | ||
378 | |||
379 | #define __NR_mount 165 | ||
380 | __SYSCALL(__NR_mount, sys_mount) | ||
381 | #define __NR_umount2 166 | ||
382 | __SYSCALL(__NR_umount2, sys_umount) | ||
383 | |||
384 | #define __NR_swapon 167 | ||
385 | __SYSCALL(__NR_swapon, sys_swapon) | ||
386 | #define __NR_swapoff 168 | ||
387 | __SYSCALL(__NR_swapoff, sys_swapoff) | ||
388 | |||
389 | #define __NR_reboot 169 | ||
390 | __SYSCALL(__NR_reboot, sys_reboot) | ||
391 | |||
392 | #define __NR_sethostname 170 | ||
393 | __SYSCALL(__NR_sethostname, sys_sethostname) | ||
394 | #define __NR_setdomainname 171 | ||
395 | __SYSCALL(__NR_setdomainname, sys_setdomainname) | ||
396 | |||
397 | #define __NR_iopl 172 | ||
398 | __SYSCALL(__NR_iopl, stub_iopl) | ||
399 | #define __NR_ioperm 173 | ||
400 | __SYSCALL(__NR_ioperm, sys_ioperm) | ||
401 | |||
402 | #define __NR_create_module 174 | ||
403 | __SYSCALL(__NR_create_module, sys_ni_syscall) | ||
404 | #define __NR_init_module 175 | ||
405 | __SYSCALL(__NR_init_module, sys_init_module) | ||
406 | #define __NR_delete_module 176 | ||
407 | __SYSCALL(__NR_delete_module, sys_delete_module) | ||
408 | #define __NR_get_kernel_syms 177 | ||
409 | __SYSCALL(__NR_get_kernel_syms, sys_ni_syscall) | ||
410 | #define __NR_query_module 178 | ||
411 | __SYSCALL(__NR_query_module, sys_ni_syscall) | ||
412 | |||
413 | #define __NR_quotactl 179 | ||
414 | __SYSCALL(__NR_quotactl, sys_quotactl) | ||
415 | |||
416 | #define __NR_nfsservctl 180 | ||
417 | __SYSCALL(__NR_nfsservctl, sys_ni_syscall) | ||
418 | |||
419 | /* reserved for LiS/STREAMS */ | ||
420 | #define __NR_getpmsg 181 | ||
421 | __SYSCALL(__NR_getpmsg, sys_ni_syscall) | ||
422 | #define __NR_putpmsg 182 | ||
423 | __SYSCALL(__NR_putpmsg, sys_ni_syscall) | ||
424 | |||
425 | /* reserved for AFS */ | ||
426 | #define __NR_afs_syscall 183 | ||
427 | __SYSCALL(__NR_afs_syscall, sys_ni_syscall) | ||
428 | |||
429 | /* reserved for tux */ | ||
430 | #define __NR_tuxcall 184 | ||
431 | __SYSCALL(__NR_tuxcall, sys_ni_syscall) | ||
432 | |||
433 | #define __NR_security 185 | ||
434 | __SYSCALL(__NR_security, sys_ni_syscall) | ||
435 | |||
436 | #define __NR_gettid 186 | ||
437 | __SYSCALL(__NR_gettid, sys_gettid) | ||
438 | |||
439 | #define __NR_readahead 187 | ||
440 | __SYSCALL(__NR_readahead, sys_readahead) | ||
441 | #define __NR_setxattr 188 | ||
442 | __SYSCALL(__NR_setxattr, sys_setxattr) | ||
443 | #define __NR_lsetxattr 189 | ||
444 | __SYSCALL(__NR_lsetxattr, sys_lsetxattr) | ||
445 | #define __NR_fsetxattr 190 | ||
446 | __SYSCALL(__NR_fsetxattr, sys_fsetxattr) | ||
447 | #define __NR_getxattr 191 | ||
448 | __SYSCALL(__NR_getxattr, sys_getxattr) | ||
449 | #define __NR_lgetxattr 192 | ||
450 | __SYSCALL(__NR_lgetxattr, sys_lgetxattr) | ||
451 | #define __NR_fgetxattr 193 | ||
452 | __SYSCALL(__NR_fgetxattr, sys_fgetxattr) | ||
453 | #define __NR_listxattr 194 | ||
454 | __SYSCALL(__NR_listxattr, sys_listxattr) | ||
455 | #define __NR_llistxattr 195 | ||
456 | __SYSCALL(__NR_llistxattr, sys_llistxattr) | ||
457 | #define __NR_flistxattr 196 | ||
458 | __SYSCALL(__NR_flistxattr, sys_flistxattr) | ||
459 | #define __NR_removexattr 197 | ||
460 | __SYSCALL(__NR_removexattr, sys_removexattr) | ||
461 | #define __NR_lremovexattr 198 | ||
462 | __SYSCALL(__NR_lremovexattr, sys_lremovexattr) | ||
463 | #define __NR_fremovexattr 199 | ||
464 | __SYSCALL(__NR_fremovexattr, sys_fremovexattr) | ||
465 | #define __NR_tkill 200 | ||
466 | __SYSCALL(__NR_tkill, sys_tkill) | ||
467 | #define __NR_time 201 | ||
468 | __SYSCALL(__NR_time, sys_time) | ||
469 | #define __NR_futex 202 | ||
470 | __SYSCALL(__NR_futex, sys_futex) | ||
471 | #define __NR_sched_setaffinity 203 | ||
472 | __SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity) | ||
473 | #define __NR_sched_getaffinity 204 | ||
474 | __SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity) | ||
475 | #define __NR_set_thread_area 205 | ||
476 | __SYSCALL(__NR_set_thread_area, sys_ni_syscall) /* use arch_prctl */ | ||
477 | #define __NR_io_setup 206 | ||
478 | __SYSCALL(__NR_io_setup, sys_io_setup) | ||
479 | #define __NR_io_destroy 207 | ||
480 | __SYSCALL(__NR_io_destroy, sys_io_destroy) | ||
481 | #define __NR_io_getevents 208 | ||
482 | __SYSCALL(__NR_io_getevents, sys_io_getevents) | ||
483 | #define __NR_io_submit 209 | ||
484 | __SYSCALL(__NR_io_submit, sys_io_submit) | ||
485 | #define __NR_io_cancel 210 | ||
486 | __SYSCALL(__NR_io_cancel, sys_io_cancel) | ||
487 | #define __NR_get_thread_area 211 | ||
488 | __SYSCALL(__NR_get_thread_area, sys_ni_syscall) /* use arch_prctl */ | ||
489 | #define __NR_lookup_dcookie 212 | ||
490 | __SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie) | ||
491 | #define __NR_epoll_create 213 | ||
492 | __SYSCALL(__NR_epoll_create, sys_epoll_create) | ||
493 | #define __NR_epoll_ctl_old 214 | ||
494 | __SYSCALL(__NR_epoll_ctl_old, sys_ni_syscall) | ||
495 | #define __NR_epoll_wait_old 215 | ||
496 | __SYSCALL(__NR_epoll_wait_old, sys_ni_syscall) | ||
497 | #define __NR_remap_file_pages 216 | ||
498 | __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) | ||
499 | #define __NR_getdents64 217 | ||
500 | __SYSCALL(__NR_getdents64, sys_getdents64) | ||
501 | #define __NR_set_tid_address 218 | ||
502 | __SYSCALL(__NR_set_tid_address, sys_set_tid_address) | ||
503 | #define __NR_restart_syscall 219 | ||
504 | __SYSCALL(__NR_restart_syscall, sys_restart_syscall) | ||
505 | #define __NR_semtimedop 220 | ||
506 | __SYSCALL(__NR_semtimedop, sys_semtimedop) | ||
507 | #define __NR_fadvise64 221 | ||
508 | __SYSCALL(__NR_fadvise64, sys_fadvise64) | ||
509 | #define __NR_timer_create 222 | ||
510 | __SYSCALL(__NR_timer_create, sys_timer_create) | ||
511 | #define __NR_timer_settime 223 | ||
512 | __SYSCALL(__NR_timer_settime, sys_timer_settime) | ||
513 | #define __NR_timer_gettime 224 | ||
514 | __SYSCALL(__NR_timer_gettime, sys_timer_gettime) | ||
515 | #define __NR_timer_getoverrun 225 | ||
516 | __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) | ||
517 | #define __NR_timer_delete 226 | ||
518 | __SYSCALL(__NR_timer_delete, sys_timer_delete) | ||
519 | #define __NR_clock_settime 227 | ||
520 | __SYSCALL(__NR_clock_settime, sys_clock_settime) | ||
521 | #define __NR_clock_gettime 228 | ||
522 | __SYSCALL(__NR_clock_gettime, sys_clock_gettime) | ||
523 | #define __NR_clock_getres 229 | ||
524 | __SYSCALL(__NR_clock_getres, sys_clock_getres) | ||
525 | #define __NR_clock_nanosleep 230 | ||
526 | __SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep) | ||
527 | #define __NR_exit_group 231 | ||
528 | __SYSCALL(__NR_exit_group, sys_exit_group) | ||
529 | #define __NR_epoll_wait 232 | ||
530 | __SYSCALL(__NR_epoll_wait, sys_epoll_wait) | ||
531 | #define __NR_epoll_ctl 233 | ||
532 | __SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) | ||
533 | #define __NR_tgkill 234 | ||
534 | __SYSCALL(__NR_tgkill, sys_tgkill) | ||
535 | #define __NR_utimes 235 | ||
536 | __SYSCALL(__NR_utimes, sys_utimes) | ||
537 | #define __NR_vserver 236 | ||
538 | __SYSCALL(__NR_vserver, sys_ni_syscall) | ||
539 | #define __NR_mbind 237 | ||
540 | __SYSCALL(__NR_mbind, sys_mbind) | ||
541 | #define __NR_set_mempolicy 238 | ||
542 | __SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) | ||
543 | #define __NR_get_mempolicy 239 | ||
544 | __SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) | ||
545 | #define __NR_mq_open 240 | ||
546 | __SYSCALL(__NR_mq_open, sys_mq_open) | ||
547 | #define __NR_mq_unlink 241 | ||
548 | __SYSCALL(__NR_mq_unlink, sys_mq_unlink) | ||
549 | #define __NR_mq_timedsend 242 | ||
550 | __SYSCALL(__NR_mq_timedsend, sys_mq_timedsend) | ||
551 | #define __NR_mq_timedreceive 243 | ||
552 | __SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive) | ||
553 | #define __NR_mq_notify 244 | ||
554 | __SYSCALL(__NR_mq_notify, sys_mq_notify) | ||
555 | #define __NR_mq_getsetattr 245 | ||
556 | __SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr) | ||
557 | #define __NR_kexec_load 246 | ||
558 | __SYSCALL(__NR_kexec_load, sys_kexec_load) | ||
559 | #define __NR_waitid 247 | ||
560 | __SYSCALL(__NR_waitid, sys_waitid) | ||
561 | #define __NR_add_key 248 | ||
562 | __SYSCALL(__NR_add_key, sys_add_key) | ||
563 | #define __NR_request_key 249 | ||
564 | __SYSCALL(__NR_request_key, sys_request_key) | ||
565 | #define __NR_keyctl 250 | ||
566 | __SYSCALL(__NR_keyctl, sys_keyctl) | ||
567 | #define __NR_ioprio_set 251 | ||
568 | __SYSCALL(__NR_ioprio_set, sys_ioprio_set) | ||
569 | #define __NR_ioprio_get 252 | ||
570 | __SYSCALL(__NR_ioprio_get, sys_ioprio_get) | ||
571 | #define __NR_inotify_init 253 | ||
572 | __SYSCALL(__NR_inotify_init, sys_inotify_init) | ||
573 | #define __NR_inotify_add_watch 254 | ||
574 | __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) | ||
575 | #define __NR_inotify_rm_watch 255 | ||
576 | __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) | ||
577 | #define __NR_migrate_pages 256 | ||
578 | __SYSCALL(__NR_migrate_pages, sys_migrate_pages) | ||
579 | #define __NR_openat 257 | ||
580 | __SYSCALL(__NR_openat, sys_openat) | ||
581 | #define __NR_mkdirat 258 | ||
582 | __SYSCALL(__NR_mkdirat, sys_mkdirat) | ||
583 | #define __NR_mknodat 259 | ||
584 | __SYSCALL(__NR_mknodat, sys_mknodat) | ||
585 | #define __NR_fchownat 260 | ||
586 | __SYSCALL(__NR_fchownat, sys_fchownat) | ||
587 | #define __NR_futimesat 261 | ||
588 | __SYSCALL(__NR_futimesat, sys_futimesat) | ||
589 | #define __NR_newfstatat 262 | ||
590 | __SYSCALL(__NR_newfstatat, sys_newfstatat) | ||
591 | #define __NR_unlinkat 263 | ||
592 | __SYSCALL(__NR_unlinkat, sys_unlinkat) | ||
593 | #define __NR_renameat 264 | ||
594 | __SYSCALL(__NR_renameat, sys_renameat) | ||
595 | #define __NR_linkat 265 | ||
596 | __SYSCALL(__NR_linkat, sys_linkat) | ||
597 | #define __NR_symlinkat 266 | ||
598 | __SYSCALL(__NR_symlinkat, sys_symlinkat) | ||
599 | #define __NR_readlinkat 267 | ||
600 | __SYSCALL(__NR_readlinkat, sys_readlinkat) | ||
601 | #define __NR_fchmodat 268 | ||
602 | __SYSCALL(__NR_fchmodat, sys_fchmodat) | ||
603 | #define __NR_faccessat 269 | ||
604 | __SYSCALL(__NR_faccessat, sys_faccessat) | ||
605 | #define __NR_pselect6 270 | ||
606 | __SYSCALL(__NR_pselect6, sys_pselect6) | ||
607 | #define __NR_ppoll 271 | ||
608 | __SYSCALL(__NR_ppoll, sys_ppoll) | ||
609 | #define __NR_unshare 272 | ||
610 | __SYSCALL(__NR_unshare, sys_unshare) | ||
611 | #define __NR_set_robust_list 273 | ||
612 | __SYSCALL(__NR_set_robust_list, sys_set_robust_list) | ||
613 | #define __NR_get_robust_list 274 | ||
614 | __SYSCALL(__NR_get_robust_list, sys_get_robust_list) | ||
615 | #define __NR_splice 275 | ||
616 | __SYSCALL(__NR_splice, sys_splice) | ||
617 | #define __NR_tee 276 | ||
618 | __SYSCALL(__NR_tee, sys_tee) | ||
619 | #define __NR_sync_file_range 277 | ||
620 | __SYSCALL(__NR_sync_file_range, sys_sync_file_range) | ||
621 | #define __NR_vmsplice 278 | ||
622 | __SYSCALL(__NR_vmsplice, sys_vmsplice) | ||
623 | #define __NR_move_pages 279 | ||
624 | __SYSCALL(__NR_move_pages, sys_move_pages) | ||
625 | #define __NR_utimensat 280 | ||
626 | __SYSCALL(__NR_utimensat, sys_utimensat) | ||
627 | #define __NR_epoll_pwait 281 | ||
628 | __SYSCALL(__NR_epoll_pwait, sys_epoll_pwait) | ||
629 | #define __NR_signalfd 282 | ||
630 | __SYSCALL(__NR_signalfd, sys_signalfd) | ||
631 | #define __NR_timerfd_create 283 | ||
632 | __SYSCALL(__NR_timerfd_create, sys_timerfd_create) | ||
633 | #define __NR_eventfd 284 | ||
634 | __SYSCALL(__NR_eventfd, sys_eventfd) | ||
635 | #define __NR_fallocate 285 | ||
636 | __SYSCALL(__NR_fallocate, sys_fallocate) | ||
637 | #define __NR_timerfd_settime 286 | ||
638 | __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) | ||
639 | #define __NR_timerfd_gettime 287 | ||
640 | __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) | ||
641 | #define __NR_accept4 288 | ||
642 | __SYSCALL(__NR_accept4, sys_accept4) | ||
643 | #define __NR_signalfd4 289 | ||
644 | __SYSCALL(__NR_signalfd4, sys_signalfd4) | ||
645 | #define __NR_eventfd2 290 | ||
646 | __SYSCALL(__NR_eventfd2, sys_eventfd2) | ||
647 | #define __NR_epoll_create1 291 | ||
648 | __SYSCALL(__NR_epoll_create1, sys_epoll_create1) | ||
649 | #define __NR_dup3 292 | ||
650 | __SYSCALL(__NR_dup3, sys_dup3) | ||
651 | #define __NR_pipe2 293 | ||
652 | __SYSCALL(__NR_pipe2, sys_pipe2) | ||
653 | #define __NR_inotify_init1 294 | ||
654 | __SYSCALL(__NR_inotify_init1, sys_inotify_init1) | ||
655 | #define __NR_preadv 295 | ||
656 | __SYSCALL(__NR_preadv, sys_preadv) | ||
657 | #define __NR_pwritev 296 | ||
658 | __SYSCALL(__NR_pwritev, sys_pwritev) | ||
659 | #define __NR_rt_tgsigqueueinfo 297 | ||
660 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) | ||
661 | #define __NR_perf_event_open 298 | ||
662 | __SYSCALL(__NR_perf_event_open, sys_perf_event_open) | ||
663 | #define __NR_recvmmsg 299 | ||
664 | __SYSCALL(__NR_recvmmsg, sys_recvmmsg) | ||
665 | #define __NR_fanotify_init 300 | ||
666 | __SYSCALL(__NR_fanotify_init, sys_fanotify_init) | ||
667 | #define __NR_fanotify_mark 301 | ||
668 | __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) | ||
669 | #define __NR_prlimit64 302 | ||
670 | __SYSCALL(__NR_prlimit64, sys_prlimit64) | ||
671 | #define __NR_name_to_handle_at 303 | ||
672 | __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) | ||
673 | #define __NR_open_by_handle_at 304 | ||
674 | __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) | ||
675 | #define __NR_clock_adjtime 305 | ||
676 | __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) | ||
677 | #define __NR_syncfs 306 | ||
678 | __SYSCALL(__NR_syncfs, sys_syncfs) | ||
679 | #define __NR_sendmmsg 307 | ||
680 | __SYSCALL(__NR_sendmmsg, sys_sendmmsg) | ||
681 | #define __NR_setns 308 | ||
682 | __SYSCALL(__NR_setns, sys_setns) | ||
683 | #define __NR_getcpu 309 | ||
684 | __SYSCALL(__NR_getcpu, sys_getcpu) | ||
685 | #define __NR_process_vm_readv 310 | ||
686 | __SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) | ||
687 | #define __NR_process_vm_writev 311 | ||
688 | __SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) | ||
689 | |||
690 | #ifndef __NO_STUBS | ||
691 | #define __ARCH_WANT_OLD_READDIR | ||
692 | #define __ARCH_WANT_OLD_STAT | ||
693 | #define __ARCH_WANT_SYS_ALARM | ||
694 | #define __ARCH_WANT_SYS_GETHOSTNAME | ||
695 | #define __ARCH_WANT_SYS_PAUSE | ||
696 | #define __ARCH_WANT_SYS_SGETMASK | ||
697 | #define __ARCH_WANT_SYS_SIGNAL | ||
698 | #define __ARCH_WANT_SYS_UTIME | ||
699 | #define __ARCH_WANT_SYS_WAITPID | ||
700 | #define __ARCH_WANT_SYS_SOCKETCALL | ||
701 | #define __ARCH_WANT_SYS_FADVISE64 | ||
702 | #define __ARCH_WANT_SYS_GETPGRP | ||
703 | #define __ARCH_WANT_SYS_LLSEEK | ||
704 | #define __ARCH_WANT_SYS_NICE | ||
705 | #define __ARCH_WANT_SYS_OLD_GETRLIMIT | ||
706 | #define __ARCH_WANT_SYS_OLD_UNAME | ||
707 | #define __ARCH_WANT_SYS_OLDUMOUNT | ||
708 | #define __ARCH_WANT_SYS_SIGPENDING | ||
709 | #define __ARCH_WANT_SYS_SIGPROCMASK | ||
710 | #define __ARCH_WANT_SYS_RT_SIGACTION | ||
711 | #define __ARCH_WANT_SYS_RT_SIGSUSPEND | ||
712 | #define __ARCH_WANT_SYS_TIME | ||
713 | #define __ARCH_WANT_COMPAT_SYS_TIME | ||
714 | #endif /* __NO_STUBS */ | ||
715 | |||
716 | #ifdef __KERNEL__ | ||
717 | |||
718 | #ifndef COMPILE_OFFSETS | ||
719 | #include <asm/asm-offsets.h> | ||
720 | #define NR_syscalls (__NR_syscall_max + 1) | ||
721 | #endif | ||
722 | |||
723 | /* | ||
724 | * "Conditional" syscalls | ||
725 | * | ||
726 | * What we want is __attribute__((weak,alias("sys_ni_syscall"))), | ||
727 | * but it doesn't work on all toolchains, so we just do it by hand | ||
728 | */ | ||
729 | #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") | ||
730 | #endif /* __KERNEL__ */ | ||
731 | |||
732 | #endif /* _ASM_X86_UNISTD_64_H */ | ||
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 1971e652d24b..517d4767ffdd 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -7,6 +7,7 @@ | |||
7 | struct mpc_bus; | 7 | struct mpc_bus; |
8 | struct mpc_cpu; | 8 | struct mpc_cpu; |
9 | struct mpc_table; | 9 | struct mpc_table; |
10 | struct cpuinfo_x86; | ||
10 | 11 | ||
11 | /** | 12 | /** |
12 | * struct x86_init_mpparse - platform specific mpparse ops | 13 | * struct x86_init_mpparse - platform specific mpparse ops |
@@ -147,6 +148,7 @@ struct x86_init_ops { | |||
147 | */ | 148 | */ |
148 | struct x86_cpuinit_ops { | 149 | struct x86_cpuinit_ops { |
149 | void (*setup_percpu_clockev)(void); | 150 | void (*setup_percpu_clockev)(void); |
151 | void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); | ||
150 | }; | 152 | }; |
151 | 153 | ||
152 | /** | 154 | /** |
@@ -177,6 +179,7 @@ struct x86_msi_ops { | |||
177 | int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); | 179 | int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); |
178 | void (*teardown_msi_irq)(unsigned int irq); | 180 | void (*teardown_msi_irq)(unsigned int irq); |
179 | void (*teardown_msi_irqs)(struct pci_dev *dev); | 181 | void (*teardown_msi_irqs)(struct pci_dev *dev); |
182 | void (*restore_msi_irqs)(struct pci_dev *dev, int irq); | ||
180 | }; | 183 | }; |
181 | 184 | ||
182 | extern struct x86_init_ops x86_init; | 185 | extern struct x86_init_ops x86_init; |
@@ -186,5 +189,6 @@ extern struct x86_msi_ops x86_msi; | |||
186 | 189 | ||
187 | extern void x86_init_noop(void); | 190 | extern void x86_init_noop(void); |
188 | extern void x86_init_uint_noop(unsigned int unused); | 191 | extern void x86_init_uint_noop(unsigned int unused); |
192 | extern void x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node); | ||
189 | 193 | ||
190 | #endif | 194 | #endif |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8baca3c4871c..5369059c07a9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -25,7 +25,8 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o | |||
25 | obj-y += probe_roms.o | 25 | obj-y += probe_roms.o |
26 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 26 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
27 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 27 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
28 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 28 | obj-y += syscall_$(BITS).o |
29 | obj-$(CONFIG_X86_64) += vsyscall_64.o | ||
29 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | 30 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o |
30 | obj-y += bootflag.o e820.o | 31 | obj-y += bootflag.o e820.o |
31 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 32 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
@@ -80,6 +81,7 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o | |||
80 | obj-$(CONFIG_AMD_NB) += amd_nb.o | 81 | obj-$(CONFIG_AMD_NB) += amd_nb.o |
81 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | 82 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o |
82 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | 83 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o |
84 | obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o | ||
83 | 85 | ||
84 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 86 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
85 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 87 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4558f0d0822d..ce664f33ea8e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -219,6 +219,8 @@ static int __init | |||
219 | acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) | 219 | acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) |
220 | { | 220 | { |
221 | struct acpi_madt_local_x2apic *processor = NULL; | 221 | struct acpi_madt_local_x2apic *processor = NULL; |
222 | int apic_id; | ||
223 | u8 enabled; | ||
222 | 224 | ||
223 | processor = (struct acpi_madt_local_x2apic *)header; | 225 | processor = (struct acpi_madt_local_x2apic *)header; |
224 | 226 | ||
@@ -227,6 +229,8 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) | |||
227 | 229 | ||
228 | acpi_table_print_madt_entry(header); | 230 | acpi_table_print_madt_entry(header); |
229 | 231 | ||
232 | apic_id = processor->local_apic_id; | ||
233 | enabled = processor->lapic_flags & ACPI_MADT_ENABLED; | ||
230 | #ifdef CONFIG_X86_X2APIC | 234 | #ifdef CONFIG_X86_X2APIC |
231 | /* | 235 | /* |
232 | * We need to register disabled CPU as well to permit | 236 | * We need to register disabled CPU as well to permit |
@@ -235,8 +239,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) | |||
235 | * to not preallocating memory for all NR_CPUS | 239 | * to not preallocating memory for all NR_CPUS |
236 | * when we use CPU hotplug. | 240 | * when we use CPU hotplug. |
237 | */ | 241 | */ |
238 | acpi_register_lapic(processor->local_apic_id, /* APIC ID */ | 242 | if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled) |
239 | processor->lapic_flags & ACPI_MADT_ENABLED); | 243 | printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); |
244 | else | ||
245 | acpi_register_lapic(apic_id, enabled); | ||
240 | #else | 246 | #else |
241 | printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); | 247 | printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); |
242 | #endif | 248 | #endif |
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 4c39baa8facc..be16854591cc 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -119,20 +119,49 @@ bool __init early_is_amd_nb(u32 device) | |||
119 | return false; | 119 | return false; |
120 | } | 120 | } |
121 | 121 | ||
122 | struct resource *amd_get_mmconfig_range(struct resource *res) | ||
123 | { | ||
124 | u32 address; | ||
125 | u64 base, msr; | ||
126 | unsigned segn_busn_bits; | ||
127 | |||
128 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | ||
129 | return NULL; | ||
130 | |||
131 | /* assume all cpus from fam10h have mmconfig */ | ||
132 | if (boot_cpu_data.x86 < 0x10) | ||
133 | return NULL; | ||
134 | |||
135 | address = MSR_FAM10H_MMIO_CONF_BASE; | ||
136 | rdmsrl(address, msr); | ||
137 | |||
138 | /* mmconfig is not enabled */ | ||
139 | if (!(msr & FAM10H_MMIO_CONF_ENABLE)) | ||
140 | return NULL; | ||
141 | |||
142 | base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); | ||
143 | |||
144 | segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & | ||
145 | FAM10H_MMIO_CONF_BUSRANGE_MASK; | ||
146 | |||
147 | res->flags = IORESOURCE_MEM; | ||
148 | res->start = base; | ||
149 | res->end = base + (1ULL<<(segn_busn_bits + 20)) - 1; | ||
150 | return res; | ||
151 | } | ||
152 | |||
122 | int amd_get_subcaches(int cpu) | 153 | int amd_get_subcaches(int cpu) |
123 | { | 154 | { |
124 | struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; | 155 | struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; |
125 | unsigned int mask; | 156 | unsigned int mask; |
126 | int cuid = 0; | 157 | int cuid; |
127 | 158 | ||
128 | if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | 159 | if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) |
129 | return 0; | 160 | return 0; |
130 | 161 | ||
131 | pci_read_config_dword(link, 0x1d4, &mask); | 162 | pci_read_config_dword(link, 0x1d4, &mask); |
132 | 163 | ||
133 | #ifdef CONFIG_SMP | ||
134 | cuid = cpu_data(cpu).compute_unit_id; | 164 | cuid = cpu_data(cpu).compute_unit_id; |
135 | #endif | ||
136 | return (mask >> (4 * cuid)) & 0xf; | 165 | return (mask >> (4 * cuid)) & 0xf; |
137 | } | 166 | } |
138 | 167 | ||
@@ -141,7 +170,7 @@ int amd_set_subcaches(int cpu, int mask) | |||
141 | static unsigned int reset, ban; | 170 | static unsigned int reset, ban; |
142 | struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); | 171 | struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); |
143 | unsigned int reg; | 172 | unsigned int reg; |
144 | int cuid = 0; | 173 | int cuid; |
145 | 174 | ||
146 | if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf) | 175 | if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf) |
147 | return -EINVAL; | 176 | return -EINVAL; |
@@ -159,9 +188,7 @@ int amd_set_subcaches(int cpu, int mask) | |||
159 | pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); | 188 | pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); |
160 | } | 189 | } |
161 | 190 | ||
162 | #ifdef CONFIG_SMP | ||
163 | cuid = cpu_data(cpu).compute_unit_id; | 191 | cuid = cpu_data(cpu).compute_unit_id; |
164 | #endif | ||
165 | mask <<= 4 * cuid; | 192 | mask <<= 4 * cuid; |
166 | mask |= (0xf ^ (1 << cuid)) << 26; | 193 | mask |= (0xf ^ (1 << cuid)) << 26; |
167 | 194 | ||
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 3d2661ca6542..6e76c191a835 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -88,13 +88,13 @@ static u32 __init allocate_aperture(void) | |||
88 | */ | 88 | */ |
89 | addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, | 89 | addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, |
90 | aper_size, aper_size); | 90 | aper_size, aper_size); |
91 | if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) { | 91 | if (!addr || addr + aper_size > GART_MAX_ADDR) { |
92 | printk(KERN_ERR | 92 | printk(KERN_ERR |
93 | "Cannot allocate aperture memory hole (%lx,%uK)\n", | 93 | "Cannot allocate aperture memory hole (%lx,%uK)\n", |
94 | addr, aper_size>>10); | 94 | addr, aper_size>>10); |
95 | return 0; | 95 | return 0; |
96 | } | 96 | } |
97 | memblock_x86_reserve_range(addr, addr + aper_size, "aperture64"); | 97 | memblock_reserve(addr, aper_size); |
98 | /* | 98 | /* |
99 | * Kmemleak should not scan this block as it may not be mapped via the | 99 | * Kmemleak should not scan this block as it may not be mapped via the |
100 | * kernel direct mapping. | 100 | * kernel direct mapping. |
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 767fd04f2843..0ae0323b1f9c 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile | |||
@@ -10,6 +10,7 @@ obj-$(CONFIG_SMP) += ipi.o | |||
10 | 10 | ||
11 | ifeq ($(CONFIG_X86_64),y) | 11 | ifeq ($(CONFIG_X86_64),y) |
12 | # APIC probe will depend on the listing order here | 12 | # APIC probe will depend on the listing order here |
13 | obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o | ||
13 | obj-$(CONFIG_X86_UV) += x2apic_uv_x.o | 14 | obj-$(CONFIG_X86_UV) += x2apic_uv_x.o |
14 | obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o | 15 | obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o |
15 | obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o | 16 | obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f98d84caf94c..2eec05b6d1b8 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -146,16 +146,26 @@ __setup("apicpmtimer", setup_apicpmtimer); | |||
146 | int x2apic_mode; | 146 | int x2apic_mode; |
147 | #ifdef CONFIG_X86_X2APIC | 147 | #ifdef CONFIG_X86_X2APIC |
148 | /* x2apic enabled before OS handover */ | 148 | /* x2apic enabled before OS handover */ |
149 | static int x2apic_preenabled; | 149 | int x2apic_preenabled; |
150 | static int x2apic_disabled; | ||
151 | static int nox2apic; | ||
150 | static __init int setup_nox2apic(char *str) | 152 | static __init int setup_nox2apic(char *str) |
151 | { | 153 | { |
152 | if (x2apic_enabled()) { | 154 | if (x2apic_enabled()) { |
153 | pr_warning("Bios already enabled x2apic, " | 155 | int apicid = native_apic_msr_read(APIC_ID); |
154 | "can't enforce nox2apic"); | 156 | |
155 | return 0; | 157 | if (apicid >= 255) { |
156 | } | 158 | pr_warning("Apicid: %08x, cannot enforce nox2apic\n", |
159 | apicid); | ||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | pr_warning("x2apic already enabled. will disable it\n"); | ||
164 | } else | ||
165 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | ||
166 | |||
167 | nox2apic = 1; | ||
157 | 168 | ||
158 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | ||
159 | return 0; | 169 | return 0; |
160 | } | 170 | } |
161 | early_param("nox2apic", setup_nox2apic); | 171 | early_param("nox2apic", setup_nox2apic); |
@@ -250,6 +260,7 @@ u32 native_safe_apic_wait_icr_idle(void) | |||
250 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | 260 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
251 | if (!send_status) | 261 | if (!send_status) |
252 | break; | 262 | break; |
263 | inc_irq_stat(icr_read_retry_count); | ||
253 | udelay(100); | 264 | udelay(100); |
254 | } while (timeout++ < 1000); | 265 | } while (timeout++ < 1000); |
255 | 266 | ||
@@ -876,8 +887,8 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) | |||
876 | * Besides, if we don't timer interrupts ignore the global | 887 | * Besides, if we don't timer interrupts ignore the global |
877 | * interrupt lock, which is the WrongThing (tm) to do. | 888 | * interrupt lock, which is the WrongThing (tm) to do. |
878 | */ | 889 | */ |
879 | exit_idle(); | ||
880 | irq_enter(); | 890 | irq_enter(); |
891 | exit_idle(); | ||
881 | local_apic_timer_interrupt(); | 892 | local_apic_timer_interrupt(); |
882 | irq_exit(); | 893 | irq_exit(); |
883 | 894 | ||
@@ -1431,6 +1442,45 @@ void __init bsp_end_local_APIC_setup(void) | |||
1431 | } | 1442 | } |
1432 | 1443 | ||
1433 | #ifdef CONFIG_X86_X2APIC | 1444 | #ifdef CONFIG_X86_X2APIC |
1445 | /* | ||
1446 | * Need to disable xapic and x2apic at the same time and then enable xapic mode | ||
1447 | */ | ||
1448 | static inline void __disable_x2apic(u64 msr) | ||
1449 | { | ||
1450 | wrmsrl(MSR_IA32_APICBASE, | ||
1451 | msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); | ||
1452 | wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); | ||
1453 | } | ||
1454 | |||
1455 | static __init void disable_x2apic(void) | ||
1456 | { | ||
1457 | u64 msr; | ||
1458 | |||
1459 | if (!cpu_has_x2apic) | ||
1460 | return; | ||
1461 | |||
1462 | rdmsrl(MSR_IA32_APICBASE, msr); | ||
1463 | if (msr & X2APIC_ENABLE) { | ||
1464 | u32 x2apic_id = read_apic_id(); | ||
1465 | |||
1466 | if (x2apic_id >= 255) | ||
1467 | panic("Cannot disable x2apic, id: %08x\n", x2apic_id); | ||
1468 | |||
1469 | pr_info("Disabling x2apic\n"); | ||
1470 | __disable_x2apic(msr); | ||
1471 | |||
1472 | if (nox2apic) { | ||
1473 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC); | ||
1474 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | ||
1475 | } | ||
1476 | |||
1477 | x2apic_disabled = 1; | ||
1478 | x2apic_mode = 0; | ||
1479 | |||
1480 | register_lapic_address(mp_lapic_addr); | ||
1481 | } | ||
1482 | } | ||
1483 | |||
1434 | void check_x2apic(void) | 1484 | void check_x2apic(void) |
1435 | { | 1485 | { |
1436 | if (x2apic_enabled()) { | 1486 | if (x2apic_enabled()) { |
@@ -1441,15 +1491,20 @@ void check_x2apic(void) | |||
1441 | 1491 | ||
1442 | void enable_x2apic(void) | 1492 | void enable_x2apic(void) |
1443 | { | 1493 | { |
1444 | int msr, msr2; | 1494 | u64 msr; |
1495 | |||
1496 | rdmsrl(MSR_IA32_APICBASE, msr); | ||
1497 | if (x2apic_disabled) { | ||
1498 | __disable_x2apic(msr); | ||
1499 | return; | ||
1500 | } | ||
1445 | 1501 | ||
1446 | if (!x2apic_mode) | 1502 | if (!x2apic_mode) |
1447 | return; | 1503 | return; |
1448 | 1504 | ||
1449 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | ||
1450 | if (!(msr & X2APIC_ENABLE)) { | 1505 | if (!(msr & X2APIC_ENABLE)) { |
1451 | printk_once(KERN_INFO "Enabling x2apic\n"); | 1506 | printk_once(KERN_INFO "Enabling x2apic\n"); |
1452 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, msr2); | 1507 | wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); |
1453 | } | 1508 | } |
1454 | } | 1509 | } |
1455 | #endif /* CONFIG_X86_X2APIC */ | 1510 | #endif /* CONFIG_X86_X2APIC */ |
@@ -1486,25 +1541,34 @@ void __init enable_IR_x2apic(void) | |||
1486 | ret = save_ioapic_entries(); | 1541 | ret = save_ioapic_entries(); |
1487 | if (ret) { | 1542 | if (ret) { |
1488 | pr_info("Saving IO-APIC state failed: %d\n", ret); | 1543 | pr_info("Saving IO-APIC state failed: %d\n", ret); |
1489 | goto out; | 1544 | return; |
1490 | } | 1545 | } |
1491 | 1546 | ||
1492 | local_irq_save(flags); | 1547 | local_irq_save(flags); |
1493 | legacy_pic->mask_all(); | 1548 | legacy_pic->mask_all(); |
1494 | mask_ioapic_entries(); | 1549 | mask_ioapic_entries(); |
1495 | 1550 | ||
1551 | if (x2apic_preenabled && nox2apic) | ||
1552 | disable_x2apic(); | ||
1553 | |||
1496 | if (dmar_table_init_ret) | 1554 | if (dmar_table_init_ret) |
1497 | ret = -1; | 1555 | ret = -1; |
1498 | else | 1556 | else |
1499 | ret = enable_IR(); | 1557 | ret = enable_IR(); |
1500 | 1558 | ||
1559 | if (!x2apic_supported()) | ||
1560 | goto skip_x2apic; | ||
1561 | |||
1501 | if (ret < 0) { | 1562 | if (ret < 0) { |
1502 | /* IR is required if there is APIC ID > 255 even when running | 1563 | /* IR is required if there is APIC ID > 255 even when running |
1503 | * under KVM | 1564 | * under KVM |
1504 | */ | 1565 | */ |
1505 | if (max_physical_apicid > 255 || | 1566 | if (max_physical_apicid > 255 || |
1506 | !hypervisor_x2apic_available()) | 1567 | !hypervisor_x2apic_available()) { |
1507 | goto nox2apic; | 1568 | if (x2apic_preenabled) |
1569 | disable_x2apic(); | ||
1570 | goto skip_x2apic; | ||
1571 | } | ||
1508 | /* | 1572 | /* |
1509 | * without IR all CPUs can be addressed by IOAPIC/MSI | 1573 | * without IR all CPUs can be addressed by IOAPIC/MSI |
1510 | * only in physical mode | 1574 | * only in physical mode |
@@ -1512,8 +1576,10 @@ void __init enable_IR_x2apic(void) | |||
1512 | x2apic_force_phys(); | 1576 | x2apic_force_phys(); |
1513 | } | 1577 | } |
1514 | 1578 | ||
1515 | if (ret == IRQ_REMAP_XAPIC_MODE) | 1579 | if (ret == IRQ_REMAP_XAPIC_MODE) { |
1516 | goto nox2apic; | 1580 | pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); |
1581 | goto skip_x2apic; | ||
1582 | } | ||
1517 | 1583 | ||
1518 | x2apic_enabled = 1; | 1584 | x2apic_enabled = 1; |
1519 | 1585 | ||
@@ -1523,22 +1589,11 @@ void __init enable_IR_x2apic(void) | |||
1523 | pr_info("Enabled x2apic\n"); | 1589 | pr_info("Enabled x2apic\n"); |
1524 | } | 1590 | } |
1525 | 1591 | ||
1526 | nox2apic: | 1592 | skip_x2apic: |
1527 | if (ret < 0) /* IR enabling failed */ | 1593 | if (ret < 0) /* IR enabling failed */ |
1528 | restore_ioapic_entries(); | 1594 | restore_ioapic_entries(); |
1529 | legacy_pic->restore_mask(); | 1595 | legacy_pic->restore_mask(); |
1530 | local_irq_restore(flags); | 1596 | local_irq_restore(flags); |
1531 | |||
1532 | out: | ||
1533 | if (x2apic_enabled || !x2apic_supported()) | ||
1534 | return; | ||
1535 | |||
1536 | if (x2apic_preenabled) | ||
1537 | panic("x2apic: enabled by BIOS but kernel init failed."); | ||
1538 | else if (ret == IRQ_REMAP_XAPIC_MODE) | ||
1539 | pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); | ||
1540 | else if (ret < 0) | ||
1541 | pr_info("x2apic not enabled, IRQ remapping init failed\n"); | ||
1542 | } | 1597 | } |
1543 | 1598 | ||
1544 | #ifdef CONFIG_X86_64 | 1599 | #ifdef CONFIG_X86_64 |
@@ -1809,8 +1864,8 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
1809 | { | 1864 | { |
1810 | u32 v; | 1865 | u32 v; |
1811 | 1866 | ||
1812 | exit_idle(); | ||
1813 | irq_enter(); | 1867 | irq_enter(); |
1868 | exit_idle(); | ||
1814 | /* | 1869 | /* |
1815 | * Check if this really is a spurious interrupt and ACK it | 1870 | * Check if this really is a spurious interrupt and ACK it |
1816 | * if it is a vectored one. Just in case... | 1871 | * if it is a vectored one. Just in case... |
@@ -1846,8 +1901,8 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
1846 | "Illegal register address", /* APIC Error Bit 7 */ | 1901 | "Illegal register address", /* APIC Error Bit 7 */ |
1847 | }; | 1902 | }; |
1848 | 1903 | ||
1849 | exit_idle(); | ||
1850 | irq_enter(); | 1904 | irq_enter(); |
1905 | exit_idle(); | ||
1851 | /* First tickle the hardware, only then report what went on. -- REW */ | 1906 | /* First tickle the hardware, only then report what went on. -- REW */ |
1852 | v0 = apic_read(APIC_ESR); | 1907 | v0 = apic_read(APIC_ESR); |
1853 | apic_write(APIC_ESR, 0); | 1908 | apic_write(APIC_ESR, 0); |
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index f7a41e4cae47..8c3cdded6f2b 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -62,7 +62,7 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) | |||
62 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | 62 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel |
63 | * document number 292116). So here it goes... | 63 | * document number 292116). So here it goes... |
64 | */ | 64 | */ |
65 | static void flat_init_apic_ldr(void) | 65 | void flat_init_apic_ldr(void) |
66 | { | 66 | { |
67 | unsigned long val; | 67 | unsigned long val; |
68 | unsigned long num, id; | 68 | unsigned long num, id; |
@@ -171,9 +171,14 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb) | |||
171 | return initial_apic_id >> index_msb; | 171 | return initial_apic_id >> index_msb; |
172 | } | 172 | } |
173 | 173 | ||
174 | static int flat_probe(void) | ||
175 | { | ||
176 | return 1; | ||
177 | } | ||
178 | |||
174 | static struct apic apic_flat = { | 179 | static struct apic apic_flat = { |
175 | .name = "flat", | 180 | .name = "flat", |
176 | .probe = NULL, | 181 | .probe = flat_probe, |
177 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, | 182 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, |
178 | .apic_id_registered = flat_apic_id_registered, | 183 | .apic_id_registered = flat_apic_id_registered, |
179 | 184 | ||
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c new file mode 100644 index 000000000000..09d3d8c1cd99 --- /dev/null +++ b/arch/x86/kernel/apic/apic_numachip.c | |||
@@ -0,0 +1,294 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Numascale NumaConnect-Specific APIC Code | ||
7 | * | ||
8 | * Copyright (C) 2011 Numascale AS. All rights reserved. | ||
9 | * | ||
10 | * Send feedback to <support@numascale.com> | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #include <linux/errno.h> | ||
15 | #include <linux/threads.h> | ||
16 | #include <linux/cpumask.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/ctype.h> | ||
21 | #include <linux/init.h> | ||
22 | #include <linux/hardirq.h> | ||
23 | #include <linux/delay.h> | ||
24 | |||
25 | #include <asm/numachip/numachip_csr.h> | ||
26 | #include <asm/smp.h> | ||
27 | #include <asm/apic.h> | ||
28 | #include <asm/ipi.h> | ||
29 | #include <asm/apic_flat_64.h> | ||
30 | |||
31 | static int numachip_system __read_mostly; | ||
32 | |||
33 | static struct apic apic_numachip __read_mostly; | ||
34 | |||
35 | static unsigned int get_apic_id(unsigned long x) | ||
36 | { | ||
37 | unsigned long value; | ||
38 | unsigned int id; | ||
39 | |||
40 | rdmsrl(MSR_FAM10H_NODE_ID, value); | ||
41 | id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U); | ||
42 | |||
43 | return id; | ||
44 | } | ||
45 | |||
46 | static unsigned long set_apic_id(unsigned int id) | ||
47 | { | ||
48 | unsigned long x; | ||
49 | |||
50 | x = ((id & 0xffU) << 24); | ||
51 | return x; | ||
52 | } | ||
53 | |||
54 | static unsigned int read_xapic_id(void) | ||
55 | { | ||
56 | return get_apic_id(apic_read(APIC_ID)); | ||
57 | } | ||
58 | |||
59 | static int numachip_apic_id_registered(void) | ||
60 | { | ||
61 | return physid_isset(read_xapic_id(), phys_cpu_present_map); | ||
62 | } | ||
63 | |||
64 | static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) | ||
65 | { | ||
66 | return initial_apic_id >> index_msb; | ||
67 | } | ||
68 | |||
69 | static const struct cpumask *numachip_target_cpus(void) | ||
70 | { | ||
71 | return cpu_online_mask; | ||
72 | } | ||
73 | |||
74 | static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
75 | { | ||
76 | cpumask_clear(retmask); | ||
77 | cpumask_set_cpu(cpu, retmask); | ||
78 | } | ||
79 | |||
80 | static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) | ||
81 | { | ||
82 | union numachip_csr_g3_ext_irq_gen int_gen; | ||
83 | |||
84 | int_gen.s._destination_apic_id = phys_apicid; | ||
85 | int_gen.s._vector = 0; | ||
86 | int_gen.s._msgtype = APIC_DM_INIT >> 8; | ||
87 | int_gen.s._index = 0; | ||
88 | |||
89 | write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); | ||
90 | |||
91 | int_gen.s._msgtype = APIC_DM_STARTUP >> 8; | ||
92 | int_gen.s._vector = start_rip >> 12; | ||
93 | |||
94 | write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); | ||
95 | |||
96 | atomic_set(&init_deasserted, 1); | ||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | static void numachip_send_IPI_one(int cpu, int vector) | ||
101 | { | ||
102 | union numachip_csr_g3_ext_irq_gen int_gen; | ||
103 | int apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
104 | |||
105 | int_gen.s._destination_apic_id = apicid; | ||
106 | int_gen.s._vector = vector; | ||
107 | int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8; | ||
108 | int_gen.s._index = 0; | ||
109 | |||
110 | write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); | ||
111 | } | ||
112 | |||
113 | static void numachip_send_IPI_mask(const struct cpumask *mask, int vector) | ||
114 | { | ||
115 | unsigned int cpu; | ||
116 | |||
117 | for_each_cpu(cpu, mask) | ||
118 | numachip_send_IPI_one(cpu, vector); | ||
119 | } | ||
120 | |||
121 | static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask, | ||
122 | int vector) | ||
123 | { | ||
124 | unsigned int this_cpu = smp_processor_id(); | ||
125 | unsigned int cpu; | ||
126 | |||
127 | for_each_cpu(cpu, mask) { | ||
128 | if (cpu != this_cpu) | ||
129 | numachip_send_IPI_one(cpu, vector); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | static void numachip_send_IPI_allbutself(int vector) | ||
134 | { | ||
135 | unsigned int this_cpu = smp_processor_id(); | ||
136 | unsigned int cpu; | ||
137 | |||
138 | for_each_online_cpu(cpu) { | ||
139 | if (cpu != this_cpu) | ||
140 | numachip_send_IPI_one(cpu, vector); | ||
141 | } | ||
142 | } | ||
143 | |||
144 | static void numachip_send_IPI_all(int vector) | ||
145 | { | ||
146 | numachip_send_IPI_mask(cpu_online_mask, vector); | ||
147 | } | ||
148 | |||
149 | static void numachip_send_IPI_self(int vector) | ||
150 | { | ||
151 | __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); | ||
152 | } | ||
153 | |||
154 | static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
155 | { | ||
156 | int cpu; | ||
157 | |||
158 | /* | ||
159 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
160 | * May as well be the first. | ||
161 | */ | ||
162 | cpu = cpumask_first(cpumask); | ||
163 | if (likely((unsigned)cpu < nr_cpu_ids)) | ||
164 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
165 | |||
166 | return BAD_APICID; | ||
167 | } | ||
168 | |||
169 | static unsigned int | ||
170 | numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
171 | const struct cpumask *andmask) | ||
172 | { | ||
173 | int cpu; | ||
174 | |||
175 | /* | ||
176 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
177 | * May as well be the first. | ||
178 | */ | ||
179 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
180 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
181 | break; | ||
182 | } | ||
183 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
184 | } | ||
185 | |||
186 | static int __init numachip_probe(void) | ||
187 | { | ||
188 | return apic == &apic_numachip; | ||
189 | } | ||
190 | |||
191 | static void __init map_csrs(void) | ||
192 | { | ||
193 | printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n", | ||
194 | NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1); | ||
195 | init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE); | ||
196 | |||
197 | printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n", | ||
198 | NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1); | ||
199 | init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE); | ||
200 | } | ||
201 | |||
202 | static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) | ||
203 | { | ||
204 | c->phys_proc_id = node; | ||
205 | per_cpu(cpu_llc_id, smp_processor_id()) = node; | ||
206 | } | ||
207 | |||
208 | static int __init numachip_system_init(void) | ||
209 | { | ||
210 | unsigned int val; | ||
211 | |||
212 | if (!numachip_system) | ||
213 | return 0; | ||
214 | |||
215 | x86_cpuinit.fixup_cpu_id = fixup_cpu_id; | ||
216 | |||
217 | map_csrs(); | ||
218 | |||
219 | val = read_lcsr(CSR_G0_NODE_IDS); | ||
220 | printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val); | ||
221 | |||
222 | return 0; | ||
223 | } | ||
224 | early_initcall(numachip_system_init); | ||
225 | |||
226 | static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
227 | { | ||
228 | if (!strncmp(oem_id, "NUMASC", 6)) { | ||
229 | numachip_system = 1; | ||
230 | return 1; | ||
231 | } | ||
232 | |||
233 | return 0; | ||
234 | } | ||
235 | |||
236 | static struct apic apic_numachip __refconst = { | ||
237 | |||
238 | .name = "NumaConnect system", | ||
239 | .probe = numachip_probe, | ||
240 | .acpi_madt_oem_check = numachip_acpi_madt_oem_check, | ||
241 | .apic_id_registered = numachip_apic_id_registered, | ||
242 | |||
243 | .irq_delivery_mode = dest_Fixed, | ||
244 | .irq_dest_mode = 0, /* physical */ | ||
245 | |||
246 | .target_cpus = numachip_target_cpus, | ||
247 | .disable_esr = 0, | ||
248 | .dest_logical = 0, | ||
249 | .check_apicid_used = NULL, | ||
250 | .check_apicid_present = NULL, | ||
251 | |||
252 | .vector_allocation_domain = numachip_vector_allocation_domain, | ||
253 | .init_apic_ldr = flat_init_apic_ldr, | ||
254 | |||
255 | .ioapic_phys_id_map = NULL, | ||
256 | .setup_apic_routing = NULL, | ||
257 | .multi_timer_check = NULL, | ||
258 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
259 | .apicid_to_cpu_present = NULL, | ||
260 | .setup_portio_remap = NULL, | ||
261 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
262 | .enable_apic_mode = NULL, | ||
263 | .phys_pkg_id = numachip_phys_pkg_id, | ||
264 | .mps_oem_check = NULL, | ||
265 | |||
266 | .get_apic_id = get_apic_id, | ||
267 | .set_apic_id = set_apic_id, | ||
268 | .apic_id_mask = 0xffU << 24, | ||
269 | |||
270 | .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid, | ||
271 | .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and, | ||
272 | |||
273 | .send_IPI_mask = numachip_send_IPI_mask, | ||
274 | .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, | ||
275 | .send_IPI_allbutself = numachip_send_IPI_allbutself, | ||
276 | .send_IPI_all = numachip_send_IPI_all, | ||
277 | .send_IPI_self = numachip_send_IPI_self, | ||
278 | |||
279 | .wakeup_secondary_cpu = numachip_wakeup_secondary, | ||
280 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
281 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
282 | .wait_for_init_deassert = NULL, | ||
283 | .smp_callin_clear_local_apic = NULL, | ||
284 | .inquire_remote_apic = NULL, /* REMRD not supported */ | ||
285 | |||
286 | .read = native_apic_mem_read, | ||
287 | .write = native_apic_mem_write, | ||
288 | .icr_read = native_apic_icr_read, | ||
289 | .icr_write = native_apic_icr_write, | ||
290 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
291 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
292 | }; | ||
293 | apic_driver(apic_numachip); | ||
294 | |||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 6d939d7847e2..fb072754bc1d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -2421,8 +2421,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2421 | unsigned vector, me; | 2421 | unsigned vector, me; |
2422 | 2422 | ||
2423 | ack_APIC_irq(); | 2423 | ack_APIC_irq(); |
2424 | exit_idle(); | ||
2425 | irq_enter(); | 2424 | irq_enter(); |
2425 | exit_idle(); | ||
2426 | 2426 | ||
2427 | me = smp_processor_id(); | 2427 | me = smp_processor_id(); |
2428 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | 2428 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { |
@@ -2948,6 +2948,10 @@ static inline void __init check_timer(void) | |||
2948 | } | 2948 | } |
2949 | local_irq_disable(); | 2949 | local_irq_disable(); |
2950 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); | 2950 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); |
2951 | if (x2apic_preenabled) | ||
2952 | apic_printk(APIC_QUIET, KERN_INFO | ||
2953 | "Perhaps problem with the pre-enabled x2apic mode\n" | ||
2954 | "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); | ||
2951 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " | 2955 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " |
2952 | "report. Then try booting with the 'noapic' option.\n"); | 2956 | "report. Then try booting with the 'noapic' option.\n"); |
2953 | out: | 2957 | out: |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 9d59bbacd4e3..79b05b88aa19 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -769,7 +769,12 @@ void __init uv_system_init(void) | |||
769 | for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) | 769 | for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) |
770 | uv_possible_blades += | 770 | uv_possible_blades += |
771 | hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); | 771 | hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); |
772 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); | 772 | |
773 | /* uv_num_possible_blades() is really the hub count */ | ||
774 | printk(KERN_INFO "UV: Found %d blades, %d hubs\n", | ||
775 | is_uv1_hub() ? uv_num_possible_blades() : | ||
776 | (uv_num_possible_blades() + 1) / 2, | ||
777 | uv_num_possible_blades()); | ||
773 | 778 | ||
774 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); | 779 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); |
775 | uv_blade_info = kzalloc(bytes, GFP_KERNEL); | 780 | uv_blade_info = kzalloc(bytes, GFP_KERNEL); |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index a46bd383953c..f76623cbe263 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -383,21 +383,21 @@ static int ignore_sys_suspend; | |||
383 | static int ignore_normal_resume; | 383 | static int ignore_normal_resume; |
384 | static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; | 384 | static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; |
385 | 385 | ||
386 | static int debug __read_mostly; | 386 | static bool debug __read_mostly; |
387 | static int smp __read_mostly; | 387 | static bool smp __read_mostly; |
388 | static int apm_disabled = -1; | 388 | static int apm_disabled = -1; |
389 | #ifdef CONFIG_SMP | 389 | #ifdef CONFIG_SMP |
390 | static int power_off; | 390 | static bool power_off; |
391 | #else | 391 | #else |
392 | static int power_off = 1; | 392 | static bool power_off = 1; |
393 | #endif | 393 | #endif |
394 | static int realmode_power_off; | 394 | static bool realmode_power_off; |
395 | #ifdef CONFIG_APM_ALLOW_INTS | 395 | #ifdef CONFIG_APM_ALLOW_INTS |
396 | static int allow_ints = 1; | 396 | static bool allow_ints = 1; |
397 | #else | 397 | #else |
398 | static int allow_ints; | 398 | static bool allow_ints; |
399 | #endif | 399 | #endif |
400 | static int broken_psr; | 400 | static bool broken_psr; |
401 | 401 | ||
402 | static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); | 402 | static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); |
403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); | 403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); |
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 4f13fafc5264..68de2dc962ec 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c | |||
@@ -67,4 +67,6 @@ void common(void) { | |||
67 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | 67 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); |
68 | OFFSET(BP_version, boot_params, hdr.version); | 68 | OFFSET(BP_version, boot_params, hdr.version); |
69 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); | 69 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); |
70 | OFFSET(BP_pref_address, boot_params, hdr.pref_address); | ||
71 | OFFSET(BP_code32_start, boot_params, hdr.code32_start); | ||
70 | } | 72 | } |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 395a10e68067..85d98ab15cdc 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -3,6 +3,11 @@ | |||
3 | #include <linux/lguest.h> | 3 | #include <linux/lguest.h> |
4 | #include "../../../drivers/lguest/lg.h" | 4 | #include "../../../drivers/lguest/lg.h" |
5 | 5 | ||
6 | #define __SYSCALL_I386(nr, sym, compat) [nr] = 1, | ||
7 | static char syscalls[] = { | ||
8 | #include <asm/syscalls_32.h> | ||
9 | }; | ||
10 | |||
6 | /* workaround for a warning with -Wmissing-prototypes */ | 11 | /* workaround for a warning with -Wmissing-prototypes */ |
7 | void foo(void); | 12 | void foo(void); |
8 | 13 | ||
@@ -76,4 +81,7 @@ void foo(void) | |||
76 | OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); | 81 | OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); |
77 | OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); | 82 | OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); |
78 | #endif | 83 | #endif |
84 | BLANK(); | ||
85 | DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); | ||
86 | DEFINE(NR_syscalls, sizeof(syscalls)); | ||
79 | } | 87 | } |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index e72a1194af22..834e897b1e25 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -1,11 +1,12 @@ | |||
1 | #include <asm/ia32.h> | 1 | #include <asm/ia32.h> |
2 | 2 | ||
3 | #define __NO_STUBS 1 | 3 | #define __SYSCALL_64(nr, sym, compat) [nr] = 1, |
4 | #undef __SYSCALL | 4 | static char syscalls_64[] = { |
5 | #undef _ASM_X86_UNISTD_64_H | 5 | #include <asm/syscalls_64.h> |
6 | #define __SYSCALL(nr, sym) [nr] = 1, | 6 | }; |
7 | static char syscalls[] = { | 7 | #define __SYSCALL_I386(nr, sym, compat) [nr] = 1, |
8 | #include <asm/unistd.h> | 8 | static char syscalls_ia32[] = { |
9 | #include <asm/syscalls_32.h> | ||
9 | }; | 10 | }; |
10 | 11 | ||
11 | int main(void) | 12 | int main(void) |
@@ -72,7 +73,11 @@ int main(void) | |||
72 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); | 73 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); |
73 | BLANK(); | 74 | BLANK(); |
74 | 75 | ||
75 | DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); | 76 | DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); |
77 | DEFINE(NR_syscalls, sizeof(syscalls_64)); | ||
78 | |||
79 | DEFINE(__NR_ia32_syscall_max, sizeof(syscalls_ia32) - 1); | ||
80 | DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32)); | ||
76 | 81 | ||
77 | return 0; | 82 | return 0; |
78 | } | 83 | } |
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 452932d34730..5da1269e8ddc 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c | |||
@@ -62,7 +62,8 @@ early_param("memory_corruption_check_size", set_corruption_check_size); | |||
62 | 62 | ||
63 | void __init setup_bios_corruption_check(void) | 63 | void __init setup_bios_corruption_check(void) |
64 | { | 64 | { |
65 | u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ | 65 | phys_addr_t start, end; |
66 | u64 i; | ||
66 | 67 | ||
67 | if (memory_corruption_check == -1) { | 68 | if (memory_corruption_check == -1) { |
68 | memory_corruption_check = | 69 | memory_corruption_check = |
@@ -82,28 +83,23 @@ void __init setup_bios_corruption_check(void) | |||
82 | 83 | ||
83 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | 84 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); |
84 | 85 | ||
85 | while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | 86 | for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { |
86 | u64 size; | 87 | start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE), |
87 | addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); | 88 | PAGE_SIZE, corruption_check_size); |
89 | end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE), | ||
90 | PAGE_SIZE, corruption_check_size); | ||
91 | if (start >= end) | ||
92 | continue; | ||
88 | 93 | ||
89 | if (addr == MEMBLOCK_ERROR) | 94 | memblock_reserve(start, end - start); |
90 | break; | 95 | scan_areas[num_scan_areas].addr = start; |
91 | 96 | scan_areas[num_scan_areas].size = end - start; | |
92 | if (addr >= corruption_check_size) | ||
93 | break; | ||
94 | |||
95 | if ((addr + size) > corruption_check_size) | ||
96 | size = corruption_check_size - addr; | ||
97 | |||
98 | memblock_x86_reserve_range(addr, addr + size, "SCAN RAM"); | ||
99 | scan_areas[num_scan_areas].addr = addr; | ||
100 | scan_areas[num_scan_areas].size = size; | ||
101 | num_scan_areas++; | ||
102 | 97 | ||
103 | /* Assume we've already mapped this early memory */ | 98 | /* Assume we've already mapped this early memory */ |
104 | memset(__va(addr), 0, size); | 99 | memset(__va(start), 0, end - start); |
105 | 100 | ||
106 | addr += size; | 101 | if (++num_scan_areas >= MAX_SCAN_AREAS) |
102 | break; | ||
107 | } | 103 | } |
108 | 104 | ||
109 | if (num_scan_areas) | 105 | if (num_scan_areas) |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0bab2b18bb20..f4773f4aae35 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -148,7 +148,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) | |||
148 | 148 | ||
149 | static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) | 149 | static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) |
150 | { | 150 | { |
151 | #ifdef CONFIG_SMP | ||
152 | /* calling is from identify_secondary_cpu() ? */ | 151 | /* calling is from identify_secondary_cpu() ? */ |
153 | if (!c->cpu_index) | 152 | if (!c->cpu_index) |
154 | return; | 153 | return; |
@@ -192,7 +191,6 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) | |||
192 | 191 | ||
193 | valid_k7: | 192 | valid_k7: |
194 | ; | 193 | ; |
195 | #endif | ||
196 | } | 194 | } |
197 | 195 | ||
198 | static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) | 196 | static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) |
@@ -353,6 +351,13 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | |||
353 | if (node == NUMA_NO_NODE) | 351 | if (node == NUMA_NO_NODE) |
354 | node = per_cpu(cpu_llc_id, cpu); | 352 | node = per_cpu(cpu_llc_id, cpu); |
355 | 353 | ||
354 | /* | ||
355 | * If core numbers are inconsistent, it's likely a multi-fabric platform, | ||
356 | * so invoke platform-specific handler | ||
357 | */ | ||
358 | if (c->phys_proc_id != node) | ||
359 | x86_cpuinit.fixup_cpu_id(c, node); | ||
360 | |||
356 | if (!node_online(node)) { | 361 | if (!node_online(node)) { |
357 | /* | 362 | /* |
358 | * Two possibilities here: | 363 | * Two possibilities here: |
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e58d978e0758..159103c0b1f4 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c | |||
@@ -278,7 +278,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) | |||
278 | } | 278 | } |
279 | #ifdef CONFIG_X86_32 | 279 | #ifdef CONFIG_X86_32 |
280 | /* Cyrix III family needs CX8 & PGE explicitly enabled. */ | 280 | /* Cyrix III family needs CX8 & PGE explicitly enabled. */ |
281 | if (c->x86_model >= 6 && c->x86_model <= 9) { | 281 | if (c->x86_model >= 6 && c->x86_model <= 13) { |
282 | rdmsr(MSR_VIA_FCR, lo, hi); | 282 | rdmsr(MSR_VIA_FCR, lo, hi); |
283 | lo |= (1<<1 | 1<<7); | 283 | lo |= (1<<1 | 1<<7); |
284 | wrmsr(MSR_VIA_FCR, lo, hi); | 284 | wrmsr(MSR_VIA_FCR, lo, hi); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index aa003b13a831..d43cad74f166 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -676,9 +676,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
676 | if (this_cpu->c_early_init) | 676 | if (this_cpu->c_early_init) |
677 | this_cpu->c_early_init(c); | 677 | this_cpu->c_early_init(c); |
678 | 678 | ||
679 | #ifdef CONFIG_SMP | ||
680 | c->cpu_index = 0; | 679 | c->cpu_index = 0; |
681 | #endif | ||
682 | filter_cpuid_features(c, false); | 680 | filter_cpuid_features(c, false); |
683 | 681 | ||
684 | setup_smep(c); | 682 | setup_smep(c); |
@@ -764,10 +762,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
764 | c->apicid = c->initial_apicid; | 762 | c->apicid = c->initial_apicid; |
765 | # endif | 763 | # endif |
766 | #endif | 764 | #endif |
767 | |||
768 | #ifdef CONFIG_X86_HT | ||
769 | c->phys_proc_id = c->initial_apicid; | 765 | c->phys_proc_id = c->initial_apicid; |
770 | #endif | ||
771 | } | 766 | } |
772 | 767 | ||
773 | setup_smep(c); | 768 | setup_smep(c); |
@@ -1026,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid); | |||
1026 | 1021 | ||
1027 | #ifdef CONFIG_X86_64 | 1022 | #ifdef CONFIG_X86_64 |
1028 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; | 1023 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; |
1024 | struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, | ||
1025 | (unsigned long) nmi_idt_table }; | ||
1029 | 1026 | ||
1030 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 1027 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
1031 | irq_stack_union) __aligned(PAGE_SIZE); | 1028 | irq_stack_union) __aligned(PAGE_SIZE); |
@@ -1090,6 +1087,26 @@ unsigned long kernel_eflags; | |||
1090 | */ | 1087 | */ |
1091 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | 1088 | DEFINE_PER_CPU(struct orig_ist, orig_ist); |
1092 | 1089 | ||
1090 | static DEFINE_PER_CPU(unsigned long, debug_stack_addr); | ||
1091 | DEFINE_PER_CPU(int, debug_stack_usage); | ||
1092 | |||
1093 | int is_debug_stack(unsigned long addr) | ||
1094 | { | ||
1095 | return __get_cpu_var(debug_stack_usage) || | ||
1096 | (addr <= __get_cpu_var(debug_stack_addr) && | ||
1097 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); | ||
1098 | } | ||
1099 | |||
1100 | void debug_stack_set_zero(void) | ||
1101 | { | ||
1102 | load_idt((const struct desc_ptr *)&nmi_idt_descr); | ||
1103 | } | ||
1104 | |||
1105 | void debug_stack_reset(void) | ||
1106 | { | ||
1107 | load_idt((const struct desc_ptr *)&idt_descr); | ||
1108 | } | ||
1109 | |||
1093 | #else /* CONFIG_X86_64 */ | 1110 | #else /* CONFIG_X86_64 */ |
1094 | 1111 | ||
1095 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | 1112 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; |
@@ -1141,6 +1158,15 @@ static void dbg_restore_debug_regs(void) | |||
1141 | #endif /* ! CONFIG_KGDB */ | 1158 | #endif /* ! CONFIG_KGDB */ |
1142 | 1159 | ||
1143 | /* | 1160 | /* |
1161 | * Prints an error where the NUMA and configured core-number mismatch and the | ||
1162 | * platform didn't override this to fix it up | ||
1163 | */ | ||
1164 | void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node) | ||
1165 | { | ||
1166 | pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id); | ||
1167 | } | ||
1168 | |||
1169 | /* | ||
1144 | * cpu_init() initializes state that is per-CPU. Some data is already | 1170 | * cpu_init() initializes state that is per-CPU. Some data is already |
1145 | * initialized (naturally) in the bootstrap process, such as the GDT | 1171 | * initialized (naturally) in the bootstrap process, such as the GDT |
1146 | * and IDT. We reload them nevertheless, this function acts as a | 1172 | * and IDT. We reload them nevertheless, this function acts as a |
@@ -1208,6 +1234,8 @@ void __cpuinit cpu_init(void) | |||
1208 | estacks += exception_stack_sizes[v]; | 1234 | estacks += exception_stack_sizes[v]; |
1209 | oist->ist[v] = t->x86_tss.ist[v] = | 1235 | oist->ist[v] = t->x86_tss.ist[v] = |
1210 | (unsigned long)estacks; | 1236 | (unsigned long)estacks; |
1237 | if (v == DEBUG_STACK-1) | ||
1238 | per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; | ||
1211 | } | 1239 | } |
1212 | } | 1240 | } |
1213 | 1241 | ||
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 1b22dcc51af4..8bacc7826fb3 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -1,5 +1,4 @@ | |||
1 | #ifndef ARCH_X86_CPU_H | 1 | #ifndef ARCH_X86_CPU_H |
2 | |||
3 | #define ARCH_X86_CPU_H | 2 | #define ARCH_X86_CPU_H |
4 | 3 | ||
5 | struct cpu_model_info { | 4 | struct cpu_model_info { |
@@ -35,6 +34,4 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], | |||
35 | 34 | ||
36 | extern void get_cpu_cap(struct cpuinfo_x86 *c); | 35 | extern void get_cpu_cap(struct cpuinfo_x86 *c); |
37 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); | 36 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); |
38 | extern void get_cpu_cap(struct cpuinfo_x86 *c); | 37 | #endif /* ARCH_X86_CPU_H */ |
39 | |||
40 | #endif | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 523131213f08..3e6ff6cbf42a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -181,7 +181,6 @@ static void __cpuinit trap_init_f00f_bug(void) | |||
181 | 181 | ||
182 | static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) | 182 | static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) |
183 | { | 183 | { |
184 | #ifdef CONFIG_SMP | ||
185 | /* calling is from identify_secondary_cpu() ? */ | 184 | /* calling is from identify_secondary_cpu() ? */ |
186 | if (!c->cpu_index) | 185 | if (!c->cpu_index) |
187 | return; | 186 | return; |
@@ -198,7 +197,6 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) | |||
198 | WARN_ONCE(1, "WARNING: SMP operation may be unreliable" | 197 | WARN_ONCE(1, "WARNING: SMP operation may be unreliable" |
199 | "with B stepping processors.\n"); | 198 | "with B stepping processors.\n"); |
200 | } | 199 | } |
201 | #endif | ||
202 | } | 200 | } |
203 | 201 | ||
204 | static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | 202 | static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a3b0811693c9..6b45e5e7a901 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -844,8 +844,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
844 | 844 | ||
845 | #include <linux/kobject.h> | 845 | #include <linux/kobject.h> |
846 | #include <linux/sysfs.h> | 846 | #include <linux/sysfs.h> |
847 | 847 | #include <linux/cpu.h> | |
848 | extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ | ||
849 | 848 | ||
850 | /* pointer to kobject for cpuX/cache */ | 849 | /* pointer to kobject for cpuX/cache */ |
851 | static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); | 850 | static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); |
@@ -1073,9 +1072,9 @@ err_out: | |||
1073 | static DECLARE_BITMAP(cache_dev_map, NR_CPUS); | 1072 | static DECLARE_BITMAP(cache_dev_map, NR_CPUS); |
1074 | 1073 | ||
1075 | /* Add/Remove cache interface for CPU device */ | 1074 | /* Add/Remove cache interface for CPU device */ |
1076 | static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | 1075 | static int __cpuinit cache_add_dev(struct device *dev) |
1077 | { | 1076 | { |
1078 | unsigned int cpu = sys_dev->id; | 1077 | unsigned int cpu = dev->id; |
1079 | unsigned long i, j; | 1078 | unsigned long i, j; |
1080 | struct _index_kobject *this_object; | 1079 | struct _index_kobject *this_object; |
1081 | struct _cpuid4_info *this_leaf; | 1080 | struct _cpuid4_info *this_leaf; |
@@ -1087,7 +1086,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
1087 | 1086 | ||
1088 | retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), | 1087 | retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), |
1089 | &ktype_percpu_entry, | 1088 | &ktype_percpu_entry, |
1090 | &sys_dev->kobj, "%s", "cache"); | 1089 | &dev->kobj, "%s", "cache"); |
1091 | if (retval < 0) { | 1090 | if (retval < 0) { |
1092 | cpuid4_cache_sysfs_exit(cpu); | 1091 | cpuid4_cache_sysfs_exit(cpu); |
1093 | return retval; | 1092 | return retval; |
@@ -1124,9 +1123,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
1124 | return 0; | 1123 | return 0; |
1125 | } | 1124 | } |
1126 | 1125 | ||
1127 | static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | 1126 | static void __cpuinit cache_remove_dev(struct device *dev) |
1128 | { | 1127 | { |
1129 | unsigned int cpu = sys_dev->id; | 1128 | unsigned int cpu = dev->id; |
1130 | unsigned long i; | 1129 | unsigned long i; |
1131 | 1130 | ||
1132 | if (per_cpu(ici_cpuid4_info, cpu) == NULL) | 1131 | if (per_cpu(ici_cpuid4_info, cpu) == NULL) |
@@ -1145,17 +1144,17 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, | |||
1145 | unsigned long action, void *hcpu) | 1144 | unsigned long action, void *hcpu) |
1146 | { | 1145 | { |
1147 | unsigned int cpu = (unsigned long)hcpu; | 1146 | unsigned int cpu = (unsigned long)hcpu; |
1148 | struct sys_device *sys_dev; | 1147 | struct device *dev; |
1149 | 1148 | ||
1150 | sys_dev = get_cpu_sysdev(cpu); | 1149 | dev = get_cpu_device(cpu); |
1151 | switch (action) { | 1150 | switch (action) { |
1152 | case CPU_ONLINE: | 1151 | case CPU_ONLINE: |
1153 | case CPU_ONLINE_FROZEN: | 1152 | case CPU_ONLINE_FROZEN: |
1154 | cache_add_dev(sys_dev); | 1153 | cache_add_dev(dev); |
1155 | break; | 1154 | break; |
1156 | case CPU_DEAD: | 1155 | case CPU_DEAD: |
1157 | case CPU_DEAD_FROZEN: | 1156 | case CPU_DEAD_FROZEN: |
1158 | cache_remove_dev(sys_dev); | 1157 | cache_remove_dev(dev); |
1159 | break; | 1158 | break; |
1160 | } | 1159 | } |
1161 | return NOTIFY_OK; | 1160 | return NOTIFY_OK; |
@@ -1174,9 +1173,9 @@ static int __cpuinit cache_sysfs_init(void) | |||
1174 | 1173 | ||
1175 | for_each_online_cpu(i) { | 1174 | for_each_online_cpu(i) { |
1176 | int err; | 1175 | int err; |
1177 | struct sys_device *sys_dev = get_cpu_sysdev(i); | 1176 | struct device *dev = get_cpu_device(i); |
1178 | 1177 | ||
1179 | err = cache_add_dev(sys_dev); | 1178 | err = cache_add_dev(dev); |
1180 | if (err) | 1179 | if (err) |
1181 | return err; | 1180 | return err; |
1182 | } | 1181 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 319882ef848d..fc4beb393577 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/preempt.h> | ||
20 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
21 | #include <linux/notifier.h> | 22 | #include <linux/notifier.h> |
22 | #include <linux/kdebug.h> | 23 | #include <linux/kdebug.h> |
@@ -92,6 +93,18 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) | |||
92 | return NMI_HANDLED; | 93 | return NMI_HANDLED; |
93 | } | 94 | } |
94 | 95 | ||
96 | static void mce_irq_ipi(void *info) | ||
97 | { | ||
98 | int cpu = smp_processor_id(); | ||
99 | struct mce *m = &__get_cpu_var(injectm); | ||
100 | |||
101 | if (cpumask_test_cpu(cpu, mce_inject_cpumask) && | ||
102 | m->inject_flags & MCJ_EXCEPTION) { | ||
103 | cpumask_clear_cpu(cpu, mce_inject_cpumask); | ||
104 | raise_exception(m, NULL); | ||
105 | } | ||
106 | } | ||
107 | |||
95 | /* Inject mce on current CPU */ | 108 | /* Inject mce on current CPU */ |
96 | static int raise_local(void) | 109 | static int raise_local(void) |
97 | { | 110 | { |
@@ -139,9 +152,10 @@ static void raise_mce(struct mce *m) | |||
139 | return; | 152 | return; |
140 | 153 | ||
141 | #ifdef CONFIG_X86_LOCAL_APIC | 154 | #ifdef CONFIG_X86_LOCAL_APIC |
142 | if (m->inject_flags & MCJ_NMI_BROADCAST) { | 155 | if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) { |
143 | unsigned long start; | 156 | unsigned long start; |
144 | int cpu; | 157 | int cpu; |
158 | |||
145 | get_online_cpus(); | 159 | get_online_cpus(); |
146 | cpumask_copy(mce_inject_cpumask, cpu_online_mask); | 160 | cpumask_copy(mce_inject_cpumask, cpu_online_mask); |
147 | cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); | 161 | cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); |
@@ -151,13 +165,25 @@ static void raise_mce(struct mce *m) | |||
151 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) | 165 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) |
152 | cpumask_clear_cpu(cpu, mce_inject_cpumask); | 166 | cpumask_clear_cpu(cpu, mce_inject_cpumask); |
153 | } | 167 | } |
154 | if (!cpumask_empty(mce_inject_cpumask)) | 168 | if (!cpumask_empty(mce_inject_cpumask)) { |
155 | apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); | 169 | if (m->inject_flags & MCJ_IRQ_BRAODCAST) { |
170 | /* | ||
171 | * don't wait because mce_irq_ipi is necessary | ||
172 | * to be sync with following raise_local | ||
173 | */ | ||
174 | preempt_disable(); | ||
175 | smp_call_function_many(mce_inject_cpumask, | ||
176 | mce_irq_ipi, NULL, 0); | ||
177 | preempt_enable(); | ||
178 | } else if (m->inject_flags & MCJ_NMI_BROADCAST) | ||
179 | apic->send_IPI_mask(mce_inject_cpumask, | ||
180 | NMI_VECTOR); | ||
181 | } | ||
156 | start = jiffies; | 182 | start = jiffies; |
157 | while (!cpumask_empty(mce_inject_cpumask)) { | 183 | while (!cpumask_empty(mce_inject_cpumask)) { |
158 | if (!time_before(jiffies, start + 2*HZ)) { | 184 | if (!time_before(jiffies, start + 2*HZ)) { |
159 | printk(KERN_ERR | 185 | printk(KERN_ERR |
160 | "Timeout waiting for mce inject NMI %lx\n", | 186 | "Timeout waiting for mce inject %lx\n", |
161 | *cpumask_bits(mce_inject_cpumask)); | 187 | *cpumask_bits(mce_inject_cpumask)); |
162 | break; | 188 | break; |
163 | } | 189 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index fefcc69ee8b5..ed44c8a65858 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -1,4 +1,4 @@ | |||
1 | #include <linux/sysdev.h> | 1 | #include <linux/device.h> |
2 | #include <asm/mce.h> | 2 | #include <asm/mce.h> |
3 | 3 | ||
4 | enum severity_level { | 4 | enum severity_level { |
@@ -17,7 +17,7 @@ enum severity_level { | |||
17 | struct mce_bank { | 17 | struct mce_bank { |
18 | u64 ctl; /* subevents to enable */ | 18 | u64 ctl; /* subevents to enable */ |
19 | unsigned char init; /* initialise bank? */ | 19 | unsigned char init; /* initialise bank? */ |
20 | struct sysdev_attribute attr; /* sysdev attribute */ | 20 | struct device_attribute attr; /* device attribute */ |
21 | char attrname[ATTR_LEN]; /* attribute name */ | 21 | char attrname[ATTR_LEN]; /* attribute name */ |
22 | }; | 22 | }; |
23 | 23 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2af127d4c3d1..5a11ae2e9e91 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -19,7 +19,7 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
22 | #include <linux/sysdev.h> | 22 | #include <linux/device.h> |
23 | #include <linux/syscore_ops.h> | 23 | #include <linux/syscore_ops.h> |
24 | #include <linux/delay.h> | 24 | #include <linux/delay.h> |
25 | #include <linux/ctype.h> | 25 | #include <linux/ctype.h> |
@@ -95,13 +95,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); | |||
95 | static DEFINE_PER_CPU(struct mce, mces_seen); | 95 | static DEFINE_PER_CPU(struct mce, mces_seen); |
96 | static int cpu_missing; | 96 | static int cpu_missing; |
97 | 97 | ||
98 | /* | ||
99 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
100 | * MCE errors in a human-readable form. | ||
101 | */ | ||
102 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
103 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | ||
104 | |||
105 | /* MCA banks polled by the period polling timer for corrected events */ | 98 | /* MCA banks polled by the period polling timer for corrected events */ |
106 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 99 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
107 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 100 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
@@ -109,6 +102,12 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | |||
109 | 102 | ||
110 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 103 | static DEFINE_PER_CPU(struct work_struct, mce_work); |
111 | 104 | ||
105 | /* | ||
106 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
107 | * MCE errors in a human-readable form. | ||
108 | */ | ||
109 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
110 | |||
112 | /* Do initial initialization of a struct mce */ | 111 | /* Do initial initialization of a struct mce */ |
113 | void mce_setup(struct mce *m) | 112 | void mce_setup(struct mce *m) |
114 | { | 113 | { |
@@ -119,9 +118,7 @@ void mce_setup(struct mce *m) | |||
119 | m->time = get_seconds(); | 118 | m->time = get_seconds(); |
120 | m->cpuvendor = boot_cpu_data.x86_vendor; | 119 | m->cpuvendor = boot_cpu_data.x86_vendor; |
121 | m->cpuid = cpuid_eax(1); | 120 | m->cpuid = cpuid_eax(1); |
122 | #ifdef CONFIG_SMP | ||
123 | m->socketid = cpu_data(m->extcpu).phys_proc_id; | 121 | m->socketid = cpu_data(m->extcpu).phys_proc_id; |
124 | #endif | ||
125 | m->apicid = cpu_data(m->extcpu).initial_apicid; | 122 | m->apicid = cpu_data(m->extcpu).initial_apicid; |
126 | rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); | 123 | rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); |
127 | } | 124 | } |
@@ -190,6 +187,57 @@ void mce_log(struct mce *mce) | |||
190 | set_bit(0, &mce_need_notify); | 187 | set_bit(0, &mce_need_notify); |
191 | } | 188 | } |
192 | 189 | ||
190 | static void drain_mcelog_buffer(void) | ||
191 | { | ||
192 | unsigned int next, i, prev = 0; | ||
193 | |||
194 | next = rcu_dereference_check_mce(mcelog.next); | ||
195 | |||
196 | do { | ||
197 | struct mce *m; | ||
198 | |||
199 | /* drain what was logged during boot */ | ||
200 | for (i = prev; i < next; i++) { | ||
201 | unsigned long start = jiffies; | ||
202 | unsigned retries = 1; | ||
203 | |||
204 | m = &mcelog.entry[i]; | ||
205 | |||
206 | while (!m->finished) { | ||
207 | if (time_after_eq(jiffies, start + 2*retries)) | ||
208 | retries++; | ||
209 | |||
210 | cpu_relax(); | ||
211 | |||
212 | if (!m->finished && retries >= 4) { | ||
213 | pr_err("MCE: skipping error being logged currently!\n"); | ||
214 | break; | ||
215 | } | ||
216 | } | ||
217 | smp_rmb(); | ||
218 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | ||
219 | } | ||
220 | |||
221 | memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m)); | ||
222 | prev = next; | ||
223 | next = cmpxchg(&mcelog.next, prev, 0); | ||
224 | } while (next != prev); | ||
225 | } | ||
226 | |||
227 | |||
228 | void mce_register_decode_chain(struct notifier_block *nb) | ||
229 | { | ||
230 | atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); | ||
231 | drain_mcelog_buffer(); | ||
232 | } | ||
233 | EXPORT_SYMBOL_GPL(mce_register_decode_chain); | ||
234 | |||
235 | void mce_unregister_decode_chain(struct notifier_block *nb) | ||
236 | { | ||
237 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); | ||
238 | } | ||
239 | EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); | ||
240 | |||
193 | static void print_mce(struct mce *m) | 241 | static void print_mce(struct mce *m) |
194 | { | 242 | { |
195 | int ret = 0; | 243 | int ret = 0; |
@@ -1770,7 +1818,7 @@ static struct syscore_ops mce_syscore_ops = { | |||
1770 | }; | 1818 | }; |
1771 | 1819 | ||
1772 | /* | 1820 | /* |
1773 | * mce_sysdev: Sysfs support | 1821 | * mce_device: Sysfs support |
1774 | */ | 1822 | */ |
1775 | 1823 | ||
1776 | static void mce_cpu_restart(void *data) | 1824 | static void mce_cpu_restart(void *data) |
@@ -1806,27 +1854,28 @@ static void mce_enable_ce(void *all) | |||
1806 | __mcheck_cpu_init_timer(); | 1854 | __mcheck_cpu_init_timer(); |
1807 | } | 1855 | } |
1808 | 1856 | ||
1809 | static struct sysdev_class mce_sysdev_class = { | 1857 | static struct bus_type mce_subsys = { |
1810 | .name = "machinecheck", | 1858 | .name = "machinecheck", |
1859 | .dev_name = "machinecheck", | ||
1811 | }; | 1860 | }; |
1812 | 1861 | ||
1813 | DEFINE_PER_CPU(struct sys_device, mce_sysdev); | 1862 | struct device *mce_device[CONFIG_NR_CPUS]; |
1814 | 1863 | ||
1815 | __cpuinitdata | 1864 | __cpuinitdata |
1816 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | 1865 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); |
1817 | 1866 | ||
1818 | static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr) | 1867 | static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) |
1819 | { | 1868 | { |
1820 | return container_of(attr, struct mce_bank, attr); | 1869 | return container_of(attr, struct mce_bank, attr); |
1821 | } | 1870 | } |
1822 | 1871 | ||
1823 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | 1872 | static ssize_t show_bank(struct device *s, struct device_attribute *attr, |
1824 | char *buf) | 1873 | char *buf) |
1825 | { | 1874 | { |
1826 | return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); | 1875 | return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); |
1827 | } | 1876 | } |
1828 | 1877 | ||
1829 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | 1878 | static ssize_t set_bank(struct device *s, struct device_attribute *attr, |
1830 | const char *buf, size_t size) | 1879 | const char *buf, size_t size) |
1831 | { | 1880 | { |
1832 | u64 new; | 1881 | u64 new; |
@@ -1841,14 +1890,14 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | |||
1841 | } | 1890 | } |
1842 | 1891 | ||
1843 | static ssize_t | 1892 | static ssize_t |
1844 | show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) | 1893 | show_trigger(struct device *s, struct device_attribute *attr, char *buf) |
1845 | { | 1894 | { |
1846 | strcpy(buf, mce_helper); | 1895 | strcpy(buf, mce_helper); |
1847 | strcat(buf, "\n"); | 1896 | strcat(buf, "\n"); |
1848 | return strlen(mce_helper) + 1; | 1897 | return strlen(mce_helper) + 1; |
1849 | } | 1898 | } |
1850 | 1899 | ||
1851 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | 1900 | static ssize_t set_trigger(struct device *s, struct device_attribute *attr, |
1852 | const char *buf, size_t siz) | 1901 | const char *buf, size_t siz) |
1853 | { | 1902 | { |
1854 | char *p; | 1903 | char *p; |
@@ -1863,8 +1912,8 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | |||
1863 | return strlen(mce_helper) + !!p; | 1912 | return strlen(mce_helper) + !!p; |
1864 | } | 1913 | } |
1865 | 1914 | ||
1866 | static ssize_t set_ignore_ce(struct sys_device *s, | 1915 | static ssize_t set_ignore_ce(struct device *s, |
1867 | struct sysdev_attribute *attr, | 1916 | struct device_attribute *attr, |
1868 | const char *buf, size_t size) | 1917 | const char *buf, size_t size) |
1869 | { | 1918 | { |
1870 | u64 new; | 1919 | u64 new; |
@@ -1887,8 +1936,8 @@ static ssize_t set_ignore_ce(struct sys_device *s, | |||
1887 | return size; | 1936 | return size; |
1888 | } | 1937 | } |
1889 | 1938 | ||
1890 | static ssize_t set_cmci_disabled(struct sys_device *s, | 1939 | static ssize_t set_cmci_disabled(struct device *s, |
1891 | struct sysdev_attribute *attr, | 1940 | struct device_attribute *attr, |
1892 | const char *buf, size_t size) | 1941 | const char *buf, size_t size) |
1893 | { | 1942 | { |
1894 | u64 new; | 1943 | u64 new; |
@@ -1910,108 +1959,117 @@ static ssize_t set_cmci_disabled(struct sys_device *s, | |||
1910 | return size; | 1959 | return size; |
1911 | } | 1960 | } |
1912 | 1961 | ||
1913 | static ssize_t store_int_with_restart(struct sys_device *s, | 1962 | static ssize_t store_int_with_restart(struct device *s, |
1914 | struct sysdev_attribute *attr, | 1963 | struct device_attribute *attr, |
1915 | const char *buf, size_t size) | 1964 | const char *buf, size_t size) |
1916 | { | 1965 | { |
1917 | ssize_t ret = sysdev_store_int(s, attr, buf, size); | 1966 | ssize_t ret = device_store_int(s, attr, buf, size); |
1918 | mce_restart(); | 1967 | mce_restart(); |
1919 | return ret; | 1968 | return ret; |
1920 | } | 1969 | } |
1921 | 1970 | ||
1922 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | 1971 | static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); |
1923 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | 1972 | static DEVICE_INT_ATTR(tolerant, 0644, tolerant); |
1924 | static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); | 1973 | static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout); |
1925 | static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); | 1974 | static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); |
1926 | 1975 | ||
1927 | static struct sysdev_ext_attribute attr_check_interval = { | 1976 | static struct dev_ext_attribute dev_attr_check_interval = { |
1928 | _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, | 1977 | __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), |
1929 | store_int_with_restart), | ||
1930 | &check_interval | 1978 | &check_interval |
1931 | }; | 1979 | }; |
1932 | 1980 | ||
1933 | static struct sysdev_ext_attribute attr_ignore_ce = { | 1981 | static struct dev_ext_attribute dev_attr_ignore_ce = { |
1934 | _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), | 1982 | __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce), |
1935 | &mce_ignore_ce | 1983 | &mce_ignore_ce |
1936 | }; | 1984 | }; |
1937 | 1985 | ||
1938 | static struct sysdev_ext_attribute attr_cmci_disabled = { | 1986 | static struct dev_ext_attribute dev_attr_cmci_disabled = { |
1939 | _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), | 1987 | __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled), |
1940 | &mce_cmci_disabled | 1988 | &mce_cmci_disabled |
1941 | }; | 1989 | }; |
1942 | 1990 | ||
1943 | static struct sysdev_attribute *mce_sysdev_attrs[] = { | 1991 | static struct device_attribute *mce_device_attrs[] = { |
1944 | &attr_tolerant.attr, | 1992 | &dev_attr_tolerant.attr, |
1945 | &attr_check_interval.attr, | 1993 | &dev_attr_check_interval.attr, |
1946 | &attr_trigger, | 1994 | &dev_attr_trigger, |
1947 | &attr_monarch_timeout.attr, | 1995 | &dev_attr_monarch_timeout.attr, |
1948 | &attr_dont_log_ce.attr, | 1996 | &dev_attr_dont_log_ce.attr, |
1949 | &attr_ignore_ce.attr, | 1997 | &dev_attr_ignore_ce.attr, |
1950 | &attr_cmci_disabled.attr, | 1998 | &dev_attr_cmci_disabled.attr, |
1951 | NULL | 1999 | NULL |
1952 | }; | 2000 | }; |
1953 | 2001 | ||
1954 | static cpumask_var_t mce_sysdev_initialized; | 2002 | static cpumask_var_t mce_device_initialized; |
2003 | |||
2004 | static void mce_device_release(struct device *dev) | ||
2005 | { | ||
2006 | kfree(dev); | ||
2007 | } | ||
1955 | 2008 | ||
1956 | /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ | 2009 | /* Per cpu device init. All of the cpus still share the same ctrl bank: */ |
1957 | static __cpuinit int mce_sysdev_create(unsigned int cpu) | 2010 | static __cpuinit int mce_device_create(unsigned int cpu) |
1958 | { | 2011 | { |
1959 | struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); | 2012 | struct device *dev; |
1960 | int err; | 2013 | int err; |
1961 | int i, j; | 2014 | int i, j; |
1962 | 2015 | ||
1963 | if (!mce_available(&boot_cpu_data)) | 2016 | if (!mce_available(&boot_cpu_data)) |
1964 | return -EIO; | 2017 | return -EIO; |
1965 | 2018 | ||
1966 | memset(&sysdev->kobj, 0, sizeof(struct kobject)); | 2019 | dev = kzalloc(sizeof *dev, GFP_KERNEL); |
1967 | sysdev->id = cpu; | 2020 | if (!dev) |
1968 | sysdev->cls = &mce_sysdev_class; | 2021 | return -ENOMEM; |
2022 | dev->id = cpu; | ||
2023 | dev->bus = &mce_subsys; | ||
2024 | dev->release = &mce_device_release; | ||
1969 | 2025 | ||
1970 | err = sysdev_register(sysdev); | 2026 | err = device_register(dev); |
1971 | if (err) | 2027 | if (err) |
1972 | return err; | 2028 | return err; |
1973 | 2029 | ||
1974 | for (i = 0; mce_sysdev_attrs[i]; i++) { | 2030 | for (i = 0; mce_device_attrs[i]; i++) { |
1975 | err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]); | 2031 | err = device_create_file(dev, mce_device_attrs[i]); |
1976 | if (err) | 2032 | if (err) |
1977 | goto error; | 2033 | goto error; |
1978 | } | 2034 | } |
1979 | for (j = 0; j < banks; j++) { | 2035 | for (j = 0; j < banks; j++) { |
1980 | err = sysdev_create_file(sysdev, &mce_banks[j].attr); | 2036 | err = device_create_file(dev, &mce_banks[j].attr); |
1981 | if (err) | 2037 | if (err) |
1982 | goto error2; | 2038 | goto error2; |
1983 | } | 2039 | } |
1984 | cpumask_set_cpu(cpu, mce_sysdev_initialized); | 2040 | cpumask_set_cpu(cpu, mce_device_initialized); |
2041 | mce_device[cpu] = dev; | ||
1985 | 2042 | ||
1986 | return 0; | 2043 | return 0; |
1987 | error2: | 2044 | error2: |
1988 | while (--j >= 0) | 2045 | while (--j >= 0) |
1989 | sysdev_remove_file(sysdev, &mce_banks[j].attr); | 2046 | device_remove_file(dev, &mce_banks[j].attr); |
1990 | error: | 2047 | error: |
1991 | while (--i >= 0) | 2048 | while (--i >= 0) |
1992 | sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); | 2049 | device_remove_file(dev, mce_device_attrs[i]); |
1993 | 2050 | ||
1994 | sysdev_unregister(sysdev); | 2051 | device_unregister(dev); |
1995 | 2052 | ||
1996 | return err; | 2053 | return err; |
1997 | } | 2054 | } |
1998 | 2055 | ||
1999 | static __cpuinit void mce_sysdev_remove(unsigned int cpu) | 2056 | static __cpuinit void mce_device_remove(unsigned int cpu) |
2000 | { | 2057 | { |
2001 | struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); | 2058 | struct device *dev = mce_device[cpu]; |
2002 | int i; | 2059 | int i; |
2003 | 2060 | ||
2004 | if (!cpumask_test_cpu(cpu, mce_sysdev_initialized)) | 2061 | if (!cpumask_test_cpu(cpu, mce_device_initialized)) |
2005 | return; | 2062 | return; |
2006 | 2063 | ||
2007 | for (i = 0; mce_sysdev_attrs[i]; i++) | 2064 | for (i = 0; mce_device_attrs[i]; i++) |
2008 | sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); | 2065 | device_remove_file(dev, mce_device_attrs[i]); |
2009 | 2066 | ||
2010 | for (i = 0; i < banks; i++) | 2067 | for (i = 0; i < banks; i++) |
2011 | sysdev_remove_file(sysdev, &mce_banks[i].attr); | 2068 | device_remove_file(dev, &mce_banks[i].attr); |
2012 | 2069 | ||
2013 | sysdev_unregister(sysdev); | 2070 | device_unregister(dev); |
2014 | cpumask_clear_cpu(cpu, mce_sysdev_initialized); | 2071 | cpumask_clear_cpu(cpu, mce_device_initialized); |
2072 | mce_device[cpu] = NULL; | ||
2015 | } | 2073 | } |
2016 | 2074 | ||
2017 | /* Make sure there are no machine checks on offlined CPUs. */ | 2075 | /* Make sure there are no machine checks on offlined CPUs. */ |
@@ -2061,7 +2119,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2061 | switch (action) { | 2119 | switch (action) { |
2062 | case CPU_ONLINE: | 2120 | case CPU_ONLINE: |
2063 | case CPU_ONLINE_FROZEN: | 2121 | case CPU_ONLINE_FROZEN: |
2064 | mce_sysdev_create(cpu); | 2122 | mce_device_create(cpu); |
2065 | if (threshold_cpu_callback) | 2123 | if (threshold_cpu_callback) |
2066 | threshold_cpu_callback(action, cpu); | 2124 | threshold_cpu_callback(action, cpu); |
2067 | break; | 2125 | break; |
@@ -2069,7 +2127,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2069 | case CPU_DEAD_FROZEN: | 2127 | case CPU_DEAD_FROZEN: |
2070 | if (threshold_cpu_callback) | 2128 | if (threshold_cpu_callback) |
2071 | threshold_cpu_callback(action, cpu); | 2129 | threshold_cpu_callback(action, cpu); |
2072 | mce_sysdev_remove(cpu); | 2130 | mce_device_remove(cpu); |
2073 | break; | 2131 | break; |
2074 | case CPU_DOWN_PREPARE: | 2132 | case CPU_DOWN_PREPARE: |
2075 | case CPU_DOWN_PREPARE_FROZEN: | 2133 | case CPU_DOWN_PREPARE_FROZEN: |
@@ -2103,7 +2161,7 @@ static __init void mce_init_banks(void) | |||
2103 | 2161 | ||
2104 | for (i = 0; i < banks; i++) { | 2162 | for (i = 0; i < banks; i++) { |
2105 | struct mce_bank *b = &mce_banks[i]; | 2163 | struct mce_bank *b = &mce_banks[i]; |
2106 | struct sysdev_attribute *a = &b->attr; | 2164 | struct device_attribute *a = &b->attr; |
2107 | 2165 | ||
2108 | sysfs_attr_init(&a->attr); | 2166 | sysfs_attr_init(&a->attr); |
2109 | a->attr.name = b->attrname; | 2167 | a->attr.name = b->attrname; |
@@ -2123,16 +2181,16 @@ static __init int mcheck_init_device(void) | |||
2123 | if (!mce_available(&boot_cpu_data)) | 2181 | if (!mce_available(&boot_cpu_data)) |
2124 | return -EIO; | 2182 | return -EIO; |
2125 | 2183 | ||
2126 | zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL); | 2184 | zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); |
2127 | 2185 | ||
2128 | mce_init_banks(); | 2186 | mce_init_banks(); |
2129 | 2187 | ||
2130 | err = sysdev_class_register(&mce_sysdev_class); | 2188 | err = subsys_system_register(&mce_subsys, NULL); |
2131 | if (err) | 2189 | if (err) |
2132 | return err; | 2190 | return err; |
2133 | 2191 | ||
2134 | for_each_online_cpu(i) { | 2192 | for_each_online_cpu(i) { |
2135 | err = mce_sysdev_create(i); | 2193 | err = mce_device_create(i); |
2136 | if (err) | 2194 | if (err) |
2137 | return err; | 2195 | return err; |
2138 | } | 2196 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f5474218cffe..786e76a86322 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/notifier.h> | 17 | #include <linux/notifier.h> |
18 | #include <linux/kobject.h> | 18 | #include <linux/kobject.h> |
19 | #include <linux/percpu.h> | 19 | #include <linux/percpu.h> |
20 | #include <linux/sysdev.h> | ||
21 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
22 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
23 | #include <linux/sysfs.h> | 22 | #include <linux/sysfs.h> |
@@ -64,11 +63,9 @@ struct threshold_bank { | |||
64 | }; | 63 | }; |
65 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); | 64 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); |
66 | 65 | ||
67 | #ifdef CONFIG_SMP | ||
68 | static unsigned char shared_bank[NR_BANKS] = { | 66 | static unsigned char shared_bank[NR_BANKS] = { |
69 | 0, 0, 0, 0, 1 | 67 | 0, 0, 0, 0, 1 |
70 | }; | 68 | }; |
71 | #endif | ||
72 | 69 | ||
73 | static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ | 70 | static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ |
74 | 71 | ||
@@ -202,10 +199,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
202 | 199 | ||
203 | if (!block) | 200 | if (!block) |
204 | per_cpu(bank_map, cpu) |= (1 << bank); | 201 | per_cpu(bank_map, cpu) |= (1 << bank); |
205 | #ifdef CONFIG_SMP | ||
206 | if (shared_bank[bank] && c->cpu_core_id) | 202 | if (shared_bank[bank] && c->cpu_core_id) |
207 | break; | 203 | break; |
208 | #endif | 204 | |
209 | offset = setup_APIC_mce(offset, | 205 | offset = setup_APIC_mce(offset, |
210 | (high & MASK_LVTOFF_HI) >> 20); | 206 | (high & MASK_LVTOFF_HI) >> 20); |
211 | 207 | ||
@@ -527,11 +523,11 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
527 | { | 523 | { |
528 | int i, err = 0; | 524 | int i, err = 0; |
529 | struct threshold_bank *b = NULL; | 525 | struct threshold_bank *b = NULL; |
526 | struct device *dev = mce_device[cpu]; | ||
530 | char name[32]; | 527 | char name[32]; |
531 | 528 | ||
532 | sprintf(name, "threshold_bank%i", bank); | 529 | sprintf(name, "threshold_bank%i", bank); |
533 | 530 | ||
534 | #ifdef CONFIG_SMP | ||
535 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ | 531 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ |
536 | i = cpumask_first(cpu_llc_shared_mask(cpu)); | 532 | i = cpumask_first(cpu_llc_shared_mask(cpu)); |
537 | 533 | ||
@@ -548,8 +544,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
548 | if (!b) | 544 | if (!b) |
549 | goto out; | 545 | goto out; |
550 | 546 | ||
551 | err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj, | 547 | err = sysfs_create_link(&dev->kobj, b->kobj, name); |
552 | b->kobj, name); | ||
553 | if (err) | 548 | if (err) |
554 | goto out; | 549 | goto out; |
555 | 550 | ||
@@ -558,7 +553,6 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
558 | 553 | ||
559 | goto out; | 554 | goto out; |
560 | } | 555 | } |
561 | #endif | ||
562 | 556 | ||
563 | b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); | 557 | b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); |
564 | if (!b) { | 558 | if (!b) { |
@@ -571,7 +565,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
571 | goto out; | 565 | goto out; |
572 | } | 566 | } |
573 | 567 | ||
574 | b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj); | 568 | b->kobj = kobject_create_and_add(name, &dev->kobj); |
575 | if (!b->kobj) | 569 | if (!b->kobj) |
576 | goto out_free; | 570 | goto out_free; |
577 | 571 | ||
@@ -591,8 +585,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
591 | if (i == cpu) | 585 | if (i == cpu) |
592 | continue; | 586 | continue; |
593 | 587 | ||
594 | err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj, | 588 | dev = mce_device[i]; |
595 | b->kobj, name); | 589 | if (dev) |
590 | err = sysfs_create_link(&dev->kobj,b->kobj, name); | ||
596 | if (err) | 591 | if (err) |
597 | goto out; | 592 | goto out; |
598 | 593 | ||
@@ -655,6 +650,7 @@ static void deallocate_threshold_block(unsigned int cpu, | |||
655 | static void threshold_remove_bank(unsigned int cpu, int bank) | 650 | static void threshold_remove_bank(unsigned int cpu, int bank) |
656 | { | 651 | { |
657 | struct threshold_bank *b; | 652 | struct threshold_bank *b; |
653 | struct device *dev; | ||
658 | char name[32]; | 654 | char name[32]; |
659 | int i = 0; | 655 | int i = 0; |
660 | 656 | ||
@@ -669,7 +665,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
669 | #ifdef CONFIG_SMP | 665 | #ifdef CONFIG_SMP |
670 | /* sibling symlink */ | 666 | /* sibling symlink */ |
671 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | 667 | if (shared_bank[bank] && b->blocks->cpu != cpu) { |
672 | sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name); | 668 | sysfs_remove_link(&mce_device[cpu]->kobj, name); |
673 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 669 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
674 | 670 | ||
675 | return; | 671 | return; |
@@ -681,7 +677,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
681 | if (i == cpu) | 677 | if (i == cpu) |
682 | continue; | 678 | continue; |
683 | 679 | ||
684 | sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name); | 680 | dev = mce_device[i]; |
681 | if (dev) | ||
682 | sysfs_remove_link(&dev->kobj, name); | ||
685 | per_cpu(threshold_banks, i)[bank] = NULL; | 683 | per_cpu(threshold_banks, i)[bank] = NULL; |
686 | } | 684 | } |
687 | 685 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 787e06c84ea6..67bb17a37a0a 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
21 | #include <linux/export.h> | 21 | #include <linux/export.h> |
22 | #include <linux/sysdev.h> | ||
23 | #include <linux/types.h> | 22 | #include <linux/types.h> |
24 | #include <linux/init.h> | 23 | #include <linux/init.h> |
25 | #include <linux/smp.h> | 24 | #include <linux/smp.h> |
@@ -69,16 +68,16 @@ static atomic_t therm_throt_en = ATOMIC_INIT(0); | |||
69 | static u32 lvtthmr_init __read_mostly; | 68 | static u32 lvtthmr_init __read_mostly; |
70 | 69 | ||
71 | #ifdef CONFIG_SYSFS | 70 | #ifdef CONFIG_SYSFS |
72 | #define define_therm_throt_sysdev_one_ro(_name) \ | 71 | #define define_therm_throt_device_one_ro(_name) \ |
73 | static SYSDEV_ATTR(_name, 0444, \ | 72 | static DEVICE_ATTR(_name, 0444, \ |
74 | therm_throt_sysdev_show_##_name, \ | 73 | therm_throt_device_show_##_name, \ |
75 | NULL) \ | 74 | NULL) \ |
76 | 75 | ||
77 | #define define_therm_throt_sysdev_show_func(event, name) \ | 76 | #define define_therm_throt_device_show_func(event, name) \ |
78 | \ | 77 | \ |
79 | static ssize_t therm_throt_sysdev_show_##event##_##name( \ | 78 | static ssize_t therm_throt_device_show_##event##_##name( \ |
80 | struct sys_device *dev, \ | 79 | struct device *dev, \ |
81 | struct sysdev_attribute *attr, \ | 80 | struct device_attribute *attr, \ |
82 | char *buf) \ | 81 | char *buf) \ |
83 | { \ | 82 | { \ |
84 | unsigned int cpu = dev->id; \ | 83 | unsigned int cpu = dev->id; \ |
@@ -95,20 +94,20 @@ static ssize_t therm_throt_sysdev_show_##event##_##name( \ | |||
95 | return ret; \ | 94 | return ret; \ |
96 | } | 95 | } |
97 | 96 | ||
98 | define_therm_throt_sysdev_show_func(core_throttle, count); | 97 | define_therm_throt_device_show_func(core_throttle, count); |
99 | define_therm_throt_sysdev_one_ro(core_throttle_count); | 98 | define_therm_throt_device_one_ro(core_throttle_count); |
100 | 99 | ||
101 | define_therm_throt_sysdev_show_func(core_power_limit, count); | 100 | define_therm_throt_device_show_func(core_power_limit, count); |
102 | define_therm_throt_sysdev_one_ro(core_power_limit_count); | 101 | define_therm_throt_device_one_ro(core_power_limit_count); |
103 | 102 | ||
104 | define_therm_throt_sysdev_show_func(package_throttle, count); | 103 | define_therm_throt_device_show_func(package_throttle, count); |
105 | define_therm_throt_sysdev_one_ro(package_throttle_count); | 104 | define_therm_throt_device_one_ro(package_throttle_count); |
106 | 105 | ||
107 | define_therm_throt_sysdev_show_func(package_power_limit, count); | 106 | define_therm_throt_device_show_func(package_power_limit, count); |
108 | define_therm_throt_sysdev_one_ro(package_power_limit_count); | 107 | define_therm_throt_device_one_ro(package_power_limit_count); |
109 | 108 | ||
110 | static struct attribute *thermal_throttle_attrs[] = { | 109 | static struct attribute *thermal_throttle_attrs[] = { |
111 | &attr_core_throttle_count.attr, | 110 | &dev_attr_core_throttle_count.attr, |
112 | NULL | 111 | NULL |
113 | }; | 112 | }; |
114 | 113 | ||
@@ -223,36 +222,36 @@ static int thresh_event_valid(int event) | |||
223 | 222 | ||
224 | #ifdef CONFIG_SYSFS | 223 | #ifdef CONFIG_SYSFS |
225 | /* Add/Remove thermal_throttle interface for CPU device: */ | 224 | /* Add/Remove thermal_throttle interface for CPU device: */ |
226 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, | 225 | static __cpuinit int thermal_throttle_add_dev(struct device *dev, |
227 | unsigned int cpu) | 226 | unsigned int cpu) |
228 | { | 227 | { |
229 | int err; | 228 | int err; |
230 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 229 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
231 | 230 | ||
232 | err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); | 231 | err = sysfs_create_group(&dev->kobj, &thermal_attr_group); |
233 | if (err) | 232 | if (err) |
234 | return err; | 233 | return err; |
235 | 234 | ||
236 | if (cpu_has(c, X86_FEATURE_PLN)) | 235 | if (cpu_has(c, X86_FEATURE_PLN)) |
237 | err = sysfs_add_file_to_group(&sys_dev->kobj, | 236 | err = sysfs_add_file_to_group(&dev->kobj, |
238 | &attr_core_power_limit_count.attr, | 237 | &dev_attr_core_power_limit_count.attr, |
239 | thermal_attr_group.name); | 238 | thermal_attr_group.name); |
240 | if (cpu_has(c, X86_FEATURE_PTS)) { | 239 | if (cpu_has(c, X86_FEATURE_PTS)) { |
241 | err = sysfs_add_file_to_group(&sys_dev->kobj, | 240 | err = sysfs_add_file_to_group(&dev->kobj, |
242 | &attr_package_throttle_count.attr, | 241 | &dev_attr_package_throttle_count.attr, |
243 | thermal_attr_group.name); | 242 | thermal_attr_group.name); |
244 | if (cpu_has(c, X86_FEATURE_PLN)) | 243 | if (cpu_has(c, X86_FEATURE_PLN)) |
245 | err = sysfs_add_file_to_group(&sys_dev->kobj, | 244 | err = sysfs_add_file_to_group(&dev->kobj, |
246 | &attr_package_power_limit_count.attr, | 245 | &dev_attr_package_power_limit_count.attr, |
247 | thermal_attr_group.name); | 246 | thermal_attr_group.name); |
248 | } | 247 | } |
249 | 248 | ||
250 | return err; | 249 | return err; |
251 | } | 250 | } |
252 | 251 | ||
253 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | 252 | static __cpuinit void thermal_throttle_remove_dev(struct device *dev) |
254 | { | 253 | { |
255 | sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group); | 254 | sysfs_remove_group(&dev->kobj, &thermal_attr_group); |
256 | } | 255 | } |
257 | 256 | ||
258 | /* Mutex protecting device creation against CPU hotplug: */ | 257 | /* Mutex protecting device creation against CPU hotplug: */ |
@@ -265,16 +264,16 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
265 | void *hcpu) | 264 | void *hcpu) |
266 | { | 265 | { |
267 | unsigned int cpu = (unsigned long)hcpu; | 266 | unsigned int cpu = (unsigned long)hcpu; |
268 | struct sys_device *sys_dev; | 267 | struct device *dev; |
269 | int err = 0; | 268 | int err = 0; |
270 | 269 | ||
271 | sys_dev = get_cpu_sysdev(cpu); | 270 | dev = get_cpu_device(cpu); |
272 | 271 | ||
273 | switch (action) { | 272 | switch (action) { |
274 | case CPU_UP_PREPARE: | 273 | case CPU_UP_PREPARE: |
275 | case CPU_UP_PREPARE_FROZEN: | 274 | case CPU_UP_PREPARE_FROZEN: |
276 | mutex_lock(&therm_cpu_lock); | 275 | mutex_lock(&therm_cpu_lock); |
277 | err = thermal_throttle_add_dev(sys_dev, cpu); | 276 | err = thermal_throttle_add_dev(dev, cpu); |
278 | mutex_unlock(&therm_cpu_lock); | 277 | mutex_unlock(&therm_cpu_lock); |
279 | WARN_ON(err); | 278 | WARN_ON(err); |
280 | break; | 279 | break; |
@@ -283,7 +282,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
283 | case CPU_DEAD: | 282 | case CPU_DEAD: |
284 | case CPU_DEAD_FROZEN: | 283 | case CPU_DEAD_FROZEN: |
285 | mutex_lock(&therm_cpu_lock); | 284 | mutex_lock(&therm_cpu_lock); |
286 | thermal_throttle_remove_dev(sys_dev); | 285 | thermal_throttle_remove_dev(dev); |
287 | mutex_unlock(&therm_cpu_lock); | 286 | mutex_unlock(&therm_cpu_lock); |
288 | break; | 287 | break; |
289 | } | 288 | } |
@@ -310,7 +309,7 @@ static __init int thermal_throttle_init_device(void) | |||
310 | #endif | 309 | #endif |
311 | /* connect live CPUs to sysfs */ | 310 | /* connect live CPUs to sysfs */ |
312 | for_each_online_cpu(cpu) { | 311 | for_each_online_cpu(cpu) { |
313 | err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu); | 312 | err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu); |
314 | WARN_ON(err); | 313 | WARN_ON(err); |
315 | } | 314 | } |
316 | #ifdef CONFIG_HOTPLUG_CPU | 315 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -323,17 +322,6 @@ device_initcall(thermal_throttle_init_device); | |||
323 | 322 | ||
324 | #endif /* CONFIG_SYSFS */ | 323 | #endif /* CONFIG_SYSFS */ |
325 | 324 | ||
326 | /* | ||
327 | * Set up the most two significant bit to notify mce log that this thermal | ||
328 | * event type. | ||
329 | * This is a temp solution. May be changed in the future with mce log | ||
330 | * infrasture. | ||
331 | */ | ||
332 | #define CORE_THROTTLED (0) | ||
333 | #define CORE_POWER_LIMIT ((__u64)1 << 62) | ||
334 | #define PACKAGE_THROTTLED ((__u64)2 << 62) | ||
335 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) | ||
336 | |||
337 | static void notify_thresholds(__u64 msr_val) | 325 | static void notify_thresholds(__u64 msr_val) |
338 | { | 326 | { |
339 | /* check whether the interrupt handler is defined; | 327 | /* check whether the interrupt handler is defined; |
@@ -363,27 +351,23 @@ static void intel_thermal_interrupt(void) | |||
363 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, | 351 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, |
364 | THERMAL_THROTTLING_EVENT, | 352 | THERMAL_THROTTLING_EVENT, |
365 | CORE_LEVEL) != 0) | 353 | CORE_LEVEL) != 0) |
366 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); | 354 | mce_log_therm_throt_event(msr_val); |
367 | 355 | ||
368 | if (this_cpu_has(X86_FEATURE_PLN)) | 356 | if (this_cpu_has(X86_FEATURE_PLN)) |
369 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, | 357 | therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, |
370 | POWER_LIMIT_EVENT, | 358 | POWER_LIMIT_EVENT, |
371 | CORE_LEVEL) != 0) | 359 | CORE_LEVEL); |
372 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); | ||
373 | 360 | ||
374 | if (this_cpu_has(X86_FEATURE_PTS)) { | 361 | if (this_cpu_has(X86_FEATURE_PTS)) { |
375 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | 362 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); |
376 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | 363 | therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, |
377 | THERMAL_THROTTLING_EVENT, | 364 | THERMAL_THROTTLING_EVENT, |
378 | PACKAGE_LEVEL) != 0) | 365 | PACKAGE_LEVEL); |
379 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); | ||
380 | if (this_cpu_has(X86_FEATURE_PLN)) | 366 | if (this_cpu_has(X86_FEATURE_PLN)) |
381 | if (therm_throt_process(msr_val & | 367 | therm_throt_process(msr_val & |
382 | PACKAGE_THERM_STATUS_POWER_LIMIT, | 368 | PACKAGE_THERM_STATUS_POWER_LIMIT, |
383 | POWER_LIMIT_EVENT, | 369 | POWER_LIMIT_EVENT, |
384 | PACKAGE_LEVEL) != 0) | 370 | PACKAGE_LEVEL); |
385 | mce_log_therm_throt_event(PACKAGE_POWER_LIMIT | ||
386 | | msr_val); | ||
387 | } | 371 | } |
388 | } | 372 | } |
389 | 373 | ||
@@ -397,8 +381,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | |||
397 | 381 | ||
398 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | 382 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) |
399 | { | 383 | { |
400 | exit_idle(); | ||
401 | irq_enter(); | 384 | irq_enter(); |
385 | exit_idle(); | ||
402 | inc_irq_stat(irq_thermal_count); | 386 | inc_irq_stat(irq_thermal_count); |
403 | smp_thermal_vector(); | 387 | smp_thermal_vector(); |
404 | irq_exit(); | 388 | irq_exit(); |
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index d746df2909c9..aa578cadb940 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c | |||
@@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt; | |||
19 | 19 | ||
20 | asmlinkage void smp_threshold_interrupt(void) | 20 | asmlinkage void smp_threshold_interrupt(void) |
21 | { | 21 | { |
22 | exit_idle(); | ||
23 | irq_enter(); | 22 | irq_enter(); |
23 | exit_idle(); | ||
24 | inc_irq_stat(irq_threshold_count); | 24 | inc_irq_stat(irq_threshold_count); |
25 | mce_threshold_vector(); | 25 | mce_threshold_vector(); |
26 | irq_exit(); | 26 | irq_exit(); |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bda212a0010..5adce1040b11 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event) | |||
484 | return event->pmu == &pmu; | 484 | return event->pmu == &pmu; |
485 | } | 485 | } |
486 | 486 | ||
487 | /* | ||
488 | * Event scheduler state: | ||
489 | * | ||
490 | * Assign events iterating over all events and counters, beginning | ||
491 | * with events with least weights first. Keep the current iterator | ||
492 | * state in struct sched_state. | ||
493 | */ | ||
494 | struct sched_state { | ||
495 | int weight; | ||
496 | int event; /* event index */ | ||
497 | int counter; /* counter index */ | ||
498 | int unassigned; /* number of events to be assigned left */ | ||
499 | unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
500 | }; | ||
501 | |||
502 | /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ | ||
503 | #define SCHED_STATES_MAX 2 | ||
504 | |||
505 | struct perf_sched { | ||
506 | int max_weight; | ||
507 | int max_events; | ||
508 | struct event_constraint **constraints; | ||
509 | struct sched_state state; | ||
510 | int saved_states; | ||
511 | struct sched_state saved[SCHED_STATES_MAX]; | ||
512 | }; | ||
513 | |||
514 | /* | ||
515 | * Initialize interator that runs through all events and counters. | ||
516 | */ | ||
517 | static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, | ||
518 | int num, int wmin, int wmax) | ||
519 | { | ||
520 | int idx; | ||
521 | |||
522 | memset(sched, 0, sizeof(*sched)); | ||
523 | sched->max_events = num; | ||
524 | sched->max_weight = wmax; | ||
525 | sched->constraints = c; | ||
526 | |||
527 | for (idx = 0; idx < num; idx++) { | ||
528 | if (c[idx]->weight == wmin) | ||
529 | break; | ||
530 | } | ||
531 | |||
532 | sched->state.event = idx; /* start with min weight */ | ||
533 | sched->state.weight = wmin; | ||
534 | sched->state.unassigned = num; | ||
535 | } | ||
536 | |||
537 | static void perf_sched_save_state(struct perf_sched *sched) | ||
538 | { | ||
539 | if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) | ||
540 | return; | ||
541 | |||
542 | sched->saved[sched->saved_states] = sched->state; | ||
543 | sched->saved_states++; | ||
544 | } | ||
545 | |||
546 | static bool perf_sched_restore_state(struct perf_sched *sched) | ||
547 | { | ||
548 | if (!sched->saved_states) | ||
549 | return false; | ||
550 | |||
551 | sched->saved_states--; | ||
552 | sched->state = sched->saved[sched->saved_states]; | ||
553 | |||
554 | /* continue with next counter: */ | ||
555 | clear_bit(sched->state.counter++, sched->state.used); | ||
556 | |||
557 | return true; | ||
558 | } | ||
559 | |||
560 | /* | ||
561 | * Select a counter for the current event to schedule. Return true on | ||
562 | * success. | ||
563 | */ | ||
564 | static bool __perf_sched_find_counter(struct perf_sched *sched) | ||
565 | { | ||
566 | struct event_constraint *c; | ||
567 | int idx; | ||
568 | |||
569 | if (!sched->state.unassigned) | ||
570 | return false; | ||
571 | |||
572 | if (sched->state.event >= sched->max_events) | ||
573 | return false; | ||
574 | |||
575 | c = sched->constraints[sched->state.event]; | ||
576 | |||
577 | /* Prefer fixed purpose counters */ | ||
578 | if (x86_pmu.num_counters_fixed) { | ||
579 | idx = X86_PMC_IDX_FIXED; | ||
580 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { | ||
581 | if (!__test_and_set_bit(idx, sched->state.used)) | ||
582 | goto done; | ||
583 | } | ||
584 | } | ||
585 | /* Grab the first unused counter starting with idx */ | ||
586 | idx = sched->state.counter; | ||
587 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { | ||
588 | if (!__test_and_set_bit(idx, sched->state.used)) | ||
589 | goto done; | ||
590 | } | ||
591 | |||
592 | return false; | ||
593 | |||
594 | done: | ||
595 | sched->state.counter = idx; | ||
596 | |||
597 | if (c->overlap) | ||
598 | perf_sched_save_state(sched); | ||
599 | |||
600 | return true; | ||
601 | } | ||
602 | |||
603 | static bool perf_sched_find_counter(struct perf_sched *sched) | ||
604 | { | ||
605 | while (!__perf_sched_find_counter(sched)) { | ||
606 | if (!perf_sched_restore_state(sched)) | ||
607 | return false; | ||
608 | } | ||
609 | |||
610 | return true; | ||
611 | } | ||
612 | |||
613 | /* | ||
614 | * Go through all unassigned events and find the next one to schedule. | ||
615 | * Take events with the least weight first. Return true on success. | ||
616 | */ | ||
617 | static bool perf_sched_next_event(struct perf_sched *sched) | ||
618 | { | ||
619 | struct event_constraint *c; | ||
620 | |||
621 | if (!sched->state.unassigned || !--sched->state.unassigned) | ||
622 | return false; | ||
623 | |||
624 | do { | ||
625 | /* next event */ | ||
626 | sched->state.event++; | ||
627 | if (sched->state.event >= sched->max_events) { | ||
628 | /* next weight */ | ||
629 | sched->state.event = 0; | ||
630 | sched->state.weight++; | ||
631 | if (sched->state.weight > sched->max_weight) | ||
632 | return false; | ||
633 | } | ||
634 | c = sched->constraints[sched->state.event]; | ||
635 | } while (c->weight != sched->state.weight); | ||
636 | |||
637 | sched->state.counter = 0; /* start with first counter */ | ||
638 | |||
639 | return true; | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * Assign a counter for each event. | ||
644 | */ | ||
645 | static int perf_assign_events(struct event_constraint **constraints, int n, | ||
646 | int wmin, int wmax, int *assign) | ||
647 | { | ||
648 | struct perf_sched sched; | ||
649 | |||
650 | perf_sched_init(&sched, constraints, n, wmin, wmax); | ||
651 | |||
652 | do { | ||
653 | if (!perf_sched_find_counter(&sched)) | ||
654 | break; /* failed */ | ||
655 | if (assign) | ||
656 | assign[sched.state.event] = sched.state.counter; | ||
657 | } while (perf_sched_next_event(&sched)); | ||
658 | |||
659 | return sched.state.unassigned; | ||
660 | } | ||
661 | |||
487 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | 662 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
488 | { | 663 | { |
489 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; | 664 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; |
490 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 665 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
491 | int i, j, w, wmax, num = 0; | 666 | int i, wmin, wmax, num = 0; |
492 | struct hw_perf_event *hwc; | 667 | struct hw_perf_event *hwc; |
493 | 668 | ||
494 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | 669 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
495 | 670 | ||
496 | for (i = 0; i < n; i++) { | 671 | for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { |
497 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); | 672 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); |
498 | constraints[i] = c; | 673 | constraints[i] = c; |
674 | wmin = min(wmin, c->weight); | ||
675 | wmax = max(wmax, c->weight); | ||
499 | } | 676 | } |
500 | 677 | ||
501 | /* | 678 | /* |
@@ -521,60 +698,12 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
521 | if (assign) | 698 | if (assign) |
522 | assign[i] = hwc->idx; | 699 | assign[i] = hwc->idx; |
523 | } | 700 | } |
524 | if (i == n) | ||
525 | goto done; | ||
526 | |||
527 | /* | ||
528 | * begin slow path | ||
529 | */ | ||
530 | |||
531 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
532 | 701 | ||
533 | /* | 702 | /* slow path */ |
534 | * weight = number of possible counters | 703 | if (i != n) |
535 | * | 704 | num = perf_assign_events(constraints, n, wmin, wmax, assign); |
536 | * 1 = most constrained, only works on one counter | ||
537 | * wmax = least constrained, works on any counter | ||
538 | * | ||
539 | * assign events to counters starting with most | ||
540 | * constrained events. | ||
541 | */ | ||
542 | wmax = x86_pmu.num_counters; | ||
543 | 705 | ||
544 | /* | 706 | /* |
545 | * when fixed event counters are present, | ||
546 | * wmax is incremented by 1 to account | ||
547 | * for one more choice | ||
548 | */ | ||
549 | if (x86_pmu.num_counters_fixed) | ||
550 | wmax++; | ||
551 | |||
552 | for (w = 1, num = n; num && w <= wmax; w++) { | ||
553 | /* for each event */ | ||
554 | for (i = 0; num && i < n; i++) { | ||
555 | c = constraints[i]; | ||
556 | hwc = &cpuc->event_list[i]->hw; | ||
557 | |||
558 | if (c->weight != w) | ||
559 | continue; | ||
560 | |||
561 | for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { | ||
562 | if (!test_bit(j, used_mask)) | ||
563 | break; | ||
564 | } | ||
565 | |||
566 | if (j == X86_PMC_IDX_MAX) | ||
567 | break; | ||
568 | |||
569 | __set_bit(j, used_mask); | ||
570 | |||
571 | if (assign) | ||
572 | assign[i] = j; | ||
573 | num--; | ||
574 | } | ||
575 | } | ||
576 | done: | ||
577 | /* | ||
578 | * scheduling failed or is just a simulation, | 707 | * scheduling failed or is just a simulation, |
579 | * free resources if necessary | 708 | * free resources if necessary |
580 | */ | 709 | */ |
@@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void) | |||
1119 | 1248 | ||
1120 | static int __init init_hw_perf_events(void) | 1249 | static int __init init_hw_perf_events(void) |
1121 | { | 1250 | { |
1251 | struct x86_pmu_quirk *quirk; | ||
1122 | struct event_constraint *c; | 1252 | struct event_constraint *c; |
1123 | int err; | 1253 | int err; |
1124 | 1254 | ||
@@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void) | |||
1147 | 1277 | ||
1148 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1278 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1149 | 1279 | ||
1150 | if (x86_pmu.quirks) | 1280 | for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) |
1151 | x86_pmu.quirks(); | 1281 | quirk->func(); |
1152 | 1282 | ||
1153 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | 1283 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { |
1154 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1284 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
@@ -1171,12 +1301,18 @@ static int __init init_hw_perf_events(void) | |||
1171 | 1301 | ||
1172 | unconstrained = (struct event_constraint) | 1302 | unconstrained = (struct event_constraint) |
1173 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, | 1303 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1174 | 0, x86_pmu.num_counters); | 1304 | 0, x86_pmu.num_counters, 0); |
1175 | 1305 | ||
1176 | if (x86_pmu.event_constraints) { | 1306 | if (x86_pmu.event_constraints) { |
1307 | /* | ||
1308 | * event on fixed counter2 (REF_CYCLES) only works on this | ||
1309 | * counter, so do not extend mask to generic counters | ||
1310 | */ | ||
1177 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1311 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
1178 | if (c->cmask != X86_RAW_EVENT_MASK) | 1312 | if (c->cmask != X86_RAW_EVENT_MASK |
1313 | || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { | ||
1179 | continue; | 1314 | continue; |
1315 | } | ||
1180 | 1316 | ||
1181 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; | 1317 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; |
1182 | c->weight += x86_pmu.num_counters; | 1318 | c->weight += x86_pmu.num_counters; |
@@ -1566,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs) | |||
1566 | 1702 | ||
1567 | return misc; | 1703 | return misc; |
1568 | } | 1704 | } |
1705 | |||
1706 | void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) | ||
1707 | { | ||
1708 | cap->version = x86_pmu.version; | ||
1709 | cap->num_counters_gp = x86_pmu.num_counters; | ||
1710 | cap->num_counters_fixed = x86_pmu.num_counters_fixed; | ||
1711 | cap->bit_width_gp = x86_pmu.cntval_bits; | ||
1712 | cap->bit_width_fixed = x86_pmu.cntval_bits; | ||
1713 | cap->events_mask = (unsigned int)x86_pmu.events_maskl; | ||
1714 | cap->events_mask_len = x86_pmu.events_mask_len; | ||
1715 | } | ||
1716 | EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index b9698d40ac4b..8944062f46e2 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -45,6 +45,7 @@ struct event_constraint { | |||
45 | u64 code; | 45 | u64 code; |
46 | u64 cmask; | 46 | u64 cmask; |
47 | int weight; | 47 | int weight; |
48 | int overlap; | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | struct amd_nb { | 51 | struct amd_nb { |
@@ -151,15 +152,40 @@ struct cpu_hw_events { | |||
151 | void *kfree_on_online; | 152 | void *kfree_on_online; |
152 | }; | 153 | }; |
153 | 154 | ||
154 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ | 155 | #define __EVENT_CONSTRAINT(c, n, m, w, o) {\ |
155 | { .idxmsk64 = (n) }, \ | 156 | { .idxmsk64 = (n) }, \ |
156 | .code = (c), \ | 157 | .code = (c), \ |
157 | .cmask = (m), \ | 158 | .cmask = (m), \ |
158 | .weight = (w), \ | 159 | .weight = (w), \ |
160 | .overlap = (o), \ | ||
159 | } | 161 | } |
160 | 162 | ||
161 | #define EVENT_CONSTRAINT(c, n, m) \ | 163 | #define EVENT_CONSTRAINT(c, n, m) \ |
162 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 164 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) |
165 | |||
166 | /* | ||
167 | * The overlap flag marks event constraints with overlapping counter | ||
168 | * masks. This is the case if the counter mask of such an event is not | ||
169 | * a subset of any other counter mask of a constraint with an equal or | ||
170 | * higher weight, e.g.: | ||
171 | * | ||
172 | * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); | ||
173 | * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); | ||
174 | * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); | ||
175 | * | ||
176 | * The event scheduler may not select the correct counter in the first | ||
177 | * cycle because it needs to know which subsequent events will be | ||
178 | * scheduled. It may fail to schedule the events then. So we set the | ||
179 | * overlap flag for such constraints to give the scheduler a hint which | ||
180 | * events to select for counter rescheduling. | ||
181 | * | ||
182 | * Care must be taken as the rescheduling algorithm is O(n!) which | ||
183 | * will increase scheduling cycles for an over-commited system | ||
184 | * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros | ||
185 | * and its counter masks must be kept at a minimum. | ||
186 | */ | ||
187 | #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ | ||
188 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) | ||
163 | 189 | ||
164 | /* | 190 | /* |
165 | * Constraint on the Event code. | 191 | * Constraint on the Event code. |
@@ -235,6 +261,11 @@ union perf_capabilities { | |||
235 | u64 capabilities; | 261 | u64 capabilities; |
236 | }; | 262 | }; |
237 | 263 | ||
264 | struct x86_pmu_quirk { | ||
265 | struct x86_pmu_quirk *next; | ||
266 | void (*func)(void); | ||
267 | }; | ||
268 | |||
238 | /* | 269 | /* |
239 | * struct x86_pmu - generic x86 pmu | 270 | * struct x86_pmu - generic x86 pmu |
240 | */ | 271 | */ |
@@ -259,6 +290,11 @@ struct x86_pmu { | |||
259 | int num_counters_fixed; | 290 | int num_counters_fixed; |
260 | int cntval_bits; | 291 | int cntval_bits; |
261 | u64 cntval_mask; | 292 | u64 cntval_mask; |
293 | union { | ||
294 | unsigned long events_maskl; | ||
295 | unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; | ||
296 | }; | ||
297 | int events_mask_len; | ||
262 | int apic; | 298 | int apic; |
263 | u64 max_period; | 299 | u64 max_period; |
264 | struct event_constraint * | 300 | struct event_constraint * |
@@ -268,7 +304,7 @@ struct x86_pmu { | |||
268 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 304 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
269 | struct perf_event *event); | 305 | struct perf_event *event); |
270 | struct event_constraint *event_constraints; | 306 | struct event_constraint *event_constraints; |
271 | void (*quirks)(void); | 307 | struct x86_pmu_quirk *quirks; |
272 | int perfctr_second_write; | 308 | int perfctr_second_write; |
273 | 309 | ||
274 | int (*cpu_prepare)(int cpu); | 310 | int (*cpu_prepare)(int cpu); |
@@ -309,6 +345,15 @@ struct x86_pmu { | |||
309 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); | 345 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); |
310 | }; | 346 | }; |
311 | 347 | ||
348 | #define x86_add_quirk(func_) \ | ||
349 | do { \ | ||
350 | static struct x86_pmu_quirk __quirk __initdata = { \ | ||
351 | .func = func_, \ | ||
352 | }; \ | ||
353 | __quirk.next = x86_pmu.quirks; \ | ||
354 | x86_pmu.quirks = &__quirk; \ | ||
355 | } while (0) | ||
356 | |||
312 | #define ERF_NO_HT_SHARING 1 | 357 | #define ERF_NO_HT_SHARING 1 |
313 | #define ERF_HAS_RSP_1 2 | 358 | #define ERF_HAS_RSP_1 2 |
314 | 359 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index aeefd45697a2..0397b23be8e9 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
492 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); | 492 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); |
493 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); | 493 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); |
494 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); | 494 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); |
495 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); | 495 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); |
496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); | 496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); |
497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); | 497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); |
498 | 498 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 121f1be4da19..3bd37bdf1b8e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = | |||
28 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | 28 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, |
29 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | 29 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, |
30 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 30 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
31 | [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ | ||
31 | }; | 32 | }; |
32 | 33 | ||
33 | static struct event_constraint intel_core_event_constraints[] __read_mostly = | 34 | static struct event_constraint intel_core_event_constraints[] __read_mostly = |
@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly = | |||
45 | { | 46 | { |
46 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 47 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
47 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 48 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
48 | /* | 49 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
49 | * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event | ||
50 | * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed | ||
51 | * ratio between these counters. | ||
52 | */ | ||
53 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
54 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | 50 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ |
55 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | 51 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
56 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | 52 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = | |||
68 | { | 64 | { |
69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 65 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
70 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 66 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
71 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 67 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
72 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | 68 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ |
73 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | 69 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ |
74 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | 70 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ |
@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly | |||
90 | { | 86 | { |
91 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 87 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
92 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 88 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
93 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 89 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
94 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | 90 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ |
95 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ | 91 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ |
96 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | 92 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ |
@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = | |||
102 | { | 98 | { |
103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 99 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
104 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 100 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
105 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 101 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
106 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ | 102 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ |
107 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | 103 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
108 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | 104 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ |
@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = | |||
125 | { | 121 | { |
126 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 122 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
127 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 123 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
128 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 124 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
129 | EVENT_CONSTRAINT_END | 125 | EVENT_CONSTRAINT_END |
130 | }; | 126 | }; |
131 | 127 | ||
@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1519 | .guest_get_msrs = intel_guest_get_msrs, | 1515 | .guest_get_msrs = intel_guest_get_msrs, |
1520 | }; | 1516 | }; |
1521 | 1517 | ||
1522 | static void intel_clovertown_quirks(void) | 1518 | static __init void intel_clovertown_quirk(void) |
1523 | { | 1519 | { |
1524 | /* | 1520 | /* |
1525 | * PEBS is unreliable due to: | 1521 | * PEBS is unreliable due to: |
@@ -1545,19 +1541,60 @@ static void intel_clovertown_quirks(void) | |||
1545 | x86_pmu.pebs_constraints = NULL; | 1541 | x86_pmu.pebs_constraints = NULL; |
1546 | } | 1542 | } |
1547 | 1543 | ||
1548 | static void intel_sandybridge_quirks(void) | 1544 | static __init void intel_sandybridge_quirk(void) |
1549 | { | 1545 | { |
1550 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | 1546 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); |
1551 | x86_pmu.pebs = 0; | 1547 | x86_pmu.pebs = 0; |
1552 | x86_pmu.pebs_constraints = NULL; | 1548 | x86_pmu.pebs_constraints = NULL; |
1553 | } | 1549 | } |
1554 | 1550 | ||
1551 | static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { | ||
1552 | { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, | ||
1553 | { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, | ||
1554 | { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, | ||
1555 | { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, | ||
1556 | { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, | ||
1557 | { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, | ||
1558 | { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, | ||
1559 | }; | ||
1560 | |||
1561 | static __init void intel_arch_events_quirk(void) | ||
1562 | { | ||
1563 | int bit; | ||
1564 | |||
1565 | /* disable event that reported as not presend by cpuid */ | ||
1566 | for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { | ||
1567 | intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; | ||
1568 | printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", | ||
1569 | intel_arch_events_map[bit].name); | ||
1570 | } | ||
1571 | } | ||
1572 | |||
1573 | static __init void intel_nehalem_quirk(void) | ||
1574 | { | ||
1575 | union cpuid10_ebx ebx; | ||
1576 | |||
1577 | ebx.full = x86_pmu.events_maskl; | ||
1578 | if (ebx.split.no_branch_misses_retired) { | ||
1579 | /* | ||
1580 | * Erratum AAJ80 detected, we work it around by using | ||
1581 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
1582 | * branch-misses, but it's still much better than the | ||
1583 | * architectural event which is often completely bogus: | ||
1584 | */ | ||
1585 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
1586 | ebx.split.no_branch_misses_retired = 0; | ||
1587 | x86_pmu.events_maskl = ebx.full; | ||
1588 | printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); | ||
1589 | } | ||
1590 | } | ||
1591 | |||
1555 | __init int intel_pmu_init(void) | 1592 | __init int intel_pmu_init(void) |
1556 | { | 1593 | { |
1557 | union cpuid10_edx edx; | 1594 | union cpuid10_edx edx; |
1558 | union cpuid10_eax eax; | 1595 | union cpuid10_eax eax; |
1596 | union cpuid10_ebx ebx; | ||
1559 | unsigned int unused; | 1597 | unsigned int unused; |
1560 | unsigned int ebx; | ||
1561 | int version; | 1598 | int version; |
1562 | 1599 | ||
1563 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 1600 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
@@ -1574,8 +1611,8 @@ __init int intel_pmu_init(void) | |||
1574 | * Check whether the Architectural PerfMon supports | 1611 | * Check whether the Architectural PerfMon supports |
1575 | * Branch Misses Retired hw_event or not. | 1612 | * Branch Misses Retired hw_event or not. |
1576 | */ | 1613 | */ |
1577 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | 1614 | cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); |
1578 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | 1615 | if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) |
1579 | return -ENODEV; | 1616 | return -ENODEV; |
1580 | 1617 | ||
1581 | version = eax.split.version_id; | 1618 | version = eax.split.version_id; |
@@ -1589,6 +1626,9 @@ __init int intel_pmu_init(void) | |||
1589 | x86_pmu.cntval_bits = eax.split.bit_width; | 1626 | x86_pmu.cntval_bits = eax.split.bit_width; |
1590 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; | 1627 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
1591 | 1628 | ||
1629 | x86_pmu.events_maskl = ebx.full; | ||
1630 | x86_pmu.events_mask_len = eax.split.mask_length; | ||
1631 | |||
1592 | /* | 1632 | /* |
1593 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 1633 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
1594 | * assume at least 3 events: | 1634 | * assume at least 3 events: |
@@ -1608,6 +1648,8 @@ __init int intel_pmu_init(void) | |||
1608 | 1648 | ||
1609 | intel_ds_init(); | 1649 | intel_ds_init(); |
1610 | 1650 | ||
1651 | x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ | ||
1652 | |||
1611 | /* | 1653 | /* |
1612 | * Install the hw-cache-events table: | 1654 | * Install the hw-cache-events table: |
1613 | */ | 1655 | */ |
@@ -1617,7 +1659,7 @@ __init int intel_pmu_init(void) | |||
1617 | break; | 1659 | break; |
1618 | 1660 | ||
1619 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 1661 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
1620 | x86_pmu.quirks = intel_clovertown_quirks; | 1662 | x86_add_quirk(intel_clovertown_quirk); |
1621 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 1663 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
1622 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 1664 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
1623 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 1665 | case 29: /* six-core 45 nm xeon "Dunnington" */ |
@@ -1651,17 +1693,8 @@ __init int intel_pmu_init(void) | |||
1651 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | 1693 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
1652 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | 1694 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; |
1653 | 1695 | ||
1654 | if (ebx & 0x40) { | 1696 | x86_add_quirk(intel_nehalem_quirk); |
1655 | /* | ||
1656 | * Erratum AAJ80 detected, we work it around by using | ||
1657 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
1658 | * branch-misses, but it's still much better than the | ||
1659 | * architectural event which is often completely bogus: | ||
1660 | */ | ||
1661 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
1662 | 1697 | ||
1663 | pr_cont("erratum AAJ80 worked around, "); | ||
1664 | } | ||
1665 | pr_cont("Nehalem events, "); | 1698 | pr_cont("Nehalem events, "); |
1666 | break; | 1699 | break; |
1667 | 1700 | ||
@@ -1701,7 +1734,7 @@ __init int intel_pmu_init(void) | |||
1701 | break; | 1734 | break; |
1702 | 1735 | ||
1703 | case 42: /* SandyBridge */ | 1736 | case 42: /* SandyBridge */ |
1704 | x86_pmu.quirks = intel_sandybridge_quirks; | 1737 | x86_add_quirk(intel_sandybridge_quirk); |
1705 | case 45: /* SandyBridge, "Romely-EP" */ | 1738 | case 45: /* SandyBridge, "Romely-EP" */ |
1706 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1739 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
1707 | sizeof(hw_cache_event_ids)); | 1740 | sizeof(hw_cache_event_ids)); |
@@ -1738,5 +1771,6 @@ __init int intel_pmu_init(void) | |||
1738 | break; | 1771 | break; |
1739 | } | 1772 | } |
1740 | } | 1773 | } |
1774 | |||
1741 | return 0; | 1775 | return 0; |
1742 | } | 1776 | } |
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c index 5abbea297e0c..7b3fe56b1c21 100644 --- a/arch/x86/kernel/cpu/powerflags.c +++ b/arch/x86/kernel/cpu/powerflags.c | |||
@@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = { | |||
16 | "100mhzsteps", | 16 | "100mhzsteps", |
17 | "hwpstate", | 17 | "hwpstate", |
18 | "", /* tsc invariant mapped to constant_tsc */ | 18 | "", /* tsc invariant mapped to constant_tsc */ |
19 | /* nothing */ | 19 | "cpb", /* core performance boost */ |
20 | "eff_freq_ro", /* Readonly aperf/mperf */ | ||
20 | }; | 21 | }; |
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 14b23140e81f..8022c6681485 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -64,12 +64,10 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) | |||
64 | static int show_cpuinfo(struct seq_file *m, void *v) | 64 | static int show_cpuinfo(struct seq_file *m, void *v) |
65 | { | 65 | { |
66 | struct cpuinfo_x86 *c = v; | 66 | struct cpuinfo_x86 *c = v; |
67 | unsigned int cpu = 0; | 67 | unsigned int cpu; |
68 | int i; | 68 | int i; |
69 | 69 | ||
70 | #ifdef CONFIG_SMP | ||
71 | cpu = c->cpu_index; | 70 | cpu = c->cpu_index; |
72 | #endif | ||
73 | seq_printf(m, "processor\t: %u\n" | 71 | seq_printf(m, "processor\t: %u\n" |
74 | "vendor_id\t: %s\n" | 72 | "vendor_id\t: %s\n" |
75 | "cpu family\t: %d\n" | 73 | "cpu family\t: %d\n" |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 212a6a42527c..a524353d93f2 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -177,7 +177,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier = | |||
177 | .notifier_call = cpuid_class_cpu_callback, | 177 | .notifier_call = cpuid_class_cpu_callback, |
178 | }; | 178 | }; |
179 | 179 | ||
180 | static char *cpuid_devnode(struct device *dev, mode_t *mode) | 180 | static char *cpuid_devnode(struct device *dev, umode_t *mode) |
181 | { | 181 | { |
182 | return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); | 182 | return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); |
183 | } | 183 | } |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 51c3b186e5b9..62d61e9976eb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/acpi.h> | 19 | #include <linux/acpi.h> |
20 | #include <linux/firmware-map.h> | 20 | #include <linux/firmware-map.h> |
21 | #include <linux/memblock.h> | 21 | #include <linux/memblock.h> |
22 | #include <linux/sort.h> | ||
22 | 23 | ||
23 | #include <asm/e820.h> | 24 | #include <asm/e820.h> |
24 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
@@ -227,22 +228,38 @@ void __init e820_print_map(char *who) | |||
227 | * ____________________33__ | 228 | * ____________________33__ |
228 | * ______________________4_ | 229 | * ______________________4_ |
229 | */ | 230 | */ |
231 | struct change_member { | ||
232 | struct e820entry *pbios; /* pointer to original bios entry */ | ||
233 | unsigned long long addr; /* address for this change point */ | ||
234 | }; | ||
235 | |||
236 | static int __init cpcompare(const void *a, const void *b) | ||
237 | { | ||
238 | struct change_member * const *app = a, * const *bpp = b; | ||
239 | const struct change_member *ap = *app, *bp = *bpp; | ||
240 | |||
241 | /* | ||
242 | * Inputs are pointers to two elements of change_point[]. If their | ||
243 | * addresses are unequal, their difference dominates. If the addresses | ||
244 | * are equal, then consider one that represents the end of its region | ||
245 | * to be greater than one that does not. | ||
246 | */ | ||
247 | if (ap->addr != bp->addr) | ||
248 | return ap->addr > bp->addr ? 1 : -1; | ||
249 | |||
250 | return (ap->addr != ap->pbios->addr) - (bp->addr != bp->pbios->addr); | ||
251 | } | ||
230 | 252 | ||
231 | int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, | 253 | int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, |
232 | u32 *pnr_map) | 254 | u32 *pnr_map) |
233 | { | 255 | { |
234 | struct change_member { | ||
235 | struct e820entry *pbios; /* pointer to original bios entry */ | ||
236 | unsigned long long addr; /* address for this change point */ | ||
237 | }; | ||
238 | static struct change_member change_point_list[2*E820_X_MAX] __initdata; | 256 | static struct change_member change_point_list[2*E820_X_MAX] __initdata; |
239 | static struct change_member *change_point[2*E820_X_MAX] __initdata; | 257 | static struct change_member *change_point[2*E820_X_MAX] __initdata; |
240 | static struct e820entry *overlap_list[E820_X_MAX] __initdata; | 258 | static struct e820entry *overlap_list[E820_X_MAX] __initdata; |
241 | static struct e820entry new_bios[E820_X_MAX] __initdata; | 259 | static struct e820entry new_bios[E820_X_MAX] __initdata; |
242 | struct change_member *change_tmp; | ||
243 | unsigned long current_type, last_type; | 260 | unsigned long current_type, last_type; |
244 | unsigned long long last_addr; | 261 | unsigned long long last_addr; |
245 | int chgidx, still_changing; | 262 | int chgidx; |
246 | int overlap_entries; | 263 | int overlap_entries; |
247 | int new_bios_entry; | 264 | int new_bios_entry; |
248 | int old_nr, new_nr, chg_nr; | 265 | int old_nr, new_nr, chg_nr; |
@@ -279,35 +296,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, | |||
279 | chg_nr = chgidx; | 296 | chg_nr = chgidx; |
280 | 297 | ||
281 | /* sort change-point list by memory addresses (low -> high) */ | 298 | /* sort change-point list by memory addresses (low -> high) */ |
282 | still_changing = 1; | 299 | sort(change_point, chg_nr, sizeof *change_point, cpcompare, NULL); |
283 | while (still_changing) { | ||
284 | still_changing = 0; | ||
285 | for (i = 1; i < chg_nr; i++) { | ||
286 | unsigned long long curaddr, lastaddr; | ||
287 | unsigned long long curpbaddr, lastpbaddr; | ||
288 | |||
289 | curaddr = change_point[i]->addr; | ||
290 | lastaddr = change_point[i - 1]->addr; | ||
291 | curpbaddr = change_point[i]->pbios->addr; | ||
292 | lastpbaddr = change_point[i - 1]->pbios->addr; | ||
293 | |||
294 | /* | ||
295 | * swap entries, when: | ||
296 | * | ||
297 | * curaddr > lastaddr or | ||
298 | * curaddr == lastaddr and curaddr == curpbaddr and | ||
299 | * lastaddr != lastpbaddr | ||
300 | */ | ||
301 | if (curaddr < lastaddr || | ||
302 | (curaddr == lastaddr && curaddr == curpbaddr && | ||
303 | lastaddr != lastpbaddr)) { | ||
304 | change_tmp = change_point[i]; | ||
305 | change_point[i] = change_point[i-1]; | ||
306 | change_point[i-1] = change_tmp; | ||
307 | still_changing = 1; | ||
308 | } | ||
309 | } | ||
310 | } | ||
311 | 300 | ||
312 | /* create a new bios memory map, removing overlaps */ | 301 | /* create a new bios memory map, removing overlaps */ |
313 | overlap_entries = 0; /* number of entries in the overlap table */ | 302 | overlap_entries = 0; /* number of entries in the overlap table */ |
@@ -738,35 +727,17 @@ core_initcall(e820_mark_nvs_memory); | |||
738 | /* | 727 | /* |
739 | * pre allocated 4k and reserved it in memblock and e820_saved | 728 | * pre allocated 4k and reserved it in memblock and e820_saved |
740 | */ | 729 | */ |
741 | u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | 730 | u64 __init early_reserve_e820(u64 size, u64 align) |
742 | { | 731 | { |
743 | u64 size = 0; | ||
744 | u64 addr; | 732 | u64 addr; |
745 | u64 start; | ||
746 | 733 | ||
747 | for (start = startt; ; start += size) { | 734 | addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); |
748 | start = memblock_x86_find_in_range_size(start, &size, align); | 735 | if (addr) { |
749 | if (start == MEMBLOCK_ERROR) | 736 | e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED); |
750 | return 0; | 737 | printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); |
751 | if (size >= sizet) | 738 | update_e820_saved(); |
752 | break; | ||
753 | } | 739 | } |
754 | 740 | ||
755 | #ifdef CONFIG_X86_32 | ||
756 | if (start >= MAXMEM) | ||
757 | return 0; | ||
758 | if (start + size > MAXMEM) | ||
759 | size = MAXMEM - start; | ||
760 | #endif | ||
761 | |||
762 | addr = round_down(start + size - sizet, align); | ||
763 | if (addr < start) | ||
764 | return 0; | ||
765 | memblock_x86_reserve_range(addr, addr + sizet, "new next"); | ||
766 | e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); | ||
767 | printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); | ||
768 | update_e820_saved(); | ||
769 | |||
770 | return addr; | 741 | return addr; |
771 | } | 742 | } |
772 | 743 | ||
@@ -1090,7 +1061,7 @@ void __init memblock_x86_fill(void) | |||
1090 | * We are safe to enable resizing, beause memblock_x86_fill() | 1061 | * We are safe to enable resizing, beause memblock_x86_fill() |
1091 | * is rather later for x86 | 1062 | * is rather later for x86 |
1092 | */ | 1063 | */ |
1093 | memblock_can_resize = 1; | 1064 | memblock_allow_resize(); |
1094 | 1065 | ||
1095 | for (i = 0; i < e820.nr_map; i++) { | 1066 | for (i = 0; i < e820.nr_map; i++) { |
1096 | struct e820entry *ei = &e820.map[i]; | 1067 | struct e820entry *ei = &e820.map[i]; |
@@ -1105,22 +1076,36 @@ void __init memblock_x86_fill(void) | |||
1105 | memblock_add(ei->addr, ei->size); | 1076 | memblock_add(ei->addr, ei->size); |
1106 | } | 1077 | } |
1107 | 1078 | ||
1108 | memblock_analyze(); | ||
1109 | memblock_dump_all(); | 1079 | memblock_dump_all(); |
1110 | } | 1080 | } |
1111 | 1081 | ||
1112 | void __init memblock_find_dma_reserve(void) | 1082 | void __init memblock_find_dma_reserve(void) |
1113 | { | 1083 | { |
1114 | #ifdef CONFIG_X86_64 | 1084 | #ifdef CONFIG_X86_64 |
1115 | u64 free_size_pfn; | 1085 | u64 nr_pages = 0, nr_free_pages = 0; |
1116 | u64 mem_size_pfn; | 1086 | unsigned long start_pfn, end_pfn; |
1087 | phys_addr_t start, end; | ||
1088 | int i; | ||
1089 | u64 u; | ||
1090 | |||
1117 | /* | 1091 | /* |
1118 | * need to find out used area below MAX_DMA_PFN | 1092 | * need to find out used area below MAX_DMA_PFN |
1119 | * need to use memblock to get free size in [0, MAX_DMA_PFN] | 1093 | * need to use memblock to get free size in [0, MAX_DMA_PFN] |
1120 | * at first, and assume boot_mem will not take below MAX_DMA_PFN | 1094 | * at first, and assume boot_mem will not take below MAX_DMA_PFN |
1121 | */ | 1095 | */ |
1122 | mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; | 1096 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { |
1123 | free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; | 1097 | start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN); |
1124 | set_dma_reserve(mem_size_pfn - free_size_pfn); | 1098 | end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN); |
1099 | nr_pages += end_pfn - start_pfn; | ||
1100 | } | ||
1101 | |||
1102 | for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) { | ||
1103 | start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); | ||
1104 | end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); | ||
1105 | if (start_pfn < end_pfn) | ||
1106 | nr_free_pages += end_pfn - start_pfn; | ||
1107 | } | ||
1108 | |||
1109 | set_dma_reserve(nr_pages - nr_free_pages); | ||
1125 | #endif | 1110 | #endif |
1126 | } | 1111 | } |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index cd28a350f7f9..9b9f18b49918 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -240,14 +240,14 @@ static int __init setup_early_printk(char *buf) | |||
240 | if (!strncmp(buf, "xen", 3)) | 240 | if (!strncmp(buf, "xen", 3)) |
241 | early_console_register(&xenboot_console, keep); | 241 | early_console_register(&xenboot_console, keep); |
242 | #endif | 242 | #endif |
243 | #ifdef CONFIG_EARLY_PRINTK_MRST | 243 | #ifdef CONFIG_EARLY_PRINTK_INTEL_MID |
244 | if (!strncmp(buf, "mrst", 4)) { | 244 | if (!strncmp(buf, "mrst", 4)) { |
245 | mrst_early_console_init(); | 245 | mrst_early_console_init(); |
246 | early_console_register(&early_mrst_console, keep); | 246 | early_console_register(&early_mrst_console, keep); |
247 | } | 247 | } |
248 | 248 | ||
249 | if (!strncmp(buf, "hsu", 3)) { | 249 | if (!strncmp(buf, "hsu", 3)) { |
250 | hsu_early_console_init(); | 250 | hsu_early_console_init(buf + 3); |
251 | early_console_register(&early_hsu_console, keep); | 251 | early_console_register(&early_hsu_console, keep); |
252 | } | 252 | } |
253 | #endif | 253 | #endif |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f3f6f5344001..79d97e68f042 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -42,6 +42,7 @@ | |||
42 | */ | 42 | */ |
43 | 43 | ||
44 | #include <linux/linkage.h> | 44 | #include <linux/linkage.h> |
45 | #include <linux/err.h> | ||
45 | #include <asm/thread_info.h> | 46 | #include <asm/thread_info.h> |
46 | #include <asm/irqflags.h> | 47 | #include <asm/irqflags.h> |
47 | #include <asm/errno.h> | 48 | #include <asm/errno.h> |
@@ -81,8 +82,6 @@ | |||
81 | * enough to patch inline, increasing performance. | 82 | * enough to patch inline, increasing performance. |
82 | */ | 83 | */ |
83 | 84 | ||
84 | #define nr_syscalls ((syscall_table_size)/4) | ||
85 | |||
86 | #ifdef CONFIG_PREEMPT | 85 | #ifdef CONFIG_PREEMPT |
87 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF | 86 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF |
88 | #else | 87 | #else |
@@ -423,7 +422,7 @@ sysenter_past_esp: | |||
423 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) | 422 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
424 | jnz sysenter_audit | 423 | jnz sysenter_audit |
425 | sysenter_do_call: | 424 | sysenter_do_call: |
426 | cmpl $(nr_syscalls), %eax | 425 | cmpl $(NR_syscalls), %eax |
427 | jae syscall_badsys | 426 | jae syscall_badsys |
428 | call *sys_call_table(,%eax,4) | 427 | call *sys_call_table(,%eax,4) |
429 | movl %eax,PT_EAX(%esp) | 428 | movl %eax,PT_EAX(%esp) |
@@ -455,7 +454,7 @@ sysenter_audit: | |||
455 | movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ | 454 | movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ |
456 | movl %eax,%edx /* 2nd arg: syscall number */ | 455 | movl %eax,%edx /* 2nd arg: syscall number */ |
457 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ | 456 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ |
458 | call audit_syscall_entry | 457 | call __audit_syscall_entry |
459 | pushl_cfi %ebx | 458 | pushl_cfi %ebx |
460 | movl PT_EAX(%esp),%eax /* reload syscall number */ | 459 | movl PT_EAX(%esp),%eax /* reload syscall number */ |
461 | jmp sysenter_do_call | 460 | jmp sysenter_do_call |
@@ -466,11 +465,10 @@ sysexit_audit: | |||
466 | TRACE_IRQS_ON | 465 | TRACE_IRQS_ON |
467 | ENABLE_INTERRUPTS(CLBR_ANY) | 466 | ENABLE_INTERRUPTS(CLBR_ANY) |
468 | movl %eax,%edx /* second arg, syscall return value */ | 467 | movl %eax,%edx /* second arg, syscall return value */ |
469 | cmpl $0,%eax /* is it < 0? */ | 468 | cmpl $-MAX_ERRNO,%eax /* is it an error ? */ |
470 | setl %al /* 1 if so, 0 if not */ | 469 | setbe %al /* 1 if so, 0 if not */ |
471 | movzbl %al,%eax /* zero-extend that */ | 470 | movzbl %al,%eax /* zero-extend that */ |
472 | inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | 471 | call __audit_syscall_exit |
473 | call audit_syscall_exit | ||
474 | DISABLE_INTERRUPTS(CLBR_ANY) | 472 | DISABLE_INTERRUPTS(CLBR_ANY) |
475 | TRACE_IRQS_OFF | 473 | TRACE_IRQS_OFF |
476 | movl TI_flags(%ebp), %ecx | 474 | movl TI_flags(%ebp), %ecx |
@@ -504,7 +502,7 @@ ENTRY(system_call) | |||
504 | # system call tracing in operation / emulation | 502 | # system call tracing in operation / emulation |
505 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) | 503 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
506 | jnz syscall_trace_entry | 504 | jnz syscall_trace_entry |
507 | cmpl $(nr_syscalls), %eax | 505 | cmpl $(NR_syscalls), %eax |
508 | jae syscall_badsys | 506 | jae syscall_badsys |
509 | syscall_call: | 507 | syscall_call: |
510 | call *sys_call_table(,%eax,4) | 508 | call *sys_call_table(,%eax,4) |
@@ -625,6 +623,8 @@ work_notifysig: # deal with pending signals and | |||
625 | movl %esp, %eax | 623 | movl %esp, %eax |
626 | jne work_notifysig_v86 # returning to kernel-space or | 624 | jne work_notifysig_v86 # returning to kernel-space or |
627 | # vm86-space | 625 | # vm86-space |
626 | TRACE_IRQS_ON | ||
627 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
628 | xorl %edx, %edx | 628 | xorl %edx, %edx |
629 | call do_notify_resume | 629 | call do_notify_resume |
630 | jmp resume_userspace_sig | 630 | jmp resume_userspace_sig |
@@ -638,6 +638,8 @@ work_notifysig_v86: | |||
638 | #else | 638 | #else |
639 | movl %esp, %eax | 639 | movl %esp, %eax |
640 | #endif | 640 | #endif |
641 | TRACE_IRQS_ON | ||
642 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
641 | xorl %edx, %edx | 643 | xorl %edx, %edx |
642 | call do_notify_resume | 644 | call do_notify_resume |
643 | jmp resume_userspace_sig | 645 | jmp resume_userspace_sig |
@@ -650,7 +652,7 @@ syscall_trace_entry: | |||
650 | movl %esp, %eax | 652 | movl %esp, %eax |
651 | call syscall_trace_enter | 653 | call syscall_trace_enter |
652 | /* What it returned is what we'll actually use. */ | 654 | /* What it returned is what we'll actually use. */ |
653 | cmpl $(nr_syscalls), %eax | 655 | cmpl $(NR_syscalls), %eax |
654 | jnae syscall_call | 656 | jnae syscall_call |
655 | jmp syscall_exit | 657 | jmp syscall_exit |
656 | END(syscall_trace_entry) | 658 | END(syscall_trace_entry) |
@@ -690,29 +692,28 @@ END(syscall_badsys) | |||
690 | * System calls that need a pt_regs pointer. | 692 | * System calls that need a pt_regs pointer. |
691 | */ | 693 | */ |
692 | #define PTREGSCALL0(name) \ | 694 | #define PTREGSCALL0(name) \ |
693 | ALIGN; \ | 695 | ENTRY(ptregs_##name) ; \ |
694 | ptregs_##name: \ | ||
695 | leal 4(%esp),%eax; \ | 696 | leal 4(%esp),%eax; \ |
696 | jmp sys_##name; | 697 | jmp sys_##name; \ |
698 | ENDPROC(ptregs_##name) | ||
697 | 699 | ||
698 | #define PTREGSCALL1(name) \ | 700 | #define PTREGSCALL1(name) \ |
699 | ALIGN; \ | 701 | ENTRY(ptregs_##name) ; \ |
700 | ptregs_##name: \ | ||
701 | leal 4(%esp),%edx; \ | 702 | leal 4(%esp),%edx; \ |
702 | movl (PT_EBX+4)(%esp),%eax; \ | 703 | movl (PT_EBX+4)(%esp),%eax; \ |
703 | jmp sys_##name; | 704 | jmp sys_##name; \ |
705 | ENDPROC(ptregs_##name) | ||
704 | 706 | ||
705 | #define PTREGSCALL2(name) \ | 707 | #define PTREGSCALL2(name) \ |
706 | ALIGN; \ | 708 | ENTRY(ptregs_##name) ; \ |
707 | ptregs_##name: \ | ||
708 | leal 4(%esp),%ecx; \ | 709 | leal 4(%esp),%ecx; \ |
709 | movl (PT_ECX+4)(%esp),%edx; \ | 710 | movl (PT_ECX+4)(%esp),%edx; \ |
710 | movl (PT_EBX+4)(%esp),%eax; \ | 711 | movl (PT_EBX+4)(%esp),%eax; \ |
711 | jmp sys_##name; | 712 | jmp sys_##name; \ |
713 | ENDPROC(ptregs_##name) | ||
712 | 714 | ||
713 | #define PTREGSCALL3(name) \ | 715 | #define PTREGSCALL3(name) \ |
714 | ALIGN; \ | 716 | ENTRY(ptregs_##name) ; \ |
715 | ptregs_##name: \ | ||
716 | CFI_STARTPROC; \ | 717 | CFI_STARTPROC; \ |
717 | leal 4(%esp),%eax; \ | 718 | leal 4(%esp),%eax; \ |
718 | pushl_cfi %eax; \ | 719 | pushl_cfi %eax; \ |
@@ -737,8 +738,7 @@ PTREGSCALL2(vm86) | |||
737 | PTREGSCALL1(vm86old) | 738 | PTREGSCALL1(vm86old) |
738 | 739 | ||
739 | /* Clone is an oddball. The 4th arg is in %edi */ | 740 | /* Clone is an oddball. The 4th arg is in %edi */ |
740 | ALIGN; | 741 | ENTRY(ptregs_clone) |
741 | ptregs_clone: | ||
742 | CFI_STARTPROC | 742 | CFI_STARTPROC |
743 | leal 4(%esp),%eax | 743 | leal 4(%esp),%eax |
744 | pushl_cfi %eax | 744 | pushl_cfi %eax |
@@ -1209,11 +1209,6 @@ return_to_handler: | |||
1209 | jmp *%ecx | 1209 | jmp *%ecx |
1210 | #endif | 1210 | #endif |
1211 | 1211 | ||
1212 | .section .rodata,"a" | ||
1213 | #include "syscall_table_32.S" | ||
1214 | |||
1215 | syscall_table_size=(.-sys_call_table) | ||
1216 | |||
1217 | /* | 1212 | /* |
1218 | * Some functions should be protected against kprobes | 1213 | * Some functions should be protected against kprobes |
1219 | */ | 1214 | */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index faf8d5e74b0b..3fe8239fd8fb 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <asm/paravirt.h> | 55 | #include <asm/paravirt.h> |
56 | #include <asm/ftrace.h> | 56 | #include <asm/ftrace.h> |
57 | #include <asm/percpu.h> | 57 | #include <asm/percpu.h> |
58 | #include <linux/err.h> | ||
58 | 59 | ||
59 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 60 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
60 | #include <linux/elf-em.h> | 61 | #include <linux/elf-em.h> |
@@ -221,7 +222,7 @@ ENDPROC(native_usergs_sysret64) | |||
221 | /*CFI_REL_OFFSET ss,0*/ | 222 | /*CFI_REL_OFFSET ss,0*/ |
222 | pushq_cfi %rax /* rsp */ | 223 | pushq_cfi %rax /* rsp */ |
223 | CFI_REL_OFFSET rsp,0 | 224 | CFI_REL_OFFSET rsp,0 |
224 | pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */ | 225 | pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ |
225 | /*CFI_REL_OFFSET rflags,0*/ | 226 | /*CFI_REL_OFFSET rflags,0*/ |
226 | pushq_cfi $__KERNEL_CS /* cs */ | 227 | pushq_cfi $__KERNEL_CS /* cs */ |
227 | /*CFI_REL_OFFSET cs,0*/ | 228 | /*CFI_REL_OFFSET cs,0*/ |
@@ -411,7 +412,7 @@ ENTRY(ret_from_fork) | |||
411 | RESTORE_REST | 412 | RESTORE_REST |
412 | 413 | ||
413 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? | 414 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? |
414 | je int_ret_from_sys_call | 415 | jz retint_restore_args |
415 | 416 | ||
416 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET | 417 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET |
417 | jnz int_ret_from_sys_call | 418 | jnz int_ret_from_sys_call |
@@ -465,7 +466,7 @@ ENTRY(system_call) | |||
465 | * after the swapgs, so that it can do the swapgs | 466 | * after the swapgs, so that it can do the swapgs |
466 | * for the guest and jump here on syscall. | 467 | * for the guest and jump here on syscall. |
467 | */ | 468 | */ |
468 | ENTRY(system_call_after_swapgs) | 469 | GLOBAL(system_call_after_swapgs) |
469 | 470 | ||
470 | movq %rsp,PER_CPU_VAR(old_rsp) | 471 | movq %rsp,PER_CPU_VAR(old_rsp) |
471 | movq PER_CPU_VAR(kernel_stack),%rsp | 472 | movq PER_CPU_VAR(kernel_stack),%rsp |
@@ -478,8 +479,7 @@ ENTRY(system_call_after_swapgs) | |||
478 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 479 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
479 | movq %rcx,RIP-ARGOFFSET(%rsp) | 480 | movq %rcx,RIP-ARGOFFSET(%rsp) |
480 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 481 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
481 | GET_THREAD_INFO(%rcx) | 482 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
482 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) | ||
483 | jnz tracesys | 483 | jnz tracesys |
484 | system_call_fastpath: | 484 | system_call_fastpath: |
485 | cmpq $__NR_syscall_max,%rax | 485 | cmpq $__NR_syscall_max,%rax |
@@ -496,10 +496,9 @@ ret_from_sys_call: | |||
496 | /* edi: flagmask */ | 496 | /* edi: flagmask */ |
497 | sysret_check: | 497 | sysret_check: |
498 | LOCKDEP_SYS_EXIT | 498 | LOCKDEP_SYS_EXIT |
499 | GET_THREAD_INFO(%rcx) | ||
500 | DISABLE_INTERRUPTS(CLBR_NONE) | 499 | DISABLE_INTERRUPTS(CLBR_NONE) |
501 | TRACE_IRQS_OFF | 500 | TRACE_IRQS_OFF |
502 | movl TI_flags(%rcx),%edx | 501 | movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx |
503 | andl %edi,%edx | 502 | andl %edi,%edx |
504 | jnz sysret_careful | 503 | jnz sysret_careful |
505 | CFI_REMEMBER_STATE | 504 | CFI_REMEMBER_STATE |
@@ -550,7 +549,7 @@ badsys: | |||
550 | #ifdef CONFIG_AUDITSYSCALL | 549 | #ifdef CONFIG_AUDITSYSCALL |
551 | /* | 550 | /* |
552 | * Fast path for syscall audit without full syscall trace. | 551 | * Fast path for syscall audit without full syscall trace. |
553 | * We just call audit_syscall_entry() directly, and then | 552 | * We just call __audit_syscall_entry() directly, and then |
554 | * jump back to the normal fast path. | 553 | * jump back to the normal fast path. |
555 | */ | 554 | */ |
556 | auditsys: | 555 | auditsys: |
@@ -560,22 +559,21 @@ auditsys: | |||
560 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ | 559 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ |
561 | movq %rax,%rsi /* 2nd arg: syscall number */ | 560 | movq %rax,%rsi /* 2nd arg: syscall number */ |
562 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ | 561 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ |
563 | call audit_syscall_entry | 562 | call __audit_syscall_entry |
564 | LOAD_ARGS 0 /* reload call-clobbered registers */ | 563 | LOAD_ARGS 0 /* reload call-clobbered registers */ |
565 | jmp system_call_fastpath | 564 | jmp system_call_fastpath |
566 | 565 | ||
567 | /* | 566 | /* |
568 | * Return fast path for syscall audit. Call audit_syscall_exit() | 567 | * Return fast path for syscall audit. Call __audit_syscall_exit() |
569 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | 568 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT |
570 | * masked off. | 569 | * masked off. |
571 | */ | 570 | */ |
572 | sysret_audit: | 571 | sysret_audit: |
573 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ | 572 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ |
574 | cmpq $0,%rsi /* is it < 0? */ | 573 | cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ |
575 | setl %al /* 1 if so, 0 if not */ | 574 | setbe %al /* 1 if so, 0 if not */ |
576 | movzbl %al,%edi /* zero-extend that into %edi */ | 575 | movzbl %al,%edi /* zero-extend that into %edi */ |
577 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | 576 | call __audit_syscall_exit |
578 | call audit_syscall_exit | ||
579 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 577 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
580 | jmp sysret_check | 578 | jmp sysret_check |
581 | #endif /* CONFIG_AUDITSYSCALL */ | 579 | #endif /* CONFIG_AUDITSYSCALL */ |
@@ -583,7 +581,7 @@ sysret_audit: | |||
583 | /* Do syscall tracing */ | 581 | /* Do syscall tracing */ |
584 | tracesys: | 582 | tracesys: |
585 | #ifdef CONFIG_AUDITSYSCALL | 583 | #ifdef CONFIG_AUDITSYSCALL |
586 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 584 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
587 | jz auditsys | 585 | jz auditsys |
588 | #endif | 586 | #endif |
589 | SAVE_REST | 587 | SAVE_REST |
@@ -612,8 +610,6 @@ tracesys: | |||
612 | GLOBAL(int_ret_from_sys_call) | 610 | GLOBAL(int_ret_from_sys_call) |
613 | DISABLE_INTERRUPTS(CLBR_NONE) | 611 | DISABLE_INTERRUPTS(CLBR_NONE) |
614 | TRACE_IRQS_OFF | 612 | TRACE_IRQS_OFF |
615 | testl $3,CS-ARGOFFSET(%rsp) | ||
616 | je retint_restore_args | ||
617 | movl $_TIF_ALLWORK_MASK,%edi | 613 | movl $_TIF_ALLWORK_MASK,%edi |
618 | /* edi: mask to check */ | 614 | /* edi: mask to check */ |
619 | GLOBAL(int_with_check) | 615 | GLOBAL(int_with_check) |
@@ -953,6 +949,7 @@ END(common_interrupt) | |||
953 | ENTRY(\sym) | 949 | ENTRY(\sym) |
954 | INTR_FRAME | 950 | INTR_FRAME |
955 | pushq_cfi $~(\num) | 951 | pushq_cfi $~(\num) |
952 | .Lcommon_\sym: | ||
956 | interrupt \do_sym | 953 | interrupt \do_sym |
957 | jmp ret_from_intr | 954 | jmp ret_from_intr |
958 | CFI_ENDPROC | 955 | CFI_ENDPROC |
@@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ | |||
976 | x86_platform_ipi smp_x86_platform_ipi | 973 | x86_platform_ipi smp_x86_platform_ipi |
977 | 974 | ||
978 | #ifdef CONFIG_SMP | 975 | #ifdef CONFIG_SMP |
979 | .irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | 976 | ALIGN |
977 | INTR_FRAME | ||
978 | .irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | ||
980 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | 979 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 |
981 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | 980 | .if NUM_INVALIDATE_TLB_VECTORS > \idx |
982 | apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ | 981 | ENTRY(invalidate_interrupt\idx) |
983 | invalidate_interrupt\idx smp_invalidate_interrupt | 982 | pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx) |
983 | jmp .Lcommon_invalidate_interrupt0 | ||
984 | CFI_ADJUST_CFA_OFFSET -8 | ||
985 | END(invalidate_interrupt\idx) | ||
984 | .endif | 986 | .endif |
985 | .endr | 987 | .endr |
988 | CFI_ENDPROC | ||
989 | apicinterrupt INVALIDATE_TLB_VECTOR_START, \ | ||
990 | invalidate_interrupt0, smp_invalidate_interrupt | ||
986 | #endif | 991 | #endif |
987 | 992 | ||
988 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 993 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
@@ -1475,62 +1480,214 @@ ENTRY(error_exit) | |||
1475 | CFI_ENDPROC | 1480 | CFI_ENDPROC |
1476 | END(error_exit) | 1481 | END(error_exit) |
1477 | 1482 | ||
1483 | /* | ||
1484 | * Test if a given stack is an NMI stack or not. | ||
1485 | */ | ||
1486 | .macro test_in_nmi reg stack nmi_ret normal_ret | ||
1487 | cmpq %\reg, \stack | ||
1488 | ja \normal_ret | ||
1489 | subq $EXCEPTION_STKSZ, %\reg | ||
1490 | cmpq %\reg, \stack | ||
1491 | jb \normal_ret | ||
1492 | jmp \nmi_ret | ||
1493 | .endm | ||
1478 | 1494 | ||
1479 | /* runs on exception stack */ | 1495 | /* runs on exception stack */ |
1480 | ENTRY(nmi) | 1496 | ENTRY(nmi) |
1481 | INTR_FRAME | 1497 | INTR_FRAME |
1482 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1498 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1483 | pushq_cfi $-1 | 1499 | /* |
1500 | * We allow breakpoints in NMIs. If a breakpoint occurs, then | ||
1501 | * the iretq it performs will take us out of NMI context. | ||
1502 | * This means that we can have nested NMIs where the next | ||
1503 | * NMI is using the top of the stack of the previous NMI. We | ||
1504 | * can't let it execute because the nested NMI will corrupt the | ||
1505 | * stack of the previous NMI. NMI handlers are not re-entrant | ||
1506 | * anyway. | ||
1507 | * | ||
1508 | * To handle this case we do the following: | ||
1509 | * Check the a special location on the stack that contains | ||
1510 | * a variable that is set when NMIs are executing. | ||
1511 | * The interrupted task's stack is also checked to see if it | ||
1512 | * is an NMI stack. | ||
1513 | * If the variable is not set and the stack is not the NMI | ||
1514 | * stack then: | ||
1515 | * o Set the special variable on the stack | ||
1516 | * o Copy the interrupt frame into a "saved" location on the stack | ||
1517 | * o Copy the interrupt frame into a "copy" location on the stack | ||
1518 | * o Continue processing the NMI | ||
1519 | * If the variable is set or the previous stack is the NMI stack: | ||
1520 | * o Modify the "copy" location to jump to the repeate_nmi | ||
1521 | * o return back to the first NMI | ||
1522 | * | ||
1523 | * Now on exit of the first NMI, we first clear the stack variable | ||
1524 | * The NMI stack will tell any nested NMIs at that point that it is | ||
1525 | * nested. Then we pop the stack normally with iret, and if there was | ||
1526 | * a nested NMI that updated the copy interrupt stack frame, a | ||
1527 | * jump will be made to the repeat_nmi code that will handle the second | ||
1528 | * NMI. | ||
1529 | */ | ||
1530 | |||
1531 | /* Use %rdx as out temp variable throughout */ | ||
1532 | pushq_cfi %rdx | ||
1533 | |||
1534 | /* | ||
1535 | * Check the special variable on the stack to see if NMIs are | ||
1536 | * executing. | ||
1537 | */ | ||
1538 | cmp $1, -8(%rsp) | ||
1539 | je nested_nmi | ||
1540 | |||
1541 | /* | ||
1542 | * Now test if the previous stack was an NMI stack. | ||
1543 | * We need the double check. We check the NMI stack to satisfy the | ||
1544 | * race when the first NMI clears the variable before returning. | ||
1545 | * We check the variable because the first NMI could be in a | ||
1546 | * breakpoint routine using a breakpoint stack. | ||
1547 | */ | ||
1548 | lea 6*8(%rsp), %rdx | ||
1549 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi | ||
1550 | |||
1551 | nested_nmi: | ||
1552 | /* | ||
1553 | * Do nothing if we interrupted the fixup in repeat_nmi. | ||
1554 | * It's about to repeat the NMI handler, so we are fine | ||
1555 | * with ignoring this one. | ||
1556 | */ | ||
1557 | movq $repeat_nmi, %rdx | ||
1558 | cmpq 8(%rsp), %rdx | ||
1559 | ja 1f | ||
1560 | movq $end_repeat_nmi, %rdx | ||
1561 | cmpq 8(%rsp), %rdx | ||
1562 | ja nested_nmi_out | ||
1563 | |||
1564 | 1: | ||
1565 | /* Set up the interrupted NMIs stack to jump to repeat_nmi */ | ||
1566 | leaq -6*8(%rsp), %rdx | ||
1567 | movq %rdx, %rsp | ||
1568 | CFI_ADJUST_CFA_OFFSET 6*8 | ||
1569 | pushq_cfi $__KERNEL_DS | ||
1570 | pushq_cfi %rdx | ||
1571 | pushfq_cfi | ||
1572 | pushq_cfi $__KERNEL_CS | ||
1573 | pushq_cfi $repeat_nmi | ||
1574 | |||
1575 | /* Put stack back */ | ||
1576 | addq $(11*8), %rsp | ||
1577 | CFI_ADJUST_CFA_OFFSET -11*8 | ||
1578 | |||
1579 | nested_nmi_out: | ||
1580 | popq_cfi %rdx | ||
1581 | |||
1582 | /* No need to check faults here */ | ||
1583 | INTERRUPT_RETURN | ||
1584 | |||
1585 | first_nmi: | ||
1586 | /* | ||
1587 | * Because nested NMIs will use the pushed location that we | ||
1588 | * stored in rdx, we must keep that space available. | ||
1589 | * Here's what our stack frame will look like: | ||
1590 | * +-------------------------+ | ||
1591 | * | original SS | | ||
1592 | * | original Return RSP | | ||
1593 | * | original RFLAGS | | ||
1594 | * | original CS | | ||
1595 | * | original RIP | | ||
1596 | * +-------------------------+ | ||
1597 | * | temp storage for rdx | | ||
1598 | * +-------------------------+ | ||
1599 | * | NMI executing variable | | ||
1600 | * +-------------------------+ | ||
1601 | * | Saved SS | | ||
1602 | * | Saved Return RSP | | ||
1603 | * | Saved RFLAGS | | ||
1604 | * | Saved CS | | ||
1605 | * | Saved RIP | | ||
1606 | * +-------------------------+ | ||
1607 | * | copied SS | | ||
1608 | * | copied Return RSP | | ||
1609 | * | copied RFLAGS | | ||
1610 | * | copied CS | | ||
1611 | * | copied RIP | | ||
1612 | * +-------------------------+ | ||
1613 | * | pt_regs | | ||
1614 | * +-------------------------+ | ||
1615 | * | ||
1616 | * The saved RIP is used to fix up the copied RIP that a nested | ||
1617 | * NMI may zero out. The original stack frame and the temp storage | ||
1618 | * is also used by nested NMIs and can not be trusted on exit. | ||
1619 | */ | ||
1620 | /* Set the NMI executing variable on the stack. */ | ||
1621 | pushq_cfi $1 | ||
1622 | |||
1623 | /* Copy the stack frame to the Saved frame */ | ||
1624 | .rept 5 | ||
1625 | pushq_cfi 6*8(%rsp) | ||
1626 | .endr | ||
1627 | |||
1628 | /* Make another copy, this one may be modified by nested NMIs */ | ||
1629 | .rept 5 | ||
1630 | pushq_cfi 4*8(%rsp) | ||
1631 | .endr | ||
1632 | |||
1633 | /* Do not pop rdx, nested NMIs will corrupt it */ | ||
1634 | movq 11*8(%rsp), %rdx | ||
1635 | |||
1636 | /* | ||
1637 | * Everything below this point can be preempted by a nested | ||
1638 | * NMI if the first NMI took an exception. Repeated NMIs | ||
1639 | * caused by an exception and nested NMI will start here, and | ||
1640 | * can still be preempted by another NMI. | ||
1641 | */ | ||
1642 | restart_nmi: | ||
1643 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | ||
1484 | subq $ORIG_RAX-R15, %rsp | 1644 | subq $ORIG_RAX-R15, %rsp |
1485 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1645 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1646 | /* | ||
1647 | * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit | ||
1648 | * as we should not be calling schedule in NMI context. | ||
1649 | * Even with normal interrupts enabled. An NMI should not be | ||
1650 | * setting NEED_RESCHED or anything that normal interrupts and | ||
1651 | * exceptions might do. | ||
1652 | */ | ||
1486 | call save_paranoid | 1653 | call save_paranoid |
1487 | DEFAULT_FRAME 0 | 1654 | DEFAULT_FRAME 0 |
1488 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | 1655 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
1489 | movq %rsp,%rdi | 1656 | movq %rsp,%rdi |
1490 | movq $-1,%rsi | 1657 | movq $-1,%rsi |
1491 | call do_nmi | 1658 | call do_nmi |
1492 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1493 | /* paranoidexit; without TRACE_IRQS_OFF */ | ||
1494 | /* ebx: no swapgs flag */ | ||
1495 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1496 | testl %ebx,%ebx /* swapgs needed? */ | 1659 | testl %ebx,%ebx /* swapgs needed? */ |
1497 | jnz nmi_restore | 1660 | jnz nmi_restore |
1498 | testl $3,CS(%rsp) | ||
1499 | jnz nmi_userspace | ||
1500 | nmi_swapgs: | 1661 | nmi_swapgs: |
1501 | SWAPGS_UNSAFE_STACK | 1662 | SWAPGS_UNSAFE_STACK |
1502 | nmi_restore: | 1663 | nmi_restore: |
1503 | RESTORE_ALL 8 | 1664 | RESTORE_ALL 8 |
1665 | /* Clear the NMI executing stack variable */ | ||
1666 | movq $0, 10*8(%rsp) | ||
1504 | jmp irq_return | 1667 | jmp irq_return |
1505 | nmi_userspace: | ||
1506 | GET_THREAD_INFO(%rcx) | ||
1507 | movl TI_flags(%rcx),%ebx | ||
1508 | andl $_TIF_WORK_MASK,%ebx | ||
1509 | jz nmi_swapgs | ||
1510 | movq %rsp,%rdi /* &pt_regs */ | ||
1511 | call sync_regs | ||
1512 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1513 | testl $_TIF_NEED_RESCHED,%ebx | ||
1514 | jnz nmi_schedule | ||
1515 | movl %ebx,%edx /* arg3: thread flags */ | ||
1516 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1517 | xorl %esi,%esi /* arg2: oldset */ | ||
1518 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1519 | call do_notify_resume | ||
1520 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1521 | jmp nmi_userspace | ||
1522 | nmi_schedule: | ||
1523 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1524 | call schedule | ||
1525 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1526 | jmp nmi_userspace | ||
1527 | CFI_ENDPROC | 1668 | CFI_ENDPROC |
1528 | #else | ||
1529 | jmp paranoid_exit | ||
1530 | CFI_ENDPROC | ||
1531 | #endif | ||
1532 | END(nmi) | 1669 | END(nmi) |
1533 | 1670 | ||
1671 | /* | ||
1672 | * If an NMI hit an iret because of an exception or breakpoint, | ||
1673 | * it can lose its NMI context, and a nested NMI may come in. | ||
1674 | * In that case, the nested NMI will change the preempted NMI's | ||
1675 | * stack to jump to here when it does the final iret. | ||
1676 | */ | ||
1677 | repeat_nmi: | ||
1678 | INTR_FRAME | ||
1679 | /* Update the stack variable to say we are still in NMI */ | ||
1680 | movq $1, 5*8(%rsp) | ||
1681 | |||
1682 | /* copy the saved stack back to copy stack */ | ||
1683 | .rept 5 | ||
1684 | pushq_cfi 4*8(%rsp) | ||
1685 | .endr | ||
1686 | |||
1687 | jmp restart_nmi | ||
1688 | CFI_ENDPROC | ||
1689 | end_repeat_nmi: | ||
1690 | |||
1534 | ENTRY(ignore_sysret) | 1691 | ENTRY(ignore_sysret) |
1535 | CFI_STARTPROC | 1692 | CFI_STARTPROC |
1536 | mov $-ENOSYS,%eax | 1693 | mov $-ENOSYS,%eax |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index af0699ba48cf..48d9d4ea1020 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c | |||
@@ -52,5 +52,5 @@ void __init reserve_ebda_region(void) | |||
52 | lowmem = 0x9f000; | 52 | lowmem = 0x9f000; |
53 | 53 | ||
54 | /* reserve all memory between lowmem and the 1MB mark */ | 54 | /* reserve all memory between lowmem and the 1MB mark */ |
55 | memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); | 55 | memblock_reserve(lowmem, 0x100000 - lowmem); |
56 | } | 56 | } |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3bb08509a7a1..51ff18616d50 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -31,9 +31,8 @@ static void __init i386_default_early_setup(void) | |||
31 | 31 | ||
32 | void __init i386_start_kernel(void) | 32 | void __init i386_start_kernel(void) |
33 | { | 33 | { |
34 | memblock_init(); | 34 | memblock_reserve(__pa_symbol(&_text), |
35 | 35 | __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); | |
36 | memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | ||
37 | 36 | ||
38 | #ifdef CONFIG_BLK_DEV_INITRD | 37 | #ifdef CONFIG_BLK_DEV_INITRD |
39 | /* Reserve INITRD */ | 38 | /* Reserve INITRD */ |
@@ -42,7 +41,7 @@ void __init i386_start_kernel(void) | |||
42 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 41 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
43 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 42 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
44 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | 43 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
45 | memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); | 44 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); |
46 | } | 45 | } |
47 | #endif | 46 | #endif |
48 | 47 | ||
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5655c2272adb..3a3b779f41d3 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -98,9 +98,8 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
98 | { | 98 | { |
99 | copy_bootdata(__va(real_mode_data)); | 99 | copy_bootdata(__va(real_mode_data)); |
100 | 100 | ||
101 | memblock_init(); | 101 | memblock_reserve(__pa_symbol(&_text), |
102 | 102 | __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); | |
103 | memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | ||
104 | 103 | ||
105 | #ifdef CONFIG_BLK_DEV_INITRD | 104 | #ifdef CONFIG_BLK_DEV_INITRD |
106 | /* Reserve INITRD */ | 105 | /* Reserve INITRD */ |
@@ -109,7 +108,7 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
109 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | 108 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; |
110 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | 109 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; |
111 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | 110 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
112 | memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); | 111 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); |
113 | } | 112 | } |
114 | #endif | 113 | #endif |
115 | 114 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e11e39478a49..40f4eb3766d1 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -417,6 +417,10 @@ ENTRY(phys_base) | |||
417 | ENTRY(idt_table) | 417 | ENTRY(idt_table) |
418 | .skip IDT_ENTRIES * 16 | 418 | .skip IDT_ENTRIES * 16 |
419 | 419 | ||
420 | .align L1_CACHE_BYTES | ||
421 | ENTRY(nmi_idt_table) | ||
422 | .skip IDT_ENTRIES * 16 | ||
423 | |||
420 | __PAGE_ALIGNED_BSS | 424 | __PAGE_ALIGNED_BSS |
421 | .align PAGE_SIZE | 425 | .align PAGE_SIZE |
422 | ENTRY(empty_zero_page) | 426 | ENTRY(empty_zero_page) |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 1bb0bf4d92cd..ad0de0c2714e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -2,7 +2,6 @@ | |||
2 | #include <linux/clockchips.h> | 2 | #include <linux/clockchips.h> |
3 | #include <linux/interrupt.h> | 3 | #include <linux/interrupt.h> |
4 | #include <linux/export.h> | 4 | #include <linux/export.h> |
5 | #include <linux/sysdev.h> | ||
6 | #include <linux/delay.h> | 5 | #include <linux/delay.h> |
7 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
8 | #include <linux/i8253.h> | 7 | #include <linux/i8253.h> |
@@ -32,8 +31,6 @@ | |||
32 | #define HPET_MIN_CYCLES 128 | 31 | #define HPET_MIN_CYCLES 128 |
33 | #define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) | 32 | #define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) |
34 | 33 | ||
35 | #define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) | ||
36 | |||
37 | /* | 34 | /* |
38 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | 35 | * HPET address is set in acpi/boot.c, when an ACPI entry exists |
39 | */ | 36 | */ |
@@ -55,6 +52,11 @@ struct hpet_dev { | |||
55 | char name[10]; | 52 | char name[10]; |
56 | }; | 53 | }; |
57 | 54 | ||
55 | inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev) | ||
56 | { | ||
57 | return container_of(evtdev, struct hpet_dev, evt); | ||
58 | } | ||
59 | |||
58 | inline unsigned int hpet_readl(unsigned int a) | 60 | inline unsigned int hpet_readl(unsigned int a) |
59 | { | 61 | { |
60 | return readl(hpet_virt_address + a); | 62 | return readl(hpet_virt_address + a); |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 429e0c92924e..7943e0c21bde 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -74,6 +74,10 @@ int arch_show_interrupts(struct seq_file *p, int prec) | |||
74 | for_each_online_cpu(j) | 74 | for_each_online_cpu(j) |
75 | seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); | 75 | seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); |
76 | seq_printf(p, " IRQ work interrupts\n"); | 76 | seq_printf(p, " IRQ work interrupts\n"); |
77 | seq_printf(p, "%*s: ", prec, "RTR"); | ||
78 | for_each_online_cpu(j) | ||
79 | seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count); | ||
80 | seq_printf(p, " APIC ICR read retries\n"); | ||
77 | #endif | 81 | #endif |
78 | if (x86_platform_ipi_callback) { | 82 | if (x86_platform_ipi_callback) { |
79 | seq_printf(p, "%*s: ", prec, "PLT"); | 83 | seq_printf(p, "%*s: ", prec, "PLT"); |
@@ -136,6 +140,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
136 | sum += irq_stats(cpu)->irq_spurious_count; | 140 | sum += irq_stats(cpu)->irq_spurious_count; |
137 | sum += irq_stats(cpu)->apic_perf_irqs; | 141 | sum += irq_stats(cpu)->apic_perf_irqs; |
138 | sum += irq_stats(cpu)->apic_irq_work_irqs; | 142 | sum += irq_stats(cpu)->apic_irq_work_irqs; |
143 | sum += irq_stats(cpu)->icr_read_retry_count; | ||
139 | #endif | 144 | #endif |
140 | if (x86_platform_ipi_callback) | 145 | if (x86_platform_ipi_callback) |
141 | sum += irq_stats(cpu)->x86_platform_ipis; | 146 | sum += irq_stats(cpu)->x86_platform_ipis; |
@@ -181,8 +186,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | |||
181 | unsigned vector = ~regs->orig_ax; | 186 | unsigned vector = ~regs->orig_ax; |
182 | unsigned irq; | 187 | unsigned irq; |
183 | 188 | ||
184 | exit_idle(); | ||
185 | irq_enter(); | 189 | irq_enter(); |
190 | exit_idle(); | ||
186 | 191 | ||
187 | irq = __this_cpu_read(vector_irq[vector]); | 192 | irq = __this_cpu_read(vector_irq[vector]); |
188 | 193 | ||
@@ -209,10 +214,10 @@ void smp_x86_platform_ipi(struct pt_regs *regs) | |||
209 | 214 | ||
210 | ack_APIC_irq(); | 215 | ack_APIC_irq(); |
211 | 216 | ||
212 | exit_idle(); | ||
213 | |||
214 | irq_enter(); | 217 | irq_enter(); |
215 | 218 | ||
219 | exit_idle(); | ||
220 | |||
216 | inc_irq_stat(x86_platform_ipis); | 221 | inc_irq_stat(x86_platform_ipis); |
217 | 222 | ||
218 | if (x86_platform_ipi_callback) | 223 | if (x86_platform_ipi_callback) |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 72090705a656..40fc86161d92 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -28,6 +28,9 @@ DEFINE_PER_CPU(struct pt_regs *, irq_regs); | |||
28 | EXPORT_PER_CPU_SYMBOL(irq_regs); | 28 | EXPORT_PER_CPU_SYMBOL(irq_regs); |
29 | 29 | ||
30 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 30 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
31 | |||
32 | int sysctl_panic_on_stackoverflow __read_mostly; | ||
33 | |||
31 | /* Debugging check for stack overflow: is there less than 1KB free? */ | 34 | /* Debugging check for stack overflow: is there less than 1KB free? */ |
32 | static int check_stack_overflow(void) | 35 | static int check_stack_overflow(void) |
33 | { | 36 | { |
@@ -43,6 +46,8 @@ static void print_stack_overflow(void) | |||
43 | { | 46 | { |
44 | printk(KERN_WARNING "low stack detected by irq handler\n"); | 47 | printk(KERN_WARNING "low stack detected by irq handler\n"); |
45 | dump_stack(); | 48 | dump_stack(); |
49 | if (sysctl_panic_on_stackoverflow) | ||
50 | panic("low stack detected by irq handler - check messages\n"); | ||
46 | } | 51 | } |
47 | 52 | ||
48 | #else | 53 | #else |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 69bca468c47a..d04d3ecded62 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -26,6 +26,8 @@ EXPORT_PER_CPU_SYMBOL(irq_stat); | |||
26 | DEFINE_PER_CPU(struct pt_regs *, irq_regs); | 26 | DEFINE_PER_CPU(struct pt_regs *, irq_regs); |
27 | EXPORT_PER_CPU_SYMBOL(irq_regs); | 27 | EXPORT_PER_CPU_SYMBOL(irq_regs); |
28 | 28 | ||
29 | int sysctl_panic_on_stackoverflow; | ||
30 | |||
29 | /* | 31 | /* |
30 | * Probabilistic stack overflow check: | 32 | * Probabilistic stack overflow check: |
31 | * | 33 | * |
@@ -36,18 +38,39 @@ EXPORT_PER_CPU_SYMBOL(irq_regs); | |||
36 | static inline void stack_overflow_check(struct pt_regs *regs) | 38 | static inline void stack_overflow_check(struct pt_regs *regs) |
37 | { | 39 | { |
38 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 40 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
41 | #define STACK_TOP_MARGIN 128 | ||
42 | struct orig_ist *oist; | ||
43 | u64 irq_stack_top, irq_stack_bottom; | ||
44 | u64 estack_top, estack_bottom; | ||
39 | u64 curbase = (u64)task_stack_page(current); | 45 | u64 curbase = (u64)task_stack_page(current); |
40 | 46 | ||
41 | if (user_mode_vm(regs)) | 47 | if (user_mode_vm(regs)) |
42 | return; | 48 | return; |
43 | 49 | ||
44 | WARN_ONCE(regs->sp >= curbase && | 50 | if (regs->sp >= curbase + sizeof(struct thread_info) + |
45 | regs->sp <= curbase + THREAD_SIZE && | 51 | sizeof(struct pt_regs) + STACK_TOP_MARGIN && |
46 | regs->sp < curbase + sizeof(struct thread_info) + | 52 | regs->sp <= curbase + THREAD_SIZE) |
47 | sizeof(struct pt_regs) + 128, | 53 | return; |
54 | |||
55 | irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) + | ||
56 | STACK_TOP_MARGIN; | ||
57 | irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr); | ||
58 | if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) | ||
59 | return; | ||
60 | |||
61 | oist = &__get_cpu_var(orig_ist); | ||
62 | estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN; | ||
63 | estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; | ||
64 | if (regs->sp >= estack_top && regs->sp <= estack_bottom) | ||
65 | return; | ||
66 | |||
67 | WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", | ||
68 | current->comm, curbase, regs->sp, | ||
69 | irq_stack_top, irq_stack_bottom, | ||
70 | estack_top, estack_bottom); | ||
48 | 71 | ||
49 | "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", | 72 | if (sysctl_panic_on_stackoverflow) |
50 | current->comm, curbase, regs->sp); | 73 | panic("low stack detected by irq handler - check messages\n"); |
51 | #endif | 74 | #endif |
52 | } | 75 | } |
53 | 76 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index b3300e6bacef..313fb5cddbce 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include <linux/kprobes.h> | 9 | #include <linux/kprobes.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | #include <linux/kernel_stat.h> | 11 | #include <linux/kernel_stat.h> |
12 | #include <linux/sysdev.h> | 12 | #include <linux/device.h> |
13 | #include <linux/bitops.h> | 13 | #include <linux/bitops.h> |
14 | #include <linux/acpi.h> | 14 | #include <linux/acpi.h> |
15 | #include <linux/io.h> | 15 | #include <linux/io.h> |
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index ea9d5f2f13ef..2889b3d43882 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c | |||
@@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry, | |||
50 | put_online_cpus(); | 50 | put_online_cpus(); |
51 | } | 51 | } |
52 | 52 | ||
53 | void arch_jump_label_transform_static(struct jump_entry *entry, | 53 | __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, |
54 | enum jump_label_type type) | 54 | enum jump_label_type type) |
55 | { | 55 | { |
56 | __jump_label_transform(entry, type, text_poke_early); | 56 | __jump_label_transform(entry, type, text_poke_early); |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a9c2116001d6..f0c6fd6f176b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -39,8 +39,6 @@ | |||
39 | #include <asm/desc.h> | 39 | #include <asm/desc.h> |
40 | #include <asm/tlbflush.h> | 40 | #include <asm/tlbflush.h> |
41 | 41 | ||
42 | #define MMU_QUEUE_SIZE 1024 | ||
43 | |||
44 | static int kvmapf = 1; | 42 | static int kvmapf = 1; |
45 | 43 | ||
46 | static int parse_no_kvmapf(char *arg) | 44 | static int parse_no_kvmapf(char *arg) |
@@ -60,21 +58,10 @@ static int parse_no_stealacc(char *arg) | |||
60 | 58 | ||
61 | early_param("no-steal-acc", parse_no_stealacc); | 59 | early_param("no-steal-acc", parse_no_stealacc); |
62 | 60 | ||
63 | struct kvm_para_state { | ||
64 | u8 mmu_queue[MMU_QUEUE_SIZE]; | ||
65 | int mmu_queue_len; | ||
66 | }; | ||
67 | |||
68 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); | ||
69 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); | 61 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); |
70 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); | 62 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); |
71 | static int has_steal_clock = 0; | 63 | static int has_steal_clock = 0; |
72 | 64 | ||
73 | static struct kvm_para_state *kvm_para_state(void) | ||
74 | { | ||
75 | return &per_cpu(para_state, raw_smp_processor_id()); | ||
76 | } | ||
77 | |||
78 | /* | 65 | /* |
79 | * No need for any "IO delay" on KVM | 66 | * No need for any "IO delay" on KVM |
80 | */ | 67 | */ |
@@ -271,151 +258,6 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
271 | } | 258 | } |
272 | } | 259 | } |
273 | 260 | ||
274 | static void kvm_mmu_op(void *buffer, unsigned len) | ||
275 | { | ||
276 | int r; | ||
277 | unsigned long a1, a2; | ||
278 | |||
279 | do { | ||
280 | a1 = __pa(buffer); | ||
281 | a2 = 0; /* on i386 __pa() always returns <4G */ | ||
282 | r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2); | ||
283 | buffer += r; | ||
284 | len -= r; | ||
285 | } while (len); | ||
286 | } | ||
287 | |||
288 | static void mmu_queue_flush(struct kvm_para_state *state) | ||
289 | { | ||
290 | if (state->mmu_queue_len) { | ||
291 | kvm_mmu_op(state->mmu_queue, state->mmu_queue_len); | ||
292 | state->mmu_queue_len = 0; | ||
293 | } | ||
294 | } | ||
295 | |||
296 | static void kvm_deferred_mmu_op(void *buffer, int len) | ||
297 | { | ||
298 | struct kvm_para_state *state = kvm_para_state(); | ||
299 | |||
300 | if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) { | ||
301 | kvm_mmu_op(buffer, len); | ||
302 | return; | ||
303 | } | ||
304 | if (state->mmu_queue_len + len > sizeof state->mmu_queue) | ||
305 | mmu_queue_flush(state); | ||
306 | memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len); | ||
307 | state->mmu_queue_len += len; | ||
308 | } | ||
309 | |||
310 | static void kvm_mmu_write(void *dest, u64 val) | ||
311 | { | ||
312 | __u64 pte_phys; | ||
313 | struct kvm_mmu_op_write_pte wpte; | ||
314 | |||
315 | #ifdef CONFIG_HIGHPTE | ||
316 | struct page *page; | ||
317 | unsigned long dst = (unsigned long) dest; | ||
318 | |||
319 | page = kmap_atomic_to_page(dest); | ||
320 | pte_phys = page_to_pfn(page); | ||
321 | pte_phys <<= PAGE_SHIFT; | ||
322 | pte_phys += (dst & ~(PAGE_MASK)); | ||
323 | #else | ||
324 | pte_phys = (unsigned long)__pa(dest); | ||
325 | #endif | ||
326 | wpte.header.op = KVM_MMU_OP_WRITE_PTE; | ||
327 | wpte.pte_val = val; | ||
328 | wpte.pte_phys = pte_phys; | ||
329 | |||
330 | kvm_deferred_mmu_op(&wpte, sizeof wpte); | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * We only need to hook operations that are MMU writes. We hook these so that | ||
335 | * we can use lazy MMU mode to batch these operations. We could probably | ||
336 | * improve the performance of the host code if we used some of the information | ||
337 | * here to simplify processing of batched writes. | ||
338 | */ | ||
339 | static void kvm_set_pte(pte_t *ptep, pte_t pte) | ||
340 | { | ||
341 | kvm_mmu_write(ptep, pte_val(pte)); | ||
342 | } | ||
343 | |||
344 | static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
345 | pte_t *ptep, pte_t pte) | ||
346 | { | ||
347 | kvm_mmu_write(ptep, pte_val(pte)); | ||
348 | } | ||
349 | |||
350 | static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
351 | { | ||
352 | kvm_mmu_write(pmdp, pmd_val(pmd)); | ||
353 | } | ||
354 | |||
355 | #if PAGETABLE_LEVELS >= 3 | ||
356 | #ifdef CONFIG_X86_PAE | ||
357 | static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) | ||
358 | { | ||
359 | kvm_mmu_write(ptep, pte_val(pte)); | ||
360 | } | ||
361 | |||
362 | static void kvm_pte_clear(struct mm_struct *mm, | ||
363 | unsigned long addr, pte_t *ptep) | ||
364 | { | ||
365 | kvm_mmu_write(ptep, 0); | ||
366 | } | ||
367 | |||
368 | static void kvm_pmd_clear(pmd_t *pmdp) | ||
369 | { | ||
370 | kvm_mmu_write(pmdp, 0); | ||
371 | } | ||
372 | #endif | ||
373 | |||
374 | static void kvm_set_pud(pud_t *pudp, pud_t pud) | ||
375 | { | ||
376 | kvm_mmu_write(pudp, pud_val(pud)); | ||
377 | } | ||
378 | |||
379 | #if PAGETABLE_LEVELS == 4 | ||
380 | static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) | ||
381 | { | ||
382 | kvm_mmu_write(pgdp, pgd_val(pgd)); | ||
383 | } | ||
384 | #endif | ||
385 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
386 | |||
387 | static void kvm_flush_tlb(void) | ||
388 | { | ||
389 | struct kvm_mmu_op_flush_tlb ftlb = { | ||
390 | .header.op = KVM_MMU_OP_FLUSH_TLB, | ||
391 | }; | ||
392 | |||
393 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); | ||
394 | } | ||
395 | |||
396 | static void kvm_release_pt(unsigned long pfn) | ||
397 | { | ||
398 | struct kvm_mmu_op_release_pt rpt = { | ||
399 | .header.op = KVM_MMU_OP_RELEASE_PT, | ||
400 | .pt_phys = (u64)pfn << PAGE_SHIFT, | ||
401 | }; | ||
402 | |||
403 | kvm_mmu_op(&rpt, sizeof rpt); | ||
404 | } | ||
405 | |||
406 | static void kvm_enter_lazy_mmu(void) | ||
407 | { | ||
408 | paravirt_enter_lazy_mmu(); | ||
409 | } | ||
410 | |||
411 | static void kvm_leave_lazy_mmu(void) | ||
412 | { | ||
413 | struct kvm_para_state *state = kvm_para_state(); | ||
414 | |||
415 | mmu_queue_flush(state); | ||
416 | paravirt_leave_lazy_mmu(); | ||
417 | } | ||
418 | |||
419 | static void __init paravirt_ops_setup(void) | 261 | static void __init paravirt_ops_setup(void) |
420 | { | 262 | { |
421 | pv_info.name = "KVM"; | 263 | pv_info.name = "KVM"; |
@@ -424,29 +266,6 @@ static void __init paravirt_ops_setup(void) | |||
424 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) | 266 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) |
425 | pv_cpu_ops.io_delay = kvm_io_delay; | 267 | pv_cpu_ops.io_delay = kvm_io_delay; |
426 | 268 | ||
427 | if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { | ||
428 | pv_mmu_ops.set_pte = kvm_set_pte; | ||
429 | pv_mmu_ops.set_pte_at = kvm_set_pte_at; | ||
430 | pv_mmu_ops.set_pmd = kvm_set_pmd; | ||
431 | #if PAGETABLE_LEVELS >= 3 | ||
432 | #ifdef CONFIG_X86_PAE | ||
433 | pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; | ||
434 | pv_mmu_ops.pte_clear = kvm_pte_clear; | ||
435 | pv_mmu_ops.pmd_clear = kvm_pmd_clear; | ||
436 | #endif | ||
437 | pv_mmu_ops.set_pud = kvm_set_pud; | ||
438 | #if PAGETABLE_LEVELS == 4 | ||
439 | pv_mmu_ops.set_pgd = kvm_set_pgd; | ||
440 | #endif | ||
441 | #endif | ||
442 | pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; | ||
443 | pv_mmu_ops.release_pte = kvm_release_pt; | ||
444 | pv_mmu_ops.release_pmd = kvm_release_pt; | ||
445 | pv_mmu_ops.release_pud = kvm_release_pt; | ||
446 | |||
447 | pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; | ||
448 | pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; | ||
449 | } | ||
450 | #ifdef CONFIG_X86_IO_APIC | 269 | #ifdef CONFIG_X86_IO_APIC |
451 | no_timer_check = 1; | 270 | no_timer_check = 1; |
452 | #endif | 271 | #endif |
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index d494799aafcd..fe86493f3ed1 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -1,14 +1,18 @@ | |||
1 | /* | 1 | /* |
2 | * AMD CPU Microcode Update Driver for Linux | 2 | * AMD CPU Microcode Update Driver for Linux |
3 | * Copyright (C) 2008 Advanced Micro Devices Inc. | 3 | * Copyright (C) 2008-2011 Advanced Micro Devices Inc. |
4 | * | 4 | * |
5 | * Author: Peter Oruba <peter.oruba@amd.com> | 5 | * Author: Peter Oruba <peter.oruba@amd.com> |
6 | * | 6 | * |
7 | * Based on work by: | 7 | * Based on work by: |
8 | * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> | 8 | * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> |
9 | * | 9 | * |
10 | * This driver allows to upgrade microcode on AMD | 10 | * Maintainers: |
11 | * family 0x10 and 0x11 processors. | 11 | * Andreas Herrmann <andreas.herrmann3@amd.com> |
12 | * Borislav Petkov <borislav.petkov@amd.com> | ||
13 | * | ||
14 | * This driver allows to upgrade microcode on F10h AMD | ||
15 | * CPUs and later. | ||
12 | * | 16 | * |
13 | * Licensed under the terms of the GNU General Public | 17 | * Licensed under the terms of the GNU General Public |
14 | * License version 2. See file COPYING for details. | 18 | * License version 2. See file COPYING for details. |
@@ -71,6 +75,9 @@ struct microcode_amd { | |||
71 | 75 | ||
72 | static struct equiv_cpu_entry *equiv_cpu_table; | 76 | static struct equiv_cpu_entry *equiv_cpu_table; |
73 | 77 | ||
78 | /* page-sized ucode patch buffer */ | ||
79 | void *patch; | ||
80 | |||
74 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 81 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
75 | { | 82 | { |
76 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 83 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
@@ -86,27 +93,76 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | |||
86 | return 0; | 93 | return 0; |
87 | } | 94 | } |
88 | 95 | ||
89 | static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr, | 96 | static unsigned int verify_ucode_size(int cpu, u32 patch_size, |
90 | int rev) | 97 | unsigned int size) |
91 | { | 98 | { |
92 | unsigned int current_cpu_id; | 99 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
93 | u16 equiv_cpu_id = 0; | 100 | u32 max_size; |
94 | unsigned int i = 0; | 101 | |
102 | #define F1XH_MPB_MAX_SIZE 2048 | ||
103 | #define F14H_MPB_MAX_SIZE 1824 | ||
104 | #define F15H_MPB_MAX_SIZE 4096 | ||
105 | |||
106 | switch (c->x86) { | ||
107 | case 0x14: | ||
108 | max_size = F14H_MPB_MAX_SIZE; | ||
109 | break; | ||
110 | case 0x15: | ||
111 | max_size = F15H_MPB_MAX_SIZE; | ||
112 | break; | ||
113 | default: | ||
114 | max_size = F1XH_MPB_MAX_SIZE; | ||
115 | break; | ||
116 | } | ||
117 | |||
118 | if (patch_size > min_t(u32, size, max_size)) { | ||
119 | pr_err("patch size mismatch\n"); | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | return patch_size; | ||
124 | } | ||
125 | |||
126 | static u16 find_equiv_id(void) | ||
127 | { | ||
128 | unsigned int current_cpu_id, i = 0; | ||
95 | 129 | ||
96 | BUG_ON(equiv_cpu_table == NULL); | 130 | BUG_ON(equiv_cpu_table == NULL); |
131 | |||
97 | current_cpu_id = cpuid_eax(0x00000001); | 132 | current_cpu_id = cpuid_eax(0x00000001); |
98 | 133 | ||
99 | while (equiv_cpu_table[i].installed_cpu != 0) { | 134 | while (equiv_cpu_table[i].installed_cpu != 0) { |
100 | if (current_cpu_id == equiv_cpu_table[i].installed_cpu) { | 135 | if (current_cpu_id == equiv_cpu_table[i].installed_cpu) |
101 | equiv_cpu_id = equiv_cpu_table[i].equiv_cpu; | 136 | return equiv_cpu_table[i].equiv_cpu; |
102 | break; | 137 | |
103 | } | ||
104 | i++; | 138 | i++; |
105 | } | 139 | } |
140 | return 0; | ||
141 | } | ||
106 | 142 | ||
143 | /* | ||
144 | * we signal a good patch is found by returning its size > 0 | ||
145 | */ | ||
146 | static int get_matching_microcode(int cpu, const u8 *ucode_ptr, | ||
147 | unsigned int leftover_size, int rev, | ||
148 | unsigned int *current_size) | ||
149 | { | ||
150 | struct microcode_header_amd *mc_hdr; | ||
151 | unsigned int actual_size; | ||
152 | u16 equiv_cpu_id; | ||
153 | |||
154 | /* size of the current patch we're staring at */ | ||
155 | *current_size = *(u32 *)(ucode_ptr + 4) + SECTION_HDR_SIZE; | ||
156 | |||
157 | equiv_cpu_id = find_equiv_id(); | ||
107 | if (!equiv_cpu_id) | 158 | if (!equiv_cpu_id) |
108 | return 0; | 159 | return 0; |
109 | 160 | ||
161 | /* | ||
162 | * let's look at the patch header itself now | ||
163 | */ | ||
164 | mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE); | ||
165 | |||
110 | if (mc_hdr->processor_rev_id != equiv_cpu_id) | 166 | if (mc_hdr->processor_rev_id != equiv_cpu_id) |
111 | return 0; | 167 | return 0; |
112 | 168 | ||
@@ -120,7 +176,20 @@ static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr, | |||
120 | if (mc_hdr->patch_id <= rev) | 176 | if (mc_hdr->patch_id <= rev) |
121 | return 0; | 177 | return 0; |
122 | 178 | ||
123 | return 1; | 179 | /* |
180 | * now that the header looks sane, verify its size | ||
181 | */ | ||
182 | actual_size = verify_ucode_size(cpu, *current_size, leftover_size); | ||
183 | if (!actual_size) | ||
184 | return 0; | ||
185 | |||
186 | /* clear the patch buffer */ | ||
187 | memset(patch, 0, PAGE_SIZE); | ||
188 | |||
189 | /* all looks ok, get the binary patch */ | ||
190 | get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size); | ||
191 | |||
192 | return actual_size; | ||
124 | } | 193 | } |
125 | 194 | ||
126 | static int apply_microcode_amd(int cpu) | 195 | static int apply_microcode_amd(int cpu) |
@@ -155,63 +224,6 @@ static int apply_microcode_amd(int cpu) | |||
155 | return 0; | 224 | return 0; |
156 | } | 225 | } |
157 | 226 | ||
158 | static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size) | ||
159 | { | ||
160 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
161 | u32 max_size, actual_size; | ||
162 | |||
163 | #define F1XH_MPB_MAX_SIZE 2048 | ||
164 | #define F14H_MPB_MAX_SIZE 1824 | ||
165 | #define F15H_MPB_MAX_SIZE 4096 | ||
166 | |||
167 | switch (c->x86) { | ||
168 | case 0x14: | ||
169 | max_size = F14H_MPB_MAX_SIZE; | ||
170 | break; | ||
171 | case 0x15: | ||
172 | max_size = F15H_MPB_MAX_SIZE; | ||
173 | break; | ||
174 | default: | ||
175 | max_size = F1XH_MPB_MAX_SIZE; | ||
176 | break; | ||
177 | } | ||
178 | |||
179 | actual_size = *(u32 *)(buf + 4); | ||
180 | |||
181 | if (actual_size + SECTION_HDR_SIZE > size || actual_size > max_size) { | ||
182 | pr_err("section size mismatch\n"); | ||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | return actual_size; | ||
187 | } | ||
188 | |||
189 | static struct microcode_header_amd * | ||
190 | get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size) | ||
191 | { | ||
192 | struct microcode_header_amd *mc = NULL; | ||
193 | unsigned int actual_size = 0; | ||
194 | |||
195 | if (*(u32 *)buf != UCODE_UCODE_TYPE) { | ||
196 | pr_err("invalid type field in container file section header\n"); | ||
197 | goto out; | ||
198 | } | ||
199 | |||
200 | actual_size = verify_ucode_size(cpu, buf, size); | ||
201 | if (!actual_size) | ||
202 | goto out; | ||
203 | |||
204 | mc = vzalloc(actual_size); | ||
205 | if (!mc) | ||
206 | goto out; | ||
207 | |||
208 | get_ucode_data(mc, buf + SECTION_HDR_SIZE, actual_size); | ||
209 | *mc_size = actual_size + SECTION_HDR_SIZE; | ||
210 | |||
211 | out: | ||
212 | return mc; | ||
213 | } | ||
214 | |||
215 | static int install_equiv_cpu_table(const u8 *buf) | 227 | static int install_equiv_cpu_table(const u8 *buf) |
216 | { | 228 | { |
217 | unsigned int *ibuf = (unsigned int *)buf; | 229 | unsigned int *ibuf = (unsigned int *)buf; |
@@ -247,36 +259,38 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
247 | { | 259 | { |
248 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 260 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
249 | struct microcode_header_amd *mc_hdr = NULL; | 261 | struct microcode_header_amd *mc_hdr = NULL; |
250 | unsigned int mc_size, leftover; | 262 | unsigned int mc_size, leftover, current_size = 0; |
251 | int offset; | 263 | int offset; |
252 | const u8 *ucode_ptr = data; | 264 | const u8 *ucode_ptr = data; |
253 | void *new_mc = NULL; | 265 | void *new_mc = NULL; |
254 | unsigned int new_rev = uci->cpu_sig.rev; | 266 | unsigned int new_rev = uci->cpu_sig.rev; |
255 | enum ucode_state state = UCODE_OK; | 267 | enum ucode_state state = UCODE_ERROR; |
256 | 268 | ||
257 | offset = install_equiv_cpu_table(ucode_ptr); | 269 | offset = install_equiv_cpu_table(ucode_ptr); |
258 | if (offset < 0) { | 270 | if (offset < 0) { |
259 | pr_err("failed to create equivalent cpu table\n"); | 271 | pr_err("failed to create equivalent cpu table\n"); |
260 | return UCODE_ERROR; | 272 | goto out; |
261 | } | 273 | } |
262 | |||
263 | ucode_ptr += offset; | 274 | ucode_ptr += offset; |
264 | leftover = size - offset; | 275 | leftover = size - offset; |
265 | 276 | ||
266 | while (leftover) { | 277 | if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) { |
267 | mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size); | 278 | pr_err("invalid type field in container file section header\n"); |
268 | if (!mc_hdr) | 279 | goto free_table; |
269 | break; | 280 | } |
270 | 281 | ||
271 | if (get_matching_microcode(cpu, mc_hdr, new_rev)) { | 282 | while (leftover) { |
272 | vfree(new_mc); | 283 | mc_size = get_matching_microcode(cpu, ucode_ptr, leftover, |
284 | new_rev, ¤t_size); | ||
285 | if (mc_size) { | ||
286 | mc_hdr = patch; | ||
287 | new_mc = patch; | ||
273 | new_rev = mc_hdr->patch_id; | 288 | new_rev = mc_hdr->patch_id; |
274 | new_mc = mc_hdr; | 289 | goto out_ok; |
275 | } else | 290 | } |
276 | vfree(mc_hdr); | ||
277 | 291 | ||
278 | ucode_ptr += mc_size; | 292 | ucode_ptr += current_size; |
279 | leftover -= mc_size; | 293 | leftover -= current_size; |
280 | } | 294 | } |
281 | 295 | ||
282 | if (!new_mc) { | 296 | if (!new_mc) { |
@@ -284,19 +298,16 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
284 | goto free_table; | 298 | goto free_table; |
285 | } | 299 | } |
286 | 300 | ||
287 | if (!leftover) { | 301 | out_ok: |
288 | vfree(uci->mc); | 302 | uci->mc = new_mc; |
289 | uci->mc = new_mc; | 303 | state = UCODE_OK; |
290 | pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", | 304 | pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", |
291 | cpu, uci->cpu_sig.rev, new_rev); | 305 | cpu, uci->cpu_sig.rev, new_rev); |
292 | } else { | ||
293 | vfree(new_mc); | ||
294 | state = UCODE_ERROR; | ||
295 | } | ||
296 | 306 | ||
297 | free_table: | 307 | free_table: |
298 | free_equiv_cpu_table(); | 308 | free_equiv_cpu_table(); |
299 | 309 | ||
310 | out: | ||
300 | return state; | 311 | return state; |
301 | } | 312 | } |
302 | 313 | ||
@@ -337,7 +348,6 @@ static void microcode_fini_cpu_amd(int cpu) | |||
337 | { | 348 | { |
338 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 349 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
339 | 350 | ||
340 | vfree(uci->mc); | ||
341 | uci->mc = NULL; | 351 | uci->mc = NULL; |
342 | } | 352 | } |
343 | 353 | ||
@@ -351,5 +361,14 @@ static struct microcode_ops microcode_amd_ops = { | |||
351 | 361 | ||
352 | struct microcode_ops * __init init_amd_microcode(void) | 362 | struct microcode_ops * __init init_amd_microcode(void) |
353 | { | 363 | { |
364 | patch = (void *)get_zeroed_page(GFP_KERNEL); | ||
365 | if (!patch) | ||
366 | return NULL; | ||
367 | |||
354 | return µcode_amd_ops; | 368 | return µcode_amd_ops; |
355 | } | 369 | } |
370 | |||
371 | void __exit exit_amd_microcode(void) | ||
372 | { | ||
373 | free_page((unsigned long)patch); | ||
374 | } | ||
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 9d46f5e43b51..fda91c307104 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -292,8 +292,8 @@ static int reload_for_cpu(int cpu) | |||
292 | return err; | 292 | return err; |
293 | } | 293 | } |
294 | 294 | ||
295 | static ssize_t reload_store(struct sys_device *dev, | 295 | static ssize_t reload_store(struct device *dev, |
296 | struct sysdev_attribute *attr, | 296 | struct device_attribute *attr, |
297 | const char *buf, size_t size) | 297 | const char *buf, size_t size) |
298 | { | 298 | { |
299 | unsigned long val; | 299 | unsigned long val; |
@@ -318,30 +318,30 @@ static ssize_t reload_store(struct sys_device *dev, | |||
318 | return ret; | 318 | return ret; |
319 | } | 319 | } |
320 | 320 | ||
321 | static ssize_t version_show(struct sys_device *dev, | 321 | static ssize_t version_show(struct device *dev, |
322 | struct sysdev_attribute *attr, char *buf) | 322 | struct device_attribute *attr, char *buf) |
323 | { | 323 | { |
324 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | 324 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
325 | 325 | ||
326 | return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); | 326 | return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); |
327 | } | 327 | } |
328 | 328 | ||
329 | static ssize_t pf_show(struct sys_device *dev, | 329 | static ssize_t pf_show(struct device *dev, |
330 | struct sysdev_attribute *attr, char *buf) | 330 | struct device_attribute *attr, char *buf) |
331 | { | 331 | { |
332 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | 332 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
333 | 333 | ||
334 | return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); | 334 | return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); |
335 | } | 335 | } |
336 | 336 | ||
337 | static SYSDEV_ATTR(reload, 0200, NULL, reload_store); | 337 | static DEVICE_ATTR(reload, 0200, NULL, reload_store); |
338 | static SYSDEV_ATTR(version, 0400, version_show, NULL); | 338 | static DEVICE_ATTR(version, 0400, version_show, NULL); |
339 | static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); | 339 | static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); |
340 | 340 | ||
341 | static struct attribute *mc_default_attrs[] = { | 341 | static struct attribute *mc_default_attrs[] = { |
342 | &attr_reload.attr, | 342 | &dev_attr_reload.attr, |
343 | &attr_version.attr, | 343 | &dev_attr_version.attr, |
344 | &attr_processor_flags.attr, | 344 | &dev_attr_processor_flags.attr, |
345 | NULL | 345 | NULL |
346 | }; | 346 | }; |
347 | 347 | ||
@@ -405,43 +405,45 @@ static enum ucode_state microcode_update_cpu(int cpu) | |||
405 | return ustate; | 405 | return ustate; |
406 | } | 406 | } |
407 | 407 | ||
408 | static int mc_sysdev_add(struct sys_device *sys_dev) | 408 | static int mc_device_add(struct device *dev, struct subsys_interface *sif) |
409 | { | 409 | { |
410 | int err, cpu = sys_dev->id; | 410 | int err, cpu = dev->id; |
411 | 411 | ||
412 | if (!cpu_online(cpu)) | 412 | if (!cpu_online(cpu)) |
413 | return 0; | 413 | return 0; |
414 | 414 | ||
415 | pr_debug("CPU%d added\n", cpu); | 415 | pr_debug("CPU%d added\n", cpu); |
416 | 416 | ||
417 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); | 417 | err = sysfs_create_group(&dev->kobj, &mc_attr_group); |
418 | if (err) | 418 | if (err) |
419 | return err; | 419 | return err; |
420 | 420 | ||
421 | if (microcode_init_cpu(cpu) == UCODE_ERROR) { | 421 | if (microcode_init_cpu(cpu) == UCODE_ERROR) { |
422 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 422 | sysfs_remove_group(&dev->kobj, &mc_attr_group); |
423 | return -EINVAL; | 423 | return -EINVAL; |
424 | } | 424 | } |
425 | 425 | ||
426 | return err; | 426 | return err; |
427 | } | 427 | } |
428 | 428 | ||
429 | static int mc_sysdev_remove(struct sys_device *sys_dev) | 429 | static int mc_device_remove(struct device *dev, struct subsys_interface *sif) |
430 | { | 430 | { |
431 | int cpu = sys_dev->id; | 431 | int cpu = dev->id; |
432 | 432 | ||
433 | if (!cpu_online(cpu)) | 433 | if (!cpu_online(cpu)) |
434 | return 0; | 434 | return 0; |
435 | 435 | ||
436 | pr_debug("CPU%d removed\n", cpu); | 436 | pr_debug("CPU%d removed\n", cpu); |
437 | microcode_fini_cpu(cpu); | 437 | microcode_fini_cpu(cpu); |
438 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 438 | sysfs_remove_group(&dev->kobj, &mc_attr_group); |
439 | return 0; | 439 | return 0; |
440 | } | 440 | } |
441 | 441 | ||
442 | static struct sysdev_driver mc_sysdev_driver = { | 442 | static struct subsys_interface mc_cpu_interface = { |
443 | .add = mc_sysdev_add, | 443 | .name = "microcode", |
444 | .remove = mc_sysdev_remove, | 444 | .subsys = &cpu_subsys, |
445 | .add_dev = mc_device_add, | ||
446 | .remove_dev = mc_device_remove, | ||
445 | }; | 447 | }; |
446 | 448 | ||
447 | /** | 449 | /** |
@@ -464,9 +466,9 @@ static __cpuinit int | |||
464 | mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | 466 | mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) |
465 | { | 467 | { |
466 | unsigned int cpu = (unsigned long)hcpu; | 468 | unsigned int cpu = (unsigned long)hcpu; |
467 | struct sys_device *sys_dev; | 469 | struct device *dev; |
468 | 470 | ||
469 | sys_dev = get_cpu_sysdev(cpu); | 471 | dev = get_cpu_device(cpu); |
470 | switch (action) { | 472 | switch (action) { |
471 | case CPU_ONLINE: | 473 | case CPU_ONLINE: |
472 | case CPU_ONLINE_FROZEN: | 474 | case CPU_ONLINE_FROZEN: |
@@ -474,13 +476,13 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
474 | case CPU_DOWN_FAILED: | 476 | case CPU_DOWN_FAILED: |
475 | case CPU_DOWN_FAILED_FROZEN: | 477 | case CPU_DOWN_FAILED_FROZEN: |
476 | pr_debug("CPU%d added\n", cpu); | 478 | pr_debug("CPU%d added\n", cpu); |
477 | if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) | 479 | if (sysfs_create_group(&dev->kobj, &mc_attr_group)) |
478 | pr_err("Failed to create group for CPU%d\n", cpu); | 480 | pr_err("Failed to create group for CPU%d\n", cpu); |
479 | break; | 481 | break; |
480 | case CPU_DOWN_PREPARE: | 482 | case CPU_DOWN_PREPARE: |
481 | case CPU_DOWN_PREPARE_FROZEN: | 483 | case CPU_DOWN_PREPARE_FROZEN: |
482 | /* Suspend is in progress, only remove the interface */ | 484 | /* Suspend is in progress, only remove the interface */ |
483 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 485 | sysfs_remove_group(&dev->kobj, &mc_attr_group); |
484 | pr_debug("CPU%d removed\n", cpu); | 486 | pr_debug("CPU%d removed\n", cpu); |
485 | break; | 487 | break; |
486 | 488 | ||
@@ -525,7 +527,7 @@ static int __init microcode_init(void) | |||
525 | get_online_cpus(); | 527 | get_online_cpus(); |
526 | mutex_lock(µcode_mutex); | 528 | mutex_lock(µcode_mutex); |
527 | 529 | ||
528 | error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); | 530 | error = subsys_interface_register(&mc_cpu_interface); |
529 | 531 | ||
530 | mutex_unlock(µcode_mutex); | 532 | mutex_unlock(µcode_mutex); |
531 | put_online_cpus(); | 533 | put_online_cpus(); |
@@ -535,7 +537,7 @@ static int __init microcode_init(void) | |||
535 | 537 | ||
536 | error = microcode_dev_init(); | 538 | error = microcode_dev_init(); |
537 | if (error) | 539 | if (error) |
538 | goto out_sysdev_driver; | 540 | goto out_driver; |
539 | 541 | ||
540 | register_syscore_ops(&mc_syscore_ops); | 542 | register_syscore_ops(&mc_syscore_ops); |
541 | register_hotcpu_notifier(&mc_cpu_notifier); | 543 | register_hotcpu_notifier(&mc_cpu_notifier); |
@@ -545,11 +547,11 @@ static int __init microcode_init(void) | |||
545 | 547 | ||
546 | return 0; | 548 | return 0; |
547 | 549 | ||
548 | out_sysdev_driver: | 550 | out_driver: |
549 | get_online_cpus(); | 551 | get_online_cpus(); |
550 | mutex_lock(µcode_mutex); | 552 | mutex_lock(µcode_mutex); |
551 | 553 | ||
552 | sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); | 554 | subsys_interface_unregister(&mc_cpu_interface); |
553 | 555 | ||
554 | mutex_unlock(µcode_mutex); | 556 | mutex_unlock(µcode_mutex); |
555 | put_online_cpus(); | 557 | put_online_cpus(); |
@@ -563,6 +565,8 @@ module_init(microcode_init); | |||
563 | 565 | ||
564 | static void __exit microcode_exit(void) | 566 | static void __exit microcode_exit(void) |
565 | { | 567 | { |
568 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
569 | |||
566 | microcode_dev_exit(); | 570 | microcode_dev_exit(); |
567 | 571 | ||
568 | unregister_hotcpu_notifier(&mc_cpu_notifier); | 572 | unregister_hotcpu_notifier(&mc_cpu_notifier); |
@@ -571,7 +575,7 @@ static void __exit microcode_exit(void) | |||
571 | get_online_cpus(); | 575 | get_online_cpus(); |
572 | mutex_lock(µcode_mutex); | 576 | mutex_lock(µcode_mutex); |
573 | 577 | ||
574 | sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); | 578 | subsys_interface_unregister(&mc_cpu_interface); |
575 | 579 | ||
576 | mutex_unlock(µcode_mutex); | 580 | mutex_unlock(µcode_mutex); |
577 | put_online_cpus(); | 581 | put_online_cpus(); |
@@ -580,6 +584,9 @@ static void __exit microcode_exit(void) | |||
580 | 584 | ||
581 | microcode_ops = NULL; | 585 | microcode_ops = NULL; |
582 | 586 | ||
587 | if (c->x86_vendor == X86_VENDOR_AMD) | ||
588 | exit_amd_microcode(); | ||
589 | |||
583 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); | 590 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); |
584 | } | 591 | } |
585 | module_exit(microcode_exit); | 592 | module_exit(microcode_exit); |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 0741b062a304..ca470e4c92dc 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -564,9 +564,7 @@ void __init default_get_smp_config(unsigned int early) | |||
564 | 564 | ||
565 | static void __init smp_reserve_memory(struct mpf_intel *mpf) | 565 | static void __init smp_reserve_memory(struct mpf_intel *mpf) |
566 | { | 566 | { |
567 | unsigned long size = get_mpc_size(mpf->physptr); | 567 | memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr)); |
568 | |||
569 | memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc"); | ||
570 | } | 568 | } |
571 | 569 | ||
572 | static int __init smp_scan_config(unsigned long base, unsigned long length) | 570 | static int __init smp_scan_config(unsigned long base, unsigned long length) |
@@ -595,7 +593,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) | |||
595 | mpf, (u64)virt_to_phys(mpf)); | 593 | mpf, (u64)virt_to_phys(mpf)); |
596 | 594 | ||
597 | mem = virt_to_phys(mpf); | 595 | mem = virt_to_phys(mpf); |
598 | memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf"); | 596 | memblock_reserve(mem, sizeof(*mpf)); |
599 | if (mpf->physptr) | 597 | if (mpf->physptr) |
600 | smp_reserve_memory(mpf); | 598 | smp_reserve_memory(mpf); |
601 | 599 | ||
@@ -836,10 +834,8 @@ early_param("alloc_mptable", parse_alloc_mptable_opt); | |||
836 | 834 | ||
837 | void __init early_reserve_e820_mpc_new(void) | 835 | void __init early_reserve_e820_mpc_new(void) |
838 | { | 836 | { |
839 | if (enable_update_mptable && alloc_mptable) { | 837 | if (enable_update_mptable && alloc_mptable) |
840 | u64 startt = 0; | 838 | mpc_new_phys = early_reserve_e820(mpc_new_length, 4); |
841 | mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); | ||
842 | } | ||
843 | } | 839 | } |
844 | 840 | ||
845 | static int __init update_mp_table(void) | 841 | static int __init update_mp_table(void) |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 12fcbe2c143e..96356762a51d 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -236,7 +236,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = { | |||
236 | .notifier_call = msr_class_cpu_callback, | 236 | .notifier_call = msr_class_cpu_callback, |
237 | }; | 237 | }; |
238 | 238 | ||
239 | static char *msr_devnode(struct device *dev, mode_t *mode) | 239 | static char *msr_devnode(struct device *dev, umode_t *mode) |
240 | { | 240 | { |
241 | return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); | 241 | return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); |
242 | } | 242 | } |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e88f37b58ddd..47acaf319165 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
405 | unknown_nmi_error(reason, regs); | 405 | unknown_nmi_error(reason, regs); |
406 | } | 406 | } |
407 | 407 | ||
408 | /* | ||
409 | * NMIs can hit breakpoints which will cause it to lose its | ||
410 | * NMI context with the CPU when the breakpoint does an iret. | ||
411 | */ | ||
412 | #ifdef CONFIG_X86_32 | ||
413 | /* | ||
414 | * For i386, NMIs use the same stack as the kernel, and we can | ||
415 | * add a workaround to the iret problem in C. Simply have 3 states | ||
416 | * the NMI can be in. | ||
417 | * | ||
418 | * 1) not running | ||
419 | * 2) executing | ||
420 | * 3) latched | ||
421 | * | ||
422 | * When no NMI is in progress, it is in the "not running" state. | ||
423 | * When an NMI comes in, it goes into the "executing" state. | ||
424 | * Normally, if another NMI is triggered, it does not interrupt | ||
425 | * the running NMI and the HW will simply latch it so that when | ||
426 | * the first NMI finishes, it will restart the second NMI. | ||
427 | * (Note, the latch is binary, thus multiple NMIs triggering, | ||
428 | * when one is running, are ignored. Only one NMI is restarted.) | ||
429 | * | ||
430 | * If an NMI hits a breakpoint that executes an iret, another | ||
431 | * NMI can preempt it. We do not want to allow this new NMI | ||
432 | * to run, but we want to execute it when the first one finishes. | ||
433 | * We set the state to "latched", and the first NMI will perform | ||
434 | * an cmpxchg on the state, and if it doesn't successfully | ||
435 | * reset the state to "not running" it will restart the next | ||
436 | * NMI. | ||
437 | */ | ||
438 | enum nmi_states { | ||
439 | NMI_NOT_RUNNING, | ||
440 | NMI_EXECUTING, | ||
441 | NMI_LATCHED, | ||
442 | }; | ||
443 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); | ||
444 | |||
445 | #define nmi_nesting_preprocess(regs) \ | ||
446 | do { \ | ||
447 | if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ | ||
448 | __get_cpu_var(nmi_state) = NMI_LATCHED; \ | ||
449 | return; \ | ||
450 | } \ | ||
451 | nmi_restart: \ | ||
452 | __get_cpu_var(nmi_state) = NMI_EXECUTING; \ | ||
453 | } while (0) | ||
454 | |||
455 | #define nmi_nesting_postprocess() \ | ||
456 | do { \ | ||
457 | if (cmpxchg(&__get_cpu_var(nmi_state), \ | ||
458 | NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ | ||
459 | goto nmi_restart; \ | ||
460 | } while (0) | ||
461 | #else /* x86_64 */ | ||
462 | /* | ||
463 | * In x86_64 things are a bit more difficult. This has the same problem | ||
464 | * where an NMI hitting a breakpoint that calls iret will remove the | ||
465 | * NMI context, allowing a nested NMI to enter. What makes this more | ||
466 | * difficult is that both NMIs and breakpoints have their own stack. | ||
467 | * When a new NMI or breakpoint is executed, the stack is set to a fixed | ||
468 | * point. If an NMI is nested, it will have its stack set at that same | ||
469 | * fixed address that the first NMI had, and will start corrupting the | ||
470 | * stack. This is handled in entry_64.S, but the same problem exists with | ||
471 | * the breakpoint stack. | ||
472 | * | ||
473 | * If a breakpoint is being processed, and the debug stack is being used, | ||
474 | * if an NMI comes in and also hits a breakpoint, the stack pointer | ||
475 | * will be set to the same fixed address as the breakpoint that was | ||
476 | * interrupted, causing that stack to be corrupted. To handle this case, | ||
477 | * check if the stack that was interrupted is the debug stack, and if | ||
478 | * so, change the IDT so that new breakpoints will use the current stack | ||
479 | * and not switch to the fixed address. On return of the NMI, switch back | ||
480 | * to the original IDT. | ||
481 | */ | ||
482 | static DEFINE_PER_CPU(int, update_debug_stack); | ||
483 | |||
484 | static inline void nmi_nesting_preprocess(struct pt_regs *regs) | ||
485 | { | ||
486 | /* | ||
487 | * If we interrupted a breakpoint, it is possible that | ||
488 | * the nmi handler will have breakpoints too. We need to | ||
489 | * change the IDT such that breakpoints that happen here | ||
490 | * continue to use the NMI stack. | ||
491 | */ | ||
492 | if (unlikely(is_debug_stack(regs->sp))) { | ||
493 | debug_stack_set_zero(); | ||
494 | __get_cpu_var(update_debug_stack) = 1; | ||
495 | } | ||
496 | } | ||
497 | |||
498 | static inline void nmi_nesting_postprocess(void) | ||
499 | { | ||
500 | if (unlikely(__get_cpu_var(update_debug_stack))) | ||
501 | debug_stack_reset(); | ||
502 | } | ||
503 | #endif | ||
504 | |||
408 | dotraplinkage notrace __kprobes void | 505 | dotraplinkage notrace __kprobes void |
409 | do_nmi(struct pt_regs *regs, long error_code) | 506 | do_nmi(struct pt_regs *regs, long error_code) |
410 | { | 507 | { |
508 | nmi_nesting_preprocess(regs); | ||
509 | |||
411 | nmi_enter(); | 510 | nmi_enter(); |
412 | 511 | ||
413 | inc_irq_stat(__nmi_count); | 512 | inc_irq_stat(__nmi_count); |
@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
416 | default_do_nmi(regs); | 515 | default_do_nmi(regs); |
417 | 516 | ||
418 | nmi_exit(); | 517 | nmi_exit(); |
518 | |||
519 | /* On i386, may loop back to preprocess */ | ||
520 | nmi_nesting_postprocess(); | ||
419 | } | 521 | } |
420 | 522 | ||
421 | void stop_nmi(void) | 523 | void stop_nmi(void) |
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c new file mode 100644 index 000000000000..0d01a8ea4e11 --- /dev/null +++ b/arch/x86/kernel/nmi_selftest.c | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * arch/x86/kernel/nmi-selftest.c | ||
3 | * | ||
4 | * Testsuite for NMI: IPIs | ||
5 | * | ||
6 | * Started by Don Zickus: | ||
7 | * (using lib/locking-selftest.c as a guide) | ||
8 | * | ||
9 | * Copyright (C) 2011 Red Hat, Inc., Don Zickus <dzickus@redhat.com> | ||
10 | */ | ||
11 | |||
12 | #include <linux/smp.h> | ||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/delay.h> | ||
15 | |||
16 | #include <asm/apic.h> | ||
17 | #include <asm/nmi.h> | ||
18 | |||
19 | #define SUCCESS 0 | ||
20 | #define FAILURE 1 | ||
21 | #define TIMEOUT 2 | ||
22 | |||
23 | static int nmi_fail; | ||
24 | |||
25 | /* check to see if NMI IPIs work on this machine */ | ||
26 | static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __read_mostly; | ||
27 | |||
28 | static int testcase_total; | ||
29 | static int testcase_successes; | ||
30 | static int expected_testcase_failures; | ||
31 | static int unexpected_testcase_failures; | ||
32 | static int unexpected_testcase_unknowns; | ||
33 | |||
34 | static int nmi_unk_cb(unsigned int val, struct pt_regs *regs) | ||
35 | { | ||
36 | unexpected_testcase_unknowns++; | ||
37 | return NMI_HANDLED; | ||
38 | } | ||
39 | |||
40 | static void init_nmi_testsuite(void) | ||
41 | { | ||
42 | /* trap all the unknown NMIs we may generate */ | ||
43 | register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); | ||
44 | } | ||
45 | |||
46 | static void cleanup_nmi_testsuite(void) | ||
47 | { | ||
48 | unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk"); | ||
49 | } | ||
50 | |||
51 | static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs) | ||
52 | { | ||
53 | int cpu = raw_smp_processor_id(); | ||
54 | |||
55 | if (cpumask_test_and_clear_cpu(cpu, to_cpumask(nmi_ipi_mask))) | ||
56 | return NMI_HANDLED; | ||
57 | |||
58 | return NMI_DONE; | ||
59 | } | ||
60 | |||
61 | static void test_nmi_ipi(struct cpumask *mask) | ||
62 | { | ||
63 | unsigned long timeout; | ||
64 | |||
65 | if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, | ||
66 | NMI_FLAG_FIRST, "nmi_selftest")) { | ||
67 | nmi_fail = FAILURE; | ||
68 | return; | ||
69 | } | ||
70 | |||
71 | /* sync above data before sending NMI */ | ||
72 | wmb(); | ||
73 | |||
74 | apic->send_IPI_mask(mask, NMI_VECTOR); | ||
75 | |||
76 | /* Don't wait longer than a second */ | ||
77 | timeout = USEC_PER_SEC; | ||
78 | while (!cpumask_empty(mask) && timeout--) | ||
79 | udelay(1); | ||
80 | |||
81 | /* What happens if we timeout, do we still unregister?? */ | ||
82 | unregister_nmi_handler(NMI_LOCAL, "nmi_selftest"); | ||
83 | |||
84 | if (!timeout) | ||
85 | nmi_fail = TIMEOUT; | ||
86 | return; | ||
87 | } | ||
88 | |||
89 | static void remote_ipi(void) | ||
90 | { | ||
91 | cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask); | ||
92 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); | ||
93 | if (!cpumask_empty(to_cpumask(nmi_ipi_mask))) | ||
94 | test_nmi_ipi(to_cpumask(nmi_ipi_mask)); | ||
95 | } | ||
96 | |||
97 | static void local_ipi(void) | ||
98 | { | ||
99 | cpumask_clear(to_cpumask(nmi_ipi_mask)); | ||
100 | cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); | ||
101 | test_nmi_ipi(to_cpumask(nmi_ipi_mask)); | ||
102 | } | ||
103 | |||
104 | static void reset_nmi(void) | ||
105 | { | ||
106 | nmi_fail = 0; | ||
107 | } | ||
108 | |||
109 | static void dotest(void (*testcase_fn)(void), int expected) | ||
110 | { | ||
111 | testcase_fn(); | ||
112 | /* | ||
113 | * Filter out expected failures: | ||
114 | */ | ||
115 | if (nmi_fail != expected) { | ||
116 | unexpected_testcase_failures++; | ||
117 | |||
118 | if (nmi_fail == FAILURE) | ||
119 | printk("FAILED |"); | ||
120 | else if (nmi_fail == TIMEOUT) | ||
121 | printk("TIMEOUT|"); | ||
122 | else | ||
123 | printk("ERROR |"); | ||
124 | dump_stack(); | ||
125 | } else { | ||
126 | testcase_successes++; | ||
127 | printk(" ok |"); | ||
128 | } | ||
129 | testcase_total++; | ||
130 | |||
131 | reset_nmi(); | ||
132 | } | ||
133 | |||
134 | static inline void print_testname(const char *testname) | ||
135 | { | ||
136 | printk("%12s:", testname); | ||
137 | } | ||
138 | |||
139 | void nmi_selftest(void) | ||
140 | { | ||
141 | init_nmi_testsuite(); | ||
142 | |||
143 | /* | ||
144 | * Run the testsuite: | ||
145 | */ | ||
146 | printk("----------------\n"); | ||
147 | printk("| NMI testsuite:\n"); | ||
148 | printk("--------------------\n"); | ||
149 | |||
150 | print_testname("remote IPI"); | ||
151 | dotest(remote_ipi, SUCCESS); | ||
152 | printk("\n"); | ||
153 | print_testname("local IPI"); | ||
154 | dotest(local_ipi, SUCCESS); | ||
155 | printk("\n"); | ||
156 | |||
157 | cleanup_nmi_testsuite(); | ||
158 | |||
159 | if (unexpected_testcase_failures) { | ||
160 | printk("--------------------\n"); | ||
161 | printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n", | ||
162 | unexpected_testcase_failures, testcase_total); | ||
163 | printk("-----------------------------------------------------------------\n"); | ||
164 | } else if (expected_testcase_failures && testcase_successes) { | ||
165 | printk("--------------------\n"); | ||
166 | printk("%3d out of %3d testcases failed, as expected. |\n", | ||
167 | expected_testcase_failures, testcase_total); | ||
168 | printk("----------------------------------------------------\n"); | ||
169 | } else if (expected_testcase_failures && !testcase_successes) { | ||
170 | printk("--------------------\n"); | ||
171 | printk("All %3d testcases failed, as expected. |\n", | ||
172 | expected_testcase_failures); | ||
173 | printk("----------------------------------------\n"); | ||
174 | } else { | ||
175 | printk("--------------------\n"); | ||
176 | printk("Good, all %3d testcases passed! |\n", | ||
177 | testcase_successes); | ||
178 | printk("---------------------------------\n"); | ||
179 | } | ||
180 | } | ||
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 80dc793b3f63..1c4d769e21ea 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -45,6 +45,15 @@ int iommu_detected __read_mostly = 0; | |||
45 | */ | 45 | */ |
46 | int iommu_pass_through __read_mostly; | 46 | int iommu_pass_through __read_mostly; |
47 | 47 | ||
48 | /* | ||
49 | * Group multi-function PCI devices into a single device-group for the | ||
50 | * iommu_device_group interface. This tells the iommu driver to pretend | ||
51 | * it cannot distinguish between functions of a device, exposing only one | ||
52 | * group for the device. Useful for disallowing use of individual PCI | ||
53 | * functions from userspace drivers. | ||
54 | */ | ||
55 | int iommu_group_mf __read_mostly; | ||
56 | |||
48 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; | 57 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; |
49 | 58 | ||
50 | /* Dummy device used for NULL arguments (normally ISA). */ | 59 | /* Dummy device used for NULL arguments (normally ISA). */ |
@@ -169,6 +178,8 @@ static __init int iommu_setup(char *p) | |||
169 | #endif | 178 | #endif |
170 | if (!strncmp(p, "pt", 2)) | 179 | if (!strncmp(p, "pt", 2)) |
171 | iommu_pass_through = 1; | 180 | iommu_pass_through = 1; |
181 | if (!strncmp(p, "group_mf", 8)) | ||
182 | iommu_group_mf = 1; | ||
172 | 183 | ||
173 | gart_parse_options(p); | 184 | gart_parse_options(p); |
174 | 185 | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ee5d4fbd53b4..15763af7bfe3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | |||
293 | regs.orig_ax = -1; | 293 | regs.orig_ax = -1; |
294 | regs.ip = (unsigned long) kernel_thread_helper; | 294 | regs.ip = (unsigned long) kernel_thread_helper; |
295 | regs.cs = __KERNEL_CS | get_kernel_rpl(); | 295 | regs.cs = __KERNEL_CS | get_kernel_rpl(); |
296 | regs.flags = X86_EFLAGS_IF | 0x2; | 296 | regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; |
297 | 297 | ||
298 | /* Ok, create the new process.. */ | 298 | /* Ok, create the new process.. */ |
299 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); | 299 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 795b79f984c2..485204f58cda 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -99,7 +99,8 @@ void cpu_idle(void) | |||
99 | 99 | ||
100 | /* endless idle loop with no priority at all */ | 100 | /* endless idle loop with no priority at all */ |
101 | while (1) { | 101 | while (1) { |
102 | tick_nohz_stop_sched_tick(1); | 102 | tick_nohz_idle_enter(); |
103 | rcu_idle_enter(); | ||
103 | while (!need_resched()) { | 104 | while (!need_resched()) { |
104 | 105 | ||
105 | check_pgt_cache(); | 106 | check_pgt_cache(); |
@@ -116,7 +117,8 @@ void cpu_idle(void) | |||
116 | pm_idle(); | 117 | pm_idle(); |
117 | start_critical_timings(); | 118 | start_critical_timings(); |
118 | } | 119 | } |
119 | tick_nohz_restart_sched_tick(); | 120 | rcu_idle_exit(); |
121 | tick_nohz_idle_exit(); | ||
120 | preempt_enable_no_resched(); | 122 | preempt_enable_no_resched(); |
121 | schedule(); | 123 | schedule(); |
122 | preempt_disable(); | 124 | preempt_disable(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3bd7e6eebf31..9b9fe4a85c87 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -122,7 +122,7 @@ void cpu_idle(void) | |||
122 | 122 | ||
123 | /* endless idle loop with no priority at all */ | 123 | /* endless idle loop with no priority at all */ |
124 | while (1) { | 124 | while (1) { |
125 | tick_nohz_stop_sched_tick(1); | 125 | tick_nohz_idle_enter(); |
126 | while (!need_resched()) { | 126 | while (!need_resched()) { |
127 | 127 | ||
128 | rmb(); | 128 | rmb(); |
@@ -139,8 +139,14 @@ void cpu_idle(void) | |||
139 | enter_idle(); | 139 | enter_idle(); |
140 | /* Don't trace irqs off for idle */ | 140 | /* Don't trace irqs off for idle */ |
141 | stop_critical_timings(); | 141 | stop_critical_timings(); |
142 | |||
143 | /* enter_idle() needs rcu for notifiers */ | ||
144 | rcu_idle_enter(); | ||
145 | |||
142 | if (cpuidle_idle_call()) | 146 | if (cpuidle_idle_call()) |
143 | pm_idle(); | 147 | pm_idle(); |
148 | |||
149 | rcu_idle_exit(); | ||
144 | start_critical_timings(); | 150 | start_critical_timings(); |
145 | 151 | ||
146 | /* In many cases the interrupt that ended idle | 152 | /* In many cases the interrupt that ended idle |
@@ -149,7 +155,7 @@ void cpu_idle(void) | |||
149 | __exit_idle(); | 155 | __exit_idle(); |
150 | } | 156 | } |
151 | 157 | ||
152 | tick_nohz_restart_sched_tick(); | 158 | tick_nohz_idle_exit(); |
153 | preempt_enable_no_resched(); | 159 | preempt_enable_no_resched(); |
154 | schedule(); | 160 | schedule(); |
155 | preempt_disable(); | 161 | preempt_disable(); |
@@ -293,13 +299,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
293 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | 299 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
294 | 300 | ||
295 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { | 301 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
296 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | 302 | p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr, |
303 | IO_BITMAP_BYTES, GFP_KERNEL); | ||
297 | if (!p->thread.io_bitmap_ptr) { | 304 | if (!p->thread.io_bitmap_ptr) { |
298 | p->thread.io_bitmap_max = 0; | 305 | p->thread.io_bitmap_max = 0; |
299 | return -ENOMEM; | 306 | return -ENOMEM; |
300 | } | 307 | } |
301 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, | ||
302 | IO_BITMAP_BYTES); | ||
303 | set_tsk_thread_flag(p, TIF_IO_BITMAP); | 308 | set_tsk_thread_flag(p, TIF_IO_BITMAP); |
304 | } | 309 | } |
305 | 310 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 82528799c5de..50267386b766 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -749,7 +749,8 @@ put: | |||
749 | /* | 749 | /* |
750 | * Handle PTRACE_POKEUSR calls for the debug register area. | 750 | * Handle PTRACE_POKEUSR calls for the debug register area. |
751 | */ | 751 | */ |
752 | int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) | 752 | static int ptrace_set_debugreg(struct task_struct *tsk, int n, |
753 | unsigned long val) | ||
753 | { | 754 | { |
754 | struct thread_struct *thread = &(tsk->thread); | 755 | struct thread_struct *thread = &(tsk->thread); |
755 | int rc = 0; | 756 | int rc = 0; |
@@ -1391,20 +1392,18 @@ long syscall_trace_enter(struct pt_regs *regs) | |||
1391 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) | 1392 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1392 | trace_sys_enter(regs, regs->orig_ax); | 1393 | trace_sys_enter(regs, regs->orig_ax); |
1393 | 1394 | ||
1394 | if (unlikely(current->audit_context)) { | 1395 | if (IS_IA32) |
1395 | if (IS_IA32) | 1396 | audit_syscall_entry(AUDIT_ARCH_I386, |
1396 | audit_syscall_entry(AUDIT_ARCH_I386, | 1397 | regs->orig_ax, |
1397 | regs->orig_ax, | 1398 | regs->bx, regs->cx, |
1398 | regs->bx, regs->cx, | 1399 | regs->dx, regs->si); |
1399 | regs->dx, regs->si); | ||
1400 | #ifdef CONFIG_X86_64 | 1400 | #ifdef CONFIG_X86_64 |
1401 | else | 1401 | else |
1402 | audit_syscall_entry(AUDIT_ARCH_X86_64, | 1402 | audit_syscall_entry(AUDIT_ARCH_X86_64, |
1403 | regs->orig_ax, | 1403 | regs->orig_ax, |
1404 | regs->di, regs->si, | 1404 | regs->di, regs->si, |
1405 | regs->dx, regs->r10); | 1405 | regs->dx, regs->r10); |
1406 | #endif | 1406 | #endif |
1407 | } | ||
1408 | 1407 | ||
1409 | return ret ?: regs->orig_ax; | 1408 | return ret ?: regs->orig_ax; |
1410 | } | 1409 | } |
@@ -1413,8 +1412,7 @@ void syscall_trace_leave(struct pt_regs *regs) | |||
1413 | { | 1412 | { |
1414 | bool step; | 1413 | bool step; |
1415 | 1414 | ||
1416 | if (unlikely(current->audit_context)) | 1415 | audit_syscall_exit(regs); |
1417 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | ||
1418 | 1416 | ||
1419 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) | 1417 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1420 | trace_sys_exit(regs, regs->ax); | 1418 | trace_sys_exit(regs, regs->ax); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cf0ef986cb6d..d7d5099fe874 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -306,7 +306,8 @@ static void __init cleanup_highmap(void) | |||
306 | static void __init reserve_brk(void) | 306 | static void __init reserve_brk(void) |
307 | { | 307 | { |
308 | if (_brk_end > _brk_start) | 308 | if (_brk_end > _brk_start) |
309 | memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK"); | 309 | memblock_reserve(__pa(_brk_start), |
310 | __pa(_brk_end) - __pa(_brk_start)); | ||
310 | 311 | ||
311 | /* Mark brk area as locked down and no longer taking any | 312 | /* Mark brk area as locked down and no longer taking any |
312 | new allocations */ | 313 | new allocations */ |
@@ -331,13 +332,13 @@ static void __init relocate_initrd(void) | |||
331 | ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, | 332 | ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, |
332 | PAGE_SIZE); | 333 | PAGE_SIZE); |
333 | 334 | ||
334 | if (ramdisk_here == MEMBLOCK_ERROR) | 335 | if (!ramdisk_here) |
335 | panic("Cannot find place for new RAMDISK of size %lld\n", | 336 | panic("Cannot find place for new RAMDISK of size %lld\n", |
336 | ramdisk_size); | 337 | ramdisk_size); |
337 | 338 | ||
338 | /* Note: this includes all the lowmem currently occupied by | 339 | /* Note: this includes all the lowmem currently occupied by |
339 | the initrd, we rely on that fact to keep the data intact. */ | 340 | the initrd, we rely on that fact to keep the data intact. */ |
340 | memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); | 341 | memblock_reserve(ramdisk_here, area_size); |
341 | initrd_start = ramdisk_here + PAGE_OFFSET; | 342 | initrd_start = ramdisk_here + PAGE_OFFSET; |
342 | initrd_end = initrd_start + ramdisk_size; | 343 | initrd_end = initrd_start + ramdisk_size; |
343 | printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", | 344 | printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", |
@@ -393,7 +394,7 @@ static void __init reserve_initrd(void) | |||
393 | initrd_start = 0; | 394 | initrd_start = 0; |
394 | 395 | ||
395 | if (ramdisk_size >= (end_of_lowmem>>1)) { | 396 | if (ramdisk_size >= (end_of_lowmem>>1)) { |
396 | memblock_x86_free_range(ramdisk_image, ramdisk_end); | 397 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); |
397 | printk(KERN_ERR "initrd too large to handle, " | 398 | printk(KERN_ERR "initrd too large to handle, " |
398 | "disabling initrd\n"); | 399 | "disabling initrd\n"); |
399 | return; | 400 | return; |
@@ -416,7 +417,7 @@ static void __init reserve_initrd(void) | |||
416 | 417 | ||
417 | relocate_initrd(); | 418 | relocate_initrd(); |
418 | 419 | ||
419 | memblock_x86_free_range(ramdisk_image, ramdisk_end); | 420 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); |
420 | } | 421 | } |
421 | #else | 422 | #else |
422 | static void __init reserve_initrd(void) | 423 | static void __init reserve_initrd(void) |
@@ -490,15 +491,13 @@ static void __init memblock_x86_reserve_range_setup_data(void) | |||
490 | { | 491 | { |
491 | struct setup_data *data; | 492 | struct setup_data *data; |
492 | u64 pa_data; | 493 | u64 pa_data; |
493 | char buf[32]; | ||
494 | 494 | ||
495 | if (boot_params.hdr.version < 0x0209) | 495 | if (boot_params.hdr.version < 0x0209) |
496 | return; | 496 | return; |
497 | pa_data = boot_params.hdr.setup_data; | 497 | pa_data = boot_params.hdr.setup_data; |
498 | while (pa_data) { | 498 | while (pa_data) { |
499 | data = early_memremap(pa_data, sizeof(*data)); | 499 | data = early_memremap(pa_data, sizeof(*data)); |
500 | sprintf(buf, "setup data %x", data->type); | 500 | memblock_reserve(pa_data, sizeof(*data) + data->len); |
501 | memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf); | ||
502 | pa_data = data->next; | 501 | pa_data = data->next; |
503 | early_iounmap(data, sizeof(*data)); | 502 | early_iounmap(data, sizeof(*data)); |
504 | } | 503 | } |
@@ -554,7 +553,7 @@ static void __init reserve_crashkernel(void) | |||
554 | crash_base = memblock_find_in_range(alignment, | 553 | crash_base = memblock_find_in_range(alignment, |
555 | CRASH_KERNEL_ADDR_MAX, crash_size, alignment); | 554 | CRASH_KERNEL_ADDR_MAX, crash_size, alignment); |
556 | 555 | ||
557 | if (crash_base == MEMBLOCK_ERROR) { | 556 | if (!crash_base) { |
558 | pr_info("crashkernel reservation failed - No suitable area found.\n"); | 557 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
559 | return; | 558 | return; |
560 | } | 559 | } |
@@ -568,7 +567,7 @@ static void __init reserve_crashkernel(void) | |||
568 | return; | 567 | return; |
569 | } | 568 | } |
570 | } | 569 | } |
571 | memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL"); | 570 | memblock_reserve(crash_base, crash_size); |
572 | 571 | ||
573 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | 572 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " |
574 | "for crashkernel (System RAM: %ldMB)\n", | 573 | "for crashkernel (System RAM: %ldMB)\n", |
@@ -626,7 +625,7 @@ static __init void reserve_ibft_region(void) | |||
626 | addr = find_ibft_region(&size); | 625 | addr = find_ibft_region(&size); |
627 | 626 | ||
628 | if (size) | 627 | if (size) |
629 | memblock_x86_reserve_range(addr, addr + size, "* ibft"); | 628 | memblock_reserve(addr, size); |
630 | } | 629 | } |
631 | 630 | ||
632 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; | 631 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; |
@@ -750,12 +749,7 @@ void __init setup_arch(char **cmdline_p) | |||
750 | #endif | 749 | #endif |
751 | #ifdef CONFIG_EFI | 750 | #ifdef CONFIG_EFI |
752 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | 751 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
753 | #ifdef CONFIG_X86_32 | 752 | EFI_LOADER_SIGNATURE, 4)) { |
754 | "EL32", | ||
755 | #else | ||
756 | "EL64", | ||
757 | #endif | ||
758 | 4)) { | ||
759 | efi_enabled = 1; | 753 | efi_enabled = 1; |
760 | efi_memblock_x86_reserve_range(); | 754 | efi_memblock_x86_reserve_range(); |
761 | } | 755 | } |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 54ddaeb221c1..46a01bdc27e2 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -682,7 +682,6 @@ static int | |||
682 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | 682 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, |
683 | struct pt_regs *regs) | 683 | struct pt_regs *regs) |
684 | { | 684 | { |
685 | sigset_t blocked; | ||
686 | int ret; | 685 | int ret; |
687 | 686 | ||
688 | /* Are we from a system call? */ | 687 | /* Are we from a system call? */ |
@@ -733,10 +732,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
733 | */ | 732 | */ |
734 | regs->flags &= ~X86_EFLAGS_TF; | 733 | regs->flags &= ~X86_EFLAGS_TF; |
735 | 734 | ||
736 | sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); | 735 | block_sigmask(ka, sig); |
737 | if (!(ka->sa.sa_flags & SA_NODEFER)) | ||
738 | sigaddset(&blocked, sig); | ||
739 | set_current_blocked(&blocked); | ||
740 | 736 | ||
741 | tracehook_signal_handler(sig, info, ka, regs, | 737 | tracehook_signal_handler(sig, info, ka, regs, |
742 | test_thread_flag(TIF_SINGLESTEP)); | 738 | test_thread_flag(TIF_SINGLESTEP)); |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 16204dc15484..66c74f481cab 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/mmu_context.h> | 29 | #include <asm/mmu_context.h> |
30 | #include <asm/proto.h> | 30 | #include <asm/proto.h> |
31 | #include <asm/apic.h> | 31 | #include <asm/apic.h> |
32 | #include <asm/nmi.h> | ||
32 | /* | 33 | /* |
33 | * Some notes on x86 processor bugs affecting SMP operation: | 34 | * Some notes on x86 processor bugs affecting SMP operation: |
34 | * | 35 | * |
@@ -148,6 +149,60 @@ void native_send_call_func_ipi(const struct cpumask *mask) | |||
148 | free_cpumask_var(allbutself); | 149 | free_cpumask_var(allbutself); |
149 | } | 150 | } |
150 | 151 | ||
152 | static atomic_t stopping_cpu = ATOMIC_INIT(-1); | ||
153 | |||
154 | static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) | ||
155 | { | ||
156 | /* We are registered on stopping cpu too, avoid spurious NMI */ | ||
157 | if (raw_smp_processor_id() == atomic_read(&stopping_cpu)) | ||
158 | return NMI_HANDLED; | ||
159 | |||
160 | stop_this_cpu(NULL); | ||
161 | |||
162 | return NMI_HANDLED; | ||
163 | } | ||
164 | |||
165 | static void native_nmi_stop_other_cpus(int wait) | ||
166 | { | ||
167 | unsigned long flags; | ||
168 | unsigned long timeout; | ||
169 | |||
170 | if (reboot_force) | ||
171 | return; | ||
172 | |||
173 | /* | ||
174 | * Use an own vector here because smp_call_function | ||
175 | * does lots of things not suitable in a panic situation. | ||
176 | */ | ||
177 | if (num_online_cpus() > 1) { | ||
178 | /* did someone beat us here? */ | ||
179 | if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1) | ||
180 | return; | ||
181 | |||
182 | if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, | ||
183 | NMI_FLAG_FIRST, "smp_stop")) | ||
184 | /* Note: we ignore failures here */ | ||
185 | return; | ||
186 | |||
187 | /* sync above data before sending NMI */ | ||
188 | wmb(); | ||
189 | |||
190 | apic->send_IPI_allbutself(NMI_VECTOR); | ||
191 | |||
192 | /* | ||
193 | * Don't wait longer than a second if the caller | ||
194 | * didn't ask us to wait. | ||
195 | */ | ||
196 | timeout = USEC_PER_SEC; | ||
197 | while (num_online_cpus() > 1 && (wait || timeout--)) | ||
198 | udelay(1); | ||
199 | } | ||
200 | |||
201 | local_irq_save(flags); | ||
202 | disable_local_APIC(); | ||
203 | local_irq_restore(flags); | ||
204 | } | ||
205 | |||
151 | /* | 206 | /* |
152 | * this function calls the 'stop' function on all other CPUs in the system. | 207 | * this function calls the 'stop' function on all other CPUs in the system. |
153 | */ | 208 | */ |
@@ -160,7 +215,7 @@ asmlinkage void smp_reboot_interrupt(void) | |||
160 | irq_exit(); | 215 | irq_exit(); |
161 | } | 216 | } |
162 | 217 | ||
163 | static void native_stop_other_cpus(int wait) | 218 | static void native_irq_stop_other_cpus(int wait) |
164 | { | 219 | { |
165 | unsigned long flags; | 220 | unsigned long flags; |
166 | unsigned long timeout; | 221 | unsigned long timeout; |
@@ -194,6 +249,11 @@ static void native_stop_other_cpus(int wait) | |||
194 | local_irq_restore(flags); | 249 | local_irq_restore(flags); |
195 | } | 250 | } |
196 | 251 | ||
252 | static void native_smp_disable_nmi_ipi(void) | ||
253 | { | ||
254 | smp_ops.stop_other_cpus = native_irq_stop_other_cpus; | ||
255 | } | ||
256 | |||
197 | /* | 257 | /* |
198 | * Reschedule call back. | 258 | * Reschedule call back. |
199 | */ | 259 | */ |
@@ -225,12 +285,20 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
225 | irq_exit(); | 285 | irq_exit(); |
226 | } | 286 | } |
227 | 287 | ||
288 | static int __init nonmi_ipi_setup(char *str) | ||
289 | { | ||
290 | native_smp_disable_nmi_ipi(); | ||
291 | return 1; | ||
292 | } | ||
293 | |||
294 | __setup("nonmi_ipi", nonmi_ipi_setup); | ||
295 | |||
228 | struct smp_ops smp_ops = { | 296 | struct smp_ops smp_ops = { |
229 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | 297 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, |
230 | .smp_prepare_cpus = native_smp_prepare_cpus, | 298 | .smp_prepare_cpus = native_smp_prepare_cpus, |
231 | .smp_cpus_done = native_smp_cpus_done, | 299 | .smp_cpus_done = native_smp_cpus_done, |
232 | 300 | ||
233 | .stop_other_cpus = native_stop_other_cpus, | 301 | .stop_other_cpus = native_nmi_stop_other_cpus, |
234 | .smp_send_reschedule = native_smp_send_reschedule, | 302 | .smp_send_reschedule = native_smp_send_reschedule, |
235 | 303 | ||
236 | .cpu_up = native_cpu_up, | 304 | .cpu_up = native_cpu_up, |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9f548cb4a958..66d250c00d11 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -207,23 +207,29 @@ static void __cpuinit smp_callin(void) | |||
207 | * Need to setup vector mappings before we enable interrupts. | 207 | * Need to setup vector mappings before we enable interrupts. |
208 | */ | 208 | */ |
209 | setup_vector_irq(smp_processor_id()); | 209 | setup_vector_irq(smp_processor_id()); |
210 | |||
211 | /* | ||
212 | * Save our processor parameters. Note: this information | ||
213 | * is needed for clock calibration. | ||
214 | */ | ||
215 | smp_store_cpu_info(cpuid); | ||
216 | |||
210 | /* | 217 | /* |
211 | * Get our bogomips. | 218 | * Get our bogomips. |
219 | * Update loops_per_jiffy in cpu_data. Previous call to | ||
220 | * smp_store_cpu_info() stored a value that is close but not as | ||
221 | * accurate as the value just calculated. | ||
212 | * | 222 | * |
213 | * Need to enable IRQs because it can take longer and then | 223 | * Need to enable IRQs because it can take longer and then |
214 | * the NMI watchdog might kill us. | 224 | * the NMI watchdog might kill us. |
215 | */ | 225 | */ |
216 | local_irq_enable(); | 226 | local_irq_enable(); |
217 | calibrate_delay(); | 227 | calibrate_delay(); |
228 | cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; | ||
218 | local_irq_disable(); | 229 | local_irq_disable(); |
219 | pr_debug("Stack at about %p\n", &cpuid); | 230 | pr_debug("Stack at about %p\n", &cpuid); |
220 | 231 | ||
221 | /* | 232 | /* |
222 | * Save our processor parameters | ||
223 | */ | ||
224 | smp_store_cpu_info(cpuid); | ||
225 | |||
226 | /* | ||
227 | * This must be done before setting cpu_online_mask | 233 | * This must be done before setting cpu_online_mask |
228 | * or calling notify_cpu_starting. | 234 | * or calling notify_cpu_starting. |
229 | */ | 235 | */ |
@@ -840,7 +846,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
840 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); | 846 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); |
841 | 847 | ||
842 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || | 848 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || |
843 | !physid_isset(apicid, phys_cpu_present_map)) { | 849 | !physid_isset(apicid, phys_cpu_present_map) || |
850 | (!x2apic_mode && apicid >= 255)) { | ||
844 | printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); | 851 | printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); |
845 | return -EINVAL; | 852 | return -EINVAL; |
846 | } | 853 | } |
@@ -1142,6 +1149,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1142 | { | 1149 | { |
1143 | pr_debug("Boot done.\n"); | 1150 | pr_debug("Boot done.\n"); |
1144 | 1151 | ||
1152 | nmi_selftest(); | ||
1145 | impress_friends(); | 1153 | impress_friends(); |
1146 | #ifdef CONFIG_X86_IO_APIC | 1154 | #ifdef CONFIG_X86_IO_APIC |
1147 | setup_ioapic_dest(); | 1155 | setup_ioapic_dest(); |
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c new file mode 100644 index 000000000000..147fcd4941c4 --- /dev/null +++ b/arch/x86/kernel/syscall_32.c | |||
@@ -0,0 +1,25 @@ | |||
1 | /* System call table for i386. */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <linux/sys.h> | ||
5 | #include <linux/cache.h> | ||
6 | #include <asm/asm-offsets.h> | ||
7 | |||
8 | #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; | ||
9 | #include <asm/syscalls_32.h> | ||
10 | #undef __SYSCALL_I386 | ||
11 | |||
12 | #define __SYSCALL_I386(nr, sym, compat) [nr] = sym, | ||
13 | |||
14 | typedef asmlinkage void (*sys_call_ptr_t)(void); | ||
15 | |||
16 | extern asmlinkage void sys_ni_syscall(void); | ||
17 | |||
18 | const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { | ||
19 | /* | ||
20 | * Smells like a compiler bug -- it doesn't work | ||
21 | * when the & below is removed. | ||
22 | */ | ||
23 | [0 ... __NR_syscall_max] = &sys_ni_syscall, | ||
24 | #include <asm/syscalls_32.h> | ||
25 | }; | ||
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index de87d6008295..7ac7943be02c 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c | |||
@@ -5,15 +5,11 @@ | |||
5 | #include <linux/cache.h> | 5 | #include <linux/cache.h> |
6 | #include <asm/asm-offsets.h> | 6 | #include <asm/asm-offsets.h> |
7 | 7 | ||
8 | #define __NO_STUBS | 8 | #define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; |
9 | #include <asm/syscalls_64.h> | ||
10 | #undef __SYSCALL_64 | ||
9 | 11 | ||
10 | #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; | 12 | #define __SYSCALL_64(nr, sym, compat) [nr] = sym, |
11 | #undef _ASM_X86_UNISTD_64_H | ||
12 | #include <asm/unistd_64.h> | ||
13 | |||
14 | #undef __SYSCALL | ||
15 | #define __SYSCALL(nr, sym) [nr] = sym, | ||
16 | #undef _ASM_X86_UNISTD_64_H | ||
17 | 13 | ||
18 | typedef void (*sys_call_ptr_t)(void); | 14 | typedef void (*sys_call_ptr_t)(void); |
19 | 15 | ||
@@ -21,9 +17,9 @@ extern void sys_ni_syscall(void); | |||
21 | 17 | ||
22 | const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { | 18 | const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { |
23 | /* | 19 | /* |
24 | *Smells like a like a compiler bug -- it doesn't work | 20 | * Smells like a compiler bug -- it doesn't work |
25 | *when the & below is removed. | 21 | * when the & below is removed. |
26 | */ | 22 | */ |
27 | [0 ... __NR_syscall_max] = &sys_ni_syscall, | 23 | [0 ... __NR_syscall_max] = &sys_ni_syscall, |
28 | #include <asm/unistd_64.h> | 24 | #include <asm/syscalls_64.h> |
29 | }; | 25 | }; |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S deleted file mode 100644 index 9a0e31293920..000000000000 --- a/arch/x86/kernel/syscall_table_32.S +++ /dev/null | |||
@@ -1,350 +0,0 @@ | |||
1 | ENTRY(sys_call_table) | ||
2 | .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ | ||
3 | .long sys_exit | ||
4 | .long ptregs_fork | ||
5 | .long sys_read | ||
6 | .long sys_write | ||
7 | .long sys_open /* 5 */ | ||
8 | .long sys_close | ||
9 | .long sys_waitpid | ||
10 | .long sys_creat | ||
11 | .long sys_link | ||
12 | .long sys_unlink /* 10 */ | ||
13 | .long ptregs_execve | ||
14 | .long sys_chdir | ||
15 | .long sys_time | ||
16 | .long sys_mknod | ||
17 | .long sys_chmod /* 15 */ | ||
18 | .long sys_lchown16 | ||
19 | .long sys_ni_syscall /* old break syscall holder */ | ||
20 | .long sys_stat | ||
21 | .long sys_lseek | ||
22 | .long sys_getpid /* 20 */ | ||
23 | .long sys_mount | ||
24 | .long sys_oldumount | ||
25 | .long sys_setuid16 | ||
26 | .long sys_getuid16 | ||
27 | .long sys_stime /* 25 */ | ||
28 | .long sys_ptrace | ||
29 | .long sys_alarm | ||
30 | .long sys_fstat | ||
31 | .long sys_pause | ||
32 | .long sys_utime /* 30 */ | ||
33 | .long sys_ni_syscall /* old stty syscall holder */ | ||
34 | .long sys_ni_syscall /* old gtty syscall holder */ | ||
35 | .long sys_access | ||
36 | .long sys_nice | ||
37 | .long sys_ni_syscall /* 35 - old ftime syscall holder */ | ||
38 | .long sys_sync | ||
39 | .long sys_kill | ||
40 | .long sys_rename | ||
41 | .long sys_mkdir | ||
42 | .long sys_rmdir /* 40 */ | ||
43 | .long sys_dup | ||
44 | .long sys_pipe | ||
45 | .long sys_times | ||
46 | .long sys_ni_syscall /* old prof syscall holder */ | ||
47 | .long sys_brk /* 45 */ | ||
48 | .long sys_setgid16 | ||
49 | .long sys_getgid16 | ||
50 | .long sys_signal | ||
51 | .long sys_geteuid16 | ||
52 | .long sys_getegid16 /* 50 */ | ||
53 | .long sys_acct | ||
54 | .long sys_umount /* recycled never used phys() */ | ||
55 | .long sys_ni_syscall /* old lock syscall holder */ | ||
56 | .long sys_ioctl | ||
57 | .long sys_fcntl /* 55 */ | ||
58 | .long sys_ni_syscall /* old mpx syscall holder */ | ||
59 | .long sys_setpgid | ||
60 | .long sys_ni_syscall /* old ulimit syscall holder */ | ||
61 | .long sys_olduname | ||
62 | .long sys_umask /* 60 */ | ||
63 | .long sys_chroot | ||
64 | .long sys_ustat | ||
65 | .long sys_dup2 | ||
66 | .long sys_getppid | ||
67 | .long sys_getpgrp /* 65 */ | ||
68 | .long sys_setsid | ||
69 | .long sys_sigaction | ||
70 | .long sys_sgetmask | ||
71 | .long sys_ssetmask | ||
72 | .long sys_setreuid16 /* 70 */ | ||
73 | .long sys_setregid16 | ||
74 | .long sys_sigsuspend | ||
75 | .long sys_sigpending | ||
76 | .long sys_sethostname | ||
77 | .long sys_setrlimit /* 75 */ | ||
78 | .long sys_old_getrlimit | ||
79 | .long sys_getrusage | ||
80 | .long sys_gettimeofday | ||
81 | .long sys_settimeofday | ||
82 | .long sys_getgroups16 /* 80 */ | ||
83 | .long sys_setgroups16 | ||
84 | .long sys_old_select | ||
85 | .long sys_symlink | ||
86 | .long sys_lstat | ||
87 | .long sys_readlink /* 85 */ | ||
88 | .long sys_uselib | ||
89 | .long sys_swapon | ||
90 | .long sys_reboot | ||
91 | .long sys_old_readdir | ||
92 | .long sys_old_mmap /* 90 */ | ||
93 | .long sys_munmap | ||
94 | .long sys_truncate | ||
95 | .long sys_ftruncate | ||
96 | .long sys_fchmod | ||
97 | .long sys_fchown16 /* 95 */ | ||
98 | .long sys_getpriority | ||
99 | .long sys_setpriority | ||
100 | .long sys_ni_syscall /* old profil syscall holder */ | ||
101 | .long sys_statfs | ||
102 | .long sys_fstatfs /* 100 */ | ||
103 | .long sys_ioperm | ||
104 | .long sys_socketcall | ||
105 | .long sys_syslog | ||
106 | .long sys_setitimer | ||
107 | .long sys_getitimer /* 105 */ | ||
108 | .long sys_newstat | ||
109 | .long sys_newlstat | ||
110 | .long sys_newfstat | ||
111 | .long sys_uname | ||
112 | .long ptregs_iopl /* 110 */ | ||
113 | .long sys_vhangup | ||
114 | .long sys_ni_syscall /* old "idle" system call */ | ||
115 | .long ptregs_vm86old | ||
116 | .long sys_wait4 | ||
117 | .long sys_swapoff /* 115 */ | ||
118 | .long sys_sysinfo | ||
119 | .long sys_ipc | ||
120 | .long sys_fsync | ||
121 | .long ptregs_sigreturn | ||
122 | .long ptregs_clone /* 120 */ | ||
123 | .long sys_setdomainname | ||
124 | .long sys_newuname | ||
125 | .long sys_modify_ldt | ||
126 | .long sys_adjtimex | ||
127 | .long sys_mprotect /* 125 */ | ||
128 | .long sys_sigprocmask | ||
129 | .long sys_ni_syscall /* old "create_module" */ | ||
130 | .long sys_init_module | ||
131 | .long sys_delete_module | ||
132 | .long sys_ni_syscall /* 130: old "get_kernel_syms" */ | ||
133 | .long sys_quotactl | ||
134 | .long sys_getpgid | ||
135 | .long sys_fchdir | ||
136 | .long sys_bdflush | ||
137 | .long sys_sysfs /* 135 */ | ||
138 | .long sys_personality | ||
139 | .long sys_ni_syscall /* reserved for afs_syscall */ | ||
140 | .long sys_setfsuid16 | ||
141 | .long sys_setfsgid16 | ||
142 | .long sys_llseek /* 140 */ | ||
143 | .long sys_getdents | ||
144 | .long sys_select | ||
145 | .long sys_flock | ||
146 | .long sys_msync | ||
147 | .long sys_readv /* 145 */ | ||
148 | .long sys_writev | ||
149 | .long sys_getsid | ||
150 | .long sys_fdatasync | ||
151 | .long sys_sysctl | ||
152 | .long sys_mlock /* 150 */ | ||
153 | .long sys_munlock | ||
154 | .long sys_mlockall | ||
155 | .long sys_munlockall | ||
156 | .long sys_sched_setparam | ||
157 | .long sys_sched_getparam /* 155 */ | ||
158 | .long sys_sched_setscheduler | ||
159 | .long sys_sched_getscheduler | ||
160 | .long sys_sched_yield | ||
161 | .long sys_sched_get_priority_max | ||
162 | .long sys_sched_get_priority_min /* 160 */ | ||
163 | .long sys_sched_rr_get_interval | ||
164 | .long sys_nanosleep | ||
165 | .long sys_mremap | ||
166 | .long sys_setresuid16 | ||
167 | .long sys_getresuid16 /* 165 */ | ||
168 | .long ptregs_vm86 | ||
169 | .long sys_ni_syscall /* Old sys_query_module */ | ||
170 | .long sys_poll | ||
171 | .long sys_ni_syscall /* Old nfsservctl */ | ||
172 | .long sys_setresgid16 /* 170 */ | ||
173 | .long sys_getresgid16 | ||
174 | .long sys_prctl | ||
175 | .long ptregs_rt_sigreturn | ||
176 | .long sys_rt_sigaction | ||
177 | .long sys_rt_sigprocmask /* 175 */ | ||
178 | .long sys_rt_sigpending | ||
179 | .long sys_rt_sigtimedwait | ||
180 | .long sys_rt_sigqueueinfo | ||
181 | .long sys_rt_sigsuspend | ||
182 | .long sys_pread64 /* 180 */ | ||
183 | .long sys_pwrite64 | ||
184 | .long sys_chown16 | ||
185 | .long sys_getcwd | ||
186 | .long sys_capget | ||
187 | .long sys_capset /* 185 */ | ||
188 | .long ptregs_sigaltstack | ||
189 | .long sys_sendfile | ||
190 | .long sys_ni_syscall /* reserved for streams1 */ | ||
191 | .long sys_ni_syscall /* reserved for streams2 */ | ||
192 | .long ptregs_vfork /* 190 */ | ||
193 | .long sys_getrlimit | ||
194 | .long sys_mmap_pgoff | ||
195 | .long sys_truncate64 | ||
196 | .long sys_ftruncate64 | ||
197 | .long sys_stat64 /* 195 */ | ||
198 | .long sys_lstat64 | ||
199 | .long sys_fstat64 | ||
200 | .long sys_lchown | ||
201 | .long sys_getuid | ||
202 | .long sys_getgid /* 200 */ | ||
203 | .long sys_geteuid | ||
204 | .long sys_getegid | ||
205 | .long sys_setreuid | ||
206 | .long sys_setregid | ||
207 | .long sys_getgroups /* 205 */ | ||
208 | .long sys_setgroups | ||
209 | .long sys_fchown | ||
210 | .long sys_setresuid | ||
211 | .long sys_getresuid | ||
212 | .long sys_setresgid /* 210 */ | ||
213 | .long sys_getresgid | ||
214 | .long sys_chown | ||
215 | .long sys_setuid | ||
216 | .long sys_setgid | ||
217 | .long sys_setfsuid /* 215 */ | ||
218 | .long sys_setfsgid | ||
219 | .long sys_pivot_root | ||
220 | .long sys_mincore | ||
221 | .long sys_madvise | ||
222 | .long sys_getdents64 /* 220 */ | ||
223 | .long sys_fcntl64 | ||
224 | .long sys_ni_syscall /* reserved for TUX */ | ||
225 | .long sys_ni_syscall | ||
226 | .long sys_gettid | ||
227 | .long sys_readahead /* 225 */ | ||
228 | .long sys_setxattr | ||
229 | .long sys_lsetxattr | ||
230 | .long sys_fsetxattr | ||
231 | .long sys_getxattr | ||
232 | .long sys_lgetxattr /* 230 */ | ||
233 | .long sys_fgetxattr | ||
234 | .long sys_listxattr | ||
235 | .long sys_llistxattr | ||
236 | .long sys_flistxattr | ||
237 | .long sys_removexattr /* 235 */ | ||
238 | .long sys_lremovexattr | ||
239 | .long sys_fremovexattr | ||
240 | .long sys_tkill | ||
241 | .long sys_sendfile64 | ||
242 | .long sys_futex /* 240 */ | ||
243 | .long sys_sched_setaffinity | ||
244 | .long sys_sched_getaffinity | ||
245 | .long sys_set_thread_area | ||
246 | .long sys_get_thread_area | ||
247 | .long sys_io_setup /* 245 */ | ||
248 | .long sys_io_destroy | ||
249 | .long sys_io_getevents | ||
250 | .long sys_io_submit | ||
251 | .long sys_io_cancel | ||
252 | .long sys_fadvise64 /* 250 */ | ||
253 | .long sys_ni_syscall | ||
254 | .long sys_exit_group | ||
255 | .long sys_lookup_dcookie | ||
256 | .long sys_epoll_create | ||
257 | .long sys_epoll_ctl /* 255 */ | ||
258 | .long sys_epoll_wait | ||
259 | .long sys_remap_file_pages | ||
260 | .long sys_set_tid_address | ||
261 | .long sys_timer_create | ||
262 | .long sys_timer_settime /* 260 */ | ||
263 | .long sys_timer_gettime | ||
264 | .long sys_timer_getoverrun | ||
265 | .long sys_timer_delete | ||
266 | .long sys_clock_settime | ||
267 | .long sys_clock_gettime /* 265 */ | ||
268 | .long sys_clock_getres | ||
269 | .long sys_clock_nanosleep | ||
270 | .long sys_statfs64 | ||
271 | .long sys_fstatfs64 | ||
272 | .long sys_tgkill /* 270 */ | ||
273 | .long sys_utimes | ||
274 | .long sys_fadvise64_64 | ||
275 | .long sys_ni_syscall /* sys_vserver */ | ||
276 | .long sys_mbind | ||
277 | .long sys_get_mempolicy | ||
278 | .long sys_set_mempolicy | ||
279 | .long sys_mq_open | ||
280 | .long sys_mq_unlink | ||
281 | .long sys_mq_timedsend | ||
282 | .long sys_mq_timedreceive /* 280 */ | ||
283 | .long sys_mq_notify | ||
284 | .long sys_mq_getsetattr | ||
285 | .long sys_kexec_load | ||
286 | .long sys_waitid | ||
287 | .long sys_ni_syscall /* 285 */ /* available */ | ||
288 | .long sys_add_key | ||
289 | .long sys_request_key | ||
290 | .long sys_keyctl | ||
291 | .long sys_ioprio_set | ||
292 | .long sys_ioprio_get /* 290 */ | ||
293 | .long sys_inotify_init | ||
294 | .long sys_inotify_add_watch | ||
295 | .long sys_inotify_rm_watch | ||
296 | .long sys_migrate_pages | ||
297 | .long sys_openat /* 295 */ | ||
298 | .long sys_mkdirat | ||
299 | .long sys_mknodat | ||
300 | .long sys_fchownat | ||
301 | .long sys_futimesat | ||
302 | .long sys_fstatat64 /* 300 */ | ||
303 | .long sys_unlinkat | ||
304 | .long sys_renameat | ||
305 | .long sys_linkat | ||
306 | .long sys_symlinkat | ||
307 | .long sys_readlinkat /* 305 */ | ||
308 | .long sys_fchmodat | ||
309 | .long sys_faccessat | ||
310 | .long sys_pselect6 | ||
311 | .long sys_ppoll | ||
312 | .long sys_unshare /* 310 */ | ||
313 | .long sys_set_robust_list | ||
314 | .long sys_get_robust_list | ||
315 | .long sys_splice | ||
316 | .long sys_sync_file_range | ||
317 | .long sys_tee /* 315 */ | ||
318 | .long sys_vmsplice | ||
319 | .long sys_move_pages | ||
320 | .long sys_getcpu | ||
321 | .long sys_epoll_pwait | ||
322 | .long sys_utimensat /* 320 */ | ||
323 | .long sys_signalfd | ||
324 | .long sys_timerfd_create | ||
325 | .long sys_eventfd | ||
326 | .long sys_fallocate | ||
327 | .long sys_timerfd_settime /* 325 */ | ||
328 | .long sys_timerfd_gettime | ||
329 | .long sys_signalfd4 | ||
330 | .long sys_eventfd2 | ||
331 | .long sys_epoll_create1 | ||
332 | .long sys_dup3 /* 330 */ | ||
333 | .long sys_pipe2 | ||
334 | .long sys_inotify_init1 | ||
335 | .long sys_preadv | ||
336 | .long sys_pwritev | ||
337 | .long sys_rt_tgsigqueueinfo /* 335 */ | ||
338 | .long sys_perf_event_open | ||
339 | .long sys_recvmmsg | ||
340 | .long sys_fanotify_init | ||
341 | .long sys_fanotify_mark | ||
342 | .long sys_prlimit64 /* 340 */ | ||
343 | .long sys_name_to_handle_at | ||
344 | .long sys_open_by_handle_at | ||
345 | .long sys_clock_adjtime | ||
346 | .long sys_syncfs | ||
347 | .long sys_sendmmsg /* 345 */ | ||
348 | .long sys_setns | ||
349 | .long sys_process_vm_readv | ||
350 | .long sys_process_vm_writev | ||
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index a91ae7709b49..a73b61055ad6 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -14,11 +14,11 @@ void __init setup_trampolines(void) | |||
14 | 14 | ||
15 | /* Has to be in very low memory so we can execute real-mode AP code. */ | 15 | /* Has to be in very low memory so we can execute real-mode AP code. */ |
16 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); | 16 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); |
17 | if (mem == MEMBLOCK_ERROR) | 17 | if (!mem) |
18 | panic("Cannot allocate trampoline\n"); | 18 | panic("Cannot allocate trampoline\n"); |
19 | 19 | ||
20 | x86_trampoline_base = __va(mem); | 20 | x86_trampoline_base = __va(mem); |
21 | memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE"); | 21 | memblock_reserve(mem, size); |
22 | 22 | ||
23 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", | 23 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", |
24 | x86_trampoline_base, (unsigned long long)mem, size); | 24 | x86_trampoline_base, (unsigned long long)mem, size); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a8e3eb83466c..482ec3af2067 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -306,19 +306,20 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | |||
306 | == NOTIFY_STOP) | 306 | == NOTIFY_STOP) |
307 | return; | 307 | return; |
308 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ | 308 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ |
309 | #ifdef CONFIG_KPROBES | 309 | |
310 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | 310 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) |
311 | == NOTIFY_STOP) | 311 | == NOTIFY_STOP) |
312 | return; | 312 | return; |
313 | #else | ||
314 | if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) | ||
315 | == NOTIFY_STOP) | ||
316 | return; | ||
317 | #endif | ||
318 | 313 | ||
314 | /* | ||
315 | * Let others (NMI) know that the debug stack is in use | ||
316 | * as we may switch to the interrupt stack. | ||
317 | */ | ||
318 | debug_stack_usage_inc(); | ||
319 | preempt_conditional_sti(regs); | 319 | preempt_conditional_sti(regs); |
320 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | 320 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); |
321 | preempt_conditional_cli(regs); | 321 | preempt_conditional_cli(regs); |
322 | debug_stack_usage_dec(); | ||
322 | } | 323 | } |
323 | 324 | ||
324 | #ifdef CONFIG_X86_64 | 325 | #ifdef CONFIG_X86_64 |
@@ -411,6 +412,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
411 | SIGTRAP) == NOTIFY_STOP) | 412 | SIGTRAP) == NOTIFY_STOP) |
412 | return; | 413 | return; |
413 | 414 | ||
415 | /* | ||
416 | * Let others (NMI) know that the debug stack is in use | ||
417 | * as we may switch to the interrupt stack. | ||
418 | */ | ||
419 | debug_stack_usage_inc(); | ||
420 | |||
414 | /* It's safe to allow irq's after DR6 has been saved */ | 421 | /* It's safe to allow irq's after DR6 has been saved */ |
415 | preempt_conditional_sti(regs); | 422 | preempt_conditional_sti(regs); |
416 | 423 | ||
@@ -418,6 +425,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
418 | handle_vm86_trap((struct kernel_vm86_regs *) regs, | 425 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
419 | error_code, 1); | 426 | error_code, 1); |
420 | preempt_conditional_cli(regs); | 427 | preempt_conditional_cli(regs); |
428 | debug_stack_usage_dec(); | ||
421 | return; | 429 | return; |
422 | } | 430 | } |
423 | 431 | ||
@@ -437,6 +445,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
437 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) | 445 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) |
438 | send_sigtrap(tsk, regs, error_code, si_code); | 446 | send_sigtrap(tsk, regs, error_code, si_code); |
439 | preempt_conditional_cli(regs); | 447 | preempt_conditional_cli(regs); |
448 | debug_stack_usage_dec(); | ||
440 | 449 | ||
441 | return; | 450 | return; |
442 | } | 451 | } |
@@ -723,4 +732,10 @@ void __init trap_init(void) | |||
723 | cpu_init(); | 732 | cpu_init(); |
724 | 733 | ||
725 | x86_init.irqs.trap_init(); | 734 | x86_init.irqs.trap_init(); |
735 | |||
736 | #ifdef CONFIG_X86_64 | ||
737 | memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16); | ||
738 | set_nmi_gate(1, &debug); | ||
739 | set_nmi_gate(3, &int3); | ||
740 | #endif | ||
726 | } | 741 | } |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index db483369f10b..c0dd5b603749 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -35,7 +35,7 @@ static int __read_mostly tsc_unstable; | |||
35 | erroneous rdtsc usage on !cpu_has_tsc processors */ | 35 | erroneous rdtsc usage on !cpu_has_tsc processors */ |
36 | static int __read_mostly tsc_disabled = -1; | 36 | static int __read_mostly tsc_disabled = -1; |
37 | 37 | ||
38 | static int tsc_clocksource_reliable; | 38 | int tsc_clocksource_reliable; |
39 | /* | 39 | /* |
40 | * Scheduler clock - returns current time in nanosec units. | 40 | * Scheduler clock - returns current time in nanosec units. |
41 | */ | 41 | */ |
@@ -178,11 +178,11 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) | |||
178 | } | 178 | } |
179 | 179 | ||
180 | #define CAL_MS 10 | 180 | #define CAL_MS 10 |
181 | #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) | 181 | #define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS)) |
182 | #define CAL_PIT_LOOPS 1000 | 182 | #define CAL_PIT_LOOPS 1000 |
183 | 183 | ||
184 | #define CAL2_MS 50 | 184 | #define CAL2_MS 50 |
185 | #define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) | 185 | #define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS)) |
186 | #define CAL2_PIT_LOOPS 5000 | 186 | #define CAL2_PIT_LOOPS 5000 |
187 | 187 | ||
188 | 188 | ||
@@ -995,3 +995,23 @@ void __init tsc_init(void) | |||
995 | check_system_tsc_reliable(); | 995 | check_system_tsc_reliable(); |
996 | } | 996 | } |
997 | 997 | ||
998 | #ifdef CONFIG_SMP | ||
999 | /* | ||
1000 | * If we have a constant TSC and are using the TSC for the delay loop, | ||
1001 | * we can skip clock calibration if another cpu in the same socket has already | ||
1002 | * been calibrated. This assumes that CONSTANT_TSC applies to all | ||
1003 | * cpus in the socket - this should be a safe assumption. | ||
1004 | */ | ||
1005 | unsigned long __cpuinit calibrate_delay_is_known(void) | ||
1006 | { | ||
1007 | int i, cpu = smp_processor_id(); | ||
1008 | |||
1009 | if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) | ||
1010 | return 0; | ||
1011 | |||
1012 | for_each_online_cpu(i) | ||
1013 | if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) | ||
1014 | return cpu_data(i).loops_per_jiffy; | ||
1015 | return 0; | ||
1016 | } | ||
1017 | #endif | ||
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 0aa5fed8b9e6..9eba29b46cb7 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -113,7 +113,7 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
113 | if (unsynchronized_tsc()) | 113 | if (unsynchronized_tsc()) |
114 | return; | 114 | return; |
115 | 115 | ||
116 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { | 116 | if (tsc_clocksource_reliable) { |
117 | if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) | 117 | if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) |
118 | pr_info( | 118 | pr_info( |
119 | "Skipped synchronization checks as TSC is reliable.\n"); | 119 | "Skipped synchronization checks as TSC is reliable.\n"); |
@@ -172,7 +172,7 @@ void __cpuinit check_tsc_sync_target(void) | |||
172 | { | 172 | { |
173 | int cpus = 2; | 173 | int cpus = 2; |
174 | 174 | ||
175 | if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) | 175 | if (unsynchronized_tsc() || tsc_clocksource_reliable) |
176 | return; | 176 | return; |
177 | 177 | ||
178 | /* | 178 | /* |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 863f8753ab0a..b466cab5ba15 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -335,9 +335,11 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
335 | if (info->flags & VM86_SCREEN_BITMAP) | 335 | if (info->flags & VM86_SCREEN_BITMAP) |
336 | mark_screen_rdonly(tsk->mm); | 336 | mark_screen_rdonly(tsk->mm); |
337 | 337 | ||
338 | /*call audit_syscall_exit since we do not exit via the normal paths */ | 338 | /*call __audit_syscall_exit since we do not exit via the normal paths */ |
339 | #ifdef CONFIG_AUDITSYSCALL | ||
339 | if (unlikely(current->audit_context)) | 340 | if (unlikely(current->audit_context)) |
340 | audit_syscall_exit(AUDITSC_RESULT(0), 0); | 341 | __audit_syscall_exit(1, 0); |
342 | #endif | ||
341 | 343 | ||
342 | __asm__ __volatile__( | 344 | __asm__ __volatile__( |
343 | "movl %0,%%esp\n\t" | 345 | "movl %0,%%esp\n\t" |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index e4d4a22e8b94..b07ba9393564 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = | |||
57 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), | 57 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), |
58 | }; | 58 | }; |
59 | 59 | ||
60 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; | 60 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; |
61 | 61 | ||
62 | static int __init vsyscall_setup(char *str) | 62 | static int __init vsyscall_setup(char *str) |
63 | { | 63 | { |
@@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr) | |||
140 | return nr; | 140 | return nr; |
141 | } | 141 | } |
142 | 142 | ||
143 | static bool write_ok_or_segv(unsigned long ptr, size_t size) | ||
144 | { | ||
145 | /* | ||
146 | * XXX: if access_ok, get_user, and put_user handled | ||
147 | * sig_on_uaccess_error, this could go away. | ||
148 | */ | ||
149 | |||
150 | if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { | ||
151 | siginfo_t info; | ||
152 | struct thread_struct *thread = ¤t->thread; | ||
153 | |||
154 | thread->error_code = 6; /* user fault, no page, write */ | ||
155 | thread->cr2 = ptr; | ||
156 | thread->trap_no = 14; | ||
157 | |||
158 | memset(&info, 0, sizeof(info)); | ||
159 | info.si_signo = SIGSEGV; | ||
160 | info.si_errno = 0; | ||
161 | info.si_code = SEGV_MAPERR; | ||
162 | info.si_addr = (void __user *)ptr; | ||
163 | |||
164 | force_sig_info(SIGSEGV, &info, current); | ||
165 | return false; | ||
166 | } else { | ||
167 | return true; | ||
168 | } | ||
169 | } | ||
170 | |||
143 | bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | 171 | bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) |
144 | { | 172 | { |
145 | struct task_struct *tsk; | 173 | struct task_struct *tsk; |
146 | unsigned long caller; | 174 | unsigned long caller; |
147 | int vsyscall_nr; | 175 | int vsyscall_nr; |
176 | int prev_sig_on_uaccess_error; | ||
148 | long ret; | 177 | long ret; |
149 | 178 | ||
150 | /* | 179 | /* |
@@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
180 | if (seccomp_mode(&tsk->seccomp)) | 209 | if (seccomp_mode(&tsk->seccomp)) |
181 | do_exit(SIGKILL); | 210 | do_exit(SIGKILL); |
182 | 211 | ||
212 | /* | ||
213 | * With a real vsyscall, page faults cause SIGSEGV. We want to | ||
214 | * preserve that behavior to make writing exploits harder. | ||
215 | */ | ||
216 | prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; | ||
217 | current_thread_info()->sig_on_uaccess_error = 1; | ||
218 | |||
219 | /* | ||
220 | * 0 is a valid user pointer (in the access_ok sense) on 32-bit and | ||
221 | * 64-bit, so we don't need to special-case it here. For all the | ||
222 | * vsyscalls, 0 means "don't write anything" not "write it at | ||
223 | * address 0". | ||
224 | */ | ||
225 | ret = -EFAULT; | ||
183 | switch (vsyscall_nr) { | 226 | switch (vsyscall_nr) { |
184 | case 0: | 227 | case 0: |
228 | if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || | ||
229 | !write_ok_or_segv(regs->si, sizeof(struct timezone))) | ||
230 | break; | ||
231 | |||
185 | ret = sys_gettimeofday( | 232 | ret = sys_gettimeofday( |
186 | (struct timeval __user *)regs->di, | 233 | (struct timeval __user *)regs->di, |
187 | (struct timezone __user *)regs->si); | 234 | (struct timezone __user *)regs->si); |
188 | break; | 235 | break; |
189 | 236 | ||
190 | case 1: | 237 | case 1: |
238 | if (!write_ok_or_segv(regs->di, sizeof(time_t))) | ||
239 | break; | ||
240 | |||
191 | ret = sys_time((time_t __user *)regs->di); | 241 | ret = sys_time((time_t __user *)regs->di); |
192 | break; | 242 | break; |
193 | 243 | ||
194 | case 2: | 244 | case 2: |
245 | if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || | ||
246 | !write_ok_or_segv(regs->si, sizeof(unsigned))) | ||
247 | break; | ||
248 | |||
195 | ret = sys_getcpu((unsigned __user *)regs->di, | 249 | ret = sys_getcpu((unsigned __user *)regs->di, |
196 | (unsigned __user *)regs->si, | 250 | (unsigned __user *)regs->si, |
197 | 0); | 251 | 0); |
198 | break; | 252 | break; |
199 | } | 253 | } |
200 | 254 | ||
255 | current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; | ||
256 | |||
201 | if (ret == -EFAULT) { | 257 | if (ret == -EFAULT) { |
202 | /* | 258 | /* Bad news -- userspace fed a bad pointer to a vsyscall. */ |
203 | * Bad news -- userspace fed a bad pointer to a vsyscall. | ||
204 | * | ||
205 | * With a real vsyscall, that would have caused SIGSEGV. | ||
206 | * To make writing reliable exploits using the emulated | ||
207 | * vsyscalls harder, generate SIGSEGV here as well. | ||
208 | */ | ||
209 | warn_bad_vsyscall(KERN_INFO, regs, | 259 | warn_bad_vsyscall(KERN_INFO, regs, |
210 | "vsyscall fault (exploit attempt?)"); | 260 | "vsyscall fault (exploit attempt?)"); |
211 | goto sigsegv; | 261 | |
262 | /* | ||
263 | * If we failed to generate a signal for any reason, | ||
264 | * generate one here. (This should be impossible.) | ||
265 | */ | ||
266 | if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && | ||
267 | !sigismember(&tsk->pending.signal, SIGSEGV))) | ||
268 | goto sigsegv; | ||
269 | |||
270 | return true; /* Don't emulate the ret. */ | ||
212 | } | 271 | } |
213 | 272 | ||
214 | regs->ax = ret; | 273 | regs->ax = ret; |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index c1d6cd549397..947a06ccc673 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = { | |||
92 | 92 | ||
93 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { | 93 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { |
94 | .setup_percpu_clockev = setup_secondary_APIC_clock, | 94 | .setup_percpu_clockev = setup_secondary_APIC_clock, |
95 | .fixup_cpu_id = x86_default_fixup_cpu_id, | ||
95 | }; | 96 | }; |
96 | 97 | ||
97 | static void default_nmi_init(void) { }; | 98 | static void default_nmi_init(void) { }; |
@@ -114,4 +115,5 @@ struct x86_msi_ops x86_msi = { | |||
114 | .setup_msi_irqs = native_setup_msi_irqs, | 115 | .setup_msi_irqs = native_setup_msi_irqs, |
115 | .teardown_msi_irq = native_teardown_msi_irq, | 116 | .teardown_msi_irq = native_teardown_msi_irq, |
116 | .teardown_msi_irqs = default_teardown_msi_irqs, | 117 | .teardown_msi_irqs = default_teardown_msi_irqs, |
118 | .restore_msi_irqs = default_restore_msi_irqs, | ||
117 | }; | 119 | }; |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index ff5790d8e990..1a7fe868f375 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -35,6 +35,7 @@ config KVM | |||
35 | select KVM_MMIO | 35 | select KVM_MMIO |
36 | select TASKSTATS | 36 | select TASKSTATS |
37 | select TASK_DELAY_ACCT | 37 | select TASK_DELAY_ACCT |
38 | select PERF_EVENTS | ||
38 | ---help--- | 39 | ---help--- |
39 | Support hosting fully virtualized guest machines using hardware | 40 | Support hosting fully virtualized guest machines using hardware |
40 | virtualization extensions. You will need a fairly recent | 41 | virtualization extensions. You will need a fairly recent |
@@ -52,6 +53,8 @@ config KVM | |||
52 | config KVM_INTEL | 53 | config KVM_INTEL |
53 | tristate "KVM for Intel processors support" | 54 | tristate "KVM for Intel processors support" |
54 | depends on KVM | 55 | depends on KVM |
56 | # for perf_guest_get_msrs(): | ||
57 | depends on CPU_SUP_INTEL | ||
55 | ---help--- | 58 | ---help--- |
56 | Provides support for KVM on Intel processors equipped with the VT | 59 | Provides support for KVM on Intel processors equipped with the VT |
57 | extensions. | 60 | extensions. |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index f15501f431c8..4f579e8dcacf 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | |||
12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) | 12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) |
13 | 13 | ||
14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
15 | i8254.o timer.o | 15 | i8254.o timer.o cpuid.o pmu.o |
16 | kvm-intel-y += vmx.o | 16 | kvm-intel-y += vmx.o |
17 | kvm-amd-y += svm.o | 17 | kvm-amd-y += svm.o |
18 | 18 | ||
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c new file mode 100644 index 000000000000..89b02bfaaca5 --- /dev/null +++ b/arch/x86/kvm/cpuid.c | |||
@@ -0,0 +1,670 @@ | |||
1 | /* | ||
2 | * Kernel-based Virtual Machine driver for Linux | ||
3 | * cpuid support routines | ||
4 | * | ||
5 | * derived from arch/x86/kvm/x86.c | ||
6 | * | ||
7 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. | ||
8 | * Copyright IBM Corporation, 2008 | ||
9 | * | ||
10 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
11 | * the COPYING file in the top-level directory. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <linux/kvm_host.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/vmalloc.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | #include <asm/user.h> | ||
20 | #include <asm/xsave.h> | ||
21 | #include "cpuid.h" | ||
22 | #include "lapic.h" | ||
23 | #include "mmu.h" | ||
24 | #include "trace.h" | ||
25 | |||
26 | void kvm_update_cpuid(struct kvm_vcpu *vcpu) | ||
27 | { | ||
28 | struct kvm_cpuid_entry2 *best; | ||
29 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
30 | |||
31 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
32 | if (!best) | ||
33 | return; | ||
34 | |||
35 | /* Update OSXSAVE bit */ | ||
36 | if (cpu_has_xsave && best->function == 0x1) { | ||
37 | best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); | ||
38 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) | ||
39 | best->ecx |= bit(X86_FEATURE_OSXSAVE); | ||
40 | } | ||
41 | |||
42 | if (apic) { | ||
43 | if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER)) | ||
44 | apic->lapic_timer.timer_mode_mask = 3 << 17; | ||
45 | else | ||
46 | apic->lapic_timer.timer_mode_mask = 1 << 17; | ||
47 | } | ||
48 | |||
49 | kvm_pmu_cpuid_update(vcpu); | ||
50 | } | ||
51 | |||
52 | static int is_efer_nx(void) | ||
53 | { | ||
54 | unsigned long long efer = 0; | ||
55 | |||
56 | rdmsrl_safe(MSR_EFER, &efer); | ||
57 | return efer & EFER_NX; | ||
58 | } | ||
59 | |||
60 | static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) | ||
61 | { | ||
62 | int i; | ||
63 | struct kvm_cpuid_entry2 *e, *entry; | ||
64 | |||
65 | entry = NULL; | ||
66 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | ||
67 | e = &vcpu->arch.cpuid_entries[i]; | ||
68 | if (e->function == 0x80000001) { | ||
69 | entry = e; | ||
70 | break; | ||
71 | } | ||
72 | } | ||
73 | if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { | ||
74 | entry->edx &= ~(1 << 20); | ||
75 | printk(KERN_INFO "kvm: guest NX capability removed\n"); | ||
76 | } | ||
77 | } | ||
78 | |||
79 | /* when an old userspace process fills a new kernel module */ | ||
80 | int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||
81 | struct kvm_cpuid *cpuid, | ||
82 | struct kvm_cpuid_entry __user *entries) | ||
83 | { | ||
84 | int r, i; | ||
85 | struct kvm_cpuid_entry *cpuid_entries; | ||
86 | |||
87 | r = -E2BIG; | ||
88 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
89 | goto out; | ||
90 | r = -ENOMEM; | ||
91 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent); | ||
92 | if (!cpuid_entries) | ||
93 | goto out; | ||
94 | r = -EFAULT; | ||
95 | if (copy_from_user(cpuid_entries, entries, | ||
96 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | ||
97 | goto out_free; | ||
98 | for (i = 0; i < cpuid->nent; i++) { | ||
99 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | ||
100 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | ||
101 | vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx; | ||
102 | vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx; | ||
103 | vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx; | ||
104 | vcpu->arch.cpuid_entries[i].index = 0; | ||
105 | vcpu->arch.cpuid_entries[i].flags = 0; | ||
106 | vcpu->arch.cpuid_entries[i].padding[0] = 0; | ||
107 | vcpu->arch.cpuid_entries[i].padding[1] = 0; | ||
108 | vcpu->arch.cpuid_entries[i].padding[2] = 0; | ||
109 | } | ||
110 | vcpu->arch.cpuid_nent = cpuid->nent; | ||
111 | cpuid_fix_nx_cap(vcpu); | ||
112 | r = 0; | ||
113 | kvm_apic_set_version(vcpu); | ||
114 | kvm_x86_ops->cpuid_update(vcpu); | ||
115 | kvm_update_cpuid(vcpu); | ||
116 | |||
117 | out_free: | ||
118 | vfree(cpuid_entries); | ||
119 | out: | ||
120 | return r; | ||
121 | } | ||
122 | |||
123 | int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | ||
124 | struct kvm_cpuid2 *cpuid, | ||
125 | struct kvm_cpuid_entry2 __user *entries) | ||
126 | { | ||
127 | int r; | ||
128 | |||
129 | r = -E2BIG; | ||
130 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
131 | goto out; | ||
132 | r = -EFAULT; | ||
133 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | ||
134 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | ||
135 | goto out; | ||
136 | vcpu->arch.cpuid_nent = cpuid->nent; | ||
137 | kvm_apic_set_version(vcpu); | ||
138 | kvm_x86_ops->cpuid_update(vcpu); | ||
139 | kvm_update_cpuid(vcpu); | ||
140 | return 0; | ||
141 | |||
142 | out: | ||
143 | return r; | ||
144 | } | ||
145 | |||
146 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | ||
147 | struct kvm_cpuid2 *cpuid, | ||
148 | struct kvm_cpuid_entry2 __user *entries) | ||
149 | { | ||
150 | int r; | ||
151 | |||
152 | r = -E2BIG; | ||
153 | if (cpuid->nent < vcpu->arch.cpuid_nent) | ||
154 | goto out; | ||
155 | r = -EFAULT; | ||
156 | if (copy_to_user(entries, &vcpu->arch.cpuid_entries, | ||
157 | vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) | ||
158 | goto out; | ||
159 | return 0; | ||
160 | |||
161 | out: | ||
162 | cpuid->nent = vcpu->arch.cpuid_nent; | ||
163 | return r; | ||
164 | } | ||
165 | |||
166 | static void cpuid_mask(u32 *word, int wordnum) | ||
167 | { | ||
168 | *word &= boot_cpu_data.x86_capability[wordnum]; | ||
169 | } | ||
170 | |||
171 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
172 | u32 index) | ||
173 | { | ||
174 | entry->function = function; | ||
175 | entry->index = index; | ||
176 | cpuid_count(entry->function, entry->index, | ||
177 | &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); | ||
178 | entry->flags = 0; | ||
179 | } | ||
180 | |||
181 | static bool supported_xcr0_bit(unsigned bit) | ||
182 | { | ||
183 | u64 mask = ((u64)1 << bit); | ||
184 | |||
185 | return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; | ||
186 | } | ||
187 | |||
188 | #define F(x) bit(X86_FEATURE_##x) | ||
189 | |||
190 | static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
191 | u32 index, int *nent, int maxnent) | ||
192 | { | ||
193 | int r; | ||
194 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; | ||
195 | #ifdef CONFIG_X86_64 | ||
196 | unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) | ||
197 | ? F(GBPAGES) : 0; | ||
198 | unsigned f_lm = F(LM); | ||
199 | #else | ||
200 | unsigned f_gbpages = 0; | ||
201 | unsigned f_lm = 0; | ||
202 | #endif | ||
203 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | ||
204 | |||
205 | /* cpuid 1.edx */ | ||
206 | const u32 kvm_supported_word0_x86_features = | ||
207 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
208 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
209 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | | ||
210 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
211 | F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | | ||
212 | 0 /* Reserved, DS, ACPI */ | F(MMX) | | ||
213 | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | | ||
214 | 0 /* HTT, TM, Reserved, PBE */; | ||
215 | /* cpuid 0x80000001.edx */ | ||
216 | const u32 kvm_supported_word1_x86_features = | ||
217 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
218 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
219 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | | ||
220 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
221 | F(PAT) | F(PSE36) | 0 /* Reserved */ | | ||
222 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | | ||
223 | F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | | ||
224 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | ||
225 | /* cpuid 1.ecx */ | ||
226 | const u32 kvm_supported_word4_x86_features = | ||
227 | F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | | ||
228 | 0 /* DS-CPL, VMX, SMX, EST */ | | ||
229 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | ||
230 | F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | | ||
231 | 0 /* Reserved, DCA */ | F(XMM4_1) | | ||
232 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | ||
233 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | | ||
234 | F(F16C) | F(RDRAND); | ||
235 | /* cpuid 0x80000001.ecx */ | ||
236 | const u32 kvm_supported_word6_x86_features = | ||
237 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | | ||
238 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | | ||
239 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | | ||
240 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); | ||
241 | |||
242 | /* cpuid 0xC0000001.edx */ | ||
243 | const u32 kvm_supported_word5_x86_features = | ||
244 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | | ||
245 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | | ||
246 | F(PMM) | F(PMM_EN); | ||
247 | |||
248 | /* cpuid 7.0.ebx */ | ||
249 | const u32 kvm_supported_word9_x86_features = | ||
250 | F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS); | ||
251 | |||
252 | /* all calls to cpuid_count() should be made on the same cpu */ | ||
253 | get_cpu(); | ||
254 | |||
255 | r = -E2BIG; | ||
256 | |||
257 | if (*nent >= maxnent) | ||
258 | goto out; | ||
259 | |||
260 | do_cpuid_1_ent(entry, function, index); | ||
261 | ++*nent; | ||
262 | |||
263 | switch (function) { | ||
264 | case 0: | ||
265 | entry->eax = min(entry->eax, (u32)0xd); | ||
266 | break; | ||
267 | case 1: | ||
268 | entry->edx &= kvm_supported_word0_x86_features; | ||
269 | cpuid_mask(&entry->edx, 0); | ||
270 | entry->ecx &= kvm_supported_word4_x86_features; | ||
271 | cpuid_mask(&entry->ecx, 4); | ||
272 | /* we support x2apic emulation even if host does not support | ||
273 | * it since we emulate x2apic in software */ | ||
274 | entry->ecx |= F(X2APIC); | ||
275 | break; | ||
276 | /* function 2 entries are STATEFUL. That is, repeated cpuid commands | ||
277 | * may return different values. This forces us to get_cpu() before | ||
278 | * issuing the first command, and also to emulate this annoying behavior | ||
279 | * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */ | ||
280 | case 2: { | ||
281 | int t, times = entry->eax & 0xff; | ||
282 | |||
283 | entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
284 | entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
285 | for (t = 1; t < times; ++t) { | ||
286 | if (*nent >= maxnent) | ||
287 | goto out; | ||
288 | |||
289 | do_cpuid_1_ent(&entry[t], function, 0); | ||
290 | entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
291 | ++*nent; | ||
292 | } | ||
293 | break; | ||
294 | } | ||
295 | /* function 4 has additional index. */ | ||
296 | case 4: { | ||
297 | int i, cache_type; | ||
298 | |||
299 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
300 | /* read more entries until cache_type is zero */ | ||
301 | for (i = 1; ; ++i) { | ||
302 | if (*nent >= maxnent) | ||
303 | goto out; | ||
304 | |||
305 | cache_type = entry[i - 1].eax & 0x1f; | ||
306 | if (!cache_type) | ||
307 | break; | ||
308 | do_cpuid_1_ent(&entry[i], function, i); | ||
309 | entry[i].flags |= | ||
310 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
311 | ++*nent; | ||
312 | } | ||
313 | break; | ||
314 | } | ||
315 | case 7: { | ||
316 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
317 | /* Mask ebx against host capbability word 9 */ | ||
318 | if (index == 0) { | ||
319 | entry->ebx &= kvm_supported_word9_x86_features; | ||
320 | cpuid_mask(&entry->ebx, 9); | ||
321 | } else | ||
322 | entry->ebx = 0; | ||
323 | entry->eax = 0; | ||
324 | entry->ecx = 0; | ||
325 | entry->edx = 0; | ||
326 | break; | ||
327 | } | ||
328 | case 9: | ||
329 | break; | ||
330 | case 0xa: { /* Architectural Performance Monitoring */ | ||
331 | struct x86_pmu_capability cap; | ||
332 | union cpuid10_eax eax; | ||
333 | union cpuid10_edx edx; | ||
334 | |||
335 | perf_get_x86_pmu_capability(&cap); | ||
336 | |||
337 | /* | ||
338 | * Only support guest architectural pmu on a host | ||
339 | * with architectural pmu. | ||
340 | */ | ||
341 | if (!cap.version) | ||
342 | memset(&cap, 0, sizeof(cap)); | ||
343 | |||
344 | eax.split.version_id = min(cap.version, 2); | ||
345 | eax.split.num_counters = cap.num_counters_gp; | ||
346 | eax.split.bit_width = cap.bit_width_gp; | ||
347 | eax.split.mask_length = cap.events_mask_len; | ||
348 | |||
349 | edx.split.num_counters_fixed = cap.num_counters_fixed; | ||
350 | edx.split.bit_width_fixed = cap.bit_width_fixed; | ||
351 | edx.split.reserved = 0; | ||
352 | |||
353 | entry->eax = eax.full; | ||
354 | entry->ebx = cap.events_mask; | ||
355 | entry->ecx = 0; | ||
356 | entry->edx = edx.full; | ||
357 | break; | ||
358 | } | ||
359 | /* function 0xb has additional index. */ | ||
360 | case 0xb: { | ||
361 | int i, level_type; | ||
362 | |||
363 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
364 | /* read more entries until level_type is zero */ | ||
365 | for (i = 1; ; ++i) { | ||
366 | if (*nent >= maxnent) | ||
367 | goto out; | ||
368 | |||
369 | level_type = entry[i - 1].ecx & 0xff00; | ||
370 | if (!level_type) | ||
371 | break; | ||
372 | do_cpuid_1_ent(&entry[i], function, i); | ||
373 | entry[i].flags |= | ||
374 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
375 | ++*nent; | ||
376 | } | ||
377 | break; | ||
378 | } | ||
379 | case 0xd: { | ||
380 | int idx, i; | ||
381 | |||
382 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
383 | for (idx = 1, i = 1; idx < 64; ++idx) { | ||
384 | if (*nent >= maxnent) | ||
385 | goto out; | ||
386 | |||
387 | do_cpuid_1_ent(&entry[i], function, idx); | ||
388 | if (entry[i].eax == 0 || !supported_xcr0_bit(idx)) | ||
389 | continue; | ||
390 | entry[i].flags |= | ||
391 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
392 | ++*nent; | ||
393 | ++i; | ||
394 | } | ||
395 | break; | ||
396 | } | ||
397 | case KVM_CPUID_SIGNATURE: { | ||
398 | char signature[12] = "KVMKVMKVM\0\0"; | ||
399 | u32 *sigptr = (u32 *)signature; | ||
400 | entry->eax = 0; | ||
401 | entry->ebx = sigptr[0]; | ||
402 | entry->ecx = sigptr[1]; | ||
403 | entry->edx = sigptr[2]; | ||
404 | break; | ||
405 | } | ||
406 | case KVM_CPUID_FEATURES: | ||
407 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | ||
408 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | ||
409 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | ||
410 | (1 << KVM_FEATURE_ASYNC_PF) | | ||
411 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | ||
412 | |||
413 | if (sched_info_on()) | ||
414 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | ||
415 | |||
416 | entry->ebx = 0; | ||
417 | entry->ecx = 0; | ||
418 | entry->edx = 0; | ||
419 | break; | ||
420 | case 0x80000000: | ||
421 | entry->eax = min(entry->eax, 0x8000001a); | ||
422 | break; | ||
423 | case 0x80000001: | ||
424 | entry->edx &= kvm_supported_word1_x86_features; | ||
425 | cpuid_mask(&entry->edx, 1); | ||
426 | entry->ecx &= kvm_supported_word6_x86_features; | ||
427 | cpuid_mask(&entry->ecx, 6); | ||
428 | break; | ||
429 | case 0x80000008: { | ||
430 | unsigned g_phys_as = (entry->eax >> 16) & 0xff; | ||
431 | unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); | ||
432 | unsigned phys_as = entry->eax & 0xff; | ||
433 | |||
434 | if (!g_phys_as) | ||
435 | g_phys_as = phys_as; | ||
436 | entry->eax = g_phys_as | (virt_as << 8); | ||
437 | entry->ebx = entry->edx = 0; | ||
438 | break; | ||
439 | } | ||
440 | case 0x80000019: | ||
441 | entry->ecx = entry->edx = 0; | ||
442 | break; | ||
443 | case 0x8000001a: | ||
444 | break; | ||
445 | case 0x8000001d: | ||
446 | break; | ||
447 | /*Add support for Centaur's CPUID instruction*/ | ||
448 | case 0xC0000000: | ||
449 | /*Just support up to 0xC0000004 now*/ | ||
450 | entry->eax = min(entry->eax, 0xC0000004); | ||
451 | break; | ||
452 | case 0xC0000001: | ||
453 | entry->edx &= kvm_supported_word5_x86_features; | ||
454 | cpuid_mask(&entry->edx, 5); | ||
455 | break; | ||
456 | case 3: /* Processor serial number */ | ||
457 | case 5: /* MONITOR/MWAIT */ | ||
458 | case 6: /* Thermal management */ | ||
459 | case 0x80000007: /* Advanced power management */ | ||
460 | case 0xC0000002: | ||
461 | case 0xC0000003: | ||
462 | case 0xC0000004: | ||
463 | default: | ||
464 | entry->eax = entry->ebx = entry->ecx = entry->edx = 0; | ||
465 | break; | ||
466 | } | ||
467 | |||
468 | kvm_x86_ops->set_supported_cpuid(function, entry); | ||
469 | |||
470 | r = 0; | ||
471 | |||
472 | out: | ||
473 | put_cpu(); | ||
474 | |||
475 | return r; | ||
476 | } | ||
477 | |||
478 | #undef F | ||
479 | |||
480 | struct kvm_cpuid_param { | ||
481 | u32 func; | ||
482 | u32 idx; | ||
483 | bool has_leaf_count; | ||
484 | bool (*qualifier)(struct kvm_cpuid_param *param); | ||
485 | }; | ||
486 | |||
487 | static bool is_centaur_cpu(struct kvm_cpuid_param *param) | ||
488 | { | ||
489 | return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; | ||
490 | } | ||
491 | |||
492 | int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
493 | struct kvm_cpuid_entry2 __user *entries) | ||
494 | { | ||
495 | struct kvm_cpuid_entry2 *cpuid_entries; | ||
496 | int limit, nent = 0, r = -E2BIG, i; | ||
497 | u32 func; | ||
498 | static struct kvm_cpuid_param param[] = { | ||
499 | { .func = 0, .has_leaf_count = true }, | ||
500 | { .func = 0x80000000, .has_leaf_count = true }, | ||
501 | { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true }, | ||
502 | { .func = KVM_CPUID_SIGNATURE }, | ||
503 | { .func = KVM_CPUID_FEATURES }, | ||
504 | }; | ||
505 | |||
506 | if (cpuid->nent < 1) | ||
507 | goto out; | ||
508 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
509 | cpuid->nent = KVM_MAX_CPUID_ENTRIES; | ||
510 | r = -ENOMEM; | ||
511 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); | ||
512 | if (!cpuid_entries) | ||
513 | goto out; | ||
514 | |||
515 | r = 0; | ||
516 | for (i = 0; i < ARRAY_SIZE(param); i++) { | ||
517 | struct kvm_cpuid_param *ent = ¶m[i]; | ||
518 | |||
519 | if (ent->qualifier && !ent->qualifier(ent)) | ||
520 | continue; | ||
521 | |||
522 | r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, | ||
523 | &nent, cpuid->nent); | ||
524 | |||
525 | if (r) | ||
526 | goto out_free; | ||
527 | |||
528 | if (!ent->has_leaf_count) | ||
529 | continue; | ||
530 | |||
531 | limit = cpuid_entries[nent - 1].eax; | ||
532 | for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) | ||
533 | r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, | ||
534 | &nent, cpuid->nent); | ||
535 | |||
536 | if (r) | ||
537 | goto out_free; | ||
538 | } | ||
539 | |||
540 | r = -EFAULT; | ||
541 | if (copy_to_user(entries, cpuid_entries, | ||
542 | nent * sizeof(struct kvm_cpuid_entry2))) | ||
543 | goto out_free; | ||
544 | cpuid->nent = nent; | ||
545 | r = 0; | ||
546 | |||
547 | out_free: | ||
548 | vfree(cpuid_entries); | ||
549 | out: | ||
550 | return r; | ||
551 | } | ||
552 | |||
553 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | ||
554 | { | ||
555 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; | ||
556 | int j, nent = vcpu->arch.cpuid_nent; | ||
557 | |||
558 | e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
559 | /* when no next entry is found, the current entry[i] is reselected */ | ||
560 | for (j = i + 1; ; j = (j + 1) % nent) { | ||
561 | struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; | ||
562 | if (ej->function == e->function) { | ||
563 | ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
564 | return j; | ||
565 | } | ||
566 | } | ||
567 | return 0; /* silence gcc, even though control never reaches here */ | ||
568 | } | ||
569 | |||
570 | /* find an entry with matching function, matching index (if needed), and that | ||
571 | * should be read next (if it's stateful) */ | ||
572 | static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, | ||
573 | u32 function, u32 index) | ||
574 | { | ||
575 | if (e->function != function) | ||
576 | return 0; | ||
577 | if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) | ||
578 | return 0; | ||
579 | if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && | ||
580 | !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) | ||
581 | return 0; | ||
582 | return 1; | ||
583 | } | ||
584 | |||
585 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | ||
586 | u32 function, u32 index) | ||
587 | { | ||
588 | int i; | ||
589 | struct kvm_cpuid_entry2 *best = NULL; | ||
590 | |||
591 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | ||
592 | struct kvm_cpuid_entry2 *e; | ||
593 | |||
594 | e = &vcpu->arch.cpuid_entries[i]; | ||
595 | if (is_matching_cpuid_entry(e, function, index)) { | ||
596 | if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) | ||
597 | move_to_next_stateful_cpuid_entry(vcpu, i); | ||
598 | best = e; | ||
599 | break; | ||
600 | } | ||
601 | } | ||
602 | return best; | ||
603 | } | ||
604 | EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); | ||
605 | |||
606 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | ||
607 | { | ||
608 | struct kvm_cpuid_entry2 *best; | ||
609 | |||
610 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
611 | if (!best || best->eax < 0x80000008) | ||
612 | goto not_found; | ||
613 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
614 | if (best) | ||
615 | return best->eax & 0xff; | ||
616 | not_found: | ||
617 | return 36; | ||
618 | } | ||
619 | |||
620 | /* | ||
621 | * If no match is found, check whether we exceed the vCPU's limit | ||
622 | * and return the content of the highest valid _standard_ leaf instead. | ||
623 | * This is to satisfy the CPUID specification. | ||
624 | */ | ||
625 | static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, | ||
626 | u32 function, u32 index) | ||
627 | { | ||
628 | struct kvm_cpuid_entry2 *maxlevel; | ||
629 | |||
630 | maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); | ||
631 | if (!maxlevel || maxlevel->eax >= function) | ||
632 | return NULL; | ||
633 | if (function & 0x80000000) { | ||
634 | maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0); | ||
635 | if (!maxlevel) | ||
636 | return NULL; | ||
637 | } | ||
638 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); | ||
639 | } | ||
640 | |||
641 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | ||
642 | { | ||
643 | u32 function, index; | ||
644 | struct kvm_cpuid_entry2 *best; | ||
645 | |||
646 | function = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
647 | index = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
648 | kvm_register_write(vcpu, VCPU_REGS_RAX, 0); | ||
649 | kvm_register_write(vcpu, VCPU_REGS_RBX, 0); | ||
650 | kvm_register_write(vcpu, VCPU_REGS_RCX, 0); | ||
651 | kvm_register_write(vcpu, VCPU_REGS_RDX, 0); | ||
652 | best = kvm_find_cpuid_entry(vcpu, function, index); | ||
653 | |||
654 | if (!best) | ||
655 | best = check_cpuid_limit(vcpu, function, index); | ||
656 | |||
657 | if (best) { | ||
658 | kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); | ||
659 | kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); | ||
660 | kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); | ||
661 | kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); | ||
662 | } | ||
663 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
664 | trace_kvm_cpuid(function, | ||
665 | kvm_register_read(vcpu, VCPU_REGS_RAX), | ||
666 | kvm_register_read(vcpu, VCPU_REGS_RBX), | ||
667 | kvm_register_read(vcpu, VCPU_REGS_RCX), | ||
668 | kvm_register_read(vcpu, VCPU_REGS_RDX)); | ||
669 | } | ||
670 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | ||
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h new file mode 100644 index 000000000000..5b97e1797a6d --- /dev/null +++ b/arch/x86/kvm/cpuid.h | |||
@@ -0,0 +1,46 @@ | |||
1 | #ifndef ARCH_X86_KVM_CPUID_H | ||
2 | #define ARCH_X86_KVM_CPUID_H | ||
3 | |||
4 | #include "x86.h" | ||
5 | |||
6 | void kvm_update_cpuid(struct kvm_vcpu *vcpu); | ||
7 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | ||
8 | u32 function, u32 index); | ||
9 | int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
10 | struct kvm_cpuid_entry2 __user *entries); | ||
11 | int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||
12 | struct kvm_cpuid *cpuid, | ||
13 | struct kvm_cpuid_entry __user *entries); | ||
14 | int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | ||
15 | struct kvm_cpuid2 *cpuid, | ||
16 | struct kvm_cpuid_entry2 __user *entries); | ||
17 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | ||
18 | struct kvm_cpuid2 *cpuid, | ||
19 | struct kvm_cpuid_entry2 __user *entries); | ||
20 | |||
21 | |||
22 | static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | ||
23 | { | ||
24 | struct kvm_cpuid_entry2 *best; | ||
25 | |||
26 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
27 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | ||
28 | } | ||
29 | |||
30 | static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) | ||
31 | { | ||
32 | struct kvm_cpuid_entry2 *best; | ||
33 | |||
34 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
35 | return best && (best->ebx & bit(X86_FEATURE_SMEP)); | ||
36 | } | ||
37 | |||
38 | static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | ||
39 | { | ||
40 | struct kvm_cpuid_entry2 *best; | ||
41 | |||
42 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
43 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); | ||
44 | } | ||
45 | |||
46 | #endif | ||
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index f1e3be18a08f..05a562b85025 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -125,8 +125,9 @@ | |||
125 | #define Lock (1<<26) /* lock prefix is allowed for the instruction */ | 125 | #define Lock (1<<26) /* lock prefix is allowed for the instruction */ |
126 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ | 126 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ |
127 | #define No64 (1<<28) | 127 | #define No64 (1<<28) |
128 | #define PageTable (1 << 29) /* instruction used to write page table */ | ||
128 | /* Source 2 operand type */ | 129 | /* Source 2 operand type */ |
129 | #define Src2Shift (29) | 130 | #define Src2Shift (30) |
130 | #define Src2None (OpNone << Src2Shift) | 131 | #define Src2None (OpNone << Src2Shift) |
131 | #define Src2CL (OpCL << Src2Shift) | 132 | #define Src2CL (OpCL << Src2Shift) |
132 | #define Src2ImmByte (OpImmByte << Src2Shift) | 133 | #define Src2ImmByte (OpImmByte << Src2Shift) |
@@ -1674,11 +1675,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) | |||
1674 | return X86EMUL_CONTINUE; | 1675 | return X86EMUL_CONTINUE; |
1675 | } | 1676 | } |
1676 | 1677 | ||
1677 | static int em_grp1a(struct x86_emulate_ctxt *ctxt) | ||
1678 | { | ||
1679 | return emulate_pop(ctxt, &ctxt->dst.val, ctxt->dst.bytes); | ||
1680 | } | ||
1681 | |||
1682 | static int em_grp2(struct x86_emulate_ctxt *ctxt) | 1678 | static int em_grp2(struct x86_emulate_ctxt *ctxt) |
1683 | { | 1679 | { |
1684 | switch (ctxt->modrm_reg) { | 1680 | switch (ctxt->modrm_reg) { |
@@ -1788,7 +1784,7 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) | |||
1788 | return rc; | 1784 | return rc; |
1789 | } | 1785 | } |
1790 | 1786 | ||
1791 | static int em_grp9(struct x86_emulate_ctxt *ctxt) | 1787 | static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) |
1792 | { | 1788 | { |
1793 | u64 old = ctxt->dst.orig_val64; | 1789 | u64 old = ctxt->dst.orig_val64; |
1794 | 1790 | ||
@@ -1831,6 +1827,24 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) | |||
1831 | return rc; | 1827 | return rc; |
1832 | } | 1828 | } |
1833 | 1829 | ||
1830 | static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) | ||
1831 | { | ||
1832 | /* Save real source value, then compare EAX against destination. */ | ||
1833 | ctxt->src.orig_val = ctxt->src.val; | ||
1834 | ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; | ||
1835 | emulate_2op_SrcV(ctxt, "cmp"); | ||
1836 | |||
1837 | if (ctxt->eflags & EFLG_ZF) { | ||
1838 | /* Success: write back to memory. */ | ||
1839 | ctxt->dst.val = ctxt->src.orig_val; | ||
1840 | } else { | ||
1841 | /* Failure: write the value we saw to EAX. */ | ||
1842 | ctxt->dst.type = OP_REG; | ||
1843 | ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; | ||
1844 | } | ||
1845 | return X86EMUL_CONTINUE; | ||
1846 | } | ||
1847 | |||
1834 | static int em_lseg(struct x86_emulate_ctxt *ctxt) | 1848 | static int em_lseg(struct x86_emulate_ctxt *ctxt) |
1835 | { | 1849 | { |
1836 | int seg = ctxt->src2.val; | 1850 | int seg = ctxt->src2.val; |
@@ -2481,6 +2495,15 @@ static int em_das(struct x86_emulate_ctxt *ctxt) | |||
2481 | return X86EMUL_CONTINUE; | 2495 | return X86EMUL_CONTINUE; |
2482 | } | 2496 | } |
2483 | 2497 | ||
2498 | static int em_call(struct x86_emulate_ctxt *ctxt) | ||
2499 | { | ||
2500 | long rel = ctxt->src.val; | ||
2501 | |||
2502 | ctxt->src.val = (unsigned long)ctxt->_eip; | ||
2503 | jmp_rel(ctxt, rel); | ||
2504 | return em_push(ctxt); | ||
2505 | } | ||
2506 | |||
2484 | static int em_call_far(struct x86_emulate_ctxt *ctxt) | 2507 | static int em_call_far(struct x86_emulate_ctxt *ctxt) |
2485 | { | 2508 | { |
2486 | u16 sel, old_cs; | 2509 | u16 sel, old_cs; |
@@ -2622,12 +2645,75 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt) | |||
2622 | return X86EMUL_CONTINUE; | 2645 | return X86EMUL_CONTINUE; |
2623 | } | 2646 | } |
2624 | 2647 | ||
2648 | static int em_rdpmc(struct x86_emulate_ctxt *ctxt) | ||
2649 | { | ||
2650 | u64 pmc; | ||
2651 | |||
2652 | if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc)) | ||
2653 | return emulate_gp(ctxt, 0); | ||
2654 | ctxt->regs[VCPU_REGS_RAX] = (u32)pmc; | ||
2655 | ctxt->regs[VCPU_REGS_RDX] = pmc >> 32; | ||
2656 | return X86EMUL_CONTINUE; | ||
2657 | } | ||
2658 | |||
2625 | static int em_mov(struct x86_emulate_ctxt *ctxt) | 2659 | static int em_mov(struct x86_emulate_ctxt *ctxt) |
2626 | { | 2660 | { |
2627 | ctxt->dst.val = ctxt->src.val; | 2661 | ctxt->dst.val = ctxt->src.val; |
2628 | return X86EMUL_CONTINUE; | 2662 | return X86EMUL_CONTINUE; |
2629 | } | 2663 | } |
2630 | 2664 | ||
2665 | static int em_cr_write(struct x86_emulate_ctxt *ctxt) | ||
2666 | { | ||
2667 | if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) | ||
2668 | return emulate_gp(ctxt, 0); | ||
2669 | |||
2670 | /* Disable writeback. */ | ||
2671 | ctxt->dst.type = OP_NONE; | ||
2672 | return X86EMUL_CONTINUE; | ||
2673 | } | ||
2674 | |||
2675 | static int em_dr_write(struct x86_emulate_ctxt *ctxt) | ||
2676 | { | ||
2677 | unsigned long val; | ||
2678 | |||
2679 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
2680 | val = ctxt->src.val & ~0ULL; | ||
2681 | else | ||
2682 | val = ctxt->src.val & ~0U; | ||
2683 | |||
2684 | /* #UD condition is already handled. */ | ||
2685 | if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0) | ||
2686 | return emulate_gp(ctxt, 0); | ||
2687 | |||
2688 | /* Disable writeback. */ | ||
2689 | ctxt->dst.type = OP_NONE; | ||
2690 | return X86EMUL_CONTINUE; | ||
2691 | } | ||
2692 | |||
2693 | static int em_wrmsr(struct x86_emulate_ctxt *ctxt) | ||
2694 | { | ||
2695 | u64 msr_data; | ||
2696 | |||
2697 | msr_data = (u32)ctxt->regs[VCPU_REGS_RAX] | ||
2698 | | ((u64)ctxt->regs[VCPU_REGS_RDX] << 32); | ||
2699 | if (ctxt->ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) | ||
2700 | return emulate_gp(ctxt, 0); | ||
2701 | |||
2702 | return X86EMUL_CONTINUE; | ||
2703 | } | ||
2704 | |||
2705 | static int em_rdmsr(struct x86_emulate_ctxt *ctxt) | ||
2706 | { | ||
2707 | u64 msr_data; | ||
2708 | |||
2709 | if (ctxt->ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) | ||
2710 | return emulate_gp(ctxt, 0); | ||
2711 | |||
2712 | ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data; | ||
2713 | ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32; | ||
2714 | return X86EMUL_CONTINUE; | ||
2715 | } | ||
2716 | |||
2631 | static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt) | 2717 | static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt) |
2632 | { | 2718 | { |
2633 | if (ctxt->modrm_reg > VCPU_SREG_GS) | 2719 | if (ctxt->modrm_reg > VCPU_SREG_GS) |
@@ -2775,6 +2861,24 @@ static int em_jcxz(struct x86_emulate_ctxt *ctxt) | |||
2775 | return X86EMUL_CONTINUE; | 2861 | return X86EMUL_CONTINUE; |
2776 | } | 2862 | } |
2777 | 2863 | ||
2864 | static int em_in(struct x86_emulate_ctxt *ctxt) | ||
2865 | { | ||
2866 | if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val, | ||
2867 | &ctxt->dst.val)) | ||
2868 | return X86EMUL_IO_NEEDED; | ||
2869 | |||
2870 | return X86EMUL_CONTINUE; | ||
2871 | } | ||
2872 | |||
2873 | static int em_out(struct x86_emulate_ctxt *ctxt) | ||
2874 | { | ||
2875 | ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val, | ||
2876 | &ctxt->src.val, 1); | ||
2877 | /* Disable writeback. */ | ||
2878 | ctxt->dst.type = OP_NONE; | ||
2879 | return X86EMUL_CONTINUE; | ||
2880 | } | ||
2881 | |||
2778 | static int em_cli(struct x86_emulate_ctxt *ctxt) | 2882 | static int em_cli(struct x86_emulate_ctxt *ctxt) |
2779 | { | 2883 | { |
2780 | if (emulator_bad_iopl(ctxt)) | 2884 | if (emulator_bad_iopl(ctxt)) |
@@ -2794,6 +2898,69 @@ static int em_sti(struct x86_emulate_ctxt *ctxt) | |||
2794 | return X86EMUL_CONTINUE; | 2898 | return X86EMUL_CONTINUE; |
2795 | } | 2899 | } |
2796 | 2900 | ||
2901 | static int em_bt(struct x86_emulate_ctxt *ctxt) | ||
2902 | { | ||
2903 | /* Disable writeback. */ | ||
2904 | ctxt->dst.type = OP_NONE; | ||
2905 | /* only subword offset */ | ||
2906 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; | ||
2907 | |||
2908 | emulate_2op_SrcV_nobyte(ctxt, "bt"); | ||
2909 | return X86EMUL_CONTINUE; | ||
2910 | } | ||
2911 | |||
2912 | static int em_bts(struct x86_emulate_ctxt *ctxt) | ||
2913 | { | ||
2914 | emulate_2op_SrcV_nobyte(ctxt, "bts"); | ||
2915 | return X86EMUL_CONTINUE; | ||
2916 | } | ||
2917 | |||
2918 | static int em_btr(struct x86_emulate_ctxt *ctxt) | ||
2919 | { | ||
2920 | emulate_2op_SrcV_nobyte(ctxt, "btr"); | ||
2921 | return X86EMUL_CONTINUE; | ||
2922 | } | ||
2923 | |||
2924 | static int em_btc(struct x86_emulate_ctxt *ctxt) | ||
2925 | { | ||
2926 | emulate_2op_SrcV_nobyte(ctxt, "btc"); | ||
2927 | return X86EMUL_CONTINUE; | ||
2928 | } | ||
2929 | |||
2930 | static int em_bsf(struct x86_emulate_ctxt *ctxt) | ||
2931 | { | ||
2932 | u8 zf; | ||
2933 | |||
2934 | __asm__ ("bsf %2, %0; setz %1" | ||
2935 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
2936 | : "r"(ctxt->src.val)); | ||
2937 | |||
2938 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
2939 | if (zf) { | ||
2940 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
2941 | /* Disable writeback. */ | ||
2942 | ctxt->dst.type = OP_NONE; | ||
2943 | } | ||
2944 | return X86EMUL_CONTINUE; | ||
2945 | } | ||
2946 | |||
2947 | static int em_bsr(struct x86_emulate_ctxt *ctxt) | ||
2948 | { | ||
2949 | u8 zf; | ||
2950 | |||
2951 | __asm__ ("bsr %2, %0; setz %1" | ||
2952 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
2953 | : "r"(ctxt->src.val)); | ||
2954 | |||
2955 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
2956 | if (zf) { | ||
2957 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
2958 | /* Disable writeback. */ | ||
2959 | ctxt->dst.type = OP_NONE; | ||
2960 | } | ||
2961 | return X86EMUL_CONTINUE; | ||
2962 | } | ||
2963 | |||
2797 | static bool valid_cr(int nr) | 2964 | static bool valid_cr(int nr) |
2798 | { | 2965 | { |
2799 | switch (nr) { | 2966 | switch (nr) { |
@@ -2867,9 +3034,6 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) | |||
2867 | break; | 3034 | break; |
2868 | } | 3035 | } |
2869 | case 4: { | 3036 | case 4: { |
2870 | u64 cr4; | ||
2871 | |||
2872 | cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2873 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | 3037 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); |
2874 | 3038 | ||
2875 | if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) | 3039 | if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) |
@@ -3003,6 +3167,8 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3003 | #define D2bv(_f) D((_f) | ByteOp), D(_f) | 3167 | #define D2bv(_f) D((_f) | ByteOp), D(_f) |
3004 | #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) | 3168 | #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) |
3005 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) | 3169 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) |
3170 | #define I2bvIP(_f, _e, _i, _p) \ | ||
3171 | IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) | ||
3006 | 3172 | ||
3007 | #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ | 3173 | #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ |
3008 | I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ | 3174 | I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ |
@@ -3033,17 +3199,17 @@ static struct opcode group7_rm7[] = { | |||
3033 | 3199 | ||
3034 | static struct opcode group1[] = { | 3200 | static struct opcode group1[] = { |
3035 | I(Lock, em_add), | 3201 | I(Lock, em_add), |
3036 | I(Lock, em_or), | 3202 | I(Lock | PageTable, em_or), |
3037 | I(Lock, em_adc), | 3203 | I(Lock, em_adc), |
3038 | I(Lock, em_sbb), | 3204 | I(Lock, em_sbb), |
3039 | I(Lock, em_and), | 3205 | I(Lock | PageTable, em_and), |
3040 | I(Lock, em_sub), | 3206 | I(Lock, em_sub), |
3041 | I(Lock, em_xor), | 3207 | I(Lock, em_xor), |
3042 | I(0, em_cmp), | 3208 | I(0, em_cmp), |
3043 | }; | 3209 | }; |
3044 | 3210 | ||
3045 | static struct opcode group1A[] = { | 3211 | static struct opcode group1A[] = { |
3046 | D(DstMem | SrcNone | ModRM | Mov | Stack), N, N, N, N, N, N, N, | 3212 | I(DstMem | SrcNone | ModRM | Mov | Stack, em_pop), N, N, N, N, N, N, N, |
3047 | }; | 3213 | }; |
3048 | 3214 | ||
3049 | static struct opcode group3[] = { | 3215 | static struct opcode group3[] = { |
@@ -3058,16 +3224,19 @@ static struct opcode group3[] = { | |||
3058 | }; | 3224 | }; |
3059 | 3225 | ||
3060 | static struct opcode group4[] = { | 3226 | static struct opcode group4[] = { |
3061 | D(ByteOp | DstMem | SrcNone | ModRM | Lock), D(ByteOp | DstMem | SrcNone | ModRM | Lock), | 3227 | I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45), |
3228 | I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45), | ||
3062 | N, N, N, N, N, N, | 3229 | N, N, N, N, N, N, |
3063 | }; | 3230 | }; |
3064 | 3231 | ||
3065 | static struct opcode group5[] = { | 3232 | static struct opcode group5[] = { |
3066 | D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock), | 3233 | I(DstMem | SrcNone | ModRM | Lock, em_grp45), |
3067 | D(SrcMem | ModRM | Stack), | 3234 | I(DstMem | SrcNone | ModRM | Lock, em_grp45), |
3235 | I(SrcMem | ModRM | Stack, em_grp45), | ||
3068 | I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far), | 3236 | I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far), |
3069 | D(SrcMem | ModRM | Stack), D(SrcMemFAddr | ModRM | ImplicitOps), | 3237 | I(SrcMem | ModRM | Stack, em_grp45), |
3070 | D(SrcMem | ModRM | Stack), N, | 3238 | I(SrcMemFAddr | ModRM | ImplicitOps, em_grp45), |
3239 | I(SrcMem | ModRM | Stack, em_grp45), N, | ||
3071 | }; | 3240 | }; |
3072 | 3241 | ||
3073 | static struct opcode group6[] = { | 3242 | static struct opcode group6[] = { |
@@ -3096,18 +3265,21 @@ static struct group_dual group7 = { { | |||
3096 | 3265 | ||
3097 | static struct opcode group8[] = { | 3266 | static struct opcode group8[] = { |
3098 | N, N, N, N, | 3267 | N, N, N, N, |
3099 | D(DstMem | SrcImmByte | ModRM), D(DstMem | SrcImmByte | ModRM | Lock), | 3268 | I(DstMem | SrcImmByte | ModRM, em_bt), |
3100 | D(DstMem | SrcImmByte | ModRM | Lock), D(DstMem | SrcImmByte | ModRM | Lock), | 3269 | I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_bts), |
3270 | I(DstMem | SrcImmByte | ModRM | Lock, em_btr), | ||
3271 | I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_btc), | ||
3101 | }; | 3272 | }; |
3102 | 3273 | ||
3103 | static struct group_dual group9 = { { | 3274 | static struct group_dual group9 = { { |
3104 | N, D(DstMem64 | ModRM | Lock), N, N, N, N, N, N, | 3275 | N, I(DstMem64 | ModRM | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, |
3105 | }, { | 3276 | }, { |
3106 | N, N, N, N, N, N, N, N, | 3277 | N, N, N, N, N, N, N, N, |
3107 | } }; | 3278 | } }; |
3108 | 3279 | ||
3109 | static struct opcode group11[] = { | 3280 | static struct opcode group11[] = { |
3110 | I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), | 3281 | I(DstMem | SrcImm | ModRM | Mov | PageTable, em_mov), |
3282 | X7(D(Undefined)), | ||
3111 | }; | 3283 | }; |
3112 | 3284 | ||
3113 | static struct gprefix pfx_0f_6f_0f_7f = { | 3285 | static struct gprefix pfx_0f_6f_0f_7f = { |
@@ -3120,7 +3292,7 @@ static struct opcode opcode_table[256] = { | |||
3120 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), | 3292 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), |
3121 | I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), | 3293 | I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), |
3122 | /* 0x08 - 0x0F */ | 3294 | /* 0x08 - 0x0F */ |
3123 | I6ALU(Lock, em_or), | 3295 | I6ALU(Lock | PageTable, em_or), |
3124 | I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), | 3296 | I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), |
3125 | N, | 3297 | N, |
3126 | /* 0x10 - 0x17 */ | 3298 | /* 0x10 - 0x17 */ |
@@ -3132,7 +3304,7 @@ static struct opcode opcode_table[256] = { | |||
3132 | I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), | 3304 | I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), |
3133 | I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), | 3305 | I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), |
3134 | /* 0x20 - 0x27 */ | 3306 | /* 0x20 - 0x27 */ |
3135 | I6ALU(Lock, em_and), N, N, | 3307 | I6ALU(Lock | PageTable, em_and), N, N, |
3136 | /* 0x28 - 0x2F */ | 3308 | /* 0x28 - 0x2F */ |
3137 | I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), | 3309 | I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), |
3138 | /* 0x30 - 0x37 */ | 3310 | /* 0x30 - 0x37 */ |
@@ -3155,8 +3327,8 @@ static struct opcode opcode_table[256] = { | |||
3155 | I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), | 3327 | I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), |
3156 | I(SrcImmByte | Mov | Stack, em_push), | 3328 | I(SrcImmByte | Mov | Stack, em_push), |
3157 | I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), | 3329 | I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), |
3158 | D2bvIP(DstDI | SrcDX | Mov | String, ins, check_perm_in), /* insb, insw/insd */ | 3330 | I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */ |
3159 | D2bvIP(SrcSI | DstDX | String, outs, check_perm_out), /* outsb, outsw/outsd */ | 3331 | I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */ |
3160 | /* 0x70 - 0x7F */ | 3332 | /* 0x70 - 0x7F */ |
3161 | X16(D(SrcImmByte)), | 3333 | X16(D(SrcImmByte)), |
3162 | /* 0x80 - 0x87 */ | 3334 | /* 0x80 - 0x87 */ |
@@ -3165,11 +3337,11 @@ static struct opcode opcode_table[256] = { | |||
3165 | G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1), | 3337 | G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1), |
3166 | G(DstMem | SrcImmByte | ModRM | Group, group1), | 3338 | G(DstMem | SrcImmByte | ModRM | Group, group1), |
3167 | I2bv(DstMem | SrcReg | ModRM, em_test), | 3339 | I2bv(DstMem | SrcReg | ModRM, em_test), |
3168 | I2bv(DstMem | SrcReg | ModRM | Lock, em_xchg), | 3340 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), |
3169 | /* 0x88 - 0x8F */ | 3341 | /* 0x88 - 0x8F */ |
3170 | I2bv(DstMem | SrcReg | ModRM | Mov, em_mov), | 3342 | I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), |
3171 | I2bv(DstReg | SrcMem | ModRM | Mov, em_mov), | 3343 | I2bv(DstReg | SrcMem | ModRM | Mov, em_mov), |
3172 | I(DstMem | SrcNone | ModRM | Mov, em_mov_rm_sreg), | 3344 | I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg), |
3173 | D(ModRM | SrcMem | NoAccess | DstReg), | 3345 | D(ModRM | SrcMem | NoAccess | DstReg), |
3174 | I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm), | 3346 | I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm), |
3175 | G(0, group1A), | 3347 | G(0, group1A), |
@@ -3182,7 +3354,7 @@ static struct opcode opcode_table[256] = { | |||
3182 | II(ImplicitOps | Stack, em_popf, popf), N, N, | 3354 | II(ImplicitOps | Stack, em_popf, popf), N, N, |
3183 | /* 0xA0 - 0xA7 */ | 3355 | /* 0xA0 - 0xA7 */ |
3184 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), | 3356 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), |
3185 | I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), | 3357 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), |
3186 | I2bv(SrcSI | DstDI | Mov | String, em_mov), | 3358 | I2bv(SrcSI | DstDI | Mov | String, em_mov), |
3187 | I2bv(SrcSI | DstDI | String, em_cmp), | 3359 | I2bv(SrcSI | DstDI | String, em_cmp), |
3188 | /* 0xA8 - 0xAF */ | 3360 | /* 0xA8 - 0xAF */ |
@@ -3213,13 +3385,13 @@ static struct opcode opcode_table[256] = { | |||
3213 | /* 0xE0 - 0xE7 */ | 3385 | /* 0xE0 - 0xE7 */ |
3214 | X3(I(SrcImmByte, em_loop)), | 3386 | X3(I(SrcImmByte, em_loop)), |
3215 | I(SrcImmByte, em_jcxz), | 3387 | I(SrcImmByte, em_jcxz), |
3216 | D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), | 3388 | I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in), |
3217 | D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), | 3389 | I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out), |
3218 | /* 0xE8 - 0xEF */ | 3390 | /* 0xE8 - 0xEF */ |
3219 | D(SrcImm | Stack), D(SrcImm | ImplicitOps), | 3391 | I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps), |
3220 | I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps), | 3392 | I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps), |
3221 | D2bvIP(SrcDX | DstAcc, in, check_perm_in), | 3393 | I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in), |
3222 | D2bvIP(SrcAcc | DstDX, out, check_perm_out), | 3394 | I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out), |
3223 | /* 0xF0 - 0xF7 */ | 3395 | /* 0xF0 - 0xF7 */ |
3224 | N, DI(ImplicitOps, icebp), N, N, | 3396 | N, DI(ImplicitOps, icebp), N, N, |
3225 | DI(ImplicitOps | Priv, hlt), D(ImplicitOps), | 3397 | DI(ImplicitOps | Priv, hlt), D(ImplicitOps), |
@@ -3242,15 +3414,15 @@ static struct opcode twobyte_table[256] = { | |||
3242 | /* 0x20 - 0x2F */ | 3414 | /* 0x20 - 0x2F */ |
3243 | DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), | 3415 | DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), |
3244 | DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), | 3416 | DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), |
3245 | DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), | 3417 | IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), |
3246 | DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write), | 3418 | IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), |
3247 | N, N, N, N, | 3419 | N, N, N, N, |
3248 | N, N, N, N, N, N, N, N, | 3420 | N, N, N, N, N, N, N, N, |
3249 | /* 0x30 - 0x3F */ | 3421 | /* 0x30 - 0x3F */ |
3250 | DI(ImplicitOps | Priv, wrmsr), | 3422 | II(ImplicitOps | Priv, em_wrmsr, wrmsr), |
3251 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), | 3423 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), |
3252 | DI(ImplicitOps | Priv, rdmsr), | 3424 | II(ImplicitOps | Priv, em_rdmsr, rdmsr), |
3253 | DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), | 3425 | IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), |
3254 | I(ImplicitOps | VendorSpecific, em_sysenter), | 3426 | I(ImplicitOps | VendorSpecific, em_sysenter), |
3255 | I(ImplicitOps | Priv | VendorSpecific, em_sysexit), | 3427 | I(ImplicitOps | Priv | VendorSpecific, em_sysexit), |
3256 | N, N, | 3428 | N, N, |
@@ -3275,26 +3447,28 @@ static struct opcode twobyte_table[256] = { | |||
3275 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), | 3447 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), |
3276 | /* 0xA0 - 0xA7 */ | 3448 | /* 0xA0 - 0xA7 */ |
3277 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), | 3449 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), |
3278 | DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), | 3450 | DI(ImplicitOps, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), |
3279 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3451 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
3280 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, | 3452 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, |
3281 | /* 0xA8 - 0xAF */ | 3453 | /* 0xA8 - 0xAF */ |
3282 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), | 3454 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), |
3283 | DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3455 | DI(ImplicitOps, rsm), |
3456 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), | ||
3284 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3457 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
3285 | D(DstMem | SrcReg | Src2CL | ModRM), | 3458 | D(DstMem | SrcReg | Src2CL | ModRM), |
3286 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), | 3459 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), |
3287 | /* 0xB0 - 0xB7 */ | 3460 | /* 0xB0 - 0xB7 */ |
3288 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3461 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), |
3289 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), | 3462 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), |
3290 | D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3463 | I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), |
3291 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), | 3464 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), |
3292 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), | 3465 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), |
3293 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3466 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3294 | /* 0xB8 - 0xBF */ | 3467 | /* 0xB8 - 0xBF */ |
3295 | N, N, | 3468 | N, N, |
3296 | G(BitOp, group8), D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3469 | G(BitOp, group8), |
3297 | D(DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM), | 3470 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), |
3471 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), | ||
3298 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3472 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3299 | /* 0xC0 - 0xCF */ | 3473 | /* 0xC0 - 0xCF */ |
3300 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3474 | D2bv(DstMem | SrcReg | ModRM | Lock), |
@@ -3320,6 +3494,7 @@ static struct opcode twobyte_table[256] = { | |||
3320 | #undef D2bv | 3494 | #undef D2bv |
3321 | #undef D2bvIP | 3495 | #undef D2bvIP |
3322 | #undef I2bv | 3496 | #undef I2bv |
3497 | #undef I2bvIP | ||
3323 | #undef I6ALU | 3498 | #undef I6ALU |
3324 | 3499 | ||
3325 | static unsigned imm_size(struct x86_emulate_ctxt *ctxt) | 3500 | static unsigned imm_size(struct x86_emulate_ctxt *ctxt) |
@@ -3697,6 +3872,11 @@ done: | |||
3697 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; | 3872 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; |
3698 | } | 3873 | } |
3699 | 3874 | ||
3875 | bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt) | ||
3876 | { | ||
3877 | return ctxt->d & PageTable; | ||
3878 | } | ||
3879 | |||
3700 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | 3880 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) |
3701 | { | 3881 | { |
3702 | /* The second termination condition only applies for REPE | 3882 | /* The second termination condition only applies for REPE |
@@ -3720,7 +3900,6 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | |||
3720 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | 3900 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) |
3721 | { | 3901 | { |
3722 | struct x86_emulate_ops *ops = ctxt->ops; | 3902 | struct x86_emulate_ops *ops = ctxt->ops; |
3723 | u64 msr_data; | ||
3724 | int rc = X86EMUL_CONTINUE; | 3903 | int rc = X86EMUL_CONTINUE; |
3725 | int saved_dst_type = ctxt->dst.type; | 3904 | int saved_dst_type = ctxt->dst.type; |
3726 | 3905 | ||
@@ -3854,15 +4033,6 @@ special_insn: | |||
3854 | goto cannot_emulate; | 4033 | goto cannot_emulate; |
3855 | ctxt->dst.val = (s32) ctxt->src.val; | 4034 | ctxt->dst.val = (s32) ctxt->src.val; |
3856 | break; | 4035 | break; |
3857 | case 0x6c: /* insb */ | ||
3858 | case 0x6d: /* insw/insd */ | ||
3859 | ctxt->src.val = ctxt->regs[VCPU_REGS_RDX]; | ||
3860 | goto do_io_in; | ||
3861 | case 0x6e: /* outsb */ | ||
3862 | case 0x6f: /* outsw/outsd */ | ||
3863 | ctxt->dst.val = ctxt->regs[VCPU_REGS_RDX]; | ||
3864 | goto do_io_out; | ||
3865 | break; | ||
3866 | case 0x70 ... 0x7f: /* jcc (short) */ | 4036 | case 0x70 ... 0x7f: /* jcc (short) */ |
3867 | if (test_cc(ctxt->b, ctxt->eflags)) | 4037 | if (test_cc(ctxt->b, ctxt->eflags)) |
3868 | jmp_rel(ctxt, ctxt->src.val); | 4038 | jmp_rel(ctxt, ctxt->src.val); |
@@ -3870,9 +4040,6 @@ special_insn: | |||
3870 | case 0x8d: /* lea r16/r32, m */ | 4040 | case 0x8d: /* lea r16/r32, m */ |
3871 | ctxt->dst.val = ctxt->src.addr.mem.ea; | 4041 | ctxt->dst.val = ctxt->src.addr.mem.ea; |
3872 | break; | 4042 | break; |
3873 | case 0x8f: /* pop (sole member of Grp1a) */ | ||
3874 | rc = em_grp1a(ctxt); | ||
3875 | break; | ||
3876 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ | 4043 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ |
3877 | if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX]) | 4044 | if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX]) |
3878 | break; | 4045 | break; |
@@ -3905,38 +4072,11 @@ special_insn: | |||
3905 | ctxt->src.val = ctxt->regs[VCPU_REGS_RCX]; | 4072 | ctxt->src.val = ctxt->regs[VCPU_REGS_RCX]; |
3906 | rc = em_grp2(ctxt); | 4073 | rc = em_grp2(ctxt); |
3907 | break; | 4074 | break; |
3908 | case 0xe4: /* inb */ | ||
3909 | case 0xe5: /* in */ | ||
3910 | goto do_io_in; | ||
3911 | case 0xe6: /* outb */ | ||
3912 | case 0xe7: /* out */ | ||
3913 | goto do_io_out; | ||
3914 | case 0xe8: /* call (near) */ { | ||
3915 | long int rel = ctxt->src.val; | ||
3916 | ctxt->src.val = (unsigned long) ctxt->_eip; | ||
3917 | jmp_rel(ctxt, rel); | ||
3918 | rc = em_push(ctxt); | ||
3919 | break; | ||
3920 | } | ||
3921 | case 0xe9: /* jmp rel */ | 4075 | case 0xe9: /* jmp rel */ |
3922 | case 0xeb: /* jmp rel short */ | 4076 | case 0xeb: /* jmp rel short */ |
3923 | jmp_rel(ctxt, ctxt->src.val); | 4077 | jmp_rel(ctxt, ctxt->src.val); |
3924 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | 4078 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ |
3925 | break; | 4079 | break; |
3926 | case 0xec: /* in al,dx */ | ||
3927 | case 0xed: /* in (e/r)ax,dx */ | ||
3928 | do_io_in: | ||
3929 | if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val, | ||
3930 | &ctxt->dst.val)) | ||
3931 | goto done; /* IO is needed */ | ||
3932 | break; | ||
3933 | case 0xee: /* out dx,al */ | ||
3934 | case 0xef: /* out dx,(e/r)ax */ | ||
3935 | do_io_out: | ||
3936 | ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val, | ||
3937 | &ctxt->src.val, 1); | ||
3938 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | ||
3939 | break; | ||
3940 | case 0xf4: /* hlt */ | 4080 | case 0xf4: /* hlt */ |
3941 | ctxt->ops->halt(ctxt); | 4081 | ctxt->ops->halt(ctxt); |
3942 | break; | 4082 | break; |
@@ -3956,12 +4096,6 @@ special_insn: | |||
3956 | case 0xfd: /* std */ | 4096 | case 0xfd: /* std */ |
3957 | ctxt->eflags |= EFLG_DF; | 4097 | ctxt->eflags |= EFLG_DF; |
3958 | break; | 4098 | break; |
3959 | case 0xfe: /* Grp4 */ | ||
3960 | rc = em_grp45(ctxt); | ||
3961 | break; | ||
3962 | case 0xff: /* Grp5 */ | ||
3963 | rc = em_grp45(ctxt); | ||
3964 | break; | ||
3965 | default: | 4099 | default: |
3966 | goto cannot_emulate; | 4100 | goto cannot_emulate; |
3967 | } | 4101 | } |
@@ -4036,49 +4170,6 @@ twobyte_insn: | |||
4036 | case 0x21: /* mov from dr to reg */ | 4170 | case 0x21: /* mov from dr to reg */ |
4037 | ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); | 4171 | ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); |
4038 | break; | 4172 | break; |
4039 | case 0x22: /* mov reg, cr */ | ||
4040 | if (ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) { | ||
4041 | emulate_gp(ctxt, 0); | ||
4042 | rc = X86EMUL_PROPAGATE_FAULT; | ||
4043 | goto done; | ||
4044 | } | ||
4045 | ctxt->dst.type = OP_NONE; | ||
4046 | break; | ||
4047 | case 0x23: /* mov from reg to dr */ | ||
4048 | if (ops->set_dr(ctxt, ctxt->modrm_reg, ctxt->src.val & | ||
4049 | ((ctxt->mode == X86EMUL_MODE_PROT64) ? | ||
4050 | ~0ULL : ~0U)) < 0) { | ||
4051 | /* #UD condition is already handled by the code above */ | ||
4052 | emulate_gp(ctxt, 0); | ||
4053 | rc = X86EMUL_PROPAGATE_FAULT; | ||
4054 | goto done; | ||
4055 | } | ||
4056 | |||
4057 | ctxt->dst.type = OP_NONE; /* no writeback */ | ||
4058 | break; | ||
4059 | case 0x30: | ||
4060 | /* wrmsr */ | ||
4061 | msr_data = (u32)ctxt->regs[VCPU_REGS_RAX] | ||
4062 | | ((u64)ctxt->regs[VCPU_REGS_RDX] << 32); | ||
4063 | if (ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) { | ||
4064 | emulate_gp(ctxt, 0); | ||
4065 | rc = X86EMUL_PROPAGATE_FAULT; | ||
4066 | goto done; | ||
4067 | } | ||
4068 | rc = X86EMUL_CONTINUE; | ||
4069 | break; | ||
4070 | case 0x32: | ||
4071 | /* rdmsr */ | ||
4072 | if (ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) { | ||
4073 | emulate_gp(ctxt, 0); | ||
4074 | rc = X86EMUL_PROPAGATE_FAULT; | ||
4075 | goto done; | ||
4076 | } else { | ||
4077 | ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data; | ||
4078 | ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32; | ||
4079 | } | ||
4080 | rc = X86EMUL_CONTINUE; | ||
4081 | break; | ||
4082 | case 0x40 ... 0x4f: /* cmov */ | 4173 | case 0x40 ... 0x4f: /* cmov */ |
4083 | ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; | 4174 | ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; |
4084 | if (!test_cc(ctxt->b, ctxt->eflags)) | 4175 | if (!test_cc(ctxt->b, ctxt->eflags)) |
@@ -4091,93 +4182,21 @@ twobyte_insn: | |||
4091 | case 0x90 ... 0x9f: /* setcc r/m8 */ | 4182 | case 0x90 ... 0x9f: /* setcc r/m8 */ |
4092 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); | 4183 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); |
4093 | break; | 4184 | break; |
4094 | case 0xa3: | ||
4095 | bt: /* bt */ | ||
4096 | ctxt->dst.type = OP_NONE; | ||
4097 | /* only subword offset */ | ||
4098 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; | ||
4099 | emulate_2op_SrcV_nobyte(ctxt, "bt"); | ||
4100 | break; | ||
4101 | case 0xa4: /* shld imm8, r, r/m */ | 4185 | case 0xa4: /* shld imm8, r, r/m */ |
4102 | case 0xa5: /* shld cl, r, r/m */ | 4186 | case 0xa5: /* shld cl, r, r/m */ |
4103 | emulate_2op_cl(ctxt, "shld"); | 4187 | emulate_2op_cl(ctxt, "shld"); |
4104 | break; | 4188 | break; |
4105 | case 0xab: | ||
4106 | bts: /* bts */ | ||
4107 | emulate_2op_SrcV_nobyte(ctxt, "bts"); | ||
4108 | break; | ||
4109 | case 0xac: /* shrd imm8, r, r/m */ | 4189 | case 0xac: /* shrd imm8, r, r/m */ |
4110 | case 0xad: /* shrd cl, r, r/m */ | 4190 | case 0xad: /* shrd cl, r, r/m */ |
4111 | emulate_2op_cl(ctxt, "shrd"); | 4191 | emulate_2op_cl(ctxt, "shrd"); |
4112 | break; | 4192 | break; |
4113 | case 0xae: /* clflush */ | 4193 | case 0xae: /* clflush */ |
4114 | break; | 4194 | break; |
4115 | case 0xb0 ... 0xb1: /* cmpxchg */ | ||
4116 | /* | ||
4117 | * Save real source value, then compare EAX against | ||
4118 | * destination. | ||
4119 | */ | ||
4120 | ctxt->src.orig_val = ctxt->src.val; | ||
4121 | ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; | ||
4122 | emulate_2op_SrcV(ctxt, "cmp"); | ||
4123 | if (ctxt->eflags & EFLG_ZF) { | ||
4124 | /* Success: write back to memory. */ | ||
4125 | ctxt->dst.val = ctxt->src.orig_val; | ||
4126 | } else { | ||
4127 | /* Failure: write the value we saw to EAX. */ | ||
4128 | ctxt->dst.type = OP_REG; | ||
4129 | ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; | ||
4130 | } | ||
4131 | break; | ||
4132 | case 0xb3: | ||
4133 | btr: /* btr */ | ||
4134 | emulate_2op_SrcV_nobyte(ctxt, "btr"); | ||
4135 | break; | ||
4136 | case 0xb6 ... 0xb7: /* movzx */ | 4195 | case 0xb6 ... 0xb7: /* movzx */ |
4137 | ctxt->dst.bytes = ctxt->op_bytes; | 4196 | ctxt->dst.bytes = ctxt->op_bytes; |
4138 | ctxt->dst.val = (ctxt->d & ByteOp) ? (u8) ctxt->src.val | 4197 | ctxt->dst.val = (ctxt->d & ByteOp) ? (u8) ctxt->src.val |
4139 | : (u16) ctxt->src.val; | 4198 | : (u16) ctxt->src.val; |
4140 | break; | 4199 | break; |
4141 | case 0xba: /* Grp8 */ | ||
4142 | switch (ctxt->modrm_reg & 3) { | ||
4143 | case 0: | ||
4144 | goto bt; | ||
4145 | case 1: | ||
4146 | goto bts; | ||
4147 | case 2: | ||
4148 | goto btr; | ||
4149 | case 3: | ||
4150 | goto btc; | ||
4151 | } | ||
4152 | break; | ||
4153 | case 0xbb: | ||
4154 | btc: /* btc */ | ||
4155 | emulate_2op_SrcV_nobyte(ctxt, "btc"); | ||
4156 | break; | ||
4157 | case 0xbc: { /* bsf */ | ||
4158 | u8 zf; | ||
4159 | __asm__ ("bsf %2, %0; setz %1" | ||
4160 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
4161 | : "r"(ctxt->src.val)); | ||
4162 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
4163 | if (zf) { | ||
4164 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
4165 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | ||
4166 | } | ||
4167 | break; | ||
4168 | } | ||
4169 | case 0xbd: { /* bsr */ | ||
4170 | u8 zf; | ||
4171 | __asm__ ("bsr %2, %0; setz %1" | ||
4172 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
4173 | : "r"(ctxt->src.val)); | ||
4174 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
4175 | if (zf) { | ||
4176 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
4177 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | ||
4178 | } | ||
4179 | break; | ||
4180 | } | ||
4181 | case 0xbe ... 0xbf: /* movsx */ | 4200 | case 0xbe ... 0xbf: /* movsx */ |
4182 | ctxt->dst.bytes = ctxt->op_bytes; | 4201 | ctxt->dst.bytes = ctxt->op_bytes; |
4183 | ctxt->dst.val = (ctxt->d & ByteOp) ? (s8) ctxt->src.val : | 4202 | ctxt->dst.val = (ctxt->d & ByteOp) ? (s8) ctxt->src.val : |
@@ -4194,9 +4213,6 @@ twobyte_insn: | |||
4194 | ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : | 4213 | ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : |
4195 | (u64) ctxt->src.val; | 4214 | (u64) ctxt->src.val; |
4196 | break; | 4215 | break; |
4197 | case 0xc7: /* Grp9 (cmpxchg8b) */ | ||
4198 | rc = em_grp9(ctxt); | ||
4199 | break; | ||
4200 | default: | 4216 | default: |
4201 | goto cannot_emulate; | 4217 | goto cannot_emulate; |
4202 | } | 4218 | } |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 405f2620392f..d68f99df690c 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -344,7 +344,7 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | |||
344 | struct kvm_timer *pt = &ps->pit_timer; | 344 | struct kvm_timer *pt = &ps->pit_timer; |
345 | s64 interval; | 345 | s64 interval; |
346 | 346 | ||
347 | if (!irqchip_in_kernel(kvm)) | 347 | if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) |
348 | return; | 348 | return; |
349 | 349 | ||
350 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | 350 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); |
@@ -397,15 +397,11 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
397 | case 1: | 397 | case 1: |
398 | /* FIXME: enhance mode 4 precision */ | 398 | /* FIXME: enhance mode 4 precision */ |
399 | case 4: | 399 | case 4: |
400 | if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) { | 400 | create_pit_timer(kvm, val, 0); |
401 | create_pit_timer(kvm, val, 0); | ||
402 | } | ||
403 | break; | 401 | break; |
404 | case 2: | 402 | case 2: |
405 | case 3: | 403 | case 3: |
406 | if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){ | 404 | create_pit_timer(kvm, val, 1); |
407 | create_pit_timer(kvm, val, 1); | ||
408 | } | ||
409 | break; | 405 | break; |
410 | default: | 406 | default: |
411 | destroy_pit_timer(kvm->arch.vpit); | 407 | destroy_pit_timer(kvm->arch.vpit); |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index cac4746d7ffb..b6a73537e1ef 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -262,9 +262,10 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
262 | 262 | ||
263 | void kvm_pic_reset(struct kvm_kpic_state *s) | 263 | void kvm_pic_reset(struct kvm_kpic_state *s) |
264 | { | 264 | { |
265 | int irq; | 265 | int irq, i; |
266 | struct kvm_vcpu *vcpu0 = s->pics_state->kvm->bsp_vcpu; | 266 | struct kvm_vcpu *vcpu; |
267 | u8 irr = s->irr, isr = s->imr; | 267 | u8 irr = s->irr, isr = s->imr; |
268 | bool found = false; | ||
268 | 269 | ||
269 | s->last_irr = 0; | 270 | s->last_irr = 0; |
270 | s->irr = 0; | 271 | s->irr = 0; |
@@ -281,12 +282,19 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
281 | s->special_fully_nested_mode = 0; | 282 | s->special_fully_nested_mode = 0; |
282 | s->init4 = 0; | 283 | s->init4 = 0; |
283 | 284 | ||
284 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | 285 | kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm) |
285 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | 286 | if (kvm_apic_accept_pic_intr(vcpu)) { |
286 | if (irr & (1 << irq) || isr & (1 << irq)) { | 287 | found = true; |
287 | pic_clear_isr(s, irq); | 288 | break; |
288 | } | 289 | } |
289 | } | 290 | |
291 | |||
292 | if (!found) | ||
293 | return; | ||
294 | |||
295 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) | ||
296 | if (irr & (1 << irq) || isr & (1 << irq)) | ||
297 | pic_clear_isr(s, irq); | ||
290 | } | 298 | } |
291 | 299 | ||
292 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) | 300 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 54abb40199d6..cfdc6e0ef002 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "irq.h" | 38 | #include "irq.h" |
39 | #include "trace.h" | 39 | #include "trace.h" |
40 | #include "x86.h" | 40 | #include "x86.h" |
41 | #include "cpuid.h" | ||
41 | 42 | ||
42 | #ifndef CONFIG_X86_64 | 43 | #ifndef CONFIG_X86_64 |
43 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) | 44 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) |
@@ -1120,7 +1121,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) | |||
1120 | return 0; | 1121 | return 0; |
1121 | } | 1122 | } |
1122 | 1123 | ||
1123 | static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) | 1124 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) |
1124 | { | 1125 | { |
1125 | u32 reg = apic_get_reg(apic, lvt_type); | 1126 | u32 reg = apic_get_reg(apic, lvt_type); |
1126 | int vector, mode, trig_mode; | 1127 | int vector, mode, trig_mode; |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 138e8cc6fea6..6f4ce2575d09 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -34,6 +34,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu); | |||
34 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); | 34 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); |
35 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | 35 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); |
36 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); | 36 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); |
37 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); | ||
37 | 38 | ||
38 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | 39 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); |
39 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); | 40 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f1b36cf3e3d0..224b02c3cda9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -59,15 +59,6 @@ enum { | |||
59 | AUDIT_POST_SYNC | 59 | AUDIT_POST_SYNC |
60 | }; | 60 | }; |
61 | 61 | ||
62 | char *audit_point_name[] = { | ||
63 | "pre page fault", | ||
64 | "post page fault", | ||
65 | "pre pte write", | ||
66 | "post pte write", | ||
67 | "pre sync", | ||
68 | "post sync" | ||
69 | }; | ||
70 | |||
71 | #undef MMU_DEBUG | 62 | #undef MMU_DEBUG |
72 | 63 | ||
73 | #ifdef MMU_DEBUG | 64 | #ifdef MMU_DEBUG |
@@ -83,13 +74,10 @@ char *audit_point_name[] = { | |||
83 | #endif | 74 | #endif |
84 | 75 | ||
85 | #ifdef MMU_DEBUG | 76 | #ifdef MMU_DEBUG |
86 | static int dbg = 0; | 77 | static bool dbg = 0; |
87 | module_param(dbg, bool, 0644); | 78 | module_param(dbg, bool, 0644); |
88 | #endif | 79 | #endif |
89 | 80 | ||
90 | static int oos_shadow = 1; | ||
91 | module_param(oos_shadow, bool, 0644); | ||
92 | |||
93 | #ifndef MMU_DEBUG | 81 | #ifndef MMU_DEBUG |
94 | #define ASSERT(x) do { } while (0) | 82 | #define ASSERT(x) do { } while (0) |
95 | #else | 83 | #else |
@@ -593,6 +581,11 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | |||
593 | return 0; | 581 | return 0; |
594 | } | 582 | } |
595 | 583 | ||
584 | static int mmu_memory_cache_free_objects(struct kvm_mmu_memory_cache *cache) | ||
585 | { | ||
586 | return cache->nobjs; | ||
587 | } | ||
588 | |||
596 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, | 589 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, |
597 | struct kmem_cache *cache) | 590 | struct kmem_cache *cache) |
598 | { | 591 | { |
@@ -953,21 +946,35 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) | |||
953 | } | 946 | } |
954 | } | 947 | } |
955 | 948 | ||
949 | static unsigned long *__gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level, | ||
950 | struct kvm_memory_slot *slot) | ||
951 | { | ||
952 | struct kvm_lpage_info *linfo; | ||
953 | |||
954 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | ||
955 | return &slot->rmap[gfn - slot->base_gfn]; | ||
956 | |||
957 | linfo = lpage_info_slot(gfn, slot, level); | ||
958 | return &linfo->rmap_pde; | ||
959 | } | ||
960 | |||
956 | /* | 961 | /* |
957 | * Take gfn and return the reverse mapping to it. | 962 | * Take gfn and return the reverse mapping to it. |
958 | */ | 963 | */ |
959 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | 964 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) |
960 | { | 965 | { |
961 | struct kvm_memory_slot *slot; | 966 | struct kvm_memory_slot *slot; |
962 | struct kvm_lpage_info *linfo; | ||
963 | 967 | ||
964 | slot = gfn_to_memslot(kvm, gfn); | 968 | slot = gfn_to_memslot(kvm, gfn); |
965 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | 969 | return __gfn_to_rmap(kvm, gfn, level, slot); |
966 | return &slot->rmap[gfn - slot->base_gfn]; | 970 | } |
967 | 971 | ||
968 | linfo = lpage_info_slot(gfn, slot, level); | 972 | static bool rmap_can_add(struct kvm_vcpu *vcpu) |
973 | { | ||
974 | struct kvm_mmu_memory_cache *cache; | ||
969 | 975 | ||
970 | return &linfo->rmap_pde; | 976 | cache = &vcpu->arch.mmu_pte_list_desc_cache; |
977 | return mmu_memory_cache_free_objects(cache); | ||
971 | } | 978 | } |
972 | 979 | ||
973 | static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | 980 | static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) |
@@ -1004,17 +1011,16 @@ static void drop_spte(struct kvm *kvm, u64 *sptep) | |||
1004 | rmap_remove(kvm, sptep); | 1011 | rmap_remove(kvm, sptep); |
1005 | } | 1012 | } |
1006 | 1013 | ||
1007 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | 1014 | int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, |
1015 | struct kvm_memory_slot *slot) | ||
1008 | { | 1016 | { |
1009 | unsigned long *rmapp; | 1017 | unsigned long *rmapp; |
1010 | u64 *spte; | 1018 | u64 *spte; |
1011 | int i, write_protected = 0; | 1019 | int i, write_protected = 0; |
1012 | 1020 | ||
1013 | rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); | 1021 | rmapp = __gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL, slot); |
1014 | |||
1015 | spte = rmap_next(kvm, rmapp, NULL); | 1022 | spte = rmap_next(kvm, rmapp, NULL); |
1016 | while (spte) { | 1023 | while (spte) { |
1017 | BUG_ON(!spte); | ||
1018 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1024 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
1019 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 1025 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
1020 | if (is_writable_pte(*spte)) { | 1026 | if (is_writable_pte(*spte)) { |
@@ -1027,12 +1033,11 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
1027 | /* check for huge page mappings */ | 1033 | /* check for huge page mappings */ |
1028 | for (i = PT_DIRECTORY_LEVEL; | 1034 | for (i = PT_DIRECTORY_LEVEL; |
1029 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 1035 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
1030 | rmapp = gfn_to_rmap(kvm, gfn, i); | 1036 | rmapp = __gfn_to_rmap(kvm, gfn, i, slot); |
1031 | spte = rmap_next(kvm, rmapp, NULL); | 1037 | spte = rmap_next(kvm, rmapp, NULL); |
1032 | while (spte) { | 1038 | while (spte) { |
1033 | BUG_ON(!spte); | ||
1034 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1039 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
1035 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | 1040 | BUG_ON(!is_large_pte(*spte)); |
1036 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 1041 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); |
1037 | if (is_writable_pte(*spte)) { | 1042 | if (is_writable_pte(*spte)) { |
1038 | drop_spte(kvm, spte); | 1043 | drop_spte(kvm, spte); |
@@ -1047,6 +1052,14 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
1047 | return write_protected; | 1052 | return write_protected; |
1048 | } | 1053 | } |
1049 | 1054 | ||
1055 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | ||
1056 | { | ||
1057 | struct kvm_memory_slot *slot; | ||
1058 | |||
1059 | slot = gfn_to_memslot(kvm, gfn); | ||
1060 | return kvm_mmu_rmap_write_protect(kvm, gfn, slot); | ||
1061 | } | ||
1062 | |||
1050 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1063 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1051 | unsigned long data) | 1064 | unsigned long data) |
1052 | { | 1065 | { |
@@ -1103,15 +1116,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
1103 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | 1116 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, |
1104 | unsigned long data)) | 1117 | unsigned long data)) |
1105 | { | 1118 | { |
1106 | int i, j; | 1119 | int j; |
1107 | int ret; | 1120 | int ret; |
1108 | int retval = 0; | 1121 | int retval = 0; |
1109 | struct kvm_memslots *slots; | 1122 | struct kvm_memslots *slots; |
1123 | struct kvm_memory_slot *memslot; | ||
1110 | 1124 | ||
1111 | slots = kvm_memslots(kvm); | 1125 | slots = kvm_memslots(kvm); |
1112 | 1126 | ||
1113 | for (i = 0; i < slots->nmemslots; i++) { | 1127 | kvm_for_each_memslot(memslot, slots) { |
1114 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | ||
1115 | unsigned long start = memslot->userspace_addr; | 1128 | unsigned long start = memslot->userspace_addr; |
1116 | unsigned long end; | 1129 | unsigned long end; |
1117 | 1130 | ||
@@ -1324,7 +1337,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
1324 | PAGE_SIZE); | 1337 | PAGE_SIZE); |
1325 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 1338 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
1326 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 1339 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
1327 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 1340 | bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM); |
1328 | sp->parent_ptes = 0; | 1341 | sp->parent_ptes = 0; |
1329 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1342 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
1330 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); | 1343 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); |
@@ -1511,6 +1524,13 @@ static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, | |||
1511 | return ret; | 1524 | return ret; |
1512 | } | 1525 | } |
1513 | 1526 | ||
1527 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
1528 | #include "mmu_audit.c" | ||
1529 | #else | ||
1530 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { } | ||
1531 | static void mmu_audit_disable(void) { } | ||
1532 | #endif | ||
1533 | |||
1514 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1534 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
1515 | struct list_head *invalid_list) | 1535 | struct list_head *invalid_list) |
1516 | { | 1536 | { |
@@ -1640,6 +1660,18 @@ static void init_shadow_page_table(struct kvm_mmu_page *sp) | |||
1640 | sp->spt[i] = 0ull; | 1660 | sp->spt[i] = 0ull; |
1641 | } | 1661 | } |
1642 | 1662 | ||
1663 | static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) | ||
1664 | { | ||
1665 | sp->write_flooding_count = 0; | ||
1666 | } | ||
1667 | |||
1668 | static void clear_sp_write_flooding_count(u64 *spte) | ||
1669 | { | ||
1670 | struct kvm_mmu_page *sp = page_header(__pa(spte)); | ||
1671 | |||
1672 | __clear_sp_write_flooding_count(sp); | ||
1673 | } | ||
1674 | |||
1643 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | 1675 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, |
1644 | gfn_t gfn, | 1676 | gfn_t gfn, |
1645 | gva_t gaddr, | 1677 | gva_t gaddr, |
@@ -1683,6 +1715,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1683 | } else if (sp->unsync) | 1715 | } else if (sp->unsync) |
1684 | kvm_mmu_mark_parents_unsync(sp); | 1716 | kvm_mmu_mark_parents_unsync(sp); |
1685 | 1717 | ||
1718 | __clear_sp_write_flooding_count(sp); | ||
1686 | trace_kvm_mmu_get_page(sp, false); | 1719 | trace_kvm_mmu_get_page(sp, false); |
1687 | return sp; | 1720 | return sp; |
1688 | } | 1721 | } |
@@ -1796,7 +1829,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1796 | } | 1829 | } |
1797 | } | 1830 | } |
1798 | 1831 | ||
1799 | static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, | 1832 | static bool mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, |
1800 | u64 *spte) | 1833 | u64 *spte) |
1801 | { | 1834 | { |
1802 | u64 pte; | 1835 | u64 pte; |
@@ -1804,17 +1837,21 @@ static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
1804 | 1837 | ||
1805 | pte = *spte; | 1838 | pte = *spte; |
1806 | if (is_shadow_present_pte(pte)) { | 1839 | if (is_shadow_present_pte(pte)) { |
1807 | if (is_last_spte(pte, sp->role.level)) | 1840 | if (is_last_spte(pte, sp->role.level)) { |
1808 | drop_spte(kvm, spte); | 1841 | drop_spte(kvm, spte); |
1809 | else { | 1842 | if (is_large_pte(pte)) |
1843 | --kvm->stat.lpages; | ||
1844 | } else { | ||
1810 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 1845 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
1811 | drop_parent_pte(child, spte); | 1846 | drop_parent_pte(child, spte); |
1812 | } | 1847 | } |
1813 | } else if (is_mmio_spte(pte)) | 1848 | return true; |
1849 | } | ||
1850 | |||
1851 | if (is_mmio_spte(pte)) | ||
1814 | mmu_spte_clear_no_track(spte); | 1852 | mmu_spte_clear_no_track(spte); |
1815 | 1853 | ||
1816 | if (is_large_pte(pte)) | 1854 | return false; |
1817 | --kvm->stat.lpages; | ||
1818 | } | 1855 | } |
1819 | 1856 | ||
1820 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, | 1857 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, |
@@ -1831,15 +1868,6 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) | |||
1831 | mmu_page_remove_parent_pte(sp, parent_pte); | 1868 | mmu_page_remove_parent_pte(sp, parent_pte); |
1832 | } | 1869 | } |
1833 | 1870 | ||
1834 | static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm) | ||
1835 | { | ||
1836 | int i; | ||
1837 | struct kvm_vcpu *vcpu; | ||
1838 | |||
1839 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
1840 | vcpu->arch.last_pte_updated = NULL; | ||
1841 | } | ||
1842 | |||
1843 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | 1871 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) |
1844 | { | 1872 | { |
1845 | u64 *parent_pte; | 1873 | u64 *parent_pte; |
@@ -1899,7 +1927,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
1899 | } | 1927 | } |
1900 | 1928 | ||
1901 | sp->role.invalid = 1; | 1929 | sp->role.invalid = 1; |
1902 | kvm_mmu_reset_last_pte_updated(kvm); | ||
1903 | return ret; | 1930 | return ret; |
1904 | } | 1931 | } |
1905 | 1932 | ||
@@ -1985,7 +2012,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) | |||
1985 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; | 2012 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; |
1986 | } | 2013 | } |
1987 | 2014 | ||
1988 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | 2015 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) |
1989 | { | 2016 | { |
1990 | struct kvm_mmu_page *sp; | 2017 | struct kvm_mmu_page *sp; |
1991 | struct hlist_node *node; | 2018 | struct hlist_node *node; |
@@ -1994,7 +2021,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
1994 | 2021 | ||
1995 | pgprintk("%s: looking for gfn %llx\n", __func__, gfn); | 2022 | pgprintk("%s: looking for gfn %llx\n", __func__, gfn); |
1996 | r = 0; | 2023 | r = 0; |
1997 | 2024 | spin_lock(&kvm->mmu_lock); | |
1998 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { | 2025 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
1999 | pgprintk("%s: gfn %llx role %x\n", __func__, gfn, | 2026 | pgprintk("%s: gfn %llx role %x\n", __func__, gfn, |
2000 | sp->role.word); | 2027 | sp->role.word); |
@@ -2002,22 +2029,11 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
2002 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | 2029 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
2003 | } | 2030 | } |
2004 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 2031 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
2005 | return r; | 2032 | spin_unlock(&kvm->mmu_lock); |
2006 | } | ||
2007 | |||
2008 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | ||
2009 | { | ||
2010 | struct kvm_mmu_page *sp; | ||
2011 | struct hlist_node *node; | ||
2012 | LIST_HEAD(invalid_list); | ||
2013 | 2033 | ||
2014 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { | 2034 | return r; |
2015 | pgprintk("%s: zap %llx %x\n", | ||
2016 | __func__, gfn, sp->role.word); | ||
2017 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | ||
2018 | } | ||
2019 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
2020 | } | 2035 | } |
2036 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); | ||
2021 | 2037 | ||
2022 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | 2038 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) |
2023 | { | 2039 | { |
@@ -2169,8 +2185,6 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2169 | return 1; | 2185 | return 1; |
2170 | 2186 | ||
2171 | if (!need_unsync && !s->unsync) { | 2187 | if (!need_unsync && !s->unsync) { |
2172 | if (!oos_shadow) | ||
2173 | return 1; | ||
2174 | need_unsync = true; | 2188 | need_unsync = true; |
2175 | } | 2189 | } |
2176 | } | 2190 | } |
@@ -2191,11 +2205,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2191 | if (set_mmio_spte(sptep, gfn, pfn, pte_access)) | 2205 | if (set_mmio_spte(sptep, gfn, pfn, pte_access)) |
2192 | return 0; | 2206 | return 0; |
2193 | 2207 | ||
2194 | /* | ||
2195 | * We don't set the accessed bit, since we sometimes want to see | ||
2196 | * whether the guest actually used the pte (in order to detect | ||
2197 | * demand paging). | ||
2198 | */ | ||
2199 | spte = PT_PRESENT_MASK; | 2208 | spte = PT_PRESENT_MASK; |
2200 | if (!speculative) | 2209 | if (!speculative) |
2201 | spte |= shadow_accessed_mask; | 2210 | spte |= shadow_accessed_mask; |
@@ -2346,10 +2355,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2346 | } | 2355 | } |
2347 | } | 2356 | } |
2348 | kvm_release_pfn_clean(pfn); | 2357 | kvm_release_pfn_clean(pfn); |
2349 | if (speculative) { | ||
2350 | vcpu->arch.last_pte_updated = sptep; | ||
2351 | vcpu->arch.last_pte_gfn = gfn; | ||
2352 | } | ||
2353 | } | 2358 | } |
2354 | 2359 | ||
2355 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | 2360 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) |
@@ -2840,12 +2845,12 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2840 | return; | 2845 | return; |
2841 | 2846 | ||
2842 | vcpu_clear_mmio_info(vcpu, ~0ul); | 2847 | vcpu_clear_mmio_info(vcpu, ~0ul); |
2843 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); | 2848 | kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); |
2844 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | 2849 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { |
2845 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2850 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2846 | sp = page_header(root); | 2851 | sp = page_header(root); |
2847 | mmu_sync_children(vcpu, sp); | 2852 | mmu_sync_children(vcpu, sp); |
2848 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); | 2853 | kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); |
2849 | return; | 2854 | return; |
2850 | } | 2855 | } |
2851 | for (i = 0; i < 4; ++i) { | 2856 | for (i = 0; i < 4; ++i) { |
@@ -2857,7 +2862,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2857 | mmu_sync_children(vcpu, sp); | 2862 | mmu_sync_children(vcpu, sp); |
2858 | } | 2863 | } |
2859 | } | 2864 | } |
2860 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); | 2865 | kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); |
2861 | } | 2866 | } |
2862 | 2867 | ||
2863 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | 2868 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) |
@@ -3510,28 +3515,119 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, | |||
3510 | kvm_mmu_flush_tlb(vcpu); | 3515 | kvm_mmu_flush_tlb(vcpu); |
3511 | } | 3516 | } |
3512 | 3517 | ||
3513 | static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | 3518 | static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, |
3519 | const u8 *new, int *bytes) | ||
3514 | { | 3520 | { |
3515 | u64 *spte = vcpu->arch.last_pte_updated; | 3521 | u64 gentry; |
3522 | int r; | ||
3523 | |||
3524 | /* | ||
3525 | * Assume that the pte write on a page table of the same type | ||
3526 | * as the current vcpu paging mode since we update the sptes only | ||
3527 | * when they have the same mode. | ||
3528 | */ | ||
3529 | if (is_pae(vcpu) && *bytes == 4) { | ||
3530 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
3531 | *gpa &= ~(gpa_t)7; | ||
3532 | *bytes = 8; | ||
3533 | r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); | ||
3534 | if (r) | ||
3535 | gentry = 0; | ||
3536 | new = (const u8 *)&gentry; | ||
3537 | } | ||
3516 | 3538 | ||
3517 | return !!(spte && (*spte & shadow_accessed_mask)); | 3539 | switch (*bytes) { |
3540 | case 4: | ||
3541 | gentry = *(const u32 *)new; | ||
3542 | break; | ||
3543 | case 8: | ||
3544 | gentry = *(const u64 *)new; | ||
3545 | break; | ||
3546 | default: | ||
3547 | gentry = 0; | ||
3548 | break; | ||
3549 | } | ||
3550 | |||
3551 | return gentry; | ||
3518 | } | 3552 | } |
3519 | 3553 | ||
3520 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) | 3554 | /* |
3555 | * If we're seeing too many writes to a page, it may no longer be a page table, | ||
3556 | * or we may be forking, in which case it is better to unmap the page. | ||
3557 | */ | ||
3558 | static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte) | ||
3521 | { | 3559 | { |
3522 | u64 *spte = vcpu->arch.last_pte_updated; | 3560 | /* |
3561 | * Skip write-flooding detected for the sp whose level is 1, because | ||
3562 | * it can become unsync, then the guest page is not write-protected. | ||
3563 | */ | ||
3564 | if (sp->role.level == 1) | ||
3565 | return false; | ||
3523 | 3566 | ||
3524 | if (spte | 3567 | return ++sp->write_flooding_count >= 3; |
3525 | && vcpu->arch.last_pte_gfn == gfn | 3568 | } |
3526 | && shadow_accessed_mask | 3569 | |
3527 | && !(*spte & shadow_accessed_mask) | 3570 | /* |
3528 | && is_shadow_present_pte(*spte)) | 3571 | * Misaligned accesses are too much trouble to fix up; also, they usually |
3529 | set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | 3572 | * indicate a page is not used as a page table. |
3573 | */ | ||
3574 | static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, | ||
3575 | int bytes) | ||
3576 | { | ||
3577 | unsigned offset, pte_size, misaligned; | ||
3578 | |||
3579 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | ||
3580 | gpa, bytes, sp->role.word); | ||
3581 | |||
3582 | offset = offset_in_page(gpa); | ||
3583 | pte_size = sp->role.cr4_pae ? 8 : 4; | ||
3584 | |||
3585 | /* | ||
3586 | * Sometimes, the OS only writes the last one bytes to update status | ||
3587 | * bits, for example, in linux, andb instruction is used in clear_bit(). | ||
3588 | */ | ||
3589 | if (!(offset & (pte_size - 1)) && bytes == 1) | ||
3590 | return false; | ||
3591 | |||
3592 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | ||
3593 | misaligned |= bytes < 4; | ||
3594 | |||
3595 | return misaligned; | ||
3596 | } | ||
3597 | |||
3598 | static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) | ||
3599 | { | ||
3600 | unsigned page_offset, quadrant; | ||
3601 | u64 *spte; | ||
3602 | int level; | ||
3603 | |||
3604 | page_offset = offset_in_page(gpa); | ||
3605 | level = sp->role.level; | ||
3606 | *nspte = 1; | ||
3607 | if (!sp->role.cr4_pae) { | ||
3608 | page_offset <<= 1; /* 32->64 */ | ||
3609 | /* | ||
3610 | * A 32-bit pde maps 4MB while the shadow pdes map | ||
3611 | * only 2MB. So we need to double the offset again | ||
3612 | * and zap two pdes instead of one. | ||
3613 | */ | ||
3614 | if (level == PT32_ROOT_LEVEL) { | ||
3615 | page_offset &= ~7; /* kill rounding error */ | ||
3616 | page_offset <<= 1; | ||
3617 | *nspte = 2; | ||
3618 | } | ||
3619 | quadrant = page_offset >> PAGE_SHIFT; | ||
3620 | page_offset &= ~PAGE_MASK; | ||
3621 | if (quadrant != sp->role.quadrant) | ||
3622 | return NULL; | ||
3623 | } | ||
3624 | |||
3625 | spte = &sp->spt[page_offset / sizeof(*spte)]; | ||
3626 | return spte; | ||
3530 | } | 3627 | } |
3531 | 3628 | ||
3532 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 3629 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
3533 | const u8 *new, int bytes, | 3630 | const u8 *new, int bytes) |
3534 | bool guest_initiated) | ||
3535 | { | 3631 | { |
3536 | gfn_t gfn = gpa >> PAGE_SHIFT; | 3632 | gfn_t gfn = gpa >> PAGE_SHIFT; |
3537 | union kvm_mmu_page_role mask = { .word = 0 }; | 3633 | union kvm_mmu_page_role mask = { .word = 0 }; |
@@ -3539,8 +3635,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3539 | struct hlist_node *node; | 3635 | struct hlist_node *node; |
3540 | LIST_HEAD(invalid_list); | 3636 | LIST_HEAD(invalid_list); |
3541 | u64 entry, gentry, *spte; | 3637 | u64 entry, gentry, *spte; |
3542 | unsigned pte_size, page_offset, misaligned, quadrant, offset; | 3638 | int npte; |
3543 | int level, npte, invlpg_counter, r, flooded = 0; | ||
3544 | bool remote_flush, local_flush, zap_page; | 3639 | bool remote_flush, local_flush, zap_page; |
3545 | 3640 | ||
3546 | /* | 3641 | /* |
@@ -3551,112 +3646,45 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3551 | return; | 3646 | return; |
3552 | 3647 | ||
3553 | zap_page = remote_flush = local_flush = false; | 3648 | zap_page = remote_flush = local_flush = false; |
3554 | offset = offset_in_page(gpa); | ||
3555 | 3649 | ||
3556 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 3650 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
3557 | 3651 | ||
3558 | invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); | 3652 | gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); |
3559 | 3653 | ||
3560 | /* | 3654 | /* |
3561 | * Assume that the pte write on a page table of the same type | 3655 | * No need to care whether allocation memory is successful |
3562 | * as the current vcpu paging mode since we update the sptes only | 3656 | * or not since pte prefetch is skiped if it does not have |
3563 | * when they have the same mode. | 3657 | * enough objects in the cache. |
3564 | */ | 3658 | */ |
3565 | if ((is_pae(vcpu) && bytes == 4) || !new) { | 3659 | mmu_topup_memory_caches(vcpu); |
3566 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
3567 | if (is_pae(vcpu)) { | ||
3568 | gpa &= ~(gpa_t)7; | ||
3569 | bytes = 8; | ||
3570 | } | ||
3571 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); | ||
3572 | if (r) | ||
3573 | gentry = 0; | ||
3574 | new = (const u8 *)&gentry; | ||
3575 | } | ||
3576 | |||
3577 | switch (bytes) { | ||
3578 | case 4: | ||
3579 | gentry = *(const u32 *)new; | ||
3580 | break; | ||
3581 | case 8: | ||
3582 | gentry = *(const u64 *)new; | ||
3583 | break; | ||
3584 | default: | ||
3585 | gentry = 0; | ||
3586 | break; | ||
3587 | } | ||
3588 | 3660 | ||
3589 | spin_lock(&vcpu->kvm->mmu_lock); | 3661 | spin_lock(&vcpu->kvm->mmu_lock); |
3590 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | ||
3591 | gentry = 0; | ||
3592 | kvm_mmu_free_some_pages(vcpu); | ||
3593 | ++vcpu->kvm->stat.mmu_pte_write; | 3662 | ++vcpu->kvm->stat.mmu_pte_write; |
3594 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); | 3663 | kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); |
3595 | if (guest_initiated) { | ||
3596 | kvm_mmu_access_page(vcpu, gfn); | ||
3597 | if (gfn == vcpu->arch.last_pt_write_gfn | ||
3598 | && !last_updated_pte_accessed(vcpu)) { | ||
3599 | ++vcpu->arch.last_pt_write_count; | ||
3600 | if (vcpu->arch.last_pt_write_count >= 3) | ||
3601 | flooded = 1; | ||
3602 | } else { | ||
3603 | vcpu->arch.last_pt_write_gfn = gfn; | ||
3604 | vcpu->arch.last_pt_write_count = 1; | ||
3605 | vcpu->arch.last_pte_updated = NULL; | ||
3606 | } | ||
3607 | } | ||
3608 | 3664 | ||
3609 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; | 3665 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; |
3610 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { | 3666 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { |
3611 | pte_size = sp->role.cr4_pae ? 8 : 4; | 3667 | spte = get_written_sptes(sp, gpa, &npte); |
3612 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 3668 | |
3613 | misaligned |= bytes < 4; | 3669 | if (detect_write_misaligned(sp, gpa, bytes) || |
3614 | if (misaligned || flooded) { | 3670 | detect_write_flooding(sp, spte)) { |
3615 | /* | ||
3616 | * Misaligned accesses are too much trouble to fix | ||
3617 | * up; also, they usually indicate a page is not used | ||
3618 | * as a page table. | ||
3619 | * | ||
3620 | * If we're seeing too many writes to a page, | ||
3621 | * it may no longer be a page table, or we may be | ||
3622 | * forking, in which case it is better to unmap the | ||
3623 | * page. | ||
3624 | */ | ||
3625 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | ||
3626 | gpa, bytes, sp->role.word); | ||
3627 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, | 3671 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
3628 | &invalid_list); | 3672 | &invalid_list); |
3629 | ++vcpu->kvm->stat.mmu_flooded; | 3673 | ++vcpu->kvm->stat.mmu_flooded; |
3630 | continue; | 3674 | continue; |
3631 | } | 3675 | } |
3632 | page_offset = offset; | 3676 | |
3633 | level = sp->role.level; | 3677 | spte = get_written_sptes(sp, gpa, &npte); |
3634 | npte = 1; | 3678 | if (!spte) |
3635 | if (!sp->role.cr4_pae) { | 3679 | continue; |
3636 | page_offset <<= 1; /* 32->64 */ | 3680 | |
3637 | /* | ||
3638 | * A 32-bit pde maps 4MB while the shadow pdes map | ||
3639 | * only 2MB. So we need to double the offset again | ||
3640 | * and zap two pdes instead of one. | ||
3641 | */ | ||
3642 | if (level == PT32_ROOT_LEVEL) { | ||
3643 | page_offset &= ~7; /* kill rounding error */ | ||
3644 | page_offset <<= 1; | ||
3645 | npte = 2; | ||
3646 | } | ||
3647 | quadrant = page_offset >> PAGE_SHIFT; | ||
3648 | page_offset &= ~PAGE_MASK; | ||
3649 | if (quadrant != sp->role.quadrant) | ||
3650 | continue; | ||
3651 | } | ||
3652 | local_flush = true; | 3681 | local_flush = true; |
3653 | spte = &sp->spt[page_offset / sizeof(*spte)]; | ||
3654 | while (npte--) { | 3682 | while (npte--) { |
3655 | entry = *spte; | 3683 | entry = *spte; |
3656 | mmu_page_zap_pte(vcpu->kvm, sp, spte); | 3684 | mmu_page_zap_pte(vcpu->kvm, sp, spte); |
3657 | if (gentry && | 3685 | if (gentry && |
3658 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | 3686 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) |
3659 | & mask.word)) | 3687 | & mask.word) && rmap_can_add(vcpu)) |
3660 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 3688 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
3661 | if (!remote_flush && need_remote_flush(entry, *spte)) | 3689 | if (!remote_flush && need_remote_flush(entry, *spte)) |
3662 | remote_flush = true; | 3690 | remote_flush = true; |
@@ -3665,7 +3693,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3665 | } | 3693 | } |
3666 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); | 3694 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); |
3667 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 3695 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
3668 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); | 3696 | kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); |
3669 | spin_unlock(&vcpu->kvm->mmu_lock); | 3697 | spin_unlock(&vcpu->kvm->mmu_lock); |
3670 | } | 3698 | } |
3671 | 3699 | ||
@@ -3679,9 +3707,8 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
3679 | 3707 | ||
3680 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); | 3708 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
3681 | 3709 | ||
3682 | spin_lock(&vcpu->kvm->mmu_lock); | ||
3683 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3710 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
3684 | spin_unlock(&vcpu->kvm->mmu_lock); | 3711 | |
3685 | return r; | 3712 | return r; |
3686 | } | 3713 | } |
3687 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | 3714 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); |
@@ -3702,10 +3729,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | |||
3702 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 3729 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
3703 | } | 3730 | } |
3704 | 3731 | ||
3732 | static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr) | ||
3733 | { | ||
3734 | if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu)) | ||
3735 | return vcpu_match_mmio_gpa(vcpu, addr); | ||
3736 | |||
3737 | return vcpu_match_mmio_gva(vcpu, addr); | ||
3738 | } | ||
3739 | |||
3705 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | 3740 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, |
3706 | void *insn, int insn_len) | 3741 | void *insn, int insn_len) |
3707 | { | 3742 | { |
3708 | int r; | 3743 | int r, emulation_type = EMULTYPE_RETRY; |
3709 | enum emulation_result er; | 3744 | enum emulation_result er; |
3710 | 3745 | ||
3711 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); | 3746 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); |
@@ -3717,11 +3752,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | |||
3717 | goto out; | 3752 | goto out; |
3718 | } | 3753 | } |
3719 | 3754 | ||
3720 | r = mmu_topup_memory_caches(vcpu); | 3755 | if (is_mmio_page_fault(vcpu, cr2)) |
3721 | if (r) | 3756 | emulation_type = 0; |
3722 | goto out; | ||
3723 | 3757 | ||
3724 | er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len); | 3758 | er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); |
3725 | 3759 | ||
3726 | switch (er) { | 3760 | switch (er) { |
3727 | case EMULATE_DONE: | 3761 | case EMULATE_DONE: |
@@ -3792,7 +3826,11 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |||
3792 | int kvm_mmu_create(struct kvm_vcpu *vcpu) | 3826 | int kvm_mmu_create(struct kvm_vcpu *vcpu) |
3793 | { | 3827 | { |
3794 | ASSERT(vcpu); | 3828 | ASSERT(vcpu); |
3795 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3829 | |
3830 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | ||
3831 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | ||
3832 | vcpu->arch.mmu.translate_gpa = translate_gpa; | ||
3833 | vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; | ||
3796 | 3834 | ||
3797 | return alloc_mmu_pages(vcpu); | 3835 | return alloc_mmu_pages(vcpu); |
3798 | } | 3836 | } |
@@ -3852,14 +3890,14 @@ restart: | |||
3852 | spin_unlock(&kvm->mmu_lock); | 3890 | spin_unlock(&kvm->mmu_lock); |
3853 | } | 3891 | } |
3854 | 3892 | ||
3855 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, | 3893 | static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, |
3856 | struct list_head *invalid_list) | 3894 | struct list_head *invalid_list) |
3857 | { | 3895 | { |
3858 | struct kvm_mmu_page *page; | 3896 | struct kvm_mmu_page *page; |
3859 | 3897 | ||
3860 | page = container_of(kvm->arch.active_mmu_pages.prev, | 3898 | page = container_of(kvm->arch.active_mmu_pages.prev, |
3861 | struct kvm_mmu_page, link); | 3899 | struct kvm_mmu_page, link); |
3862 | return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); | 3900 | kvm_mmu_prepare_zap_page(kvm, page, invalid_list); |
3863 | } | 3901 | } |
3864 | 3902 | ||
3865 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 3903 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) |
@@ -3874,15 +3912,15 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
3874 | raw_spin_lock(&kvm_lock); | 3912 | raw_spin_lock(&kvm_lock); |
3875 | 3913 | ||
3876 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3914 | list_for_each_entry(kvm, &vm_list, vm_list) { |
3877 | int idx, freed_pages; | 3915 | int idx; |
3878 | LIST_HEAD(invalid_list); | 3916 | LIST_HEAD(invalid_list); |
3879 | 3917 | ||
3880 | idx = srcu_read_lock(&kvm->srcu); | 3918 | idx = srcu_read_lock(&kvm->srcu); |
3881 | spin_lock(&kvm->mmu_lock); | 3919 | spin_lock(&kvm->mmu_lock); |
3882 | if (!kvm_freed && nr_to_scan > 0 && | 3920 | if (!kvm_freed && nr_to_scan > 0 && |
3883 | kvm->arch.n_used_mmu_pages > 0) { | 3921 | kvm->arch.n_used_mmu_pages > 0) { |
3884 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, | 3922 | kvm_mmu_remove_some_alloc_mmu_pages(kvm, |
3885 | &invalid_list); | 3923 | &invalid_list); |
3886 | kvm_freed = kvm; | 3924 | kvm_freed = kvm; |
3887 | } | 3925 | } |
3888 | nr_to_scan--; | 3926 | nr_to_scan--; |
@@ -3944,15 +3982,15 @@ nomem: | |||
3944 | */ | 3982 | */ |
3945 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | 3983 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) |
3946 | { | 3984 | { |
3947 | int i; | ||
3948 | unsigned int nr_mmu_pages; | 3985 | unsigned int nr_mmu_pages; |
3949 | unsigned int nr_pages = 0; | 3986 | unsigned int nr_pages = 0; |
3950 | struct kvm_memslots *slots; | 3987 | struct kvm_memslots *slots; |
3988 | struct kvm_memory_slot *memslot; | ||
3951 | 3989 | ||
3952 | slots = kvm_memslots(kvm); | 3990 | slots = kvm_memslots(kvm); |
3953 | 3991 | ||
3954 | for (i = 0; i < slots->nmemslots; i++) | 3992 | kvm_for_each_memslot(memslot, slots) |
3955 | nr_pages += slots->memslots[i].npages; | 3993 | nr_pages += memslot->npages; |
3956 | 3994 | ||
3957 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; | 3995 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; |
3958 | nr_mmu_pages = max(nr_mmu_pages, | 3996 | nr_mmu_pages = max(nr_mmu_pages, |
@@ -3961,127 +3999,6 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
3961 | return nr_mmu_pages; | 3999 | return nr_mmu_pages; |
3962 | } | 4000 | } |
3963 | 4001 | ||
3964 | static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer, | ||
3965 | unsigned len) | ||
3966 | { | ||
3967 | if (len > buffer->len) | ||
3968 | return NULL; | ||
3969 | return buffer->ptr; | ||
3970 | } | ||
3971 | |||
3972 | static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer, | ||
3973 | unsigned len) | ||
3974 | { | ||
3975 | void *ret; | ||
3976 | |||
3977 | ret = pv_mmu_peek_buffer(buffer, len); | ||
3978 | if (!ret) | ||
3979 | return ret; | ||
3980 | buffer->ptr += len; | ||
3981 | buffer->len -= len; | ||
3982 | buffer->processed += len; | ||
3983 | return ret; | ||
3984 | } | ||
3985 | |||
3986 | static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, | ||
3987 | gpa_t addr, gpa_t value) | ||
3988 | { | ||
3989 | int bytes = 8; | ||
3990 | int r; | ||
3991 | |||
3992 | if (!is_long_mode(vcpu) && !is_pae(vcpu)) | ||
3993 | bytes = 4; | ||
3994 | |||
3995 | r = mmu_topup_memory_caches(vcpu); | ||
3996 | if (r) | ||
3997 | return r; | ||
3998 | |||
3999 | if (!emulator_write_phys(vcpu, addr, &value, bytes)) | ||
4000 | return -EFAULT; | ||
4001 | |||
4002 | return 1; | ||
4003 | } | ||
4004 | |||
4005 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) | ||
4006 | { | ||
4007 | (void)kvm_set_cr3(vcpu, kvm_read_cr3(vcpu)); | ||
4008 | return 1; | ||
4009 | } | ||
4010 | |||
4011 | static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr) | ||
4012 | { | ||
4013 | spin_lock(&vcpu->kvm->mmu_lock); | ||
4014 | mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT); | ||
4015 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
4016 | return 1; | ||
4017 | } | ||
4018 | |||
4019 | static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu, | ||
4020 | struct kvm_pv_mmu_op_buffer *buffer) | ||
4021 | { | ||
4022 | struct kvm_mmu_op_header *header; | ||
4023 | |||
4024 | header = pv_mmu_peek_buffer(buffer, sizeof *header); | ||
4025 | if (!header) | ||
4026 | return 0; | ||
4027 | switch (header->op) { | ||
4028 | case KVM_MMU_OP_WRITE_PTE: { | ||
4029 | struct kvm_mmu_op_write_pte *wpte; | ||
4030 | |||
4031 | wpte = pv_mmu_read_buffer(buffer, sizeof *wpte); | ||
4032 | if (!wpte) | ||
4033 | return 0; | ||
4034 | return kvm_pv_mmu_write(vcpu, wpte->pte_phys, | ||
4035 | wpte->pte_val); | ||
4036 | } | ||
4037 | case KVM_MMU_OP_FLUSH_TLB: { | ||
4038 | struct kvm_mmu_op_flush_tlb *ftlb; | ||
4039 | |||
4040 | ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb); | ||
4041 | if (!ftlb) | ||
4042 | return 0; | ||
4043 | return kvm_pv_mmu_flush_tlb(vcpu); | ||
4044 | } | ||
4045 | case KVM_MMU_OP_RELEASE_PT: { | ||
4046 | struct kvm_mmu_op_release_pt *rpt; | ||
4047 | |||
4048 | rpt = pv_mmu_read_buffer(buffer, sizeof *rpt); | ||
4049 | if (!rpt) | ||
4050 | return 0; | ||
4051 | return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys); | ||
4052 | } | ||
4053 | default: return 0; | ||
4054 | } | ||
4055 | } | ||
4056 | |||
4057 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | ||
4058 | gpa_t addr, unsigned long *ret) | ||
4059 | { | ||
4060 | int r; | ||
4061 | struct kvm_pv_mmu_op_buffer *buffer = &vcpu->arch.mmu_op_buffer; | ||
4062 | |||
4063 | buffer->ptr = buffer->buf; | ||
4064 | buffer->len = min_t(unsigned long, bytes, sizeof buffer->buf); | ||
4065 | buffer->processed = 0; | ||
4066 | |||
4067 | r = kvm_read_guest(vcpu->kvm, addr, buffer->buf, buffer->len); | ||
4068 | if (r) | ||
4069 | goto out; | ||
4070 | |||
4071 | while (buffer->len) { | ||
4072 | r = kvm_pv_mmu_op_one(vcpu, buffer); | ||
4073 | if (r < 0) | ||
4074 | goto out; | ||
4075 | if (r == 0) | ||
4076 | break; | ||
4077 | } | ||
4078 | |||
4079 | r = 1; | ||
4080 | out: | ||
4081 | *ret = buffer->processed; | ||
4082 | return r; | ||
4083 | } | ||
4084 | |||
4085 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) | 4002 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) |
4086 | { | 4003 | { |
4087 | struct kvm_shadow_walk_iterator iterator; | 4004 | struct kvm_shadow_walk_iterator iterator; |
@@ -4110,12 +4027,6 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | |||
4110 | mmu_free_memory_caches(vcpu); | 4027 | mmu_free_memory_caches(vcpu); |
4111 | } | 4028 | } |
4112 | 4029 | ||
4113 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
4114 | #include "mmu_audit.c" | ||
4115 | #else | ||
4116 | static void mmu_audit_disable(void) { } | ||
4117 | #endif | ||
4118 | |||
4119 | void kvm_mmu_module_exit(void) | 4030 | void kvm_mmu_module_exit(void) |
4120 | { | 4031 | { |
4121 | mmu_destroy_caches(); | 4032 | mmu_destroy_caches(); |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 746ec259d024..fe15dcc07a6b 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -19,6 +19,15 @@ | |||
19 | 19 | ||
20 | #include <linux/ratelimit.h> | 20 | #include <linux/ratelimit.h> |
21 | 21 | ||
22 | char const *audit_point_name[] = { | ||
23 | "pre page fault", | ||
24 | "post page fault", | ||
25 | "pre pte write", | ||
26 | "post pte write", | ||
27 | "pre sync", | ||
28 | "post sync" | ||
29 | }; | ||
30 | |||
22 | #define audit_printk(kvm, fmt, args...) \ | 31 | #define audit_printk(kvm, fmt, args...) \ |
23 | printk(KERN_ERR "audit: (%s) error: " \ | 32 | printk(KERN_ERR "audit: (%s) error: " \ |
24 | fmt, audit_point_name[kvm->arch.audit_point], ##args) | 33 | fmt, audit_point_name[kvm->arch.audit_point], ##args) |
@@ -224,7 +233,10 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu) | |||
224 | mmu_spte_walk(vcpu, audit_spte); | 233 | mmu_spte_walk(vcpu, audit_spte); |
225 | } | 234 | } |
226 | 235 | ||
227 | static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point) | 236 | static bool mmu_audit; |
237 | static struct jump_label_key mmu_audit_key; | ||
238 | |||
239 | static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) | ||
228 | { | 240 | { |
229 | static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); | 241 | static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); |
230 | 242 | ||
@@ -236,18 +248,18 @@ static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point) | |||
236 | audit_vcpu_spte(vcpu); | 248 | audit_vcpu_spte(vcpu); |
237 | } | 249 | } |
238 | 250 | ||
239 | static bool mmu_audit; | 251 | static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) |
252 | { | ||
253 | if (static_branch((&mmu_audit_key))) | ||
254 | __kvm_mmu_audit(vcpu, point); | ||
255 | } | ||
240 | 256 | ||
241 | static void mmu_audit_enable(void) | 257 | static void mmu_audit_enable(void) |
242 | { | 258 | { |
243 | int ret; | ||
244 | |||
245 | if (mmu_audit) | 259 | if (mmu_audit) |
246 | return; | 260 | return; |
247 | 261 | ||
248 | ret = register_trace_kvm_mmu_audit(kvm_mmu_audit, NULL); | 262 | jump_label_inc(&mmu_audit_key); |
249 | WARN_ON(ret); | ||
250 | |||
251 | mmu_audit = true; | 263 | mmu_audit = true; |
252 | } | 264 | } |
253 | 265 | ||
@@ -256,8 +268,7 @@ static void mmu_audit_disable(void) | |||
256 | if (!mmu_audit) | 268 | if (!mmu_audit) |
257 | return; | 269 | return; |
258 | 270 | ||
259 | unregister_trace_kvm_mmu_audit(kvm_mmu_audit, NULL); | 271 | jump_label_dec(&mmu_audit_key); |
260 | tracepoint_synchronize_unregister(); | ||
261 | mmu_audit = false; | 272 | mmu_audit = false; |
262 | } | 273 | } |
263 | 274 | ||
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index eed67f34146d..89fb0e81322a 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -243,25 +243,6 @@ TRACE_EVENT( | |||
243 | TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, | 243 | TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, |
244 | __entry->access) | 244 | __entry->access) |
245 | ); | 245 | ); |
246 | |||
247 | TRACE_EVENT( | ||
248 | kvm_mmu_audit, | ||
249 | TP_PROTO(struct kvm_vcpu *vcpu, int audit_point), | ||
250 | TP_ARGS(vcpu, audit_point), | ||
251 | |||
252 | TP_STRUCT__entry( | ||
253 | __field(struct kvm_vcpu *, vcpu) | ||
254 | __field(int, audit_point) | ||
255 | ), | ||
256 | |||
257 | TP_fast_assign( | ||
258 | __entry->vcpu = vcpu; | ||
259 | __entry->audit_point = audit_point; | ||
260 | ), | ||
261 | |||
262 | TP_printk("vcpu:%d %s", __entry->vcpu->cpu, | ||
263 | audit_point_name[__entry->audit_point]) | ||
264 | ); | ||
265 | #endif /* _TRACE_KVMMMU_H */ | 246 | #endif /* _TRACE_KVMMMU_H */ |
266 | 247 | ||
267 | #undef TRACE_INCLUDE_PATH | 248 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 92994100638b..15610285ebb6 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -497,6 +497,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
497 | shadow_walk_next(&it)) { | 497 | shadow_walk_next(&it)) { |
498 | gfn_t table_gfn; | 498 | gfn_t table_gfn; |
499 | 499 | ||
500 | clear_sp_write_flooding_count(it.sptep); | ||
500 | drop_large_spte(vcpu, it.sptep); | 501 | drop_large_spte(vcpu, it.sptep); |
501 | 502 | ||
502 | sp = NULL; | 503 | sp = NULL; |
@@ -522,6 +523,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
522 | shadow_walk_next(&it)) { | 523 | shadow_walk_next(&it)) { |
523 | gfn_t direct_gfn; | 524 | gfn_t direct_gfn; |
524 | 525 | ||
526 | clear_sp_write_flooding_count(it.sptep); | ||
525 | validate_direct_spte(vcpu, it.sptep, direct_access); | 527 | validate_direct_spte(vcpu, it.sptep, direct_access); |
526 | 528 | ||
527 | drop_large_spte(vcpu, it.sptep); | 529 | drop_large_spte(vcpu, it.sptep); |
@@ -536,6 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
536 | link_shadow_page(it.sptep, sp); | 538 | link_shadow_page(it.sptep, sp); |
537 | } | 539 | } |
538 | 540 | ||
541 | clear_sp_write_flooding_count(it.sptep); | ||
539 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, | 542 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, |
540 | user_fault, write_fault, emulate, it.level, | 543 | user_fault, write_fault, emulate, it.level, |
541 | gw->gfn, pfn, prefault, map_writable); | 544 | gw->gfn, pfn, prefault, map_writable); |
@@ -599,11 +602,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
599 | */ | 602 | */ |
600 | if (!r) { | 603 | if (!r) { |
601 | pgprintk("%s: guest page fault\n", __func__); | 604 | pgprintk("%s: guest page fault\n", __func__); |
602 | if (!prefault) { | 605 | if (!prefault) |
603 | inject_page_fault(vcpu, &walker.fault); | 606 | inject_page_fault(vcpu, &walker.fault); |
604 | /* reset fork detector */ | 607 | |
605 | vcpu->arch.last_pt_write_count = 0; | ||
606 | } | ||
607 | return 0; | 608 | return 0; |
608 | } | 609 | } |
609 | 610 | ||
@@ -631,7 +632,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
631 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 632 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
632 | goto out_unlock; | 633 | goto out_unlock; |
633 | 634 | ||
634 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); | 635 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); |
635 | kvm_mmu_free_some_pages(vcpu); | 636 | kvm_mmu_free_some_pages(vcpu); |
636 | if (!force_pt_level) | 637 | if (!force_pt_level) |
637 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 638 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); |
@@ -641,11 +642,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
641 | pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__, | 642 | pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__, |
642 | sptep, *sptep, emulate); | 643 | sptep, *sptep, emulate); |
643 | 644 | ||
644 | if (!emulate) | ||
645 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ | ||
646 | |||
647 | ++vcpu->stat.pf_fixed; | 645 | ++vcpu->stat.pf_fixed; |
648 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); | 646 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); |
649 | spin_unlock(&vcpu->kvm->mmu_lock); | 647 | spin_unlock(&vcpu->kvm->mmu_lock); |
650 | 648 | ||
651 | return emulate; | 649 | return emulate; |
@@ -656,65 +654,66 @@ out_unlock: | |||
656 | return 0; | 654 | return 0; |
657 | } | 655 | } |
658 | 656 | ||
657 | static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) | ||
658 | { | ||
659 | int offset = 0; | ||
660 | |||
661 | WARN_ON(sp->role.level != 1); | ||
662 | |||
663 | if (PTTYPE == 32) | ||
664 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | ||
665 | |||
666 | return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); | ||
667 | } | ||
668 | |||
659 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 669 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
660 | { | 670 | { |
661 | struct kvm_shadow_walk_iterator iterator; | 671 | struct kvm_shadow_walk_iterator iterator; |
662 | struct kvm_mmu_page *sp; | 672 | struct kvm_mmu_page *sp; |
663 | gpa_t pte_gpa = -1; | ||
664 | int level; | 673 | int level; |
665 | u64 *sptep; | 674 | u64 *sptep; |
666 | int need_flush = 0; | ||
667 | 675 | ||
668 | vcpu_clear_mmio_info(vcpu, gva); | 676 | vcpu_clear_mmio_info(vcpu, gva); |
669 | 677 | ||
670 | spin_lock(&vcpu->kvm->mmu_lock); | 678 | /* |
679 | * No need to check return value here, rmap_can_add() can | ||
680 | * help us to skip pte prefetch later. | ||
681 | */ | ||
682 | mmu_topup_memory_caches(vcpu); | ||
671 | 683 | ||
684 | spin_lock(&vcpu->kvm->mmu_lock); | ||
672 | for_each_shadow_entry(vcpu, gva, iterator) { | 685 | for_each_shadow_entry(vcpu, gva, iterator) { |
673 | level = iterator.level; | 686 | level = iterator.level; |
674 | sptep = iterator.sptep; | 687 | sptep = iterator.sptep; |
675 | 688 | ||
676 | sp = page_header(__pa(sptep)); | 689 | sp = page_header(__pa(sptep)); |
677 | if (is_last_spte(*sptep, level)) { | 690 | if (is_last_spte(*sptep, level)) { |
678 | int offset, shift; | 691 | pt_element_t gpte; |
692 | gpa_t pte_gpa; | ||
679 | 693 | ||
680 | if (!sp->unsync) | 694 | if (!sp->unsync) |
681 | break; | 695 | break; |
682 | 696 | ||
683 | shift = PAGE_SHIFT - | 697 | pte_gpa = FNAME(get_level1_sp_gpa)(sp); |
684 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; | ||
685 | offset = sp->role.quadrant << shift; | ||
686 | |||
687 | pte_gpa = (sp->gfn << PAGE_SHIFT) + offset; | ||
688 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | 698 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); |
689 | 699 | ||
690 | if (is_shadow_present_pte(*sptep)) { | 700 | if (mmu_page_zap_pte(vcpu->kvm, sp, sptep)) |
691 | if (is_large_pte(*sptep)) | 701 | kvm_flush_remote_tlbs(vcpu->kvm); |
692 | --vcpu->kvm->stat.lpages; | ||
693 | drop_spte(vcpu->kvm, sptep); | ||
694 | need_flush = 1; | ||
695 | } else if (is_mmio_spte(*sptep)) | ||
696 | mmu_spte_clear_no_track(sptep); | ||
697 | 702 | ||
698 | break; | 703 | if (!rmap_can_add(vcpu)) |
704 | break; | ||
705 | |||
706 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | ||
707 | sizeof(pt_element_t))) | ||
708 | break; | ||
709 | |||
710 | FNAME(update_pte)(vcpu, sp, sptep, &gpte); | ||
699 | } | 711 | } |
700 | 712 | ||
701 | if (!is_shadow_present_pte(*sptep) || !sp->unsync_children) | 713 | if (!is_shadow_present_pte(*sptep) || !sp->unsync_children) |
702 | break; | 714 | break; |
703 | } | 715 | } |
704 | |||
705 | if (need_flush) | ||
706 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
707 | |||
708 | atomic_inc(&vcpu->kvm->arch.invlpg_counter); | ||
709 | |||
710 | spin_unlock(&vcpu->kvm->mmu_lock); | 716 | spin_unlock(&vcpu->kvm->mmu_lock); |
711 | |||
712 | if (pte_gpa == -1) | ||
713 | return; | ||
714 | |||
715 | if (mmu_topup_memory_caches(vcpu)) | ||
716 | return; | ||
717 | kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); | ||
718 | } | 717 | } |
719 | 718 | ||
720 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | 719 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
@@ -769,19 +768,14 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
769 | */ | 768 | */ |
770 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 769 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
771 | { | 770 | { |
772 | int i, offset, nr_present; | 771 | int i, nr_present = 0; |
773 | bool host_writable; | 772 | bool host_writable; |
774 | gpa_t first_pte_gpa; | 773 | gpa_t first_pte_gpa; |
775 | 774 | ||
776 | offset = nr_present = 0; | ||
777 | |||
778 | /* direct kvm_mmu_page can not be unsync. */ | 775 | /* direct kvm_mmu_page can not be unsync. */ |
779 | BUG_ON(sp->role.direct); | 776 | BUG_ON(sp->role.direct); |
780 | 777 | ||
781 | if (PTTYPE == 32) | 778 | first_pte_gpa = FNAME(get_level1_sp_gpa)(sp); |
782 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | ||
783 | |||
784 | first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); | ||
785 | 779 | ||
786 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { | 780 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { |
787 | unsigned pte_access; | 781 | unsigned pte_access; |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c new file mode 100644 index 000000000000..7aad5446f393 --- /dev/null +++ b/arch/x86/kvm/pmu.c | |||
@@ -0,0 +1,533 @@ | |||
1 | /* | ||
2 | * Kernel-based Virtual Machine -- Performane Monitoring Unit support | ||
3 | * | ||
4 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. | ||
5 | * | ||
6 | * Authors: | ||
7 | * Avi Kivity <avi@redhat.com> | ||
8 | * Gleb Natapov <gleb@redhat.com> | ||
9 | * | ||
10 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
11 | * the COPYING file in the top-level directory. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <linux/types.h> | ||
16 | #include <linux/kvm_host.h> | ||
17 | #include <linux/perf_event.h> | ||
18 | #include "x86.h" | ||
19 | #include "cpuid.h" | ||
20 | #include "lapic.h" | ||
21 | |||
22 | static struct kvm_arch_event_perf_mapping { | ||
23 | u8 eventsel; | ||
24 | u8 unit_mask; | ||
25 | unsigned event_type; | ||
26 | bool inexact; | ||
27 | } arch_events[] = { | ||
28 | /* Index must match CPUID 0x0A.EBX bit vector */ | ||
29 | [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES }, | ||
30 | [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, | ||
31 | [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES }, | ||
32 | [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES }, | ||
33 | [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, | ||
34 | [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | ||
35 | [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, | ||
36 | }; | ||
37 | |||
38 | /* mapping between fixed pmc index and arch_events array */ | ||
39 | int fixed_pmc_events[] = {1, 0, 2}; | ||
40 | |||
41 | static bool pmc_is_gp(struct kvm_pmc *pmc) | ||
42 | { | ||
43 | return pmc->type == KVM_PMC_GP; | ||
44 | } | ||
45 | |||
46 | static inline u64 pmc_bitmask(struct kvm_pmc *pmc) | ||
47 | { | ||
48 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
49 | |||
50 | return pmu->counter_bitmask[pmc->type]; | ||
51 | } | ||
52 | |||
53 | static inline bool pmc_enabled(struct kvm_pmc *pmc) | ||
54 | { | ||
55 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
56 | return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl); | ||
57 | } | ||
58 | |||
59 | static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, | ||
60 | u32 base) | ||
61 | { | ||
62 | if (msr >= base && msr < base + pmu->nr_arch_gp_counters) | ||
63 | return &pmu->gp_counters[msr - base]; | ||
64 | return NULL; | ||
65 | } | ||
66 | |||
67 | static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) | ||
68 | { | ||
69 | int base = MSR_CORE_PERF_FIXED_CTR0; | ||
70 | if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) | ||
71 | return &pmu->fixed_counters[msr - base]; | ||
72 | return NULL; | ||
73 | } | ||
74 | |||
75 | static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx) | ||
76 | { | ||
77 | return get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + idx); | ||
78 | } | ||
79 | |||
80 | static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx) | ||
81 | { | ||
82 | if (idx < X86_PMC_IDX_FIXED) | ||
83 | return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0); | ||
84 | else | ||
85 | return get_fixed_pmc_idx(pmu, idx - X86_PMC_IDX_FIXED); | ||
86 | } | ||
87 | |||
88 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu) | ||
89 | { | ||
90 | if (vcpu->arch.apic) | ||
91 | kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); | ||
92 | } | ||
93 | |||
94 | static void trigger_pmi(struct irq_work *irq_work) | ||
95 | { | ||
96 | struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, | ||
97 | irq_work); | ||
98 | struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu, | ||
99 | arch.pmu); | ||
100 | |||
101 | kvm_deliver_pmi(vcpu); | ||
102 | } | ||
103 | |||
104 | static void kvm_perf_overflow(struct perf_event *perf_event, | ||
105 | struct perf_sample_data *data, | ||
106 | struct pt_regs *regs) | ||
107 | { | ||
108 | struct kvm_pmc *pmc = perf_event->overflow_handler_context; | ||
109 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
110 | __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); | ||
111 | } | ||
112 | |||
113 | static void kvm_perf_overflow_intr(struct perf_event *perf_event, | ||
114 | struct perf_sample_data *data, struct pt_regs *regs) | ||
115 | { | ||
116 | struct kvm_pmc *pmc = perf_event->overflow_handler_context; | ||
117 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
118 | if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) { | ||
119 | kvm_perf_overflow(perf_event, data, regs); | ||
120 | kvm_make_request(KVM_REQ_PMU, pmc->vcpu); | ||
121 | /* | ||
122 | * Inject PMI. If vcpu was in a guest mode during NMI PMI | ||
123 | * can be ejected on a guest mode re-entry. Otherwise we can't | ||
124 | * be sure that vcpu wasn't executing hlt instruction at the | ||
125 | * time of vmexit and is not going to re-enter guest mode until, | ||
126 | * woken up. So we should wake it, but this is impossible from | ||
127 | * NMI context. Do it from irq work instead. | ||
128 | */ | ||
129 | if (!kvm_is_in_guest()) | ||
130 | irq_work_queue(&pmc->vcpu->arch.pmu.irq_work); | ||
131 | else | ||
132 | kvm_make_request(KVM_REQ_PMI, pmc->vcpu); | ||
133 | } | ||
134 | } | ||
135 | |||
136 | static u64 read_pmc(struct kvm_pmc *pmc) | ||
137 | { | ||
138 | u64 counter, enabled, running; | ||
139 | |||
140 | counter = pmc->counter; | ||
141 | |||
142 | if (pmc->perf_event) | ||
143 | counter += perf_event_read_value(pmc->perf_event, | ||
144 | &enabled, &running); | ||
145 | |||
146 | /* FIXME: Scaling needed? */ | ||
147 | |||
148 | return counter & pmc_bitmask(pmc); | ||
149 | } | ||
150 | |||
151 | static void stop_counter(struct kvm_pmc *pmc) | ||
152 | { | ||
153 | if (pmc->perf_event) { | ||
154 | pmc->counter = read_pmc(pmc); | ||
155 | perf_event_release_kernel(pmc->perf_event); | ||
156 | pmc->perf_event = NULL; | ||
157 | } | ||
158 | } | ||
159 | |||
160 | static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | ||
161 | unsigned config, bool exclude_user, bool exclude_kernel, | ||
162 | bool intr) | ||
163 | { | ||
164 | struct perf_event *event; | ||
165 | struct perf_event_attr attr = { | ||
166 | .type = type, | ||
167 | .size = sizeof(attr), | ||
168 | .pinned = true, | ||
169 | .exclude_idle = true, | ||
170 | .exclude_host = 1, | ||
171 | .exclude_user = exclude_user, | ||
172 | .exclude_kernel = exclude_kernel, | ||
173 | .config = config, | ||
174 | }; | ||
175 | |||
176 | attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); | ||
177 | |||
178 | event = perf_event_create_kernel_counter(&attr, -1, current, | ||
179 | intr ? kvm_perf_overflow_intr : | ||
180 | kvm_perf_overflow, pmc); | ||
181 | if (IS_ERR(event)) { | ||
182 | printk_once("kvm: pmu event creation failed %ld\n", | ||
183 | PTR_ERR(event)); | ||
184 | return; | ||
185 | } | ||
186 | |||
187 | pmc->perf_event = event; | ||
188 | clear_bit(pmc->idx, (unsigned long*)&pmc->vcpu->arch.pmu.reprogram_pmi); | ||
189 | } | ||
190 | |||
191 | static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select, | ||
192 | u8 unit_mask) | ||
193 | { | ||
194 | int i; | ||
195 | |||
196 | for (i = 0; i < ARRAY_SIZE(arch_events); i++) | ||
197 | if (arch_events[i].eventsel == event_select | ||
198 | && arch_events[i].unit_mask == unit_mask | ||
199 | && (pmu->available_event_types & (1 << i))) | ||
200 | break; | ||
201 | |||
202 | if (i == ARRAY_SIZE(arch_events)) | ||
203 | return PERF_COUNT_HW_MAX; | ||
204 | |||
205 | return arch_events[i].event_type; | ||
206 | } | ||
207 | |||
208 | static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | ||
209 | { | ||
210 | unsigned config, type = PERF_TYPE_RAW; | ||
211 | u8 event_select, unit_mask; | ||
212 | |||
213 | pmc->eventsel = eventsel; | ||
214 | |||
215 | stop_counter(pmc); | ||
216 | |||
217 | if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_enabled(pmc)) | ||
218 | return; | ||
219 | |||
220 | event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; | ||
221 | unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; | ||
222 | |||
223 | if (!(event_select & (ARCH_PERFMON_EVENTSEL_EDGE | | ||
224 | ARCH_PERFMON_EVENTSEL_INV | | ||
225 | ARCH_PERFMON_EVENTSEL_CMASK))) { | ||
226 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, | ||
227 | unit_mask); | ||
228 | if (config != PERF_COUNT_HW_MAX) | ||
229 | type = PERF_TYPE_HARDWARE; | ||
230 | } | ||
231 | |||
232 | if (type == PERF_TYPE_RAW) | ||
233 | config = eventsel & X86_RAW_EVENT_MASK; | ||
234 | |||
235 | reprogram_counter(pmc, type, config, | ||
236 | !(eventsel & ARCH_PERFMON_EVENTSEL_USR), | ||
237 | !(eventsel & ARCH_PERFMON_EVENTSEL_OS), | ||
238 | eventsel & ARCH_PERFMON_EVENTSEL_INT); | ||
239 | } | ||
240 | |||
241 | static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) | ||
242 | { | ||
243 | unsigned en = en_pmi & 0x3; | ||
244 | bool pmi = en_pmi & 0x8; | ||
245 | |||
246 | stop_counter(pmc); | ||
247 | |||
248 | if (!en || !pmc_enabled(pmc)) | ||
249 | return; | ||
250 | |||
251 | reprogram_counter(pmc, PERF_TYPE_HARDWARE, | ||
252 | arch_events[fixed_pmc_events[idx]].event_type, | ||
253 | !(en & 0x2), /* exclude user */ | ||
254 | !(en & 0x1), /* exclude kernel */ | ||
255 | pmi); | ||
256 | } | ||
257 | |||
258 | static inline u8 fixed_en_pmi(u64 ctrl, int idx) | ||
259 | { | ||
260 | return (ctrl >> (idx * 4)) & 0xf; | ||
261 | } | ||
262 | |||
263 | static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) | ||
264 | { | ||
265 | int i; | ||
266 | |||
267 | for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { | ||
268 | u8 en_pmi = fixed_en_pmi(data, i); | ||
269 | struct kvm_pmc *pmc = get_fixed_pmc_idx(pmu, i); | ||
270 | |||
271 | if (fixed_en_pmi(pmu->fixed_ctr_ctrl, i) == en_pmi) | ||
272 | continue; | ||
273 | |||
274 | reprogram_fixed_counter(pmc, en_pmi, i); | ||
275 | } | ||
276 | |||
277 | pmu->fixed_ctr_ctrl = data; | ||
278 | } | ||
279 | |||
280 | static void reprogram_idx(struct kvm_pmu *pmu, int idx) | ||
281 | { | ||
282 | struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx); | ||
283 | |||
284 | if (!pmc) | ||
285 | return; | ||
286 | |||
287 | if (pmc_is_gp(pmc)) | ||
288 | reprogram_gp_counter(pmc, pmc->eventsel); | ||
289 | else { | ||
290 | int fidx = idx - X86_PMC_IDX_FIXED; | ||
291 | reprogram_fixed_counter(pmc, | ||
292 | fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx); | ||
293 | } | ||
294 | } | ||
295 | |||
296 | static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data) | ||
297 | { | ||
298 | int bit; | ||
299 | u64 diff = pmu->global_ctrl ^ data; | ||
300 | |||
301 | pmu->global_ctrl = data; | ||
302 | |||
303 | for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) | ||
304 | reprogram_idx(pmu, bit); | ||
305 | } | ||
306 | |||
307 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr) | ||
308 | { | ||
309 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
310 | int ret; | ||
311 | |||
312 | switch (msr) { | ||
313 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
314 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
315 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
316 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
317 | ret = pmu->version > 1; | ||
318 | break; | ||
319 | default: | ||
320 | ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) | ||
321 | || get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) | ||
322 | || get_fixed_pmc(pmu, msr); | ||
323 | break; | ||
324 | } | ||
325 | return ret; | ||
326 | } | ||
327 | |||
328 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) | ||
329 | { | ||
330 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
331 | struct kvm_pmc *pmc; | ||
332 | |||
333 | switch (index) { | ||
334 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
335 | *data = pmu->fixed_ctr_ctrl; | ||
336 | return 0; | ||
337 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
338 | *data = pmu->global_status; | ||
339 | return 0; | ||
340 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
341 | *data = pmu->global_ctrl; | ||
342 | return 0; | ||
343 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
344 | *data = pmu->global_ovf_ctrl; | ||
345 | return 0; | ||
346 | default: | ||
347 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || | ||
348 | (pmc = get_fixed_pmc(pmu, index))) { | ||
349 | *data = read_pmc(pmc); | ||
350 | return 0; | ||
351 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | ||
352 | *data = pmc->eventsel; | ||
353 | return 0; | ||
354 | } | ||
355 | } | ||
356 | return 1; | ||
357 | } | ||
358 | |||
359 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | ||
360 | { | ||
361 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
362 | struct kvm_pmc *pmc; | ||
363 | |||
364 | switch (index) { | ||
365 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
366 | if (pmu->fixed_ctr_ctrl == data) | ||
367 | return 0; | ||
368 | if (!(data & 0xfffffffffffff444)) { | ||
369 | reprogram_fixed_counters(pmu, data); | ||
370 | return 0; | ||
371 | } | ||
372 | break; | ||
373 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
374 | break; /* RO MSR */ | ||
375 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
376 | if (pmu->global_ctrl == data) | ||
377 | return 0; | ||
378 | if (!(data & pmu->global_ctrl_mask)) { | ||
379 | global_ctrl_changed(pmu, data); | ||
380 | return 0; | ||
381 | } | ||
382 | break; | ||
383 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
384 | if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { | ||
385 | pmu->global_status &= ~data; | ||
386 | pmu->global_ovf_ctrl = data; | ||
387 | return 0; | ||
388 | } | ||
389 | break; | ||
390 | default: | ||
391 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || | ||
392 | (pmc = get_fixed_pmc(pmu, index))) { | ||
393 | data = (s64)(s32)data; | ||
394 | pmc->counter += data - read_pmc(pmc); | ||
395 | return 0; | ||
396 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | ||
397 | if (data == pmc->eventsel) | ||
398 | return 0; | ||
399 | if (!(data & 0xffffffff00200000ull)) { | ||
400 | reprogram_gp_counter(pmc, data); | ||
401 | return 0; | ||
402 | } | ||
403 | } | ||
404 | } | ||
405 | return 1; | ||
406 | } | ||
407 | |||
408 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) | ||
409 | { | ||
410 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
411 | bool fast_mode = pmc & (1u << 31); | ||
412 | bool fixed = pmc & (1u << 30); | ||
413 | struct kvm_pmc *counters; | ||
414 | u64 ctr; | ||
415 | |||
416 | pmc &= (3u << 30) - 1; | ||
417 | if (!fixed && pmc >= pmu->nr_arch_gp_counters) | ||
418 | return 1; | ||
419 | if (fixed && pmc >= pmu->nr_arch_fixed_counters) | ||
420 | return 1; | ||
421 | counters = fixed ? pmu->fixed_counters : pmu->gp_counters; | ||
422 | ctr = read_pmc(&counters[pmc]); | ||
423 | if (fast_mode) | ||
424 | ctr = (u32)ctr; | ||
425 | *data = ctr; | ||
426 | |||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | ||
431 | { | ||
432 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
433 | struct kvm_cpuid_entry2 *entry; | ||
434 | unsigned bitmap_len; | ||
435 | |||
436 | pmu->nr_arch_gp_counters = 0; | ||
437 | pmu->nr_arch_fixed_counters = 0; | ||
438 | pmu->counter_bitmask[KVM_PMC_GP] = 0; | ||
439 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; | ||
440 | pmu->version = 0; | ||
441 | |||
442 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | ||
443 | if (!entry) | ||
444 | return; | ||
445 | |||
446 | pmu->version = entry->eax & 0xff; | ||
447 | if (!pmu->version) | ||
448 | return; | ||
449 | |||
450 | pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, | ||
451 | X86_PMC_MAX_GENERIC); | ||
452 | pmu->counter_bitmask[KVM_PMC_GP] = | ||
453 | ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; | ||
454 | bitmap_len = (entry->eax >> 24) & 0xff; | ||
455 | pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); | ||
456 | |||
457 | if (pmu->version == 1) { | ||
458 | pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1; | ||
459 | return; | ||
460 | } | ||
461 | |||
462 | pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), | ||
463 | X86_PMC_MAX_FIXED); | ||
464 | pmu->counter_bitmask[KVM_PMC_FIXED] = | ||
465 | ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; | ||
466 | pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1) | ||
467 | | (((1ull << pmu->nr_arch_fixed_counters) - 1) | ||
468 | << X86_PMC_IDX_FIXED)); | ||
469 | } | ||
470 | |||
471 | void kvm_pmu_init(struct kvm_vcpu *vcpu) | ||
472 | { | ||
473 | int i; | ||
474 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
475 | |||
476 | memset(pmu, 0, sizeof(*pmu)); | ||
477 | for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { | ||
478 | pmu->gp_counters[i].type = KVM_PMC_GP; | ||
479 | pmu->gp_counters[i].vcpu = vcpu; | ||
480 | pmu->gp_counters[i].idx = i; | ||
481 | } | ||
482 | for (i = 0; i < X86_PMC_MAX_FIXED; i++) { | ||
483 | pmu->fixed_counters[i].type = KVM_PMC_FIXED; | ||
484 | pmu->fixed_counters[i].vcpu = vcpu; | ||
485 | pmu->fixed_counters[i].idx = i + X86_PMC_IDX_FIXED; | ||
486 | } | ||
487 | init_irq_work(&pmu->irq_work, trigger_pmi); | ||
488 | kvm_pmu_cpuid_update(vcpu); | ||
489 | } | ||
490 | |||
491 | void kvm_pmu_reset(struct kvm_vcpu *vcpu) | ||
492 | { | ||
493 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
494 | int i; | ||
495 | |||
496 | irq_work_sync(&pmu->irq_work); | ||
497 | for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { | ||
498 | struct kvm_pmc *pmc = &pmu->gp_counters[i]; | ||
499 | stop_counter(pmc); | ||
500 | pmc->counter = pmc->eventsel = 0; | ||
501 | } | ||
502 | |||
503 | for (i = 0; i < X86_PMC_MAX_FIXED; i++) | ||
504 | stop_counter(&pmu->fixed_counters[i]); | ||
505 | |||
506 | pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = | ||
507 | pmu->global_ovf_ctrl = 0; | ||
508 | } | ||
509 | |||
510 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu) | ||
511 | { | ||
512 | kvm_pmu_reset(vcpu); | ||
513 | } | ||
514 | |||
515 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu) | ||
516 | { | ||
517 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
518 | u64 bitmask; | ||
519 | int bit; | ||
520 | |||
521 | bitmask = pmu->reprogram_pmi; | ||
522 | |||
523 | for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) { | ||
524 | struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit); | ||
525 | |||
526 | if (unlikely(!pmc || !pmc->perf_event)) { | ||
527 | clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi); | ||
528 | continue; | ||
529 | } | ||
530 | |||
531 | reprogram_idx(pmu, bit); | ||
532 | } | ||
533 | } | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e32243eac2f4..5fa553babe56 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1014,6 +1014,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1014 | set_intercept(svm, INTERCEPT_NMI); | 1014 | set_intercept(svm, INTERCEPT_NMI); |
1015 | set_intercept(svm, INTERCEPT_SMI); | 1015 | set_intercept(svm, INTERCEPT_SMI); |
1016 | set_intercept(svm, INTERCEPT_SELECTIVE_CR0); | 1016 | set_intercept(svm, INTERCEPT_SELECTIVE_CR0); |
1017 | set_intercept(svm, INTERCEPT_RDPMC); | ||
1017 | set_intercept(svm, INTERCEPT_CPUID); | 1018 | set_intercept(svm, INTERCEPT_CPUID); |
1018 | set_intercept(svm, INTERCEPT_INVD); | 1019 | set_intercept(svm, INTERCEPT_INVD); |
1019 | set_intercept(svm, INTERCEPT_HLT); | 1020 | set_intercept(svm, INTERCEPT_HLT); |
@@ -2770,6 +2771,19 @@ static int emulate_on_interception(struct vcpu_svm *svm) | |||
2770 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; | 2771 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; |
2771 | } | 2772 | } |
2772 | 2773 | ||
2774 | static int rdpmc_interception(struct vcpu_svm *svm) | ||
2775 | { | ||
2776 | int err; | ||
2777 | |||
2778 | if (!static_cpu_has(X86_FEATURE_NRIPS)) | ||
2779 | return emulate_on_interception(svm); | ||
2780 | |||
2781 | err = kvm_rdpmc(&svm->vcpu); | ||
2782 | kvm_complete_insn_gp(&svm->vcpu, err); | ||
2783 | |||
2784 | return 1; | ||
2785 | } | ||
2786 | |||
2773 | bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) | 2787 | bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) |
2774 | { | 2788 | { |
2775 | unsigned long cr0 = svm->vcpu.arch.cr0; | 2789 | unsigned long cr0 = svm->vcpu.arch.cr0; |
@@ -3190,6 +3204,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
3190 | [SVM_EXIT_SMI] = nop_on_interception, | 3204 | [SVM_EXIT_SMI] = nop_on_interception, |
3191 | [SVM_EXIT_INIT] = nop_on_interception, | 3205 | [SVM_EXIT_INIT] = nop_on_interception, |
3192 | [SVM_EXIT_VINTR] = interrupt_window_interception, | 3206 | [SVM_EXIT_VINTR] = interrupt_window_interception, |
3207 | [SVM_EXIT_RDPMC] = rdpmc_interception, | ||
3193 | [SVM_EXIT_CPUID] = cpuid_interception, | 3208 | [SVM_EXIT_CPUID] = cpuid_interception, |
3194 | [SVM_EXIT_IRET] = iret_interception, | 3209 | [SVM_EXIT_IRET] = iret_interception, |
3195 | [SVM_EXIT_INVD] = emulate_on_interception, | 3210 | [SVM_EXIT_INVD] = emulate_on_interception, |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index ae432ea1cd83..6b85cc647f34 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
@@ -18,9 +18,10 @@ | |||
18 | #include <linux/atomic.h> | 18 | #include <linux/atomic.h> |
19 | #include "kvm_timer.h" | 19 | #include "kvm_timer.h" |
20 | 20 | ||
21 | static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | 21 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data) |
22 | { | 22 | { |
23 | int restart_timer = 0; | 23 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); |
24 | struct kvm_vcpu *vcpu = ktimer->vcpu; | ||
24 | wait_queue_head_t *q = &vcpu->wq; | 25 | wait_queue_head_t *q = &vcpu->wq; |
25 | 26 | ||
26 | /* | 27 | /* |
@@ -40,26 +41,7 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
40 | 41 | ||
41 | if (ktimer->t_ops->is_periodic(ktimer)) { | 42 | if (ktimer->t_ops->is_periodic(ktimer)) { |
42 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); | 43 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); |
43 | restart_timer = 1; | ||
44 | } | ||
45 | |||
46 | return restart_timer; | ||
47 | } | ||
48 | |||
49 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data) | ||
50 | { | ||
51 | int restart_timer; | ||
52 | struct kvm_vcpu *vcpu; | ||
53 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); | ||
54 | |||
55 | vcpu = ktimer->vcpu; | ||
56 | if (!vcpu) | ||
57 | return HRTIMER_NORESTART; | ||
58 | |||
59 | restart_timer = __kvm_timer_fn(vcpu, ktimer); | ||
60 | if (restart_timer) | ||
61 | return HRTIMER_RESTART; | 44 | return HRTIMER_RESTART; |
62 | else | 45 | } else |
63 | return HRTIMER_NORESTART; | 46 | return HRTIMER_NORESTART; |
64 | } | 47 | } |
65 | |||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 579a0b51696a..d29216c462b3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -18,6 +18,7 @@ | |||
18 | 18 | ||
19 | #include "irq.h" | 19 | #include "irq.h" |
20 | #include "mmu.h" | 20 | #include "mmu.h" |
21 | #include "cpuid.h" | ||
21 | 22 | ||
22 | #include <linux/kvm_host.h> | 23 | #include <linux/kvm_host.h> |
23 | #include <linux/module.h> | 24 | #include <linux/module.h> |
@@ -50,29 +51,29 @@ | |||
50 | MODULE_AUTHOR("Qumranet"); | 51 | MODULE_AUTHOR("Qumranet"); |
51 | MODULE_LICENSE("GPL"); | 52 | MODULE_LICENSE("GPL"); |
52 | 53 | ||
53 | static int __read_mostly enable_vpid = 1; | 54 | static bool __read_mostly enable_vpid = 1; |
54 | module_param_named(vpid, enable_vpid, bool, 0444); | 55 | module_param_named(vpid, enable_vpid, bool, 0444); |
55 | 56 | ||
56 | static int __read_mostly flexpriority_enabled = 1; | 57 | static bool __read_mostly flexpriority_enabled = 1; |
57 | module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); | 58 | module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); |
58 | 59 | ||
59 | static int __read_mostly enable_ept = 1; | 60 | static bool __read_mostly enable_ept = 1; |
60 | module_param_named(ept, enable_ept, bool, S_IRUGO); | 61 | module_param_named(ept, enable_ept, bool, S_IRUGO); |
61 | 62 | ||
62 | static int __read_mostly enable_unrestricted_guest = 1; | 63 | static bool __read_mostly enable_unrestricted_guest = 1; |
63 | module_param_named(unrestricted_guest, | 64 | module_param_named(unrestricted_guest, |
64 | enable_unrestricted_guest, bool, S_IRUGO); | 65 | enable_unrestricted_guest, bool, S_IRUGO); |
65 | 66 | ||
66 | static int __read_mostly emulate_invalid_guest_state = 0; | 67 | static bool __read_mostly emulate_invalid_guest_state = 0; |
67 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 68 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
68 | 69 | ||
69 | static int __read_mostly vmm_exclusive = 1; | 70 | static bool __read_mostly vmm_exclusive = 1; |
70 | module_param(vmm_exclusive, bool, S_IRUGO); | 71 | module_param(vmm_exclusive, bool, S_IRUGO); |
71 | 72 | ||
72 | static int __read_mostly yield_on_hlt = 1; | 73 | static bool __read_mostly yield_on_hlt = 1; |
73 | module_param(yield_on_hlt, bool, S_IRUGO); | 74 | module_param(yield_on_hlt, bool, S_IRUGO); |
74 | 75 | ||
75 | static int __read_mostly fasteoi = 1; | 76 | static bool __read_mostly fasteoi = 1; |
76 | module_param(fasteoi, bool, S_IRUGO); | 77 | module_param(fasteoi, bool, S_IRUGO); |
77 | 78 | ||
78 | /* | 79 | /* |
@@ -80,7 +81,7 @@ module_param(fasteoi, bool, S_IRUGO); | |||
80 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not | 81 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not |
81 | * use VMX instructions. | 82 | * use VMX instructions. |
82 | */ | 83 | */ |
83 | static int __read_mostly nested = 0; | 84 | static bool __read_mostly nested = 0; |
84 | module_param(nested, bool, S_IRUGO); | 85 | module_param(nested, bool, S_IRUGO); |
85 | 86 | ||
86 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 87 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ |
@@ -1747,7 +1748,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
1747 | int save_nmsrs, index; | 1748 | int save_nmsrs, index; |
1748 | unsigned long *msr_bitmap; | 1749 | unsigned long *msr_bitmap; |
1749 | 1750 | ||
1750 | vmx_load_host_state(vmx); | ||
1751 | save_nmsrs = 0; | 1751 | save_nmsrs = 0; |
1752 | #ifdef CONFIG_X86_64 | 1752 | #ifdef CONFIG_X86_64 |
1753 | if (is_long_mode(&vmx->vcpu)) { | 1753 | if (is_long_mode(&vmx->vcpu)) { |
@@ -1956,6 +1956,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
1956 | #endif | 1956 | #endif |
1957 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | | 1957 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | |
1958 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | | 1958 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | |
1959 | CPU_BASED_RDPMC_EXITING | | ||
1959 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 1960 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
1960 | /* | 1961 | /* |
1961 | * We can allow some features even when not supported by the | 1962 | * We can allow some features even when not supported by the |
@@ -2142,12 +2143,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2142 | return 1; | 2143 | return 1; |
2143 | /* Otherwise falls through */ | 2144 | /* Otherwise falls through */ |
2144 | default: | 2145 | default: |
2145 | vmx_load_host_state(to_vmx(vcpu)); | ||
2146 | if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) | 2146 | if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) |
2147 | return 0; | 2147 | return 0; |
2148 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 2148 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
2149 | if (msr) { | 2149 | if (msr) { |
2150 | vmx_load_host_state(to_vmx(vcpu)); | ||
2151 | data = msr->data; | 2150 | data = msr->data; |
2152 | break; | 2151 | break; |
2153 | } | 2152 | } |
@@ -2171,7 +2170,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
2171 | 2170 | ||
2172 | switch (msr_index) { | 2171 | switch (msr_index) { |
2173 | case MSR_EFER: | 2172 | case MSR_EFER: |
2174 | vmx_load_host_state(vmx); | ||
2175 | ret = kvm_set_msr_common(vcpu, msr_index, data); | 2173 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
2176 | break; | 2174 | break; |
2177 | #ifdef CONFIG_X86_64 | 2175 | #ifdef CONFIG_X86_64 |
@@ -2220,7 +2218,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
2220 | break; | 2218 | break; |
2221 | msr = find_msr_entry(vmx, msr_index); | 2219 | msr = find_msr_entry(vmx, msr_index); |
2222 | if (msr) { | 2220 | if (msr) { |
2223 | vmx_load_host_state(vmx); | ||
2224 | msr->data = data; | 2221 | msr->data = data; |
2225 | break; | 2222 | break; |
2226 | } | 2223 | } |
@@ -2414,7 +2411,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2414 | CPU_BASED_USE_TSC_OFFSETING | | 2411 | CPU_BASED_USE_TSC_OFFSETING | |
2415 | CPU_BASED_MWAIT_EXITING | | 2412 | CPU_BASED_MWAIT_EXITING | |
2416 | CPU_BASED_MONITOR_EXITING | | 2413 | CPU_BASED_MONITOR_EXITING | |
2417 | CPU_BASED_INVLPG_EXITING; | 2414 | CPU_BASED_INVLPG_EXITING | |
2415 | CPU_BASED_RDPMC_EXITING; | ||
2418 | 2416 | ||
2419 | if (yield_on_hlt) | 2417 | if (yield_on_hlt) |
2420 | min |= CPU_BASED_HLT_EXITING; | 2418 | min |= CPU_BASED_HLT_EXITING; |
@@ -2716,11 +2714,13 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
2716 | { | 2714 | { |
2717 | if (!kvm->arch.tss_addr) { | 2715 | if (!kvm->arch.tss_addr) { |
2718 | struct kvm_memslots *slots; | 2716 | struct kvm_memslots *slots; |
2717 | struct kvm_memory_slot *slot; | ||
2719 | gfn_t base_gfn; | 2718 | gfn_t base_gfn; |
2720 | 2719 | ||
2721 | slots = kvm_memslots(kvm); | 2720 | slots = kvm_memslots(kvm); |
2722 | base_gfn = slots->memslots[0].base_gfn + | 2721 | slot = id_to_memslot(slots, 0); |
2723 | kvm->memslots->memslots[0].npages - 3; | 2722 | base_gfn = slot->base_gfn + slot->npages - 3; |
2723 | |||
2724 | return base_gfn << PAGE_SHIFT; | 2724 | return base_gfn << PAGE_SHIFT; |
2725 | } | 2725 | } |
2726 | return kvm->arch.tss_addr; | 2726 | return kvm->arch.tss_addr; |
@@ -3945,12 +3945,15 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) | |||
3945 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 3945 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
3946 | { | 3946 | { |
3947 | u32 cpu_based_vm_exec_control; | 3947 | u32 cpu_based_vm_exec_control; |
3948 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) | 3948 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { |
3949 | /* We can get here when nested_run_pending caused | 3949 | /* |
3950 | * vmx_interrupt_allowed() to return false. In this case, do | 3950 | * We get here if vmx_interrupt_allowed() said we can't |
3951 | * nothing - the interrupt will be injected later. | 3951 | * inject to L1 now because L2 must run. Ask L2 to exit |
3952 | * right after entry, so we can inject to L1 more promptly. | ||
3952 | */ | 3953 | */ |
3954 | kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); | ||
3953 | return; | 3955 | return; |
3956 | } | ||
3954 | 3957 | ||
3955 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 3958 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
3956 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | 3959 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; |
@@ -4077,11 +4080,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
4077 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 4080 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
4078 | { | 4081 | { |
4079 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { | 4082 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { |
4080 | struct vmcs12 *vmcs12; | 4083 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
4081 | if (to_vmx(vcpu)->nested.nested_run_pending) | 4084 | if (to_vmx(vcpu)->nested.nested_run_pending || |
4085 | (vmcs12->idt_vectoring_info_field & | ||
4086 | VECTORING_INFO_VALID_MASK)) | ||
4082 | return 0; | 4087 | return 0; |
4083 | nested_vmx_vmexit(vcpu); | 4088 | nested_vmx_vmexit(vcpu); |
4084 | vmcs12 = get_vmcs12(vcpu); | ||
4085 | vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; | 4089 | vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; |
4086 | vmcs12->vm_exit_intr_info = 0; | 4090 | vmcs12->vm_exit_intr_info = 0; |
4087 | /* fall through to normal code, but now in L1, not L2 */ | 4091 | /* fall through to normal code, but now in L1, not L2 */ |
@@ -4611,6 +4615,16 @@ static int handle_invlpg(struct kvm_vcpu *vcpu) | |||
4611 | return 1; | 4615 | return 1; |
4612 | } | 4616 | } |
4613 | 4617 | ||
4618 | static int handle_rdpmc(struct kvm_vcpu *vcpu) | ||
4619 | { | ||
4620 | int err; | ||
4621 | |||
4622 | err = kvm_rdpmc(vcpu); | ||
4623 | kvm_complete_insn_gp(vcpu, err); | ||
4624 | |||
4625 | return 1; | ||
4626 | } | ||
4627 | |||
4614 | static int handle_wbinvd(struct kvm_vcpu *vcpu) | 4628 | static int handle_wbinvd(struct kvm_vcpu *vcpu) |
4615 | { | 4629 | { |
4616 | skip_emulated_instruction(vcpu); | 4630 | skip_emulated_instruction(vcpu); |
@@ -5561,6 +5575,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
5561 | [EXIT_REASON_HLT] = handle_halt, | 5575 | [EXIT_REASON_HLT] = handle_halt, |
5562 | [EXIT_REASON_INVD] = handle_invd, | 5576 | [EXIT_REASON_INVD] = handle_invd, |
5563 | [EXIT_REASON_INVLPG] = handle_invlpg, | 5577 | [EXIT_REASON_INVLPG] = handle_invlpg, |
5578 | [EXIT_REASON_RDPMC] = handle_rdpmc, | ||
5564 | [EXIT_REASON_VMCALL] = handle_vmcall, | 5579 | [EXIT_REASON_VMCALL] = handle_vmcall, |
5565 | [EXIT_REASON_VMCLEAR] = handle_vmclear, | 5580 | [EXIT_REASON_VMCLEAR] = handle_vmclear, |
5566 | [EXIT_REASON_VMLAUNCH] = handle_vmlaunch, | 5581 | [EXIT_REASON_VMLAUNCH] = handle_vmlaunch, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4c938da2ba00..14d6cadc4ba6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include "tss.h" | 26 | #include "tss.h" |
27 | #include "kvm_cache_regs.h" | 27 | #include "kvm_cache_regs.h" |
28 | #include "x86.h" | 28 | #include "x86.h" |
29 | #include "cpuid.h" | ||
29 | 30 | ||
30 | #include <linux/clocksource.h> | 31 | #include <linux/clocksource.h> |
31 | #include <linux/interrupt.h> | 32 | #include <linux/interrupt.h> |
@@ -82,15 +83,13 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); | |||
82 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | 83 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU |
83 | 84 | ||
84 | static void update_cr8_intercept(struct kvm_vcpu *vcpu); | 85 | static void update_cr8_intercept(struct kvm_vcpu *vcpu); |
85 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
86 | struct kvm_cpuid_entry2 __user *entries); | ||
87 | static void process_nmi(struct kvm_vcpu *vcpu); | 86 | static void process_nmi(struct kvm_vcpu *vcpu); |
88 | 87 | ||
89 | struct kvm_x86_ops *kvm_x86_ops; | 88 | struct kvm_x86_ops *kvm_x86_ops; |
90 | EXPORT_SYMBOL_GPL(kvm_x86_ops); | 89 | EXPORT_SYMBOL_GPL(kvm_x86_ops); |
91 | 90 | ||
92 | int ignore_msrs = 0; | 91 | static bool ignore_msrs = 0; |
93 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); | 92 | module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); |
94 | 93 | ||
95 | bool kvm_has_tsc_control; | 94 | bool kvm_has_tsc_control; |
96 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | 95 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); |
@@ -574,54 +573,6 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | |||
574 | } | 573 | } |
575 | EXPORT_SYMBOL_GPL(kvm_set_xcr); | 574 | EXPORT_SYMBOL_GPL(kvm_set_xcr); |
576 | 575 | ||
577 | static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | ||
578 | { | ||
579 | struct kvm_cpuid_entry2 *best; | ||
580 | |||
581 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
582 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | ||
583 | } | ||
584 | |||
585 | static bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) | ||
586 | { | ||
587 | struct kvm_cpuid_entry2 *best; | ||
588 | |||
589 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
590 | return best && (best->ebx & bit(X86_FEATURE_SMEP)); | ||
591 | } | ||
592 | |||
593 | static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | ||
594 | { | ||
595 | struct kvm_cpuid_entry2 *best; | ||
596 | |||
597 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
598 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); | ||
599 | } | ||
600 | |||
601 | static void update_cpuid(struct kvm_vcpu *vcpu) | ||
602 | { | ||
603 | struct kvm_cpuid_entry2 *best; | ||
604 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
605 | |||
606 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
607 | if (!best) | ||
608 | return; | ||
609 | |||
610 | /* Update OSXSAVE bit */ | ||
611 | if (cpu_has_xsave && best->function == 0x1) { | ||
612 | best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); | ||
613 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) | ||
614 | best->ecx |= bit(X86_FEATURE_OSXSAVE); | ||
615 | } | ||
616 | |||
617 | if (apic) { | ||
618 | if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER)) | ||
619 | apic->lapic_timer.timer_mode_mask = 3 << 17; | ||
620 | else | ||
621 | apic->lapic_timer.timer_mode_mask = 1 << 17; | ||
622 | } | ||
623 | } | ||
624 | |||
625 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 576 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
626 | { | 577 | { |
627 | unsigned long old_cr4 = kvm_read_cr4(vcpu); | 578 | unsigned long old_cr4 = kvm_read_cr4(vcpu); |
@@ -655,7 +606,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
655 | kvm_mmu_reset_context(vcpu); | 606 | kvm_mmu_reset_context(vcpu); |
656 | 607 | ||
657 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) | 608 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) |
658 | update_cpuid(vcpu); | 609 | kvm_update_cpuid(vcpu); |
659 | 610 | ||
660 | return 0; | 611 | return 0; |
661 | } | 612 | } |
@@ -809,6 +760,21 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | |||
809 | } | 760 | } |
810 | EXPORT_SYMBOL_GPL(kvm_get_dr); | 761 | EXPORT_SYMBOL_GPL(kvm_get_dr); |
811 | 762 | ||
763 | bool kvm_rdpmc(struct kvm_vcpu *vcpu) | ||
764 | { | ||
765 | u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
766 | u64 data; | ||
767 | int err; | ||
768 | |||
769 | err = kvm_pmu_read_pmc(vcpu, ecx, &data); | ||
770 | if (err) | ||
771 | return err; | ||
772 | kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data); | ||
773 | kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32); | ||
774 | return err; | ||
775 | } | ||
776 | EXPORT_SYMBOL_GPL(kvm_rdpmc); | ||
777 | |||
812 | /* | 778 | /* |
813 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS | 779 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS |
814 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. | 780 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. |
@@ -1358,12 +1324,11 @@ static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) | |||
1358 | if (page_num >= blob_size) | 1324 | if (page_num >= blob_size) |
1359 | goto out; | 1325 | goto out; |
1360 | r = -ENOMEM; | 1326 | r = -ENOMEM; |
1361 | page = kzalloc(PAGE_SIZE, GFP_KERNEL); | 1327 | page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE); |
1362 | if (!page) | 1328 | if (IS_ERR(page)) { |
1329 | r = PTR_ERR(page); | ||
1363 | goto out; | 1330 | goto out; |
1364 | r = -EFAULT; | 1331 | } |
1365 | if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE)) | ||
1366 | goto out_free; | ||
1367 | if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) | 1332 | if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) |
1368 | goto out_free; | 1333 | goto out_free; |
1369 | r = 0; | 1334 | r = 0; |
@@ -1652,8 +1617,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1652 | * which we perfectly emulate ;-). Any other value should be at least | 1617 | * which we perfectly emulate ;-). Any other value should be at least |
1653 | * reported, some guests depend on them. | 1618 | * reported, some guests depend on them. |
1654 | */ | 1619 | */ |
1655 | case MSR_P6_EVNTSEL0: | ||
1656 | case MSR_P6_EVNTSEL1: | ||
1657 | case MSR_K7_EVNTSEL0: | 1620 | case MSR_K7_EVNTSEL0: |
1658 | case MSR_K7_EVNTSEL1: | 1621 | case MSR_K7_EVNTSEL1: |
1659 | case MSR_K7_EVNTSEL2: | 1622 | case MSR_K7_EVNTSEL2: |
@@ -1665,8 +1628,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1665 | /* at least RHEL 4 unconditionally writes to the perfctr registers, | 1628 | /* at least RHEL 4 unconditionally writes to the perfctr registers, |
1666 | * so we ignore writes to make it happy. | 1629 | * so we ignore writes to make it happy. |
1667 | */ | 1630 | */ |
1668 | case MSR_P6_PERFCTR0: | ||
1669 | case MSR_P6_PERFCTR1: | ||
1670 | case MSR_K7_PERFCTR0: | 1631 | case MSR_K7_PERFCTR0: |
1671 | case MSR_K7_PERFCTR1: | 1632 | case MSR_K7_PERFCTR1: |
1672 | case MSR_K7_PERFCTR2: | 1633 | case MSR_K7_PERFCTR2: |
@@ -1703,6 +1664,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1703 | default: | 1664 | default: |
1704 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 1665 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
1705 | return xen_hvm_config(vcpu, data); | 1666 | return xen_hvm_config(vcpu, data); |
1667 | if (kvm_pmu_msr(vcpu, msr)) | ||
1668 | return kvm_pmu_set_msr(vcpu, msr, data); | ||
1706 | if (!ignore_msrs) { | 1669 | if (!ignore_msrs) { |
1707 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 1670 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
1708 | msr, data); | 1671 | msr, data); |
@@ -1865,10 +1828,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1865 | case MSR_K8_SYSCFG: | 1828 | case MSR_K8_SYSCFG: |
1866 | case MSR_K7_HWCR: | 1829 | case MSR_K7_HWCR: |
1867 | case MSR_VM_HSAVE_PA: | 1830 | case MSR_VM_HSAVE_PA: |
1868 | case MSR_P6_PERFCTR0: | ||
1869 | case MSR_P6_PERFCTR1: | ||
1870 | case MSR_P6_EVNTSEL0: | ||
1871 | case MSR_P6_EVNTSEL1: | ||
1872 | case MSR_K7_EVNTSEL0: | 1831 | case MSR_K7_EVNTSEL0: |
1873 | case MSR_K7_PERFCTR0: | 1832 | case MSR_K7_PERFCTR0: |
1874 | case MSR_K8_INT_PENDING_MSG: | 1833 | case MSR_K8_INT_PENDING_MSG: |
@@ -1979,6 +1938,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1979 | data = 0xbe702111; | 1938 | data = 0xbe702111; |
1980 | break; | 1939 | break; |
1981 | default: | 1940 | default: |
1941 | if (kvm_pmu_msr(vcpu, msr)) | ||
1942 | return kvm_pmu_get_msr(vcpu, msr, pdata); | ||
1982 | if (!ignore_msrs) { | 1943 | if (!ignore_msrs) { |
1983 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 1944 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
1984 | return 1; | 1945 | return 1; |
@@ -2037,15 +1998,12 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, | |||
2037 | if (msrs.nmsrs >= MAX_IO_MSRS) | 1998 | if (msrs.nmsrs >= MAX_IO_MSRS) |
2038 | goto out; | 1999 | goto out; |
2039 | 2000 | ||
2040 | r = -ENOMEM; | ||
2041 | size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; | 2001 | size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; |
2042 | entries = kmalloc(size, GFP_KERNEL); | 2002 | entries = memdup_user(user_msrs->entries, size); |
2043 | if (!entries) | 2003 | if (IS_ERR(entries)) { |
2004 | r = PTR_ERR(entries); | ||
2044 | goto out; | 2005 | goto out; |
2045 | 2006 | } | |
2046 | r = -EFAULT; | ||
2047 | if (copy_from_user(entries, user_msrs->entries, size)) | ||
2048 | goto out_free; | ||
2049 | 2007 | ||
2050 | r = n = __msr_io(vcpu, &msrs, entries, do_msr); | 2008 | r = n = __msr_io(vcpu, &msrs, entries, do_msr); |
2051 | if (r < 0) | 2009 | if (r < 0) |
@@ -2265,466 +2223,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
2265 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); | 2223 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); |
2266 | } | 2224 | } |
2267 | 2225 | ||
2268 | static int is_efer_nx(void) | ||
2269 | { | ||
2270 | unsigned long long efer = 0; | ||
2271 | |||
2272 | rdmsrl_safe(MSR_EFER, &efer); | ||
2273 | return efer & EFER_NX; | ||
2274 | } | ||
2275 | |||
2276 | static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) | ||
2277 | { | ||
2278 | int i; | ||
2279 | struct kvm_cpuid_entry2 *e, *entry; | ||
2280 | |||
2281 | entry = NULL; | ||
2282 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | ||
2283 | e = &vcpu->arch.cpuid_entries[i]; | ||
2284 | if (e->function == 0x80000001) { | ||
2285 | entry = e; | ||
2286 | break; | ||
2287 | } | ||
2288 | } | ||
2289 | if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { | ||
2290 | entry->edx &= ~(1 << 20); | ||
2291 | printk(KERN_INFO "kvm: guest NX capability removed\n"); | ||
2292 | } | ||
2293 | } | ||
2294 | |||
2295 | /* when an old userspace process fills a new kernel module */ | ||
2296 | static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||
2297 | struct kvm_cpuid *cpuid, | ||
2298 | struct kvm_cpuid_entry __user *entries) | ||
2299 | { | ||
2300 | int r, i; | ||
2301 | struct kvm_cpuid_entry *cpuid_entries; | ||
2302 | |||
2303 | r = -E2BIG; | ||
2304 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
2305 | goto out; | ||
2306 | r = -ENOMEM; | ||
2307 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent); | ||
2308 | if (!cpuid_entries) | ||
2309 | goto out; | ||
2310 | r = -EFAULT; | ||
2311 | if (copy_from_user(cpuid_entries, entries, | ||
2312 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | ||
2313 | goto out_free; | ||
2314 | for (i = 0; i < cpuid->nent; i++) { | ||
2315 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | ||
2316 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | ||
2317 | vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx; | ||
2318 | vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx; | ||
2319 | vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx; | ||
2320 | vcpu->arch.cpuid_entries[i].index = 0; | ||
2321 | vcpu->arch.cpuid_entries[i].flags = 0; | ||
2322 | vcpu->arch.cpuid_entries[i].padding[0] = 0; | ||
2323 | vcpu->arch.cpuid_entries[i].padding[1] = 0; | ||
2324 | vcpu->arch.cpuid_entries[i].padding[2] = 0; | ||
2325 | } | ||
2326 | vcpu->arch.cpuid_nent = cpuid->nent; | ||
2327 | cpuid_fix_nx_cap(vcpu); | ||
2328 | r = 0; | ||
2329 | kvm_apic_set_version(vcpu); | ||
2330 | kvm_x86_ops->cpuid_update(vcpu); | ||
2331 | update_cpuid(vcpu); | ||
2332 | |||
2333 | out_free: | ||
2334 | vfree(cpuid_entries); | ||
2335 | out: | ||
2336 | return r; | ||
2337 | } | ||
2338 | |||
2339 | static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | ||
2340 | struct kvm_cpuid2 *cpuid, | ||
2341 | struct kvm_cpuid_entry2 __user *entries) | ||
2342 | { | ||
2343 | int r; | ||
2344 | |||
2345 | r = -E2BIG; | ||
2346 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
2347 | goto out; | ||
2348 | r = -EFAULT; | ||
2349 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | ||
2350 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | ||
2351 | goto out; | ||
2352 | vcpu->arch.cpuid_nent = cpuid->nent; | ||
2353 | kvm_apic_set_version(vcpu); | ||
2354 | kvm_x86_ops->cpuid_update(vcpu); | ||
2355 | update_cpuid(vcpu); | ||
2356 | return 0; | ||
2357 | |||
2358 | out: | ||
2359 | return r; | ||
2360 | } | ||
2361 | |||
2362 | static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | ||
2363 | struct kvm_cpuid2 *cpuid, | ||
2364 | struct kvm_cpuid_entry2 __user *entries) | ||
2365 | { | ||
2366 | int r; | ||
2367 | |||
2368 | r = -E2BIG; | ||
2369 | if (cpuid->nent < vcpu->arch.cpuid_nent) | ||
2370 | goto out; | ||
2371 | r = -EFAULT; | ||
2372 | if (copy_to_user(entries, &vcpu->arch.cpuid_entries, | ||
2373 | vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) | ||
2374 | goto out; | ||
2375 | return 0; | ||
2376 | |||
2377 | out: | ||
2378 | cpuid->nent = vcpu->arch.cpuid_nent; | ||
2379 | return r; | ||
2380 | } | ||
2381 | |||
2382 | static void cpuid_mask(u32 *word, int wordnum) | ||
2383 | { | ||
2384 | *word &= boot_cpu_data.x86_capability[wordnum]; | ||
2385 | } | ||
2386 | |||
2387 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
2388 | u32 index) | ||
2389 | { | ||
2390 | entry->function = function; | ||
2391 | entry->index = index; | ||
2392 | cpuid_count(entry->function, entry->index, | ||
2393 | &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); | ||
2394 | entry->flags = 0; | ||
2395 | } | ||
2396 | |||
2397 | static bool supported_xcr0_bit(unsigned bit) | ||
2398 | { | ||
2399 | u64 mask = ((u64)1 << bit); | ||
2400 | |||
2401 | return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; | ||
2402 | } | ||
2403 | |||
2404 | #define F(x) bit(X86_FEATURE_##x) | ||
2405 | |||
2406 | static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
2407 | u32 index, int *nent, int maxnent) | ||
2408 | { | ||
2409 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; | ||
2410 | #ifdef CONFIG_X86_64 | ||
2411 | unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) | ||
2412 | ? F(GBPAGES) : 0; | ||
2413 | unsigned f_lm = F(LM); | ||
2414 | #else | ||
2415 | unsigned f_gbpages = 0; | ||
2416 | unsigned f_lm = 0; | ||
2417 | #endif | ||
2418 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | ||
2419 | |||
2420 | /* cpuid 1.edx */ | ||
2421 | const u32 kvm_supported_word0_x86_features = | ||
2422 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
2423 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
2424 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | | ||
2425 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
2426 | F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | | ||
2427 | 0 /* Reserved, DS, ACPI */ | F(MMX) | | ||
2428 | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | | ||
2429 | 0 /* HTT, TM, Reserved, PBE */; | ||
2430 | /* cpuid 0x80000001.edx */ | ||
2431 | const u32 kvm_supported_word1_x86_features = | ||
2432 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
2433 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
2434 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | | ||
2435 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
2436 | F(PAT) | F(PSE36) | 0 /* Reserved */ | | ||
2437 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | | ||
2438 | F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | | ||
2439 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | ||
2440 | /* cpuid 1.ecx */ | ||
2441 | const u32 kvm_supported_word4_x86_features = | ||
2442 | F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | | ||
2443 | 0 /* DS-CPL, VMX, SMX, EST */ | | ||
2444 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | ||
2445 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | | ||
2446 | 0 /* Reserved, DCA */ | F(XMM4_1) | | ||
2447 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | ||
2448 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | | ||
2449 | F(F16C) | F(RDRAND); | ||
2450 | /* cpuid 0x80000001.ecx */ | ||
2451 | const u32 kvm_supported_word6_x86_features = | ||
2452 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | | ||
2453 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | | ||
2454 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | | ||
2455 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); | ||
2456 | |||
2457 | /* cpuid 0xC0000001.edx */ | ||
2458 | const u32 kvm_supported_word5_x86_features = | ||
2459 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | | ||
2460 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | | ||
2461 | F(PMM) | F(PMM_EN); | ||
2462 | |||
2463 | /* cpuid 7.0.ebx */ | ||
2464 | const u32 kvm_supported_word9_x86_features = | ||
2465 | F(SMEP) | F(FSGSBASE) | F(ERMS); | ||
2466 | |||
2467 | /* all calls to cpuid_count() should be made on the same cpu */ | ||
2468 | get_cpu(); | ||
2469 | do_cpuid_1_ent(entry, function, index); | ||
2470 | ++*nent; | ||
2471 | |||
2472 | switch (function) { | ||
2473 | case 0: | ||
2474 | entry->eax = min(entry->eax, (u32)0xd); | ||
2475 | break; | ||
2476 | case 1: | ||
2477 | entry->edx &= kvm_supported_word0_x86_features; | ||
2478 | cpuid_mask(&entry->edx, 0); | ||
2479 | entry->ecx &= kvm_supported_word4_x86_features; | ||
2480 | cpuid_mask(&entry->ecx, 4); | ||
2481 | /* we support x2apic emulation even if host does not support | ||
2482 | * it since we emulate x2apic in software */ | ||
2483 | entry->ecx |= F(X2APIC); | ||
2484 | break; | ||
2485 | /* function 2 entries are STATEFUL. That is, repeated cpuid commands | ||
2486 | * may return different values. This forces us to get_cpu() before | ||
2487 | * issuing the first command, and also to emulate this annoying behavior | ||
2488 | * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */ | ||
2489 | case 2: { | ||
2490 | int t, times = entry->eax & 0xff; | ||
2491 | |||
2492 | entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
2493 | entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
2494 | for (t = 1; t < times && *nent < maxnent; ++t) { | ||
2495 | do_cpuid_1_ent(&entry[t], function, 0); | ||
2496 | entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
2497 | ++*nent; | ||
2498 | } | ||
2499 | break; | ||
2500 | } | ||
2501 | /* function 4 has additional index. */ | ||
2502 | case 4: { | ||
2503 | int i, cache_type; | ||
2504 | |||
2505 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2506 | /* read more entries until cache_type is zero */ | ||
2507 | for (i = 1; *nent < maxnent; ++i) { | ||
2508 | cache_type = entry[i - 1].eax & 0x1f; | ||
2509 | if (!cache_type) | ||
2510 | break; | ||
2511 | do_cpuid_1_ent(&entry[i], function, i); | ||
2512 | entry[i].flags |= | ||
2513 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2514 | ++*nent; | ||
2515 | } | ||
2516 | break; | ||
2517 | } | ||
2518 | case 7: { | ||
2519 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2520 | /* Mask ebx against host capbability word 9 */ | ||
2521 | if (index == 0) { | ||
2522 | entry->ebx &= kvm_supported_word9_x86_features; | ||
2523 | cpuid_mask(&entry->ebx, 9); | ||
2524 | } else | ||
2525 | entry->ebx = 0; | ||
2526 | entry->eax = 0; | ||
2527 | entry->ecx = 0; | ||
2528 | entry->edx = 0; | ||
2529 | break; | ||
2530 | } | ||
2531 | case 9: | ||
2532 | break; | ||
2533 | /* function 0xb has additional index. */ | ||
2534 | case 0xb: { | ||
2535 | int i, level_type; | ||
2536 | |||
2537 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2538 | /* read more entries until level_type is zero */ | ||
2539 | for (i = 1; *nent < maxnent; ++i) { | ||
2540 | level_type = entry[i - 1].ecx & 0xff00; | ||
2541 | if (!level_type) | ||
2542 | break; | ||
2543 | do_cpuid_1_ent(&entry[i], function, i); | ||
2544 | entry[i].flags |= | ||
2545 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2546 | ++*nent; | ||
2547 | } | ||
2548 | break; | ||
2549 | } | ||
2550 | case 0xd: { | ||
2551 | int idx, i; | ||
2552 | |||
2553 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2554 | for (idx = 1, i = 1; *nent < maxnent && idx < 64; ++idx) { | ||
2555 | do_cpuid_1_ent(&entry[i], function, idx); | ||
2556 | if (entry[i].eax == 0 || !supported_xcr0_bit(idx)) | ||
2557 | continue; | ||
2558 | entry[i].flags |= | ||
2559 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2560 | ++*nent; | ||
2561 | ++i; | ||
2562 | } | ||
2563 | break; | ||
2564 | } | ||
2565 | case KVM_CPUID_SIGNATURE: { | ||
2566 | char signature[12] = "KVMKVMKVM\0\0"; | ||
2567 | u32 *sigptr = (u32 *)signature; | ||
2568 | entry->eax = 0; | ||
2569 | entry->ebx = sigptr[0]; | ||
2570 | entry->ecx = sigptr[1]; | ||
2571 | entry->edx = sigptr[2]; | ||
2572 | break; | ||
2573 | } | ||
2574 | case KVM_CPUID_FEATURES: | ||
2575 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | ||
2576 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | ||
2577 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | ||
2578 | (1 << KVM_FEATURE_ASYNC_PF) | | ||
2579 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | ||
2580 | |||
2581 | if (sched_info_on()) | ||
2582 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | ||
2583 | |||
2584 | entry->ebx = 0; | ||
2585 | entry->ecx = 0; | ||
2586 | entry->edx = 0; | ||
2587 | break; | ||
2588 | case 0x80000000: | ||
2589 | entry->eax = min(entry->eax, 0x8000001a); | ||
2590 | break; | ||
2591 | case 0x80000001: | ||
2592 | entry->edx &= kvm_supported_word1_x86_features; | ||
2593 | cpuid_mask(&entry->edx, 1); | ||
2594 | entry->ecx &= kvm_supported_word6_x86_features; | ||
2595 | cpuid_mask(&entry->ecx, 6); | ||
2596 | break; | ||
2597 | case 0x80000008: { | ||
2598 | unsigned g_phys_as = (entry->eax >> 16) & 0xff; | ||
2599 | unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); | ||
2600 | unsigned phys_as = entry->eax & 0xff; | ||
2601 | |||
2602 | if (!g_phys_as) | ||
2603 | g_phys_as = phys_as; | ||
2604 | entry->eax = g_phys_as | (virt_as << 8); | ||
2605 | entry->ebx = entry->edx = 0; | ||
2606 | break; | ||
2607 | } | ||
2608 | case 0x80000019: | ||
2609 | entry->ecx = entry->edx = 0; | ||
2610 | break; | ||
2611 | case 0x8000001a: | ||
2612 | break; | ||
2613 | case 0x8000001d: | ||
2614 | break; | ||
2615 | /*Add support for Centaur's CPUID instruction*/ | ||
2616 | case 0xC0000000: | ||
2617 | /*Just support up to 0xC0000004 now*/ | ||
2618 | entry->eax = min(entry->eax, 0xC0000004); | ||
2619 | break; | ||
2620 | case 0xC0000001: | ||
2621 | entry->edx &= kvm_supported_word5_x86_features; | ||
2622 | cpuid_mask(&entry->edx, 5); | ||
2623 | break; | ||
2624 | case 3: /* Processor serial number */ | ||
2625 | case 5: /* MONITOR/MWAIT */ | ||
2626 | case 6: /* Thermal management */ | ||
2627 | case 0xA: /* Architectural Performance Monitoring */ | ||
2628 | case 0x80000007: /* Advanced power management */ | ||
2629 | case 0xC0000002: | ||
2630 | case 0xC0000003: | ||
2631 | case 0xC0000004: | ||
2632 | default: | ||
2633 | entry->eax = entry->ebx = entry->ecx = entry->edx = 0; | ||
2634 | break; | ||
2635 | } | ||
2636 | |||
2637 | kvm_x86_ops->set_supported_cpuid(function, entry); | ||
2638 | |||
2639 | put_cpu(); | ||
2640 | } | ||
2641 | |||
2642 | #undef F | ||
2643 | |||
2644 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
2645 | struct kvm_cpuid_entry2 __user *entries) | ||
2646 | { | ||
2647 | struct kvm_cpuid_entry2 *cpuid_entries; | ||
2648 | int limit, nent = 0, r = -E2BIG; | ||
2649 | u32 func; | ||
2650 | |||
2651 | if (cpuid->nent < 1) | ||
2652 | goto out; | ||
2653 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
2654 | cpuid->nent = KVM_MAX_CPUID_ENTRIES; | ||
2655 | r = -ENOMEM; | ||
2656 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); | ||
2657 | if (!cpuid_entries) | ||
2658 | goto out; | ||
2659 | |||
2660 | do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent); | ||
2661 | limit = cpuid_entries[0].eax; | ||
2662 | for (func = 1; func <= limit && nent < cpuid->nent; ++func) | ||
2663 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | ||
2664 | &nent, cpuid->nent); | ||
2665 | r = -E2BIG; | ||
2666 | if (nent >= cpuid->nent) | ||
2667 | goto out_free; | ||
2668 | |||
2669 | do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent); | ||
2670 | limit = cpuid_entries[nent - 1].eax; | ||
2671 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) | ||
2672 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | ||
2673 | &nent, cpuid->nent); | ||
2674 | |||
2675 | |||
2676 | |||
2677 | r = -E2BIG; | ||
2678 | if (nent >= cpuid->nent) | ||
2679 | goto out_free; | ||
2680 | |||
2681 | /* Add support for Centaur's CPUID instruction. */ | ||
2682 | if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { | ||
2683 | do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0, | ||
2684 | &nent, cpuid->nent); | ||
2685 | |||
2686 | r = -E2BIG; | ||
2687 | if (nent >= cpuid->nent) | ||
2688 | goto out_free; | ||
2689 | |||
2690 | limit = cpuid_entries[nent - 1].eax; | ||
2691 | for (func = 0xC0000001; | ||
2692 | func <= limit && nent < cpuid->nent; ++func) | ||
2693 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | ||
2694 | &nent, cpuid->nent); | ||
2695 | |||
2696 | r = -E2BIG; | ||
2697 | if (nent >= cpuid->nent) | ||
2698 | goto out_free; | ||
2699 | } | ||
2700 | |||
2701 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, | ||
2702 | cpuid->nent); | ||
2703 | |||
2704 | r = -E2BIG; | ||
2705 | if (nent >= cpuid->nent) | ||
2706 | goto out_free; | ||
2707 | |||
2708 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent, | ||
2709 | cpuid->nent); | ||
2710 | |||
2711 | r = -E2BIG; | ||
2712 | if (nent >= cpuid->nent) | ||
2713 | goto out_free; | ||
2714 | |||
2715 | r = -EFAULT; | ||
2716 | if (copy_to_user(entries, cpuid_entries, | ||
2717 | nent * sizeof(struct kvm_cpuid_entry2))) | ||
2718 | goto out_free; | ||
2719 | cpuid->nent = nent; | ||
2720 | r = 0; | ||
2721 | |||
2722 | out_free: | ||
2723 | vfree(cpuid_entries); | ||
2724 | out: | ||
2725 | return r; | ||
2726 | } | ||
2727 | |||
2728 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2226 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
2729 | struct kvm_lapic_state *s) | 2227 | struct kvm_lapic_state *s) |
2730 | { | 2228 | { |
@@ -3042,13 +2540,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3042 | r = -EINVAL; | 2540 | r = -EINVAL; |
3043 | if (!vcpu->arch.apic) | 2541 | if (!vcpu->arch.apic) |
3044 | goto out; | 2542 | goto out; |
3045 | u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 2543 | u.lapic = memdup_user(argp, sizeof(*u.lapic)); |
3046 | r = -ENOMEM; | 2544 | if (IS_ERR(u.lapic)) { |
3047 | if (!u.lapic) | 2545 | r = PTR_ERR(u.lapic); |
3048 | goto out; | ||
3049 | r = -EFAULT; | ||
3050 | if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state))) | ||
3051 | goto out; | 2546 | goto out; |
2547 | } | ||
2548 | |||
3052 | r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); | 2549 | r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); |
3053 | if (r) | 2550 | if (r) |
3054 | goto out; | 2551 | goto out; |
@@ -3227,14 +2724,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3227 | break; | 2724 | break; |
3228 | } | 2725 | } |
3229 | case KVM_SET_XSAVE: { | 2726 | case KVM_SET_XSAVE: { |
3230 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); | 2727 | u.xsave = memdup_user(argp, sizeof(*u.xsave)); |
3231 | r = -ENOMEM; | 2728 | if (IS_ERR(u.xsave)) { |
3232 | if (!u.xsave) | 2729 | r = PTR_ERR(u.xsave); |
3233 | break; | 2730 | goto out; |
3234 | 2731 | } | |
3235 | r = -EFAULT; | ||
3236 | if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave))) | ||
3237 | break; | ||
3238 | 2732 | ||
3239 | r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); | 2733 | r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); |
3240 | break; | 2734 | break; |
@@ -3255,15 +2749,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3255 | break; | 2749 | break; |
3256 | } | 2750 | } |
3257 | case KVM_SET_XCRS: { | 2751 | case KVM_SET_XCRS: { |
3258 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); | 2752 | u.xcrs = memdup_user(argp, sizeof(*u.xcrs)); |
3259 | r = -ENOMEM; | 2753 | if (IS_ERR(u.xcrs)) { |
3260 | if (!u.xcrs) | 2754 | r = PTR_ERR(u.xcrs); |
3261 | break; | 2755 | goto out; |
3262 | 2756 | } | |
3263 | r = -EFAULT; | ||
3264 | if (copy_from_user(u.xcrs, argp, | ||
3265 | sizeof(struct kvm_xcrs))) | ||
3266 | break; | ||
3267 | 2757 | ||
3268 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); | 2758 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); |
3269 | break; | 2759 | break; |
@@ -3460,16 +2950,59 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
3460 | return 0; | 2950 | return 0; |
3461 | } | 2951 | } |
3462 | 2952 | ||
2953 | /** | ||
2954 | * write_protect_slot - write protect a slot for dirty logging | ||
2955 | * @kvm: the kvm instance | ||
2956 | * @memslot: the slot we protect | ||
2957 | * @dirty_bitmap: the bitmap indicating which pages are dirty | ||
2958 | * @nr_dirty_pages: the number of dirty pages | ||
2959 | * | ||
2960 | * We have two ways to find all sptes to protect: | ||
2961 | * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and | ||
2962 | * checks ones that have a spte mapping a page in the slot. | ||
2963 | * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap. | ||
2964 | * | ||
2965 | * Generally speaking, if there are not so many dirty pages compared to the | ||
2966 | * number of shadow pages, we should use the latter. | ||
2967 | * | ||
2968 | * Note that letting others write into a page marked dirty in the old bitmap | ||
2969 | * by using the remaining tlb entry is not a problem. That page will become | ||
2970 | * write protected again when we flush the tlb and then be reported dirty to | ||
2971 | * the user space by copying the old bitmap. | ||
2972 | */ | ||
2973 | static void write_protect_slot(struct kvm *kvm, | ||
2974 | struct kvm_memory_slot *memslot, | ||
2975 | unsigned long *dirty_bitmap, | ||
2976 | unsigned long nr_dirty_pages) | ||
2977 | { | ||
2978 | /* Not many dirty pages compared to # of shadow pages. */ | ||
2979 | if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { | ||
2980 | unsigned long gfn_offset; | ||
2981 | |||
2982 | for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { | ||
2983 | unsigned long gfn = memslot->base_gfn + gfn_offset; | ||
2984 | |||
2985 | spin_lock(&kvm->mmu_lock); | ||
2986 | kvm_mmu_rmap_write_protect(kvm, gfn, memslot); | ||
2987 | spin_unlock(&kvm->mmu_lock); | ||
2988 | } | ||
2989 | kvm_flush_remote_tlbs(kvm); | ||
2990 | } else { | ||
2991 | spin_lock(&kvm->mmu_lock); | ||
2992 | kvm_mmu_slot_remove_write_access(kvm, memslot->id); | ||
2993 | spin_unlock(&kvm->mmu_lock); | ||
2994 | } | ||
2995 | } | ||
2996 | |||
3463 | /* | 2997 | /* |
3464 | * Get (and clear) the dirty memory log for a memory slot. | 2998 | * Get (and clear) the dirty memory log for a memory slot. |
3465 | */ | 2999 | */ |
3466 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 3000 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
3467 | struct kvm_dirty_log *log) | 3001 | struct kvm_dirty_log *log) |
3468 | { | 3002 | { |
3469 | int r, i; | 3003 | int r; |
3470 | struct kvm_memory_slot *memslot; | 3004 | struct kvm_memory_slot *memslot; |
3471 | unsigned long n; | 3005 | unsigned long n, nr_dirty_pages; |
3472 | unsigned long is_dirty = 0; | ||
3473 | 3006 | ||
3474 | mutex_lock(&kvm->slots_lock); | 3007 | mutex_lock(&kvm->slots_lock); |
3475 | 3008 | ||
@@ -3477,43 +3010,41 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3477 | if (log->slot >= KVM_MEMORY_SLOTS) | 3010 | if (log->slot >= KVM_MEMORY_SLOTS) |
3478 | goto out; | 3011 | goto out; |
3479 | 3012 | ||
3480 | memslot = &kvm->memslots->memslots[log->slot]; | 3013 | memslot = id_to_memslot(kvm->memslots, log->slot); |
3481 | r = -ENOENT; | 3014 | r = -ENOENT; |
3482 | if (!memslot->dirty_bitmap) | 3015 | if (!memslot->dirty_bitmap) |
3483 | goto out; | 3016 | goto out; |
3484 | 3017 | ||
3485 | n = kvm_dirty_bitmap_bytes(memslot); | 3018 | n = kvm_dirty_bitmap_bytes(memslot); |
3486 | 3019 | nr_dirty_pages = memslot->nr_dirty_pages; | |
3487 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) | ||
3488 | is_dirty = memslot->dirty_bitmap[i]; | ||
3489 | 3020 | ||
3490 | /* If nothing is dirty, don't bother messing with page tables. */ | 3021 | /* If nothing is dirty, don't bother messing with page tables. */ |
3491 | if (is_dirty) { | 3022 | if (nr_dirty_pages) { |
3492 | struct kvm_memslots *slots, *old_slots; | 3023 | struct kvm_memslots *slots, *old_slots; |
3493 | unsigned long *dirty_bitmap; | 3024 | unsigned long *dirty_bitmap, *dirty_bitmap_head; |
3494 | 3025 | ||
3495 | dirty_bitmap = memslot->dirty_bitmap_head; | 3026 | dirty_bitmap = memslot->dirty_bitmap; |
3496 | if (memslot->dirty_bitmap == dirty_bitmap) | 3027 | dirty_bitmap_head = memslot->dirty_bitmap_head; |
3497 | dirty_bitmap += n / sizeof(long); | 3028 | if (dirty_bitmap == dirty_bitmap_head) |
3498 | memset(dirty_bitmap, 0, n); | 3029 | dirty_bitmap_head += n / sizeof(long); |
3030 | memset(dirty_bitmap_head, 0, n); | ||
3499 | 3031 | ||
3500 | r = -ENOMEM; | 3032 | r = -ENOMEM; |
3501 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 3033 | slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL); |
3502 | if (!slots) | 3034 | if (!slots) |
3503 | goto out; | 3035 | goto out; |
3504 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 3036 | |
3505 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | 3037 | memslot = id_to_memslot(slots, log->slot); |
3506 | slots->generation++; | 3038 | memslot->nr_dirty_pages = 0; |
3039 | memslot->dirty_bitmap = dirty_bitmap_head; | ||
3040 | update_memslots(slots, NULL); | ||
3507 | 3041 | ||
3508 | old_slots = kvm->memslots; | 3042 | old_slots = kvm->memslots; |
3509 | rcu_assign_pointer(kvm->memslots, slots); | 3043 | rcu_assign_pointer(kvm->memslots, slots); |
3510 | synchronize_srcu_expedited(&kvm->srcu); | 3044 | synchronize_srcu_expedited(&kvm->srcu); |
3511 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; | ||
3512 | kfree(old_slots); | 3045 | kfree(old_slots); |
3513 | 3046 | ||
3514 | spin_lock(&kvm->mmu_lock); | 3047 | write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages); |
3515 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | ||
3516 | spin_unlock(&kvm->mmu_lock); | ||
3517 | 3048 | ||
3518 | r = -EFAULT; | 3049 | r = -EFAULT; |
3519 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | 3050 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) |
@@ -3658,14 +3189,14 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3658 | } | 3189 | } |
3659 | case KVM_GET_IRQCHIP: { | 3190 | case KVM_GET_IRQCHIP: { |
3660 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | 3191 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ |
3661 | struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); | 3192 | struct kvm_irqchip *chip; |
3662 | 3193 | ||
3663 | r = -ENOMEM; | 3194 | chip = memdup_user(argp, sizeof(*chip)); |
3664 | if (!chip) | 3195 | if (IS_ERR(chip)) { |
3196 | r = PTR_ERR(chip); | ||
3665 | goto out; | 3197 | goto out; |
3666 | r = -EFAULT; | 3198 | } |
3667 | if (copy_from_user(chip, argp, sizeof *chip)) | 3199 | |
3668 | goto get_irqchip_out; | ||
3669 | r = -ENXIO; | 3200 | r = -ENXIO; |
3670 | if (!irqchip_in_kernel(kvm)) | 3201 | if (!irqchip_in_kernel(kvm)) |
3671 | goto get_irqchip_out; | 3202 | goto get_irqchip_out; |
@@ -3684,14 +3215,14 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3684 | } | 3215 | } |
3685 | case KVM_SET_IRQCHIP: { | 3216 | case KVM_SET_IRQCHIP: { |
3686 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | 3217 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ |
3687 | struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); | 3218 | struct kvm_irqchip *chip; |
3688 | 3219 | ||
3689 | r = -ENOMEM; | 3220 | chip = memdup_user(argp, sizeof(*chip)); |
3690 | if (!chip) | 3221 | if (IS_ERR(chip)) { |
3222 | r = PTR_ERR(chip); | ||
3691 | goto out; | 3223 | goto out; |
3692 | r = -EFAULT; | 3224 | } |
3693 | if (copy_from_user(chip, argp, sizeof *chip)) | 3225 | |
3694 | goto set_irqchip_out; | ||
3695 | r = -ENXIO; | 3226 | r = -ENXIO; |
3696 | if (!irqchip_in_kernel(kvm)) | 3227 | if (!irqchip_in_kernel(kvm)) |
3697 | goto set_irqchip_out; | 3228 | goto set_irqchip_out; |
@@ -3898,12 +3429,7 @@ void kvm_get_segment(struct kvm_vcpu *vcpu, | |||
3898 | kvm_x86_ops->get_segment(vcpu, var, seg); | 3429 | kvm_x86_ops->get_segment(vcpu, var, seg); |
3899 | } | 3430 | } |
3900 | 3431 | ||
3901 | static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | 3432 | gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) |
3902 | { | ||
3903 | return gpa; | ||
3904 | } | ||
3905 | |||
3906 | static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | ||
3907 | { | 3433 | { |
3908 | gpa_t t_gpa; | 3434 | gpa_t t_gpa; |
3909 | struct x86_exception exception; | 3435 | struct x86_exception exception; |
@@ -4087,7 +3613,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
4087 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | 3613 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); |
4088 | if (ret < 0) | 3614 | if (ret < 0) |
4089 | return 0; | 3615 | return 0; |
4090 | kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); | 3616 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); |
4091 | return 1; | 3617 | return 1; |
4092 | } | 3618 | } |
4093 | 3619 | ||
@@ -4324,7 +3850,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, | |||
4324 | if (!exchanged) | 3850 | if (!exchanged) |
4325 | return X86EMUL_CMPXCHG_FAILED; | 3851 | return X86EMUL_CMPXCHG_FAILED; |
4326 | 3852 | ||
4327 | kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1); | 3853 | kvm_mmu_pte_write(vcpu, gpa, new, bytes); |
4328 | 3854 | ||
4329 | return X86EMUL_CONTINUE; | 3855 | return X86EMUL_CONTINUE; |
4330 | 3856 | ||
@@ -4349,32 +3875,24 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | |||
4349 | return r; | 3875 | return r; |
4350 | } | 3876 | } |
4351 | 3877 | ||
4352 | 3878 | static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, | |
4353 | static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, | 3879 | unsigned short port, void *val, |
4354 | int size, unsigned short port, void *val, | 3880 | unsigned int count, bool in) |
4355 | unsigned int count) | ||
4356 | { | 3881 | { |
4357 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 3882 | trace_kvm_pio(!in, port, size, count); |
4358 | |||
4359 | if (vcpu->arch.pio.count) | ||
4360 | goto data_avail; | ||
4361 | |||
4362 | trace_kvm_pio(0, port, size, count); | ||
4363 | 3883 | ||
4364 | vcpu->arch.pio.port = port; | 3884 | vcpu->arch.pio.port = port; |
4365 | vcpu->arch.pio.in = 1; | 3885 | vcpu->arch.pio.in = in; |
4366 | vcpu->arch.pio.count = count; | 3886 | vcpu->arch.pio.count = count; |
4367 | vcpu->arch.pio.size = size; | 3887 | vcpu->arch.pio.size = size; |
4368 | 3888 | ||
4369 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | 3889 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { |
4370 | data_avail: | ||
4371 | memcpy(val, vcpu->arch.pio_data, size * count); | ||
4372 | vcpu->arch.pio.count = 0; | 3890 | vcpu->arch.pio.count = 0; |
4373 | return 1; | 3891 | return 1; |
4374 | } | 3892 | } |
4375 | 3893 | ||
4376 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3894 | vcpu->run->exit_reason = KVM_EXIT_IO; |
4377 | vcpu->run->io.direction = KVM_EXIT_IO_IN; | 3895 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
4378 | vcpu->run->io.size = size; | 3896 | vcpu->run->io.size = size; |
4379 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | 3897 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; |
4380 | vcpu->run->io.count = count; | 3898 | vcpu->run->io.count = count; |
@@ -4383,36 +3901,37 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
4383 | return 0; | 3901 | return 0; |
4384 | } | 3902 | } |
4385 | 3903 | ||
4386 | static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, | 3904 | static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, |
4387 | int size, unsigned short port, | 3905 | int size, unsigned short port, void *val, |
4388 | const void *val, unsigned int count) | 3906 | unsigned int count) |
4389 | { | 3907 | { |
4390 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 3908 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
3909 | int ret; | ||
4391 | 3910 | ||
4392 | trace_kvm_pio(1, port, size, count); | 3911 | if (vcpu->arch.pio.count) |
4393 | 3912 | goto data_avail; | |
4394 | vcpu->arch.pio.port = port; | ||
4395 | vcpu->arch.pio.in = 0; | ||
4396 | vcpu->arch.pio.count = count; | ||
4397 | vcpu->arch.pio.size = size; | ||
4398 | |||
4399 | memcpy(vcpu->arch.pio_data, val, size * count); | ||
4400 | 3913 | ||
4401 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | 3914 | ret = emulator_pio_in_out(vcpu, size, port, val, count, true); |
3915 | if (ret) { | ||
3916 | data_avail: | ||
3917 | memcpy(val, vcpu->arch.pio_data, size * count); | ||
4402 | vcpu->arch.pio.count = 0; | 3918 | vcpu->arch.pio.count = 0; |
4403 | return 1; | 3919 | return 1; |
4404 | } | 3920 | } |
4405 | 3921 | ||
4406 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
4407 | vcpu->run->io.direction = KVM_EXIT_IO_OUT; | ||
4408 | vcpu->run->io.size = size; | ||
4409 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
4410 | vcpu->run->io.count = count; | ||
4411 | vcpu->run->io.port = port; | ||
4412 | |||
4413 | return 0; | 3922 | return 0; |
4414 | } | 3923 | } |
4415 | 3924 | ||
3925 | static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, | ||
3926 | int size, unsigned short port, | ||
3927 | const void *val, unsigned int count) | ||
3928 | { | ||
3929 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3930 | |||
3931 | memcpy(vcpu->arch.pio_data, val, size * count); | ||
3932 | return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false); | ||
3933 | } | ||
3934 | |||
4416 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 3935 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
4417 | { | 3936 | { |
4418 | return kvm_x86_ops->get_segment_base(vcpu, seg); | 3937 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
@@ -4627,6 +4146,12 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, | |||
4627 | return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); | 4146 | return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); |
4628 | } | 4147 | } |
4629 | 4148 | ||
4149 | static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, | ||
4150 | u32 pmc, u64 *pdata) | ||
4151 | { | ||
4152 | return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata); | ||
4153 | } | ||
4154 | |||
4630 | static void emulator_halt(struct x86_emulate_ctxt *ctxt) | 4155 | static void emulator_halt(struct x86_emulate_ctxt *ctxt) |
4631 | { | 4156 | { |
4632 | emul_to_vcpu(ctxt)->arch.halt_request = 1; | 4157 | emul_to_vcpu(ctxt)->arch.halt_request = 1; |
@@ -4679,6 +4204,7 @@ static struct x86_emulate_ops emulate_ops = { | |||
4679 | .set_dr = emulator_set_dr, | 4204 | .set_dr = emulator_set_dr, |
4680 | .set_msr = emulator_set_msr, | 4205 | .set_msr = emulator_set_msr, |
4681 | .get_msr = emulator_get_msr, | 4206 | .get_msr = emulator_get_msr, |
4207 | .read_pmc = emulator_read_pmc, | ||
4682 | .halt = emulator_halt, | 4208 | .halt = emulator_halt, |
4683 | .wbinvd = emulator_wbinvd, | 4209 | .wbinvd = emulator_wbinvd, |
4684 | .fix_hypercall = emulator_fix_hypercall, | 4210 | .fix_hypercall = emulator_fix_hypercall, |
@@ -4836,6 +4362,50 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
4836 | return false; | 4362 | return false; |
4837 | } | 4363 | } |
4838 | 4364 | ||
4365 | static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | ||
4366 | unsigned long cr2, int emulation_type) | ||
4367 | { | ||
4368 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4369 | unsigned long last_retry_eip, last_retry_addr, gpa = cr2; | ||
4370 | |||
4371 | last_retry_eip = vcpu->arch.last_retry_eip; | ||
4372 | last_retry_addr = vcpu->arch.last_retry_addr; | ||
4373 | |||
4374 | /* | ||
4375 | * If the emulation is caused by #PF and it is non-page_table | ||
4376 | * writing instruction, it means the VM-EXIT is caused by shadow | ||
4377 | * page protected, we can zap the shadow page and retry this | ||
4378 | * instruction directly. | ||
4379 | * | ||
4380 | * Note: if the guest uses a non-page-table modifying instruction | ||
4381 | * on the PDE that points to the instruction, then we will unmap | ||
4382 | * the instruction and go to an infinite loop. So, we cache the | ||
4383 | * last retried eip and the last fault address, if we meet the eip | ||
4384 | * and the address again, we can break out of the potential infinite | ||
4385 | * loop. | ||
4386 | */ | ||
4387 | vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0; | ||
4388 | |||
4389 | if (!(emulation_type & EMULTYPE_RETRY)) | ||
4390 | return false; | ||
4391 | |||
4392 | if (x86_page_table_writing_insn(ctxt)) | ||
4393 | return false; | ||
4394 | |||
4395 | if (ctxt->eip == last_retry_eip && last_retry_addr == cr2) | ||
4396 | return false; | ||
4397 | |||
4398 | vcpu->arch.last_retry_eip = ctxt->eip; | ||
4399 | vcpu->arch.last_retry_addr = cr2; | ||
4400 | |||
4401 | if (!vcpu->arch.mmu.direct_map) | ||
4402 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); | ||
4403 | |||
4404 | kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
4405 | |||
4406 | return true; | ||
4407 | } | ||
4408 | |||
4839 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, | 4409 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, |
4840 | unsigned long cr2, | 4410 | unsigned long cr2, |
4841 | int emulation_type, | 4411 | int emulation_type, |
@@ -4877,6 +4447,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4877 | return EMULATE_DONE; | 4447 | return EMULATE_DONE; |
4878 | } | 4448 | } |
4879 | 4449 | ||
4450 | if (retry_instruction(ctxt, cr2, emulation_type)) | ||
4451 | return EMULATE_DONE; | ||
4452 | |||
4880 | /* this is needed for vmware backdoor interface to work since it | 4453 | /* this is needed for vmware backdoor interface to work since it |
4881 | changes registers values during IO operation */ | 4454 | changes registers values during IO operation */ |
4882 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { | 4455 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { |
@@ -5095,17 +4668,17 @@ static void kvm_timer_init(void) | |||
5095 | 4668 | ||
5096 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); | 4669 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); |
5097 | 4670 | ||
5098 | static int kvm_is_in_guest(void) | 4671 | int kvm_is_in_guest(void) |
5099 | { | 4672 | { |
5100 | return percpu_read(current_vcpu) != NULL; | 4673 | return __this_cpu_read(current_vcpu) != NULL; |
5101 | } | 4674 | } |
5102 | 4675 | ||
5103 | static int kvm_is_user_mode(void) | 4676 | static int kvm_is_user_mode(void) |
5104 | { | 4677 | { |
5105 | int user_mode = 3; | 4678 | int user_mode = 3; |
5106 | 4679 | ||
5107 | if (percpu_read(current_vcpu)) | 4680 | if (__this_cpu_read(current_vcpu)) |
5108 | user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu)); | 4681 | user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu)); |
5109 | 4682 | ||
5110 | return user_mode != 0; | 4683 | return user_mode != 0; |
5111 | } | 4684 | } |
@@ -5114,8 +4687,8 @@ static unsigned long kvm_get_guest_ip(void) | |||
5114 | { | 4687 | { |
5115 | unsigned long ip = 0; | 4688 | unsigned long ip = 0; |
5116 | 4689 | ||
5117 | if (percpu_read(current_vcpu)) | 4690 | if (__this_cpu_read(current_vcpu)) |
5118 | ip = kvm_rip_read(percpu_read(current_vcpu)); | 4691 | ip = kvm_rip_read(__this_cpu_read(current_vcpu)); |
5119 | 4692 | ||
5120 | return ip; | 4693 | return ip; |
5121 | } | 4694 | } |
@@ -5128,13 +4701,13 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { | |||
5128 | 4701 | ||
5129 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) | 4702 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) |
5130 | { | 4703 | { |
5131 | percpu_write(current_vcpu, vcpu); | 4704 | __this_cpu_write(current_vcpu, vcpu); |
5132 | } | 4705 | } |
5133 | EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); | 4706 | EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); |
5134 | 4707 | ||
5135 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) | 4708 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) |
5136 | { | 4709 | { |
5137 | percpu_write(current_vcpu, NULL); | 4710 | __this_cpu_write(current_vcpu, NULL); |
5138 | } | 4711 | } |
5139 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); | 4712 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); |
5140 | 4713 | ||
@@ -5233,15 +4806,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) | |||
5233 | } | 4806 | } |
5234 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); | 4807 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); |
5235 | 4808 | ||
5236 | static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0, | ||
5237 | unsigned long a1) | ||
5238 | { | ||
5239 | if (is_long_mode(vcpu)) | ||
5240 | return a0; | ||
5241 | else | ||
5242 | return a0 | ((gpa_t)a1 << 32); | ||
5243 | } | ||
5244 | |||
5245 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | 4809 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) |
5246 | { | 4810 | { |
5247 | u64 param, ingpa, outgpa, ret; | 4811 | u64 param, ingpa, outgpa, ret; |
@@ -5337,9 +4901,6 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
5337 | case KVM_HC_VAPIC_POLL_IRQ: | 4901 | case KVM_HC_VAPIC_POLL_IRQ: |
5338 | ret = 0; | 4902 | ret = 0; |
5339 | break; | 4903 | break; |
5340 | case KVM_HC_MMU_OP: | ||
5341 | r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret); | ||
5342 | break; | ||
5343 | default: | 4904 | default: |
5344 | ret = -KVM_ENOSYS; | 4905 | ret = -KVM_ENOSYS; |
5345 | break; | 4906 | break; |
@@ -5369,125 +4930,6 @@ int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) | |||
5369 | return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); | 4930 | return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); |
5370 | } | 4931 | } |
5371 | 4932 | ||
5372 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | ||
5373 | { | ||
5374 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; | ||
5375 | int j, nent = vcpu->arch.cpuid_nent; | ||
5376 | |||
5377 | e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
5378 | /* when no next entry is found, the current entry[i] is reselected */ | ||
5379 | for (j = i + 1; ; j = (j + 1) % nent) { | ||
5380 | struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; | ||
5381 | if (ej->function == e->function) { | ||
5382 | ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
5383 | return j; | ||
5384 | } | ||
5385 | } | ||
5386 | return 0; /* silence gcc, even though control never reaches here */ | ||
5387 | } | ||
5388 | |||
5389 | /* find an entry with matching function, matching index (if needed), and that | ||
5390 | * should be read next (if it's stateful) */ | ||
5391 | static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, | ||
5392 | u32 function, u32 index) | ||
5393 | { | ||
5394 | if (e->function != function) | ||
5395 | return 0; | ||
5396 | if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) | ||
5397 | return 0; | ||
5398 | if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && | ||
5399 | !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) | ||
5400 | return 0; | ||
5401 | return 1; | ||
5402 | } | ||
5403 | |||
5404 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | ||
5405 | u32 function, u32 index) | ||
5406 | { | ||
5407 | int i; | ||
5408 | struct kvm_cpuid_entry2 *best = NULL; | ||
5409 | |||
5410 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | ||
5411 | struct kvm_cpuid_entry2 *e; | ||
5412 | |||
5413 | e = &vcpu->arch.cpuid_entries[i]; | ||
5414 | if (is_matching_cpuid_entry(e, function, index)) { | ||
5415 | if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) | ||
5416 | move_to_next_stateful_cpuid_entry(vcpu, i); | ||
5417 | best = e; | ||
5418 | break; | ||
5419 | } | ||
5420 | } | ||
5421 | return best; | ||
5422 | } | ||
5423 | EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); | ||
5424 | |||
5425 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | ||
5426 | { | ||
5427 | struct kvm_cpuid_entry2 *best; | ||
5428 | |||
5429 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
5430 | if (!best || best->eax < 0x80000008) | ||
5431 | goto not_found; | ||
5432 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
5433 | if (best) | ||
5434 | return best->eax & 0xff; | ||
5435 | not_found: | ||
5436 | return 36; | ||
5437 | } | ||
5438 | |||
5439 | /* | ||
5440 | * If no match is found, check whether we exceed the vCPU's limit | ||
5441 | * and return the content of the highest valid _standard_ leaf instead. | ||
5442 | * This is to satisfy the CPUID specification. | ||
5443 | */ | ||
5444 | static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, | ||
5445 | u32 function, u32 index) | ||
5446 | { | ||
5447 | struct kvm_cpuid_entry2 *maxlevel; | ||
5448 | |||
5449 | maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); | ||
5450 | if (!maxlevel || maxlevel->eax >= function) | ||
5451 | return NULL; | ||
5452 | if (function & 0x80000000) { | ||
5453 | maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0); | ||
5454 | if (!maxlevel) | ||
5455 | return NULL; | ||
5456 | } | ||
5457 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); | ||
5458 | } | ||
5459 | |||
5460 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | ||
5461 | { | ||
5462 | u32 function, index; | ||
5463 | struct kvm_cpuid_entry2 *best; | ||
5464 | |||
5465 | function = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
5466 | index = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
5467 | kvm_register_write(vcpu, VCPU_REGS_RAX, 0); | ||
5468 | kvm_register_write(vcpu, VCPU_REGS_RBX, 0); | ||
5469 | kvm_register_write(vcpu, VCPU_REGS_RCX, 0); | ||
5470 | kvm_register_write(vcpu, VCPU_REGS_RDX, 0); | ||
5471 | best = kvm_find_cpuid_entry(vcpu, function, index); | ||
5472 | |||
5473 | if (!best) | ||
5474 | best = check_cpuid_limit(vcpu, function, index); | ||
5475 | |||
5476 | if (best) { | ||
5477 | kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); | ||
5478 | kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); | ||
5479 | kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); | ||
5480 | kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); | ||
5481 | } | ||
5482 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
5483 | trace_kvm_cpuid(function, | ||
5484 | kvm_register_read(vcpu, VCPU_REGS_RAX), | ||
5485 | kvm_register_read(vcpu, VCPU_REGS_RBX), | ||
5486 | kvm_register_read(vcpu, VCPU_REGS_RCX), | ||
5487 | kvm_register_read(vcpu, VCPU_REGS_RDX)); | ||
5488 | } | ||
5489 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | ||
5490 | |||
5491 | /* | 4933 | /* |
5492 | * Check if userspace requested an interrupt window, and that the | 4934 | * Check if userspace requested an interrupt window, and that the |
5493 | * interrupt window is open. | 4935 | * interrupt window is open. |
@@ -5648,6 +5090,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5648 | int r; | 5090 | int r; |
5649 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 5091 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
5650 | vcpu->run->request_interrupt_window; | 5092 | vcpu->run->request_interrupt_window; |
5093 | bool req_immediate_exit = 0; | ||
5651 | 5094 | ||
5652 | if (vcpu->requests) { | 5095 | if (vcpu->requests) { |
5653 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) | 5096 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) |
@@ -5687,7 +5130,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5687 | record_steal_time(vcpu); | 5130 | record_steal_time(vcpu); |
5688 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | 5131 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) |
5689 | process_nmi(vcpu); | 5132 | process_nmi(vcpu); |
5690 | 5133 | req_immediate_exit = | |
5134 | kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); | ||
5135 | if (kvm_check_request(KVM_REQ_PMU, vcpu)) | ||
5136 | kvm_handle_pmu_event(vcpu); | ||
5137 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | ||
5138 | kvm_deliver_pmi(vcpu); | ||
5691 | } | 5139 | } |
5692 | 5140 | ||
5693 | r = kvm_mmu_reload(vcpu); | 5141 | r = kvm_mmu_reload(vcpu); |
@@ -5738,6 +5186,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5738 | 5186 | ||
5739 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 5187 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
5740 | 5188 | ||
5189 | if (req_immediate_exit) | ||
5190 | smp_send_reschedule(vcpu->cpu); | ||
5191 | |||
5741 | kvm_guest_enter(); | 5192 | kvm_guest_enter(); |
5742 | 5193 | ||
5743 | if (unlikely(vcpu->arch.switch_db_regs)) { | 5194 | if (unlikely(vcpu->arch.switch_db_regs)) { |
@@ -5943,10 +5394,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
5943 | if (r <= 0) | 5394 | if (r <= 0) |
5944 | goto out; | 5395 | goto out; |
5945 | 5396 | ||
5946 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) | ||
5947 | kvm_register_write(vcpu, VCPU_REGS_RAX, | ||
5948 | kvm_run->hypercall.ret); | ||
5949 | |||
5950 | r = __vcpu_run(vcpu); | 5397 | r = __vcpu_run(vcpu); |
5951 | 5398 | ||
5952 | out: | 5399 | out: |
@@ -6148,7 +5595,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
6148 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; | 5595 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; |
6149 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5596 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
6150 | if (sregs->cr4 & X86_CR4_OSXSAVE) | 5597 | if (sregs->cr4 & X86_CR4_OSXSAVE) |
6151 | update_cpuid(vcpu); | 5598 | kvm_update_cpuid(vcpu); |
6152 | 5599 | ||
6153 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 5600 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
6154 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5601 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
@@ -6425,6 +5872,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
6425 | kvm_async_pf_hash_reset(vcpu); | 5872 | kvm_async_pf_hash_reset(vcpu); |
6426 | vcpu->arch.apf.halted = false; | 5873 | vcpu->arch.apf.halted = false; |
6427 | 5874 | ||
5875 | kvm_pmu_reset(vcpu); | ||
5876 | |||
6428 | return kvm_x86_ops->vcpu_reset(vcpu); | 5877 | return kvm_x86_ops->vcpu_reset(vcpu); |
6429 | } | 5878 | } |
6430 | 5879 | ||
@@ -6473,10 +5922,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
6473 | kvm = vcpu->kvm; | 5922 | kvm = vcpu->kvm; |
6474 | 5923 | ||
6475 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; | 5924 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; |
6476 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | ||
6477 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | ||
6478 | vcpu->arch.mmu.translate_gpa = translate_gpa; | ||
6479 | vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; | ||
6480 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) | 5925 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) |
6481 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 5926 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
6482 | else | 5927 | else |
@@ -6513,6 +5958,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
6513 | goto fail_free_mce_banks; | 5958 | goto fail_free_mce_banks; |
6514 | 5959 | ||
6515 | kvm_async_pf_hash_reset(vcpu); | 5960 | kvm_async_pf_hash_reset(vcpu); |
5961 | kvm_pmu_init(vcpu); | ||
6516 | 5962 | ||
6517 | return 0; | 5963 | return 0; |
6518 | fail_free_mce_banks: | 5964 | fail_free_mce_banks: |
@@ -6531,6 +5977,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
6531 | { | 5977 | { |
6532 | int idx; | 5978 | int idx; |
6533 | 5979 | ||
5980 | kvm_pmu_destroy(vcpu); | ||
6534 | kfree(vcpu->arch.mce_banks); | 5981 | kfree(vcpu->arch.mce_banks); |
6535 | kvm_free_lapic(vcpu); | 5982 | kvm_free_lapic(vcpu); |
6536 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 5983 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index d36fe237c665..cb80c293cdd8 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -33,9 +33,6 @@ static inline bool kvm_exception_is_soft(unsigned int nr) | |||
33 | return (nr == BP_VECTOR) || (nr == OF_VECTOR); | 33 | return (nr == BP_VECTOR) || (nr == OF_VECTOR); |
34 | } | 34 | } |
35 | 35 | ||
36 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | ||
37 | u32 function, u32 index); | ||
38 | |||
39 | static inline bool is_protmode(struct kvm_vcpu *vcpu) | 36 | static inline bool is_protmode(struct kvm_vcpu *vcpu) |
40 | { | 37 | { |
41 | return kvm_read_cr0_bits(vcpu, X86_CR0_PE); | 38 | return kvm_read_cr0_bits(vcpu, X86_CR0_PE); |
@@ -125,4 +122,6 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
125 | gva_t addr, void *val, unsigned int bytes, | 122 | gva_t addr, void *val, unsigned int bytes, |
126 | struct x86_exception *exception); | 123 | struct x86_exception *exception); |
127 | 124 | ||
125 | extern u64 host_xcr0; | ||
126 | |||
128 | #endif | 127 | #endif |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index cf4603ba866f..642d8805bc1b 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -856,18 +856,23 @@ static void __init lguest_init_IRQ(void) | |||
856 | } | 856 | } |
857 | 857 | ||
858 | /* | 858 | /* |
859 | * With CONFIG_SPARSE_IRQ, interrupt descriptors are allocated as-needed, so | 859 | * Interrupt descriptors are allocated as-needed, but low-numbered ones are |
860 | * rather than set them in lguest_init_IRQ we are called here every time an | 860 | * reserved by the generic x86 code. So we ignore irq_alloc_desc_at if it |
861 | * lguest device needs an interrupt. | 861 | * tells us the irq is already used: other errors (ie. ENOMEM) we take |
862 | * | 862 | * seriously. |
863 | * FIXME: irq_alloc_desc_at() can fail due to lack of memory, we should | ||
864 | * pass that up! | ||
865 | */ | 863 | */ |
866 | void lguest_setup_irq(unsigned int irq) | 864 | int lguest_setup_irq(unsigned int irq) |
867 | { | 865 | { |
868 | irq_alloc_desc_at(irq, 0); | 866 | int err; |
867 | |||
868 | /* Returns -ve error or vector number. */ | ||
869 | err = irq_alloc_desc_at(irq, 0); | ||
870 | if (err < 0 && err != -EEXIST) | ||
871 | return err; | ||
872 | |||
869 | irq_set_chip_and_handler_name(irq, &lguest_irq_controller, | 873 | irq_set_chip_and_handler_name(irq, &lguest_irq_controller, |
870 | handle_level_irq, "level"); | 874 | handle_level_irq, "level"); |
875 | return 0; | ||
871 | } | 876 | } |
872 | 877 | ||
873 | /* | 878 | /* |
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 46fc4ee09fc4..88ad5fbda6e1 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c | |||
@@ -82,9 +82,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, | |||
82 | const insn_attr_t *table; | 82 | const insn_attr_t *table; |
83 | if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) | 83 | if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) |
84 | return 0; | 84 | return 0; |
85 | table = inat_avx_tables[vex_m][vex_p]; | 85 | /* At first, this checks the master table */ |
86 | table = inat_avx_tables[vex_m][0]; | ||
86 | if (!table) | 87 | if (!table) |
87 | return 0; | 88 | return 0; |
89 | if (!inat_is_group(table[opcode]) && vex_p) { | ||
90 | /* If this is not a group, get attribute directly */ | ||
91 | table = inat_avx_tables[vex_m][vex_p]; | ||
92 | if (!table) | ||
93 | return 0; | ||
94 | } | ||
88 | return table[opcode]; | 95 | return table[opcode]; |
89 | } | 96 | } |
90 | 97 | ||
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 374562ed6704..5a1f9f3e3fbb 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c | |||
@@ -202,7 +202,7 @@ void insn_get_opcode(struct insn *insn) | |||
202 | m = insn_vex_m_bits(insn); | 202 | m = insn_vex_m_bits(insn); |
203 | p = insn_vex_p_bits(insn); | 203 | p = insn_vex_p_bits(insn); |
204 | insn->attr = inat_get_avx_attribute(op, m, p); | 204 | insn->attr = inat_get_avx_attribute(op, m, p); |
205 | if (!inat_accept_vex(insn->attr)) | 205 | if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) |
206 | insn->attr = 0; /* This instruction is bad */ | 206 | insn->attr = 0; /* This instruction is bad */ |
207 | goto end; /* VEX has only 1 byte for opcode */ | 207 | goto end; /* VEX has only 1 byte for opcode */ |
208 | } | 208 | } |
@@ -249,6 +249,8 @@ void insn_get_modrm(struct insn *insn) | |||
249 | pfx = insn_last_prefix(insn); | 249 | pfx = insn_last_prefix(insn); |
250 | insn->attr = inat_get_group_attribute(mod, pfx, | 250 | insn->attr = inat_get_group_attribute(mod, pfx, |
251 | insn->attr); | 251 | insn->attr); |
252 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) | ||
253 | insn->attr = 0; /* This is bad */ | ||
252 | } | 254 | } |
253 | } | 255 | } |
254 | 256 | ||
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c index 82004d2bf05e..bd59090825db 100644 --- a/arch/x86/lib/string_32.c +++ b/arch/x86/lib/string_32.c | |||
@@ -164,15 +164,13 @@ EXPORT_SYMBOL(strchr); | |||
164 | size_t strlen(const char *s) | 164 | size_t strlen(const char *s) |
165 | { | 165 | { |
166 | int d0; | 166 | int d0; |
167 | int res; | 167 | size_t res; |
168 | asm volatile("repne\n\t" | 168 | asm volatile("repne\n\t" |
169 | "scasb\n\t" | 169 | "scasb" |
170 | "notl %0\n\t" | ||
171 | "decl %0" | ||
172 | : "=c" (res), "=&D" (d0) | 170 | : "=c" (res), "=&D" (d0) |
173 | : "1" (s), "a" (0), "0" (0xffffffffu) | 171 | : "1" (s), "a" (0), "0" (0xffffffffu) |
174 | : "memory"); | 172 | : "memory"); |
175 | return res; | 173 | return ~res - 1; |
176 | } | 174 | } |
177 | EXPORT_SYMBOL(strlen); | 175 | EXPORT_SYMBOL(strlen); |
178 | #endif | 176 | #endif |
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index a793da5e560e..5b83c51c12e0 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt | |||
@@ -1,5 +1,11 @@ | |||
1 | # x86 Opcode Maps | 1 | # x86 Opcode Maps |
2 | # | 2 | # |
3 | # This is (mostly) based on following documentations. | ||
4 | # - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2 | ||
5 | # (#325383-040US, October 2011) | ||
6 | # - Intel(R) Advanced Vector Extensions Programming Reference | ||
7 | # (#319433-011,JUNE 2011). | ||
8 | # | ||
3 | #<Opcode maps> | 9 | #<Opcode maps> |
4 | # Table: table-name | 10 | # Table: table-name |
5 | # Referrer: escaped-name | 11 | # Referrer: escaped-name |
@@ -15,10 +21,13 @@ | |||
15 | # EndTable | 21 | # EndTable |
16 | # | 22 | # |
17 | # AVX Superscripts | 23 | # AVX Superscripts |
18 | # (VEX): this opcode can accept VEX prefix. | 24 | # (v): this opcode requires VEX prefix. |
19 | # (oVEX): this opcode requires VEX prefix. | 25 | # (v1): this opcode only supports 128bit VEX. |
20 | # (o128): this opcode only supports 128bit VEX. | 26 | # |
21 | # (o256): this opcode only supports 256bit VEX. | 27 | # Last Prefix Superscripts |
28 | # - (66): the last prefix is 0x66 | ||
29 | # - (F3): the last prefix is 0xF3 | ||
30 | # - (F2): the last prefix is 0xF2 | ||
22 | # | 31 | # |
23 | 32 | ||
24 | Table: one byte opcode | 33 | Table: one byte opcode |
@@ -199,8 +208,8 @@ a0: MOV AL,Ob | |||
199 | a1: MOV rAX,Ov | 208 | a1: MOV rAX,Ov |
200 | a2: MOV Ob,AL | 209 | a2: MOV Ob,AL |
201 | a3: MOV Ov,rAX | 210 | a3: MOV Ov,rAX |
202 | a4: MOVS/B Xb,Yb | 211 | a4: MOVS/B Yb,Xb |
203 | a5: MOVS/W/D/Q Xv,Yv | 212 | a5: MOVS/W/D/Q Yv,Xv |
204 | a6: CMPS/B Xb,Yb | 213 | a6: CMPS/B Xb,Yb |
205 | a7: CMPS/W/D Xv,Yv | 214 | a7: CMPS/W/D Xv,Yv |
206 | a8: TEST AL,Ib | 215 | a8: TEST AL,Ib |
@@ -233,8 +242,8 @@ c0: Grp2 Eb,Ib (1A) | |||
233 | c1: Grp2 Ev,Ib (1A) | 242 | c1: Grp2 Ev,Ib (1A) |
234 | c2: RETN Iw (f64) | 243 | c2: RETN Iw (f64) |
235 | c3: RETN | 244 | c3: RETN |
236 | c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) | 245 | c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) |
237 | c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) | 246 | c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) |
238 | c6: Grp11 Eb,Ib (1A) | 247 | c6: Grp11 Eb,Ib (1A) |
239 | c7: Grp11 Ev,Iz (1A) | 248 | c7: Grp11 Ev,Iz (1A) |
240 | c8: ENTER Iw,Ib | 249 | c8: ENTER Iw,Ib |
@@ -320,14 +329,19 @@ AVXcode: 1 | |||
320 | # 3DNow! uses the last imm byte as opcode extension. | 329 | # 3DNow! uses the last imm byte as opcode extension. |
321 | 0f: 3DNow! Pq,Qq,Ib | 330 | 0f: 3DNow! Pq,Qq,Ib |
322 | # 0x0f 0x10-0x1f | 331 | # 0x0f 0x10-0x1f |
323 | 10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) | 332 | # NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands |
324 | 11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) | 333 | # but it actually has operands. And also, vmovss and vmovsd only accept 128bit. |
325 | 12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) | 334 | # MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. |
326 | 13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) | 335 | # Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming |
327 | 14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) | 336 | # Reference A.1 |
328 | 15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) | 337 | 10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) |
329 | 16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) | 338 | 11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) |
330 | 17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) | 339 | 12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) |
340 | 13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) | ||
341 | 14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) | ||
342 | 15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) | ||
343 | 16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) | ||
344 | 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) | ||
331 | 18: Grp16 (1A) | 345 | 18: Grp16 (1A) |
332 | 19: | 346 | 19: |
333 | 1a: | 347 | 1a: |
@@ -345,14 +359,14 @@ AVXcode: 1 | |||
345 | 25: | 359 | 25: |
346 | 26: | 360 | 26: |
347 | 27: | 361 | 27: |
348 | 28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) | 362 | 28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) |
349 | 29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) | 363 | 29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) |
350 | 2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) | 364 | 2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) |
351 | 2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) | 365 | 2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) |
352 | 2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) | 366 | 2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) |
353 | 2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) | 367 | 2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) |
354 | 2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) | 368 | 2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) |
355 | 2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) | 369 | 2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) |
356 | # 0x0f 0x30-0x3f | 370 | # 0x0f 0x30-0x3f |
357 | 30: WRMSR | 371 | 30: WRMSR |
358 | 31: RDTSC | 372 | 31: RDTSC |
@@ -388,65 +402,66 @@ AVXcode: 1 | |||
388 | 4e: CMOVLE/NG Gv,Ev | 402 | 4e: CMOVLE/NG Gv,Ev |
389 | 4f: CMOVNLE/G Gv,Ev | 403 | 4f: CMOVNLE/G Gv,Ev |
390 | # 0x0f 0x50-0x5f | 404 | # 0x0f 0x50-0x5f |
391 | 50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) | 405 | 50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) |
392 | 51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) | 406 | 51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) |
393 | 52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) | 407 | 52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) |
394 | 53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) | 408 | 53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) |
395 | 54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) | 409 | 54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) |
396 | 55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) | 410 | 55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) |
397 | 56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) | 411 | 56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) |
398 | 57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) | 412 | 57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) |
399 | 58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) | 413 | 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) |
400 | 59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) | 414 | 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) |
401 | 5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) | 415 | 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) |
402 | 5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) | 416 | 5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) |
403 | 5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) | 417 | 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) |
404 | 5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) | 418 | 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) |
405 | 5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) | 419 | 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) |
406 | 5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) | 420 | 5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) |
407 | # 0x0f 0x60-0x6f | 421 | # 0x0f 0x60-0x6f |
408 | 60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) | 422 | 60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) |
409 | 61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) | 423 | 61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) |
410 | 62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) | 424 | 62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) |
411 | 63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) | 425 | 63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) |
412 | 64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) | 426 | 64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) |
413 | 65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) | 427 | 65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) |
414 | 66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) | 428 | 66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) |
415 | 67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) | 429 | 67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) |
416 | 68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) | 430 | 68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) |
417 | 69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) | 431 | 69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) |
418 | 6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) | 432 | 6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) |
419 | 6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) | 433 | 6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) |
420 | 6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) | 434 | 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) |
421 | 6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) | 435 | 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) |
422 | 6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) | 436 | 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) |
423 | 6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) | 437 | 6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) |
424 | # 0x0f 0x70-0x7f | 438 | # 0x0f 0x70-0x7f |
425 | 70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) | 439 | 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) |
426 | 71: Grp12 (1A) | 440 | 71: Grp12 (1A) |
427 | 72: Grp13 (1A) | 441 | 72: Grp13 (1A) |
428 | 73: Grp14 (1A) | 442 | 73: Grp14 (1A) |
429 | 74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) | 443 | 74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) |
430 | 75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) | 444 | 75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) |
431 | 76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) | 445 | 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) |
432 | 77: emms/vzeroupper/vzeroall (VEX) | 446 | # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. |
433 | 78: VMREAD Ed/q,Gd/q | 447 | 77: emms | vzeroupper | vzeroall |
434 | 79: VMWRITE Gd/q,Ed/q | 448 | 78: VMREAD Ey,Gy |
449 | 79: VMWRITE Gy,Ey | ||
435 | 7a: | 450 | 7a: |
436 | 7b: | 451 | 7b: |
437 | 7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) | 452 | 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) |
438 | 7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) | 453 | 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) |
439 | 7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) | 454 | 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) |
440 | 7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) | 455 | 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) |
441 | # 0x0f 0x80-0x8f | 456 | # 0x0f 0x80-0x8f |
442 | 80: JO Jz (f64) | 457 | 80: JO Jz (f64) |
443 | 81: JNO Jz (f64) | 458 | 81: JNO Jz (f64) |
444 | 82: JB/JNAE/JC Jz (f64) | 459 | 82: JB/JC/JNAE Jz (f64) |
445 | 83: JNB/JAE/JNC Jz (f64) | 460 | 83: JAE/JNB/JNC Jz (f64) |
446 | 84: JZ/JE Jz (f64) | 461 | 84: JE/JZ Jz (f64) |
447 | 85: JNZ/JNE Jz (f64) | 462 | 85: JNE/JNZ Jz (f64) |
448 | 86: JBE/JNA Jz (f64) | 463 | 86: JBE/JNA Jz (f64) |
449 | 87: JNBE/JA Jz (f64) | 464 | 87: JA/JNBE Jz (f64) |
450 | 88: JS Jz (f64) | 465 | 88: JS Jz (f64) |
451 | 89: JNS Jz (f64) | 466 | 89: JNS Jz (f64) |
452 | 8a: JP/JPE Jz (f64) | 467 | 8a: JP/JPE Jz (f64) |
@@ -502,18 +517,18 @@ b8: JMPE | POPCNT Gv,Ev (F3) | |||
502 | b9: Grp10 (1A) | 517 | b9: Grp10 (1A) |
503 | ba: Grp8 Ev,Ib (1A) | 518 | ba: Grp8 Ev,Ib (1A) |
504 | bb: BTC Ev,Gv | 519 | bb: BTC Ev,Gv |
505 | bc: BSF Gv,Ev | 520 | bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) |
506 | bd: BSR Gv,Ev | 521 | bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) |
507 | be: MOVSX Gv,Eb | 522 | be: MOVSX Gv,Eb |
508 | bf: MOVSX Gv,Ew | 523 | bf: MOVSX Gv,Ew |
509 | # 0x0f 0xc0-0xcf | 524 | # 0x0f 0xc0-0xcf |
510 | c0: XADD Eb,Gb | 525 | c0: XADD Eb,Gb |
511 | c1: XADD Ev,Gv | 526 | c1: XADD Ev,Gv |
512 | c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) | 527 | c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) |
513 | c3: movnti Md/q,Gd/q | 528 | c3: movnti My,Gy |
514 | c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) | 529 | c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) |
515 | c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) | 530 | c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) |
516 | c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) | 531 | c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) |
517 | c7: Grp9 (1A) | 532 | c7: Grp9 (1A) |
518 | c8: BSWAP RAX/EAX/R8/R8D | 533 | c8: BSWAP RAX/EAX/R8/R8D |
519 | c9: BSWAP RCX/ECX/R9/R9D | 534 | c9: BSWAP RCX/ECX/R9/R9D |
@@ -524,55 +539,55 @@ cd: BSWAP RBP/EBP/R13/R13D | |||
524 | ce: BSWAP RSI/ESI/R14/R14D | 539 | ce: BSWAP RSI/ESI/R14/R14D |
525 | cf: BSWAP RDI/EDI/R15/R15D | 540 | cf: BSWAP RDI/EDI/R15/R15D |
526 | # 0x0f 0xd0-0xdf | 541 | # 0x0f 0xd0-0xdf |
527 | d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) | 542 | d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) |
528 | d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) | 543 | d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) |
529 | d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) | 544 | d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) |
530 | d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) | 545 | d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) |
531 | d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) | 546 | d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) |
532 | d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) | 547 | d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) |
533 | d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) | 548 | d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) |
534 | d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) | 549 | d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) |
535 | d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) | 550 | d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) |
536 | d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) | 551 | d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) |
537 | da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) | 552 | da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) |
538 | db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) | 553 | db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) |
539 | dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) | 554 | dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) |
540 | dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) | 555 | dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) |
541 | de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) | 556 | de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) |
542 | df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) | 557 | df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) |
543 | # 0x0f 0xe0-0xef | 558 | # 0x0f 0xe0-0xef |
544 | e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) | 559 | e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) |
545 | e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) | 560 | e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) |
546 | e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) | 561 | e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) |
547 | e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) | 562 | e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) |
548 | e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) | 563 | e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) |
549 | e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) | 564 | e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) |
550 | e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) | 565 | e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) |
551 | e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) | 566 | e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) |
552 | e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) | 567 | e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) |
553 | e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) | 568 | e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) |
554 | ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) | 569 | ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) |
555 | eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) | 570 | eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) |
556 | ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) | 571 | ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) |
557 | ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) | 572 | ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) |
558 | ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) | 573 | ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) |
559 | ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) | 574 | ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) |
560 | # 0x0f 0xf0-0xff | 575 | # 0x0f 0xf0-0xff |
561 | f0: lddqu Vdq,Mdq (F2),(VEX) | 576 | f0: vlddqu Vx,Mx (F2) |
562 | f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) | 577 | f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) |
563 | f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) | 578 | f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) |
564 | f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) | 579 | f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) |
565 | f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) | 580 | f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) |
566 | f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) | 581 | f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) |
567 | f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) | 582 | f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) |
568 | f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) | 583 | f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) |
569 | f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) | 584 | f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) |
570 | f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) | 585 | f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) |
571 | fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) | 586 | fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) |
572 | fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) | 587 | fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) |
573 | fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) | 588 | fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) |
574 | fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) | 589 | fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) |
575 | fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) | 590 | fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) |
576 | ff: | 591 | ff: |
577 | EndTable | 592 | EndTable |
578 | 593 | ||
@@ -580,155 +595,193 @@ Table: 3-byte opcode 1 (0x0f 0x38) | |||
580 | Referrer: 3-byte escape 1 | 595 | Referrer: 3-byte escape 1 |
581 | AVXcode: 2 | 596 | AVXcode: 2 |
582 | # 0x0f 0x38 0x00-0x0f | 597 | # 0x0f 0x38 0x00-0x0f |
583 | 00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) | 598 | 00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) |
584 | 01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) | 599 | 01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) |
585 | 02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) | 600 | 02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) |
586 | 03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) | 601 | 03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) |
587 | 04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) | 602 | 04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) |
588 | 05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) | 603 | 05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) |
589 | 06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) | 604 | 06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) |
590 | 07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) | 605 | 07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) |
591 | 08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) | 606 | 08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) |
592 | 09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) | 607 | 09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) |
593 | 0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) | 608 | 0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) |
594 | 0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) | 609 | 0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) |
595 | 0c: Vpermilps /r (66),(oVEX) | 610 | 0c: vpermilps Vx,Hx,Wx (66),(v) |
596 | 0d: Vpermilpd /r (66),(oVEX) | 611 | 0d: vpermilpd Vx,Hx,Wx (66),(v) |
597 | 0e: vtestps /r (66),(oVEX) | 612 | 0e: vtestps Vx,Wx (66),(v) |
598 | 0f: vtestpd /r (66),(oVEX) | 613 | 0f: vtestpd Vx,Wx (66),(v) |
599 | # 0x0f 0x38 0x10-0x1f | 614 | # 0x0f 0x38 0x10-0x1f |
600 | 10: pblendvb Vdq,Wdq (66) | 615 | 10: pblendvb Vdq,Wdq (66) |
601 | 11: | 616 | 11: |
602 | 12: | 617 | 12: |
603 | 13: | 618 | 13: vcvtph2ps Vx,Wx,Ib (66),(v) |
604 | 14: blendvps Vdq,Wdq (66) | 619 | 14: blendvps Vdq,Wdq (66) |
605 | 15: blendvpd Vdq,Wdq (66) | 620 | 15: blendvpd Vdq,Wdq (66) |
606 | 16: | 621 | 16: vpermps Vqq,Hqq,Wqq (66),(v) |
607 | 17: ptest Vdq,Wdq (66),(VEX) | 622 | 17: vptest Vx,Wx (66) |
608 | 18: vbroadcastss /r (66),(oVEX) | 623 | 18: vbroadcastss Vx,Wd (66),(v) |
609 | 19: vbroadcastsd /r (66),(oVEX),(o256) | 624 | 19: vbroadcastsd Vqq,Wq (66),(v) |
610 | 1a: vbroadcastf128 /r (66),(oVEX),(o256) | 625 | 1a: vbroadcastf128 Vqq,Mdq (66),(v) |
611 | 1b: | 626 | 1b: |
612 | 1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) | 627 | 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) |
613 | 1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) | 628 | 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) |
614 | 1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) | 629 | 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) |
615 | 1f: | 630 | 1f: |
616 | # 0x0f 0x38 0x20-0x2f | 631 | # 0x0f 0x38 0x20-0x2f |
617 | 20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) | 632 | 20: vpmovsxbw Vx,Ux/Mq (66),(v1) |
618 | 21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) | 633 | 21: vpmovsxbd Vx,Ux/Md (66),(v1) |
619 | 22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) | 634 | 22: vpmovsxbq Vx,Ux/Mw (66),(v1) |
620 | 23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) | 635 | 23: vpmovsxwd Vx,Ux/Mq (66),(v1) |
621 | 24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) | 636 | 24: vpmovsxwq Vx,Ux/Md (66),(v1) |
622 | 25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) | 637 | 25: vpmovsxdq Vx,Ux/Mq (66),(v1) |
623 | 26: | 638 | 26: |
624 | 27: | 639 | 27: |
625 | 28: pmuldq Vdq,Wdq (66),(VEX),(o128) | 640 | 28: vpmuldq Vx,Hx,Wx (66),(v1) |
626 | 29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) | 641 | 29: vpcmpeqq Vx,Hx,Wx (66),(v1) |
627 | 2a: movntdqa Vdq,Mdq (66),(VEX),(o128) | 642 | 2a: vmovntdqa Vx,Mx (66),(v1) |
628 | 2b: packusdw Vdq,Wdq (66),(VEX),(o128) | 643 | 2b: vpackusdw Vx,Hx,Wx (66),(v1) |
629 | 2c: vmaskmovps(ld) /r (66),(oVEX) | 644 | 2c: vmaskmovps Vx,Hx,Mx (66),(v) |
630 | 2d: vmaskmovpd(ld) /r (66),(oVEX) | 645 | 2d: vmaskmovpd Vx,Hx,Mx (66),(v) |
631 | 2e: vmaskmovps(st) /r (66),(oVEX) | 646 | 2e: vmaskmovps Mx,Hx,Vx (66),(v) |
632 | 2f: vmaskmovpd(st) /r (66),(oVEX) | 647 | 2f: vmaskmovpd Mx,Hx,Vx (66),(v) |
633 | # 0x0f 0x38 0x30-0x3f | 648 | # 0x0f 0x38 0x30-0x3f |
634 | 30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) | 649 | 30: vpmovzxbw Vx,Ux/Mq (66),(v1) |
635 | 31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) | 650 | 31: vpmovzxbd Vx,Ux/Md (66),(v1) |
636 | 32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) | 651 | 32: vpmovzxbq Vx,Ux/Mw (66),(v1) |
637 | 33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) | 652 | 33: vpmovzxwd Vx,Ux/Mq (66),(v1) |
638 | 34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) | 653 | 34: vpmovzxwq Vx,Ux/Md (66),(v1) |
639 | 35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) | 654 | 35: vpmovzxdq Vx,Ux/Mq (66),(v1) |
640 | 36: | 655 | 36: vpermd Vqq,Hqq,Wqq (66),(v) |
641 | 37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) | 656 | 37: vpcmpgtq Vx,Hx,Wx (66),(v1) |
642 | 38: pminsb Vdq,Wdq (66),(VEX),(o128) | 657 | 38: vpminsb Vx,Hx,Wx (66),(v1) |
643 | 39: pminsd Vdq,Wdq (66),(VEX),(o128) | 658 | 39: vpminsd Vx,Hx,Wx (66),(v1) |
644 | 3a: pminuw Vdq,Wdq (66),(VEX),(o128) | 659 | 3a: vpminuw Vx,Hx,Wx (66),(v1) |
645 | 3b: pminud Vdq,Wdq (66),(VEX),(o128) | 660 | 3b: vpminud Vx,Hx,Wx (66),(v1) |
646 | 3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) | 661 | 3c: vpmaxsb Vx,Hx,Wx (66),(v1) |
647 | 3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) | 662 | 3d: vpmaxsd Vx,Hx,Wx (66),(v1) |
648 | 3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) | 663 | 3e: vpmaxuw Vx,Hx,Wx (66),(v1) |
649 | 3f: pmaxud Vdq,Wdq (66),(VEX),(o128) | 664 | 3f: vpmaxud Vx,Hx,Wx (66),(v1) |
650 | # 0x0f 0x38 0x40-0x8f | 665 | # 0x0f 0x38 0x40-0x8f |
651 | 40: pmulld Vdq,Wdq (66),(VEX),(o128) | 666 | 40: vpmulld Vx,Hx,Wx (66),(v1) |
652 | 41: phminposuw Vdq,Wdq (66),(VEX),(o128) | 667 | 41: vphminposuw Vdq,Wdq (66),(v1) |
653 | 80: INVEPT Gd/q,Mdq (66) | 668 | 42: |
654 | 81: INVPID Gd/q,Mdq (66) | 669 | 43: |
670 | 44: | ||
671 | 45: vpsrlvd/q Vx,Hx,Wx (66),(v) | ||
672 | 46: vpsravd Vx,Hx,Wx (66),(v) | ||
673 | 47: vpsllvd/q Vx,Hx,Wx (66),(v) | ||
674 | # Skip 0x48-0x57 | ||
675 | 58: vpbroadcastd Vx,Wx (66),(v) | ||
676 | 59: vpbroadcastq Vx,Wx (66),(v) | ||
677 | 5a: vbroadcasti128 Vqq,Mdq (66),(v) | ||
678 | # Skip 0x5b-0x77 | ||
679 | 78: vpbroadcastb Vx,Wx (66),(v) | ||
680 | 79: vpbroadcastw Vx,Wx (66),(v) | ||
681 | # Skip 0x7a-0x7f | ||
682 | 80: INVEPT Gy,Mdq (66) | ||
683 | 81: INVPID Gy,Mdq (66) | ||
684 | 82: INVPCID Gy,Mdq (66) | ||
685 | 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) | ||
686 | 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) | ||
655 | # 0x0f 0x38 0x90-0xbf (FMA) | 687 | # 0x0f 0x38 0x90-0xbf (FMA) |
656 | 96: vfmaddsub132pd/ps /r (66),(VEX) | 688 | 90: vgatherdd/q Vx,Hx,Wx (66),(v) |
657 | 97: vfmsubadd132pd/ps /r (66),(VEX) | 689 | 91: vgatherqd/q Vx,Hx,Wx (66),(v) |
658 | 98: vfmadd132pd/ps /r (66),(VEX) | 690 | 92: vgatherdps/d Vx,Hx,Wx (66),(v) |
659 | 99: vfmadd132sd/ss /r (66),(VEX),(o128) | 691 | 93: vgatherqps/d Vx,Hx,Wx (66),(v) |
660 | 9a: vfmsub132pd/ps /r (66),(VEX) | 692 | 94: |
661 | 9b: vfmsub132sd/ss /r (66),(VEX),(o128) | 693 | 95: |
662 | 9c: vfnmadd132pd/ps /r (66),(VEX) | 694 | 96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) |
663 | 9d: vfnmadd132sd/ss /r (66),(VEX),(o128) | 695 | 97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) |
664 | 9e: vfnmsub132pd/ps /r (66),(VEX) | 696 | 98: vfmadd132ps/d Vx,Hx,Wx (66),(v) |
665 | 9f: vfnmsub132sd/ss /r (66),(VEX),(o128) | 697 | 99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) |
666 | a6: vfmaddsub213pd/ps /r (66),(VEX) | 698 | 9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) |
667 | a7: vfmsubadd213pd/ps /r (66),(VEX) | 699 | 9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) |
668 | a8: vfmadd213pd/ps /r (66),(VEX) | 700 | 9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) |
669 | a9: vfmadd213sd/ss /r (66),(VEX),(o128) | 701 | 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) |
670 | aa: vfmsub213pd/ps /r (66),(VEX) | 702 | 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) |
671 | ab: vfmsub213sd/ss /r (66),(VEX),(o128) | 703 | 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) |
672 | ac: vfnmadd213pd/ps /r (66),(VEX) | 704 | a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) |
673 | ad: vfnmadd213sd/ss /r (66),(VEX),(o128) | 705 | a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) |
674 | ae: vfnmsub213pd/ps /r (66),(VEX) | 706 | a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) |
675 | af: vfnmsub213sd/ss /r (66),(VEX),(o128) | 707 | a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) |
676 | b6: vfmaddsub231pd/ps /r (66),(VEX) | 708 | aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) |
677 | b7: vfmsubadd231pd/ps /r (66),(VEX) | 709 | ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) |
678 | b8: vfmadd231pd/ps /r (66),(VEX) | 710 | ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) |
679 | b9: vfmadd231sd/ss /r (66),(VEX),(o128) | 711 | ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) |
680 | ba: vfmsub231pd/ps /r (66),(VEX) | 712 | ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) |
681 | bb: vfmsub231sd/ss /r (66),(VEX),(o128) | 713 | af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) |
682 | bc: vfnmadd231pd/ps /r (66),(VEX) | 714 | b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) |
683 | bd: vfnmadd231sd/ss /r (66),(VEX),(o128) | 715 | b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) |
684 | be: vfnmsub231pd/ps /r (66),(VEX) | 716 | b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) |
685 | bf: vfnmsub231sd/ss /r (66),(VEX),(o128) | 717 | b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) |
718 | ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) | ||
719 | bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) | ||
720 | bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) | ||
721 | bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) | ||
722 | be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) | ||
723 | bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) | ||
686 | # 0x0f 0x38 0xc0-0xff | 724 | # 0x0f 0x38 0xc0-0xff |
687 | db: aesimc Vdq,Wdq (66),(VEX),(o128) | 725 | db: VAESIMC Vdq,Wdq (66),(v1) |
688 | dc: aesenc Vdq,Wdq (66),(VEX),(o128) | 726 | dc: VAESENC Vdq,Hdq,Wdq (66),(v1) |
689 | dd: aesenclast Vdq,Wdq (66),(VEX),(o128) | 727 | dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) |
690 | de: aesdec Vdq,Wdq (66),(VEX),(o128) | 728 | de: VAESDEC Vdq,Hdq,Wdq (66),(v1) |
691 | df: aesdeclast Vdq,Wdq (66),(VEX),(o128) | 729 | df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) |
692 | f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) | 730 | f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) |
693 | f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) | 731 | f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) |
732 | f3: ANDN Gy,By,Ey (v) | ||
733 | f4: Grp17 (1A) | ||
734 | f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | ||
735 | f6: MULX By,Gy,rDX,Ey (F2),(v) | ||
736 | f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) | ||
694 | EndTable | 737 | EndTable |
695 | 738 | ||
696 | Table: 3-byte opcode 2 (0x0f 0x3a) | 739 | Table: 3-byte opcode 2 (0x0f 0x3a) |
697 | Referrer: 3-byte escape 2 | 740 | Referrer: 3-byte escape 2 |
698 | AVXcode: 3 | 741 | AVXcode: 3 |
699 | # 0x0f 0x3a 0x00-0xff | 742 | # 0x0f 0x3a 0x00-0xff |
700 | 04: vpermilps /r,Ib (66),(oVEX) | 743 | 00: vpermq Vqq,Wqq,Ib (66),(v) |
701 | 05: vpermilpd /r,Ib (66),(oVEX) | 744 | 01: vpermpd Vqq,Wqq,Ib (66),(v) |
702 | 06: vperm2f128 /r,Ib (66),(oVEX),(o256) | 745 | 02: vpblendd Vx,Hx,Wx,Ib (66),(v) |
703 | 08: roundps Vdq,Wdq,Ib (66),(VEX) | 746 | 03: |
704 | 09: roundpd Vdq,Wdq,Ib (66),(VEX) | 747 | 04: vpermilps Vx,Wx,Ib (66),(v) |
705 | 0a: roundss Vss,Wss,Ib (66),(VEX),(o128) | 748 | 05: vpermilpd Vx,Wx,Ib (66),(v) |
706 | 0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) | 749 | 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) |
707 | 0c: blendps Vdq,Wdq,Ib (66),(VEX) | 750 | 07: |
708 | 0d: blendpd Vdq,Wdq,Ib (66),(VEX) | 751 | 08: vroundps Vx,Wx,Ib (66) |
709 | 0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) | 752 | 09: vroundpd Vx,Wx,Ib (66) |
710 | 0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) | 753 | 0a: vroundss Vss,Wss,Ib (66),(v1) |
711 | 14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) | 754 | 0b: vroundsd Vsd,Wsd,Ib (66),(v1) |
712 | 15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) | 755 | 0c: vblendps Vx,Hx,Wx,Ib (66) |
713 | 16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) | 756 | 0d: vblendpd Vx,Hx,Wx,Ib (66) |
714 | 17: extractps Ed,Vdq,Ib (66),(VEX),(o128) | 757 | 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) |
715 | 18: vinsertf128 /r,Ib (66),(oVEX),(o256) | 758 | 0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) |
716 | 19: vextractf128 /r,Ib (66),(oVEX),(o256) | 759 | 14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) |
717 | 20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) | 760 | 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) |
718 | 21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) | 761 | 16: vpextrd/q Ey,Vdq,Ib (66),(v1) |
719 | 22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) | 762 | 17: vextractps Ed,Vdq,Ib (66),(v1) |
720 | 40: dpps Vdq,Wdq,Ib (66),(VEX) | 763 | 18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) |
721 | 41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) | 764 | 19: vextractf128 Wdq,Vqq,Ib (66),(v) |
722 | 42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) | 765 | 1d: vcvtps2ph Wx,Vx,Ib (66),(v) |
723 | 44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) | 766 | 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) |
724 | 4a: vblendvps /r,Ib (66),(oVEX) | 767 | 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) |
725 | 4b: vblendvpd /r,Ib (66),(oVEX) | 768 | 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) |
726 | 4c: vpblendvb /r,Ib (66),(oVEX),(o128) | 769 | 38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) |
727 | 60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) | 770 | 39: vextracti128 Wdq,Vqq,Ib (66),(v) |
728 | 61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) | 771 | 40: vdpps Vx,Hx,Wx,Ib (66) |
729 | 62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) | 772 | 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) |
730 | 63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) | 773 | 42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) |
731 | df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) | 774 | 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) |
775 | 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) | ||
776 | 4a: vblendvps Vx,Hx,Wx,Lx (66),(v) | ||
777 | 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) | ||
778 | 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) | ||
779 | 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) | ||
780 | 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) | ||
781 | 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) | ||
782 | 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) | ||
783 | df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) | ||
784 | f0: RORX Gy,Ey,Ib (F2),(v) | ||
732 | EndTable | 785 | EndTable |
733 | 786 | ||
734 | GrpTable: Grp1 | 787 | GrpTable: Grp1 |
@@ -790,7 +843,7 @@ GrpTable: Grp5 | |||
790 | 2: CALLN Ev (f64) | 843 | 2: CALLN Ev (f64) |
791 | 3: CALLF Ep | 844 | 3: CALLF Ep |
792 | 4: JMPN Ev (f64) | 845 | 4: JMPN Ev (f64) |
793 | 5: JMPF Ep | 846 | 5: JMPF Mp |
794 | 6: PUSH Ev (d64) | 847 | 6: PUSH Ev (d64) |
795 | 7: | 848 | 7: |
796 | EndTable | 849 | EndTable |
@@ -807,7 +860,7 @@ EndTable | |||
807 | GrpTable: Grp7 | 860 | GrpTable: Grp7 |
808 | 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | 861 | 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) |
809 | 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) | 862 | 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) |
810 | 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | 863 | 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) |
811 | 3: LIDT Ms | 864 | 3: LIDT Ms |
812 | 4: SMSW Mw/Rv | 865 | 4: SMSW Mw/Rv |
813 | 5: | 866 | 5: |
@@ -824,44 +877,45 @@ EndTable | |||
824 | 877 | ||
825 | GrpTable: Grp9 | 878 | GrpTable: Grp9 |
826 | 1: CMPXCHG8B/16B Mq/Mdq | 879 | 1: CMPXCHG8B/16B Mq/Mdq |
827 | 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | 880 | 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) |
828 | 7: VMPTRST Mq | 881 | 7: VMPTRST Mq | VMPTRST Mq (F3) |
829 | EndTable | 882 | EndTable |
830 | 883 | ||
831 | GrpTable: Grp10 | 884 | GrpTable: Grp10 |
832 | EndTable | 885 | EndTable |
833 | 886 | ||
834 | GrpTable: Grp11 | 887 | GrpTable: Grp11 |
888 | # Note: the operands are given by group opcode | ||
835 | 0: MOV | 889 | 0: MOV |
836 | EndTable | 890 | EndTable |
837 | 891 | ||
838 | GrpTable: Grp12 | 892 | GrpTable: Grp12 |
839 | 2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) | 893 | 2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) |
840 | 4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) | 894 | 4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) |
841 | 6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) | 895 | 6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) |
842 | EndTable | 896 | EndTable |
843 | 897 | ||
844 | GrpTable: Grp13 | 898 | GrpTable: Grp13 |
845 | 2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) | 899 | 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) |
846 | 4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) | 900 | 4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) |
847 | 6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) | 901 | 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) |
848 | EndTable | 902 | EndTable |
849 | 903 | ||
850 | GrpTable: Grp14 | 904 | GrpTable: Grp14 |
851 | 2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) | 905 | 2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) |
852 | 3: psrldq Udq,Ib (66),(11B),(VEX),(o128) | 906 | 3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) |
853 | 6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) | 907 | 6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) |
854 | 7: pslldq Udq,Ib (66),(11B),(VEX),(o128) | 908 | 7: vpslldq Hx,Ux,Ib (66),(11B),(v1) |
855 | EndTable | 909 | EndTable |
856 | 910 | ||
857 | GrpTable: Grp15 | 911 | GrpTable: Grp15 |
858 | 0: fxsave | 912 | 0: fxsave | RDFSBASE Ry (F3),(11B) |
859 | 1: fxstor | 913 | 1: fxstor | RDGSBASE Ry (F3),(11B) |
860 | 2: ldmxcsr (VEX) | 914 | 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) |
861 | 3: stmxcsr (VEX) | 915 | 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) |
862 | 4: XSAVE | 916 | 4: XSAVE |
863 | 5: XRSTOR | lfence (11B) | 917 | 5: XRSTOR | lfence (11B) |
864 | 6: mfence (11B) | 918 | 6: XSAVEOPT | mfence (11B) |
865 | 7: clflush | sfence (11B) | 919 | 7: clflush | sfence (11B) |
866 | EndTable | 920 | EndTable |
867 | 921 | ||
@@ -872,6 +926,12 @@ GrpTable: Grp16 | |||
872 | 3: prefetch T2 | 926 | 3: prefetch T2 |
873 | EndTable | 927 | EndTable |
874 | 928 | ||
929 | GrpTable: Grp17 | ||
930 | 1: BLSR By,Ey (v) | ||
931 | 2: BLSMSK By,Ey (v) | ||
932 | 3: BLSI By,Ey (v) | ||
933 | EndTable | ||
934 | |||
875 | # AMD's Prefetch Group | 935 | # AMD's Prefetch Group |
876 | GrpTable: GrpP | 936 | GrpTable: GrpP |
877 | 0: PREFETCH | 937 | 0: PREFETCH |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 3d11327c9ab4..23d8e5fecf76 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -27,6 +27,4 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o | |||
27 | obj-$(CONFIG_ACPI_NUMA) += srat.o | 27 | obj-$(CONFIG_ACPI_NUMA) += srat.o |
28 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o | 28 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o |
29 | 29 | ||
30 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o | ||
31 | |||
32 | obj-$(CONFIG_MEMTEST) += memtest.o | 30 | obj-$(CONFIG_MEMTEST) += memtest.o |
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index d0474ad2a6e5..1fb85dbe390a 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c | |||
@@ -25,7 +25,7 @@ int fixup_exception(struct pt_regs *regs) | |||
25 | if (fixup) { | 25 | if (fixup) { |
26 | /* If fixup is less than 16, it means uaccess error */ | 26 | /* If fixup is less than 16, it means uaccess error */ |
27 | if (fixup->fixup < 16) { | 27 | if (fixup->fixup < 16) { |
28 | current_thread_info()->uaccess_err = -EFAULT; | 28 | current_thread_info()->uaccess_err = 1; |
29 | regs->ip += fixup->fixup; | 29 | regs->ip += fixup->fixup; |
30 | return 1; | 30 | return 1; |
31 | } | 31 | } |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 5db0490deb07..9d74824a708d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -626,7 +626,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code, | |||
626 | 626 | ||
627 | static noinline void | 627 | static noinline void |
628 | no_context(struct pt_regs *regs, unsigned long error_code, | 628 | no_context(struct pt_regs *regs, unsigned long error_code, |
629 | unsigned long address) | 629 | unsigned long address, int signal, int si_code) |
630 | { | 630 | { |
631 | struct task_struct *tsk = current; | 631 | struct task_struct *tsk = current; |
632 | unsigned long *stackend; | 632 | unsigned long *stackend; |
@@ -634,8 +634,17 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
634 | int sig; | 634 | int sig; |
635 | 635 | ||
636 | /* Are we prepared to handle this kernel fault? */ | 636 | /* Are we prepared to handle this kernel fault? */ |
637 | if (fixup_exception(regs)) | 637 | if (fixup_exception(regs)) { |
638 | if (current_thread_info()->sig_on_uaccess_error && signal) { | ||
639 | tsk->thread.trap_no = 14; | ||
640 | tsk->thread.error_code = error_code | PF_USER; | ||
641 | tsk->thread.cr2 = address; | ||
642 | |||
643 | /* XXX: hwpoison faults will set the wrong code. */ | ||
644 | force_sig_info_fault(signal, si_code, address, tsk, 0); | ||
645 | } | ||
638 | return; | 646 | return; |
647 | } | ||
639 | 648 | ||
640 | /* | 649 | /* |
641 | * 32-bit: | 650 | * 32-bit: |
@@ -755,7 +764,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, | |||
755 | if (is_f00f_bug(regs, address)) | 764 | if (is_f00f_bug(regs, address)) |
756 | return; | 765 | return; |
757 | 766 | ||
758 | no_context(regs, error_code, address); | 767 | no_context(regs, error_code, address, SIGSEGV, si_code); |
759 | } | 768 | } |
760 | 769 | ||
761 | static noinline void | 770 | static noinline void |
@@ -819,7 +828,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, | |||
819 | 828 | ||
820 | /* Kernel mode? Handle exceptions or die: */ | 829 | /* Kernel mode? Handle exceptions or die: */ |
821 | if (!(error_code & PF_USER)) { | 830 | if (!(error_code & PF_USER)) { |
822 | no_context(regs, error_code, address); | 831 | no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); |
823 | return; | 832 | return; |
824 | } | 833 | } |
825 | 834 | ||
@@ -854,7 +863,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, | |||
854 | if (!(fault & VM_FAULT_RETRY)) | 863 | if (!(fault & VM_FAULT_RETRY)) |
855 | up_read(¤t->mm->mmap_sem); | 864 | up_read(¤t->mm->mmap_sem); |
856 | if (!(error_code & PF_USER)) | 865 | if (!(error_code & PF_USER)) |
857 | no_context(regs, error_code, address); | 866 | no_context(regs, error_code, address, 0, 0); |
858 | return 1; | 867 | return 1; |
859 | } | 868 | } |
860 | if (!(fault & VM_FAULT_ERROR)) | 869 | if (!(fault & VM_FAULT_ERROR)) |
@@ -864,7 +873,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, | |||
864 | /* Kernel mode? Handle exceptions or die: */ | 873 | /* Kernel mode? Handle exceptions or die: */ |
865 | if (!(error_code & PF_USER)) { | 874 | if (!(error_code & PF_USER)) { |
866 | up_read(¤t->mm->mmap_sem); | 875 | up_read(¤t->mm->mmap_sem); |
867 | no_context(regs, error_code, address); | 876 | no_context(regs, error_code, address, |
877 | SIGSEGV, SEGV_MAPERR); | ||
868 | return 1; | 878 | return 1; |
869 | } | 879 | } |
870 | 880 | ||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 87488b93a65c..6cabf6570d64 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/ioport.h> | 3 | #include <linux/ioport.h> |
4 | #include <linux/swap.h> | 4 | #include <linux/swap.h> |
5 | #include <linux/memblock.h> | 5 | #include <linux/memblock.h> |
6 | #include <linux/bootmem.h> /* for max_low_pfn */ | ||
6 | 7 | ||
7 | #include <asm/cacheflush.h> | 8 | #include <asm/cacheflush.h> |
8 | #include <asm/e820.h> | 9 | #include <asm/e820.h> |
@@ -15,6 +16,7 @@ | |||
15 | #include <asm/tlbflush.h> | 16 | #include <asm/tlbflush.h> |
16 | #include <asm/tlb.h> | 17 | #include <asm/tlb.h> |
17 | #include <asm/proto.h> | 18 | #include <asm/proto.h> |
19 | #include <asm/dma.h> /* for MAX_DMA_PFN */ | ||
18 | 20 | ||
19 | unsigned long __initdata pgt_buf_start; | 21 | unsigned long __initdata pgt_buf_start; |
20 | unsigned long __meminitdata pgt_buf_end; | 22 | unsigned long __meminitdata pgt_buf_end; |
@@ -67,7 +69,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
67 | good_end = max_pfn_mapped << PAGE_SHIFT; | 69 | good_end = max_pfn_mapped << PAGE_SHIFT; |
68 | 70 | ||
69 | base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); | 71 | base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); |
70 | if (base == MEMBLOCK_ERROR) | 72 | if (!base) |
71 | panic("Cannot find space for the kernel page tables"); | 73 | panic("Cannot find space for the kernel page tables"); |
72 | 74 | ||
73 | pgt_buf_start = base >> PAGE_SHIFT; | 75 | pgt_buf_start = base >> PAGE_SHIFT; |
@@ -80,7 +82,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
80 | 82 | ||
81 | void __init native_pagetable_reserve(u64 start, u64 end) | 83 | void __init native_pagetable_reserve(u64 start, u64 end) |
82 | { | 84 | { |
83 | memblock_x86_reserve_range(start, end, "PGTABLE"); | 85 | memblock_reserve(start, end - start); |
84 | } | 86 | } |
85 | 87 | ||
86 | struct map_range { | 88 | struct map_range { |
@@ -279,8 +281,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
279 | * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) | 281 | * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) |
280 | * so that they can be reused for other purposes. | 282 | * so that they can be reused for other purposes. |
281 | * | 283 | * |
282 | * On native it just means calling memblock_x86_reserve_range, on Xen it | 284 | * On native it just means calling memblock_reserve, on Xen it also |
283 | * also means marking RW the pagetable pages that we allocated before | 285 | * means marking RW the pagetable pages that we allocated before |
284 | * but that haven't been used. | 286 | * but that haven't been used. |
285 | * | 287 | * |
286 | * In fact on xen we mark RO the whole range pgt_buf_start - | 288 | * In fact on xen we mark RO the whole range pgt_buf_start - |
@@ -392,3 +394,24 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
392 | free_init_pages("initrd memory", start, PAGE_ALIGN(end)); | 394 | free_init_pages("initrd memory", start, PAGE_ALIGN(end)); |
393 | } | 395 | } |
394 | #endif | 396 | #endif |
397 | |||
398 | void __init zone_sizes_init(void) | ||
399 | { | ||
400 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | ||
401 | |||
402 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | ||
403 | |||
404 | #ifdef CONFIG_ZONE_DMA | ||
405 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | ||
406 | #endif | ||
407 | #ifdef CONFIG_ZONE_DMA32 | ||
408 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | ||
409 | #endif | ||
410 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | ||
411 | #ifdef CONFIG_HIGHMEM | ||
412 | max_zone_pfns[ZONE_HIGHMEM] = max_pfn; | ||
413 | #endif | ||
414 | |||
415 | free_area_init_nodes(max_zone_pfns); | ||
416 | } | ||
417 | |||
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 29f7c6d98179..8663f6c47ccb 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -427,23 +427,17 @@ static void __init add_one_highpage_init(struct page *page) | |||
427 | void __init add_highpages_with_active_regions(int nid, | 427 | void __init add_highpages_with_active_regions(int nid, |
428 | unsigned long start_pfn, unsigned long end_pfn) | 428 | unsigned long start_pfn, unsigned long end_pfn) |
429 | { | 429 | { |
430 | struct range *range; | 430 | phys_addr_t start, end; |
431 | int nr_range; | 431 | u64 i; |
432 | int i; | 432 | |
433 | 433 | for_each_free_mem_range(i, nid, &start, &end, NULL) { | |
434 | nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn); | 434 | unsigned long pfn = clamp_t(unsigned long, PFN_UP(start), |
435 | 435 | start_pfn, end_pfn); | |
436 | for (i = 0; i < nr_range; i++) { | 436 | unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end), |
437 | struct page *page; | 437 | start_pfn, end_pfn); |
438 | int node_pfn; | 438 | for ( ; pfn < e_pfn; pfn++) |
439 | 439 | if (pfn_valid(pfn)) | |
440 | for (node_pfn = range[i].start; node_pfn < range[i].end; | 440 | add_one_highpage_init(pfn_to_page(pfn)); |
441 | node_pfn++) { | ||
442 | if (!pfn_valid(node_pfn)) | ||
443 | continue; | ||
444 | page = pfn_to_page(node_pfn); | ||
445 | add_one_highpage_init(page); | ||
446 | } | ||
447 | } | 441 | } |
448 | } | 442 | } |
449 | #else | 443 | #else |
@@ -650,18 +644,18 @@ void __init initmem_init(void) | |||
650 | highstart_pfn = highend_pfn = max_pfn; | 644 | highstart_pfn = highend_pfn = max_pfn; |
651 | if (max_pfn > max_low_pfn) | 645 | if (max_pfn > max_low_pfn) |
652 | highstart_pfn = max_low_pfn; | 646 | highstart_pfn = max_low_pfn; |
653 | memblock_x86_register_active_regions(0, 0, highend_pfn); | ||
654 | sparse_memory_present_with_active_regions(0); | ||
655 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | 647 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
656 | pages_to_mb(highend_pfn - highstart_pfn)); | 648 | pages_to_mb(highend_pfn - highstart_pfn)); |
657 | num_physpages = highend_pfn; | 649 | num_physpages = highend_pfn; |
658 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | 650 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
659 | #else | 651 | #else |
660 | memblock_x86_register_active_regions(0, 0, max_low_pfn); | ||
661 | sparse_memory_present_with_active_regions(0); | ||
662 | num_physpages = max_low_pfn; | 652 | num_physpages = max_low_pfn; |
663 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 653 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
664 | #endif | 654 | #endif |
655 | |||
656 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); | ||
657 | sparse_memory_present_with_active_regions(0); | ||
658 | |||
665 | #ifdef CONFIG_FLATMEM | 659 | #ifdef CONFIG_FLATMEM |
666 | max_mapnr = num_physpages; | 660 | max_mapnr = num_physpages; |
667 | #endif | 661 | #endif |
@@ -674,22 +668,6 @@ void __init initmem_init(void) | |||
674 | } | 668 | } |
675 | #endif /* !CONFIG_NEED_MULTIPLE_NODES */ | 669 | #endif /* !CONFIG_NEED_MULTIPLE_NODES */ |
676 | 670 | ||
677 | static void __init zone_sizes_init(void) | ||
678 | { | ||
679 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | ||
680 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | ||
681 | #ifdef CONFIG_ZONE_DMA | ||
682 | max_zone_pfns[ZONE_DMA] = | ||
683 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | ||
684 | #endif | ||
685 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | ||
686 | #ifdef CONFIG_HIGHMEM | ||
687 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; | ||
688 | #endif | ||
689 | |||
690 | free_area_init_nodes(max_zone_pfns); | ||
691 | } | ||
692 | |||
693 | void __init setup_bootmem_allocator(void) | 671 | void __init setup_bootmem_allocator(void) |
694 | { | 672 | { |
695 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 673 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
@@ -760,6 +738,17 @@ void __init mem_init(void) | |||
760 | #ifdef CONFIG_FLATMEM | 738 | #ifdef CONFIG_FLATMEM |
761 | BUG_ON(!mem_map); | 739 | BUG_ON(!mem_map); |
762 | #endif | 740 | #endif |
741 | /* | ||
742 | * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to | ||
743 | * be done before free_all_bootmem(). Memblock use free low memory for | ||
744 | * temporary data (see find_range_array()) and for this purpose can use | ||
745 | * pages that was already passed to the buddy allocator, hence marked as | ||
746 | * not accessible in the page tables when compiled with | ||
747 | * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not | ||
748 | * important here. | ||
749 | */ | ||
750 | set_highmem_pages_init(); | ||
751 | |||
763 | /* this will put all low memory onto the freelists */ | 752 | /* this will put all low memory onto the freelists */ |
764 | totalram_pages += free_all_bootmem(); | 753 | totalram_pages += free_all_bootmem(); |
765 | 754 | ||
@@ -771,8 +760,6 @@ void __init mem_init(void) | |||
771 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) | 760 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) |
772 | reservedpages++; | 761 | reservedpages++; |
773 | 762 | ||
774 | set_highmem_pages_init(); | ||
775 | |||
776 | codesize = (unsigned long) &_etext - (unsigned long) &_text; | 763 | codesize = (unsigned long) &_etext - (unsigned long) &_text; |
777 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; | 764 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; |
778 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; | 765 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bbaaa005bf0e..436a0309db33 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -608,21 +608,12 @@ kernel_physical_mapping_init(unsigned long start, | |||
608 | #ifndef CONFIG_NUMA | 608 | #ifndef CONFIG_NUMA |
609 | void __init initmem_init(void) | 609 | void __init initmem_init(void) |
610 | { | 610 | { |
611 | memblock_x86_register_active_regions(0, 0, max_pfn); | 611 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); |
612 | } | 612 | } |
613 | #endif | 613 | #endif |
614 | 614 | ||
615 | void __init paging_init(void) | 615 | void __init paging_init(void) |
616 | { | 616 | { |
617 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | ||
618 | |||
619 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | ||
620 | #ifdef CONFIG_ZONE_DMA | ||
621 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | ||
622 | #endif | ||
623 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | ||
624 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | ||
625 | |||
626 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | 617 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
627 | sparse_init(); | 618 | sparse_init(); |
628 | 619 | ||
@@ -634,7 +625,7 @@ void __init paging_init(void) | |||
634 | */ | 625 | */ |
635 | node_clear_state(0, N_NORMAL_MEMORY); | 626 | node_clear_state(0, N_NORMAL_MEMORY); |
636 | 627 | ||
637 | free_area_init_nodes(max_zone_pfns); | 628 | zone_sizes_init(); |
638 | } | 629 | } |
639 | 630 | ||
640 | /* | 631 | /* |
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c deleted file mode 100644 index 992da5ec5a64..000000000000 --- a/arch/x86/mm/memblock.c +++ /dev/null | |||
@@ -1,348 +0,0 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/types.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/bitops.h> | ||
5 | #include <linux/memblock.h> | ||
6 | #include <linux/bootmem.h> | ||
7 | #include <linux/mm.h> | ||
8 | #include <linux/range.h> | ||
9 | |||
10 | /* Check for already reserved areas */ | ||
11 | bool __init memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align) | ||
12 | { | ||
13 | struct memblock_region *r; | ||
14 | u64 addr = *addrp, last; | ||
15 | u64 size = *sizep; | ||
16 | bool changed = false; | ||
17 | |||
18 | again: | ||
19 | last = addr + size; | ||
20 | for_each_memblock(reserved, r) { | ||
21 | if (last > r->base && addr < r->base) { | ||
22 | size = r->base - addr; | ||
23 | changed = true; | ||
24 | goto again; | ||
25 | } | ||
26 | if (last > (r->base + r->size) && addr < (r->base + r->size)) { | ||
27 | addr = round_up(r->base + r->size, align); | ||
28 | size = last - addr; | ||
29 | changed = true; | ||
30 | goto again; | ||
31 | } | ||
32 | if (last <= (r->base + r->size) && addr >= r->base) { | ||
33 | *sizep = 0; | ||
34 | return false; | ||
35 | } | ||
36 | } | ||
37 | if (changed) { | ||
38 | *addrp = addr; | ||
39 | *sizep = size; | ||
40 | } | ||
41 | return changed; | ||
42 | } | ||
43 | |||
44 | /* | ||
45 | * Find next free range after start, and size is returned in *sizep | ||
46 | */ | ||
47 | u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align) | ||
48 | { | ||
49 | struct memblock_region *r; | ||
50 | |||
51 | for_each_memblock(memory, r) { | ||
52 | u64 ei_start = r->base; | ||
53 | u64 ei_last = ei_start + r->size; | ||
54 | u64 addr; | ||
55 | |||
56 | addr = round_up(ei_start, align); | ||
57 | if (addr < start) | ||
58 | addr = round_up(start, align); | ||
59 | if (addr >= ei_last) | ||
60 | continue; | ||
61 | *sizep = ei_last - addr; | ||
62 | while (memblock_x86_check_reserved_size(&addr, sizep, align)) | ||
63 | ; | ||
64 | |||
65 | if (*sizep) | ||
66 | return addr; | ||
67 | } | ||
68 | |||
69 | return MEMBLOCK_ERROR; | ||
70 | } | ||
71 | |||
72 | static __init struct range *find_range_array(int count) | ||
73 | { | ||
74 | u64 end, size, mem; | ||
75 | struct range *range; | ||
76 | |||
77 | size = sizeof(struct range) * count; | ||
78 | end = memblock.current_limit; | ||
79 | |||
80 | mem = memblock_find_in_range(0, end, size, sizeof(struct range)); | ||
81 | if (mem == MEMBLOCK_ERROR) | ||
82 | panic("can not find more space for range array"); | ||
83 | |||
84 | /* | ||
85 | * This range is tempoaray, so don't reserve it, it will not be | ||
86 | * overlapped because We will not alloccate new buffer before | ||
87 | * We discard this one | ||
88 | */ | ||
89 | range = __va(mem); | ||
90 | memset(range, 0, size); | ||
91 | |||
92 | return range; | ||
93 | } | ||
94 | |||
95 | static void __init memblock_x86_subtract_reserved(struct range *range, int az) | ||
96 | { | ||
97 | u64 final_start, final_end; | ||
98 | struct memblock_region *r; | ||
99 | |||
100 | /* Take out region array itself at first*/ | ||
101 | memblock_free_reserved_regions(); | ||
102 | |||
103 | memblock_dbg("Subtract (%ld early reservations)\n", memblock.reserved.cnt); | ||
104 | |||
105 | for_each_memblock(reserved, r) { | ||
106 | memblock_dbg(" [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1); | ||
107 | final_start = PFN_DOWN(r->base); | ||
108 | final_end = PFN_UP(r->base + r->size); | ||
109 | if (final_start >= final_end) | ||
110 | continue; | ||
111 | subtract_range(range, az, final_start, final_end); | ||
112 | } | ||
113 | |||
114 | /* Put region array back ? */ | ||
115 | memblock_reserve_reserved_regions(); | ||
116 | } | ||
117 | |||
118 | struct count_data { | ||
119 | int nr; | ||
120 | }; | ||
121 | |||
122 | static int __init count_work_fn(unsigned long start_pfn, | ||
123 | unsigned long end_pfn, void *datax) | ||
124 | { | ||
125 | struct count_data *data = datax; | ||
126 | |||
127 | data->nr++; | ||
128 | |||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | static int __init count_early_node_map(int nodeid) | ||
133 | { | ||
134 | struct count_data data; | ||
135 | |||
136 | data.nr = 0; | ||
137 | work_with_active_regions(nodeid, count_work_fn, &data); | ||
138 | |||
139 | return data.nr; | ||
140 | } | ||
141 | |||
142 | int __init __get_free_all_memory_range(struct range **rangep, int nodeid, | ||
143 | unsigned long start_pfn, unsigned long end_pfn) | ||
144 | { | ||
145 | int count; | ||
146 | struct range *range; | ||
147 | int nr_range; | ||
148 | |||
149 | count = (memblock.reserved.cnt + count_early_node_map(nodeid)) * 2; | ||
150 | |||
151 | range = find_range_array(count); | ||
152 | nr_range = 0; | ||
153 | |||
154 | /* | ||
155 | * Use early_node_map[] and memblock.reserved.region to get range array | ||
156 | * at first | ||
157 | */ | ||
158 | nr_range = add_from_early_node_map(range, count, nr_range, nodeid); | ||
159 | subtract_range(range, count, 0, start_pfn); | ||
160 | subtract_range(range, count, end_pfn, -1ULL); | ||
161 | |||
162 | memblock_x86_subtract_reserved(range, count); | ||
163 | nr_range = clean_sort_range(range, count); | ||
164 | |||
165 | *rangep = range; | ||
166 | return nr_range; | ||
167 | } | ||
168 | |||
169 | int __init get_free_all_memory_range(struct range **rangep, int nodeid) | ||
170 | { | ||
171 | unsigned long end_pfn = -1UL; | ||
172 | |||
173 | #ifdef CONFIG_X86_32 | ||
174 | end_pfn = max_low_pfn; | ||
175 | #endif | ||
176 | return __get_free_all_memory_range(rangep, nodeid, 0, end_pfn); | ||
177 | } | ||
178 | |||
179 | static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free) | ||
180 | { | ||
181 | int i, count; | ||
182 | struct range *range; | ||
183 | int nr_range; | ||
184 | u64 final_start, final_end; | ||
185 | u64 free_size; | ||
186 | struct memblock_region *r; | ||
187 | |||
188 | count = (memblock.reserved.cnt + memblock.memory.cnt) * 2; | ||
189 | |||
190 | range = find_range_array(count); | ||
191 | nr_range = 0; | ||
192 | |||
193 | addr = PFN_UP(addr); | ||
194 | limit = PFN_DOWN(limit); | ||
195 | |||
196 | for_each_memblock(memory, r) { | ||
197 | final_start = PFN_UP(r->base); | ||
198 | final_end = PFN_DOWN(r->base + r->size); | ||
199 | if (final_start >= final_end) | ||
200 | continue; | ||
201 | if (final_start >= limit || final_end <= addr) | ||
202 | continue; | ||
203 | |||
204 | nr_range = add_range(range, count, nr_range, final_start, final_end); | ||
205 | } | ||
206 | subtract_range(range, count, 0, addr); | ||
207 | subtract_range(range, count, limit, -1ULL); | ||
208 | |||
209 | /* Subtract memblock.reserved.region in range ? */ | ||
210 | if (!get_free) | ||
211 | goto sort_and_count_them; | ||
212 | for_each_memblock(reserved, r) { | ||
213 | final_start = PFN_DOWN(r->base); | ||
214 | final_end = PFN_UP(r->base + r->size); | ||
215 | if (final_start >= final_end) | ||
216 | continue; | ||
217 | if (final_start >= limit || final_end <= addr) | ||
218 | continue; | ||
219 | |||
220 | subtract_range(range, count, final_start, final_end); | ||
221 | } | ||
222 | |||
223 | sort_and_count_them: | ||
224 | nr_range = clean_sort_range(range, count); | ||
225 | |||
226 | free_size = 0; | ||
227 | for (i = 0; i < nr_range; i++) | ||
228 | free_size += range[i].end - range[i].start; | ||
229 | |||
230 | return free_size << PAGE_SHIFT; | ||
231 | } | ||
232 | |||
233 | u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit) | ||
234 | { | ||
235 | return __memblock_x86_memory_in_range(addr, limit, true); | ||
236 | } | ||
237 | |||
238 | u64 __init memblock_x86_memory_in_range(u64 addr, u64 limit) | ||
239 | { | ||
240 | return __memblock_x86_memory_in_range(addr, limit, false); | ||
241 | } | ||
242 | |||
243 | void __init memblock_x86_reserve_range(u64 start, u64 end, char *name) | ||
244 | { | ||
245 | if (start == end) | ||
246 | return; | ||
247 | |||
248 | if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx)\n", start, end)) | ||
249 | return; | ||
250 | |||
251 | memblock_dbg(" memblock_x86_reserve_range: [%#010llx-%#010llx] %16s\n", start, end - 1, name); | ||
252 | |||
253 | memblock_reserve(start, end - start); | ||
254 | } | ||
255 | |||
256 | void __init memblock_x86_free_range(u64 start, u64 end) | ||
257 | { | ||
258 | if (start == end) | ||
259 | return; | ||
260 | |||
261 | if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx)\n", start, end)) | ||
262 | return; | ||
263 | |||
264 | memblock_dbg(" memblock_x86_free_range: [%#010llx-%#010llx]\n", start, end - 1); | ||
265 | |||
266 | memblock_free(start, end - start); | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * Need to call this function after memblock_x86_register_active_regions, | ||
271 | * so early_node_map[] is filled already. | ||
272 | */ | ||
273 | u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align) | ||
274 | { | ||
275 | u64 addr; | ||
276 | addr = find_memory_core_early(nid, size, align, start, end); | ||
277 | if (addr != MEMBLOCK_ERROR) | ||
278 | return addr; | ||
279 | |||
280 | /* Fallback, should already have start end within node range */ | ||
281 | return memblock_find_in_range(start, end, size, align); | ||
282 | } | ||
283 | |||
284 | /* | ||
285 | * Finds an active region in the address range from start_pfn to last_pfn and | ||
286 | * returns its range in ei_startpfn and ei_endpfn for the memblock entry. | ||
287 | */ | ||
288 | static int __init memblock_x86_find_active_region(const struct memblock_region *ei, | ||
289 | unsigned long start_pfn, | ||
290 | unsigned long last_pfn, | ||
291 | unsigned long *ei_startpfn, | ||
292 | unsigned long *ei_endpfn) | ||
293 | { | ||
294 | u64 align = PAGE_SIZE; | ||
295 | |||
296 | *ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT; | ||
297 | *ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT; | ||
298 | |||
299 | /* Skip map entries smaller than a page */ | ||
300 | if (*ei_startpfn >= *ei_endpfn) | ||
301 | return 0; | ||
302 | |||
303 | /* Skip if map is outside the node */ | ||
304 | if (*ei_endpfn <= start_pfn || *ei_startpfn >= last_pfn) | ||
305 | return 0; | ||
306 | |||
307 | /* Check for overlaps */ | ||
308 | if (*ei_startpfn < start_pfn) | ||
309 | *ei_startpfn = start_pfn; | ||
310 | if (*ei_endpfn > last_pfn) | ||
311 | *ei_endpfn = last_pfn; | ||
312 | |||
313 | return 1; | ||
314 | } | ||
315 | |||
316 | /* Walk the memblock.memory map and register active regions within a node */ | ||
317 | void __init memblock_x86_register_active_regions(int nid, unsigned long start_pfn, | ||
318 | unsigned long last_pfn) | ||
319 | { | ||
320 | unsigned long ei_startpfn; | ||
321 | unsigned long ei_endpfn; | ||
322 | struct memblock_region *r; | ||
323 | |||
324 | for_each_memblock(memory, r) | ||
325 | if (memblock_x86_find_active_region(r, start_pfn, last_pfn, | ||
326 | &ei_startpfn, &ei_endpfn)) | ||
327 | add_active_range(nid, ei_startpfn, ei_endpfn); | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * Find the hole size (in bytes) in the memory range. | ||
332 | * @start: starting address of the memory range to scan | ||
333 | * @end: ending address of the memory range to scan | ||
334 | */ | ||
335 | u64 __init memblock_x86_hole_size(u64 start, u64 end) | ||
336 | { | ||
337 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
338 | unsigned long last_pfn = end >> PAGE_SHIFT; | ||
339 | unsigned long ei_startpfn, ei_endpfn, ram = 0; | ||
340 | struct memblock_region *r; | ||
341 | |||
342 | for_each_memblock(memory, r) | ||
343 | if (memblock_x86_find_active_region(r, start_pfn, last_pfn, | ||
344 | &ei_startpfn, &ei_endpfn)) | ||
345 | ram += ei_endpfn - ei_startpfn; | ||
346 | |||
347 | return end - start - ((u64)ram << PAGE_SHIFT); | ||
348 | } | ||
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 92faf3a1c53e..c80b9fb95734 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c | |||
@@ -34,7 +34,7 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) | |||
34 | (unsigned long long) pattern, | 34 | (unsigned long long) pattern, |
35 | (unsigned long long) start_bad, | 35 | (unsigned long long) start_bad, |
36 | (unsigned long long) end_bad); | 36 | (unsigned long long) end_bad); |
37 | memblock_x86_reserve_range(start_bad, end_bad, "BAD RAM"); | 37 | memblock_reserve(start_bad, end_bad - start_bad); |
38 | } | 38 | } |
39 | 39 | ||
40 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) | 40 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) |
@@ -70,24 +70,19 @@ static void __init memtest(u64 pattern, u64 start_phys, u64 size) | |||
70 | 70 | ||
71 | static void __init do_one_pass(u64 pattern, u64 start, u64 end) | 71 | static void __init do_one_pass(u64 pattern, u64 start, u64 end) |
72 | { | 72 | { |
73 | u64 size = 0; | 73 | u64 i; |
74 | 74 | phys_addr_t this_start, this_end; | |
75 | while (start < end) { | 75 | |
76 | start = memblock_x86_find_in_range_size(start, &size, 1); | 76 | for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) { |
77 | 77 | this_start = clamp_t(phys_addr_t, this_start, start, end); | |
78 | /* done ? */ | 78 | this_end = clamp_t(phys_addr_t, this_end, start, end); |
79 | if (start >= end) | 79 | if (this_start < this_end) { |
80 | break; | 80 | printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", |
81 | if (start + size > end) | 81 | (unsigned long long)this_start, |
82 | size = end - start; | 82 | (unsigned long long)this_end, |
83 | 83 | (unsigned long long)cpu_to_be64(pattern)); | |
84 | printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", | 84 | memtest(pattern, this_start, this_end - this_start); |
85 | (unsigned long long) start, | 85 | } |
86 | (unsigned long long) start + size, | ||
87 | (unsigned long long) cpu_to_be64(pattern)); | ||
88 | memtest(pattern, start, size); | ||
89 | |||
90 | start += size; | ||
91 | } | 86 | } |
92 | } | 87 | } |
93 | 88 | ||
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 4b5ba85eb5c9..845df6835f9f 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -75,9 +75,9 @@ static unsigned long mmap_rnd(void) | |||
75 | */ | 75 | */ |
76 | if (current->flags & PF_RANDOMIZE) { | 76 | if (current->flags & PF_RANDOMIZE) { |
77 | if (mmap_is_ia32()) | 77 | if (mmap_is_ia32()) |
78 | rnd = (long)get_random_int() % (1<<8); | 78 | rnd = get_random_int() % (1<<8); |
79 | else | 79 | else |
80 | rnd = (long)(get_random_int() % (1<<28)); | 80 | rnd = get_random_int() % (1<<28); |
81 | } | 81 | } |
82 | return rnd << PAGE_SHIFT; | 82 | return rnd << PAGE_SHIFT; |
83 | } | 83 | } |
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index de54b9b278a7..dc0b727742f4 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c | |||
@@ -75,8 +75,8 @@ static LIST_HEAD(trace_list); /* struct remap_trace */ | |||
75 | 75 | ||
76 | /* module parameters */ | 76 | /* module parameters */ |
77 | static unsigned long filter_offset; | 77 | static unsigned long filter_offset; |
78 | static int nommiotrace; | 78 | static bool nommiotrace; |
79 | static int trace_pc; | 79 | static bool trace_pc; |
80 | 80 | ||
81 | module_param(filter_offset, ulong, 0); | 81 | module_param(filter_offset, ulong, 0); |
82 | module_param(nommiotrace, bool, 0); | 82 | module_param(nommiotrace, bool, 0); |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index fbeaaf416610..19d3fa08b119 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -110,7 +110,7 @@ void __cpuinit numa_clear_node(int cpu) | |||
110 | * Allocate node_to_cpumask_map based on number of available nodes | 110 | * Allocate node_to_cpumask_map based on number of available nodes |
111 | * Requires node_possible_map to be valid. | 111 | * Requires node_possible_map to be valid. |
112 | * | 112 | * |
113 | * Note: node_to_cpumask() is not valid until after this is done. | 113 | * Note: cpumask_of_node() is not valid until after this is done. |
114 | * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) | 114 | * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) |
115 | */ | 115 | */ |
116 | void __init setup_node_to_cpumask_map(void) | 116 | void __init setup_node_to_cpumask_map(void) |
@@ -192,8 +192,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) | |||
192 | /* Initialize NODE_DATA for a node on the local memory */ | 192 | /* Initialize NODE_DATA for a node on the local memory */ |
193 | static void __init setup_node_data(int nid, u64 start, u64 end) | 193 | static void __init setup_node_data(int nid, u64 start, u64 end) |
194 | { | 194 | { |
195 | const u64 nd_low = PFN_PHYS(MAX_DMA_PFN); | ||
196 | const u64 nd_high = PFN_PHYS(max_pfn_mapped); | ||
197 | const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | 195 | const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
198 | bool remapped = false; | 196 | bool remapped = false; |
199 | u64 nd_pa; | 197 | u64 nd_pa; |
@@ -224,17 +222,12 @@ static void __init setup_node_data(int nid, u64 start, u64 end) | |||
224 | nd_pa = __pa(nd); | 222 | nd_pa = __pa(nd); |
225 | remapped = true; | 223 | remapped = true; |
226 | } else { | 224 | } else { |
227 | nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high, | 225 | nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); |
228 | nd_size, SMP_CACHE_BYTES); | 226 | if (!nd_pa) { |
229 | if (nd_pa == MEMBLOCK_ERROR) | ||
230 | nd_pa = memblock_find_in_range(nd_low, nd_high, | ||
231 | nd_size, SMP_CACHE_BYTES); | ||
232 | if (nd_pa == MEMBLOCK_ERROR) { | ||
233 | pr_err("Cannot find %zu bytes in node %d\n", | 227 | pr_err("Cannot find %zu bytes in node %d\n", |
234 | nd_size, nid); | 228 | nd_size, nid); |
235 | return; | 229 | return; |
236 | } | 230 | } |
237 | memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA"); | ||
238 | nd = __va(nd_pa); | 231 | nd = __va(nd_pa); |
239 | } | 232 | } |
240 | 233 | ||
@@ -371,8 +364,7 @@ void __init numa_reset_distance(void) | |||
371 | 364 | ||
372 | /* numa_distance could be 1LU marking allocation failure, test cnt */ | 365 | /* numa_distance could be 1LU marking allocation failure, test cnt */ |
373 | if (numa_distance_cnt) | 366 | if (numa_distance_cnt) |
374 | memblock_x86_free_range(__pa(numa_distance), | 367 | memblock_free(__pa(numa_distance), size); |
375 | __pa(numa_distance) + size); | ||
376 | numa_distance_cnt = 0; | 368 | numa_distance_cnt = 0; |
377 | numa_distance = NULL; /* enable table creation */ | 369 | numa_distance = NULL; /* enable table creation */ |
378 | } | 370 | } |
@@ -395,13 +387,13 @@ static int __init numa_alloc_distance(void) | |||
395 | 387 | ||
396 | phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), | 388 | phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), |
397 | size, PAGE_SIZE); | 389 | size, PAGE_SIZE); |
398 | if (phys == MEMBLOCK_ERROR) { | 390 | if (!phys) { |
399 | pr_warning("NUMA: Warning: can't allocate distance table!\n"); | 391 | pr_warning("NUMA: Warning: can't allocate distance table!\n"); |
400 | /* don't retry until explicitly reset */ | 392 | /* don't retry until explicitly reset */ |
401 | numa_distance = (void *)1LU; | 393 | numa_distance = (void *)1LU; |
402 | return -ENOMEM; | 394 | return -ENOMEM; |
403 | } | 395 | } |
404 | memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); | 396 | memblock_reserve(phys, size); |
405 | 397 | ||
406 | numa_distance = __va(phys); | 398 | numa_distance = __va(phys); |
407 | numa_distance_cnt = cnt; | 399 | numa_distance_cnt = cnt; |
@@ -430,8 +422,9 @@ static int __init numa_alloc_distance(void) | |||
430 | * calls are ignored until the distance table is reset with | 422 | * calls are ignored until the distance table is reset with |
431 | * numa_reset_distance(). | 423 | * numa_reset_distance(). |
432 | * | 424 | * |
433 | * If @from or @to is higher than the highest known node at the time of | 425 | * If @from or @to is higher than the highest known node or lower than zero |
434 | * table creation or @distance doesn't make sense, the call is ignored. | 426 | * at the time of table creation or @distance doesn't make sense, the call |
427 | * is ignored. | ||
435 | * This is to allow simplification of specific NUMA config implementations. | 428 | * This is to allow simplification of specific NUMA config implementations. |
436 | */ | 429 | */ |
437 | void __init numa_set_distance(int from, int to, int distance) | 430 | void __init numa_set_distance(int from, int to, int distance) |
@@ -439,8 +432,9 @@ void __init numa_set_distance(int from, int to, int distance) | |||
439 | if (!numa_distance && numa_alloc_distance() < 0) | 432 | if (!numa_distance && numa_alloc_distance() < 0) |
440 | return; | 433 | return; |
441 | 434 | ||
442 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) { | 435 | if (from >= numa_distance_cnt || to >= numa_distance_cnt || |
443 | printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n", | 436 | from < 0 || to < 0) { |
437 | pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n", | ||
444 | from, to, distance); | 438 | from, to, distance); |
445 | return; | 439 | return; |
446 | } | 440 | } |
@@ -482,8 +476,8 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) | |||
482 | numaram = 0; | 476 | numaram = 0; |
483 | } | 477 | } |
484 | 478 | ||
485 | e820ram = max_pfn - (memblock_x86_hole_size(0, | 479 | e820ram = max_pfn - absent_pages_in_range(0, max_pfn); |
486 | PFN_PHYS(max_pfn)) >> PAGE_SHIFT); | 480 | |
487 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ | 481 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ |
488 | if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { | 482 | if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { |
489 | printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", | 483 | printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", |
@@ -505,13 +499,10 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
505 | if (WARN_ON(nodes_empty(node_possible_map))) | 499 | if (WARN_ON(nodes_empty(node_possible_map))) |
506 | return -EINVAL; | 500 | return -EINVAL; |
507 | 501 | ||
508 | for (i = 0; i < mi->nr_blks; i++) | 502 | for (i = 0; i < mi->nr_blks; i++) { |
509 | memblock_x86_register_active_regions(mi->blk[i].nid, | 503 | struct numa_memblk *mb = &mi->blk[i]; |
510 | mi->blk[i].start >> PAGE_SHIFT, | 504 | memblock_set_node(mb->start, mb->end - mb->start, mb->nid); |
511 | mi->blk[i].end >> PAGE_SHIFT); | 505 | } |
512 | |||
513 | /* for out of order entries */ | ||
514 | sort_node_map(); | ||
515 | 506 | ||
516 | /* | 507 | /* |
517 | * If sections array is gonna be used for pfn -> nid mapping, check | 508 | * If sections array is gonna be used for pfn -> nid mapping, check |
@@ -545,6 +536,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
545 | setup_node_data(nid, start, end); | 536 | setup_node_data(nid, start, end); |
546 | } | 537 | } |
547 | 538 | ||
539 | /* Dump memblock with node info and return. */ | ||
540 | memblock_dump_all(); | ||
548 | return 0; | 541 | return 0; |
549 | } | 542 | } |
550 | 543 | ||
@@ -582,7 +575,7 @@ static int __init numa_init(int (*init_func)(void)) | |||
582 | nodes_clear(node_possible_map); | 575 | nodes_clear(node_possible_map); |
583 | nodes_clear(node_online_map); | 576 | nodes_clear(node_online_map); |
584 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); | 577 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); |
585 | remove_all_active_ranges(); | 578 | WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); |
586 | numa_reset_distance(); | 579 | numa_reset_distance(); |
587 | 580 | ||
588 | ret = init_func(); | 581 | ret = init_func(); |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 3adebe7e536a..534255a36b6b 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -199,23 +199,23 @@ void __init init_alloc_remap(int nid, u64 start, u64 end) | |||
199 | 199 | ||
200 | /* allocate node memory and the lowmem remap area */ | 200 | /* allocate node memory and the lowmem remap area */ |
201 | node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); | 201 | node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); |
202 | if (node_pa == MEMBLOCK_ERROR) { | 202 | if (!node_pa) { |
203 | pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", | 203 | pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", |
204 | size, nid); | 204 | size, nid); |
205 | return; | 205 | return; |
206 | } | 206 | } |
207 | memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); | 207 | memblock_reserve(node_pa, size); |
208 | 208 | ||
209 | remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, | 209 | remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, |
210 | max_low_pfn << PAGE_SHIFT, | 210 | max_low_pfn << PAGE_SHIFT, |
211 | size, LARGE_PAGE_BYTES); | 211 | size, LARGE_PAGE_BYTES); |
212 | if (remap_pa == MEMBLOCK_ERROR) { | 212 | if (!remap_pa) { |
213 | pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", | 213 | pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", |
214 | size, nid); | 214 | size, nid); |
215 | memblock_x86_free_range(node_pa, node_pa + size); | 215 | memblock_free(node_pa, size); |
216 | return; | 216 | return; |
217 | } | 217 | } |
218 | memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); | 218 | memblock_reserve(remap_pa, size); |
219 | remap_va = phys_to_virt(remap_pa); | 219 | remap_va = phys_to_virt(remap_pa); |
220 | 220 | ||
221 | /* perform actual remap */ | 221 | /* perform actual remap */ |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index dd27f401f0a0..92e27119ee1a 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -19,7 +19,7 @@ unsigned long __init numa_free_all_bootmem(void) | |||
19 | for_each_online_node(i) | 19 | for_each_online_node(i) |
20 | pages += free_all_bootmem_node(NODE_DATA(i)); | 20 | pages += free_all_bootmem_node(NODE_DATA(i)); |
21 | 21 | ||
22 | pages += free_all_memory_core_early(MAX_NUMNODES); | 22 | pages += free_low_memory_core_early(MAX_NUMNODES); |
23 | 23 | ||
24 | return pages; | 24 | return pages; |
25 | } | 25 | } |
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index d0ed086b6247..46db56845f18 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c | |||
@@ -28,6 +28,16 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi) | |||
28 | return -ENOENT; | 28 | return -ENOENT; |
29 | } | 29 | } |
30 | 30 | ||
31 | static u64 mem_hole_size(u64 start, u64 end) | ||
32 | { | ||
33 | unsigned long start_pfn = PFN_UP(start); | ||
34 | unsigned long end_pfn = PFN_DOWN(end); | ||
35 | |||
36 | if (start_pfn < end_pfn) | ||
37 | return PFN_PHYS(absent_pages_in_range(start_pfn, end_pfn)); | ||
38 | return 0; | ||
39 | } | ||
40 | |||
31 | /* | 41 | /* |
32 | * Sets up nid to range from @start to @end. The return value is -errno if | 42 | * Sets up nid to range from @start to @end. The return value is -errno if |
33 | * something went wrong, 0 otherwise. | 43 | * something went wrong, 0 otherwise. |
@@ -89,7 +99,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, | |||
89 | * Calculate target node size. x86_32 freaks on __udivdi3() so do | 99 | * Calculate target node size. x86_32 freaks on __udivdi3() so do |
90 | * the division in ulong number of pages and convert back. | 100 | * the division in ulong number of pages and convert back. |
91 | */ | 101 | */ |
92 | size = max_addr - addr - memblock_x86_hole_size(addr, max_addr); | 102 | size = max_addr - addr - mem_hole_size(addr, max_addr); |
93 | size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes); | 103 | size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes); |
94 | 104 | ||
95 | /* | 105 | /* |
@@ -135,8 +145,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, | |||
135 | * Continue to add memory to this fake node if its | 145 | * Continue to add memory to this fake node if its |
136 | * non-reserved memory is less than the per-node size. | 146 | * non-reserved memory is less than the per-node size. |
137 | */ | 147 | */ |
138 | while (end - start - | 148 | while (end - start - mem_hole_size(start, end) < size) { |
139 | memblock_x86_hole_size(start, end) < size) { | ||
140 | end += FAKE_NODE_MIN_SIZE; | 149 | end += FAKE_NODE_MIN_SIZE; |
141 | if (end > limit) { | 150 | if (end > limit) { |
142 | end = limit; | 151 | end = limit; |
@@ -150,7 +159,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, | |||
150 | * this one must extend to the boundary. | 159 | * this one must extend to the boundary. |
151 | */ | 160 | */ |
152 | if (end < dma32_end && dma32_end - end - | 161 | if (end < dma32_end && dma32_end - end - |
153 | memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | 162 | mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) |
154 | end = dma32_end; | 163 | end = dma32_end; |
155 | 164 | ||
156 | /* | 165 | /* |
@@ -158,8 +167,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, | |||
158 | * next node, this one must extend to the end of the | 167 | * next node, this one must extend to the end of the |
159 | * physical node. | 168 | * physical node. |
160 | */ | 169 | */ |
161 | if (limit - end - | 170 | if (limit - end - mem_hole_size(end, limit) < size) |
162 | memblock_x86_hole_size(end, limit) < size) | ||
163 | end = limit; | 171 | end = limit; |
164 | 172 | ||
165 | ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes, | 173 | ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes, |
@@ -180,7 +188,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size) | |||
180 | { | 188 | { |
181 | u64 end = start + size; | 189 | u64 end = start + size; |
182 | 190 | ||
183 | while (end - start - memblock_x86_hole_size(start, end) < size) { | 191 | while (end - start - mem_hole_size(start, end) < size) { |
184 | end += FAKE_NODE_MIN_SIZE; | 192 | end += FAKE_NODE_MIN_SIZE; |
185 | if (end > max_addr) { | 193 | if (end > max_addr) { |
186 | end = max_addr; | 194 | end = max_addr; |
@@ -211,8 +219,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, | |||
211 | * creates a uniform distribution of node sizes across the entire | 219 | * creates a uniform distribution of node sizes across the entire |
212 | * machine (but not necessarily over physical nodes). | 220 | * machine (but not necessarily over physical nodes). |
213 | */ | 221 | */ |
214 | min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / | 222 | min_size = (max_addr - addr - mem_hole_size(addr, max_addr)) / MAX_NUMNODES; |
215 | MAX_NUMNODES; | ||
216 | min_size = max(min_size, FAKE_NODE_MIN_SIZE); | 223 | min_size = max(min_size, FAKE_NODE_MIN_SIZE); |
217 | if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) | 224 | if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) |
218 | min_size = (min_size + FAKE_NODE_MIN_SIZE) & | 225 | min_size = (min_size + FAKE_NODE_MIN_SIZE) & |
@@ -252,7 +259,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, | |||
252 | * this one must extend to the boundary. | 259 | * this one must extend to the boundary. |
253 | */ | 260 | */ |
254 | if (end < dma32_end && dma32_end - end - | 261 | if (end < dma32_end && dma32_end - end - |
255 | memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | 262 | mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) |
256 | end = dma32_end; | 263 | end = dma32_end; |
257 | 264 | ||
258 | /* | 265 | /* |
@@ -260,8 +267,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, | |||
260 | * next node, this one must extend to the end of the | 267 | * next node, this one must extend to the end of the |
261 | * physical node. | 268 | * physical node. |
262 | */ | 269 | */ |
263 | if (limit - end - | 270 | if (limit - end - mem_hole_size(end, limit) < size) |
264 | memblock_x86_hole_size(end, limit) < size) | ||
265 | end = limit; | 271 | end = limit; |
266 | 272 | ||
267 | ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES, | 273 | ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES, |
@@ -351,11 +357,11 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) | |||
351 | 357 | ||
352 | phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), | 358 | phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), |
353 | phys_size, PAGE_SIZE); | 359 | phys_size, PAGE_SIZE); |
354 | if (phys == MEMBLOCK_ERROR) { | 360 | if (!phys) { |
355 | pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); | 361 | pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); |
356 | goto no_emu; | 362 | goto no_emu; |
357 | } | 363 | } |
358 | memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST"); | 364 | memblock_reserve(phys, phys_size); |
359 | phys_dist = __va(phys); | 365 | phys_dist = __va(phys); |
360 | 366 | ||
361 | for (i = 0; i < numa_dist_cnt; i++) | 367 | for (i = 0; i < numa_dist_cnt; i++) |
@@ -424,7 +430,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) | |||
424 | 430 | ||
425 | /* free the copied physical distance table */ | 431 | /* free the copied physical distance table */ |
426 | if (phys_dist) | 432 | if (phys_dist) |
427 | memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size); | 433 | memblock_free(__pa(phys_dist), phys_size); |
428 | return; | 434 | return; |
429 | 435 | ||
430 | no_emu: | 436 | no_emu: |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f9e526742fa1..e1ebde315210 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -998,7 +998,7 @@ out_err: | |||
998 | } | 998 | } |
999 | EXPORT_SYMBOL(set_memory_uc); | 999 | EXPORT_SYMBOL(set_memory_uc); |
1000 | 1000 | ||
1001 | int _set_memory_array(unsigned long *addr, int addrinarray, | 1001 | static int _set_memory_array(unsigned long *addr, int addrinarray, |
1002 | unsigned long new_type) | 1002 | unsigned long new_type) |
1003 | { | 1003 | { |
1004 | int i, j; | 1004 | int i, j; |
@@ -1334,12 +1334,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
1334 | } | 1334 | } |
1335 | 1335 | ||
1336 | /* | 1336 | /* |
1337 | * If page allocator is not up yet then do not call c_p_a(): | ||
1338 | */ | ||
1339 | if (!debug_pagealloc_enabled) | ||
1340 | return; | ||
1341 | |||
1342 | /* | ||
1343 | * The return value is ignored as the calls cannot fail. | 1337 | * The return value is ignored as the calls cannot fail. |
1344 | * Large pages for identity mappings are not used at boot time | 1338 | * Large pages for identity mappings are not used at boot time |
1345 | * and hence no memory allocations during large page split. | 1339 | * and hence no memory allocations during large page split. |
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 7efd0c615d58..1c1c4f46a7c1 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c | |||
@@ -69,6 +69,12 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | |||
69 | if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) | 69 | if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) |
70 | return; | 70 | return; |
71 | pxm = pa->proximity_domain; | 71 | pxm = pa->proximity_domain; |
72 | apic_id = pa->apic_id; | ||
73 | if (!cpu_has_x2apic && (apic_id >= 0xff)) { | ||
74 | printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n", | ||
75 | pxm, apic_id); | ||
76 | return; | ||
77 | } | ||
72 | node = setup_node(pxm); | 78 | node = setup_node(pxm); |
73 | if (node < 0) { | 79 | if (node < 0) { |
74 | printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); | 80 | printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); |
@@ -76,7 +82,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | |||
76 | return; | 82 | return; |
77 | } | 83 | } |
78 | 84 | ||
79 | apic_id = pa->apic_id; | ||
80 | if (apic_id >= MAX_LOCAL_APIC) { | 85 | if (apic_id >= MAX_LOCAL_APIC) { |
81 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); | 86 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); |
82 | return; | 87 | return; |
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile index 446902b2a6b6..1599f568f0e2 100644 --- a/arch/x86/oprofile/Makefile +++ b/arch/x86/oprofile/Makefile | |||
@@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ | |||
4 | oprof.o cpu_buffer.o buffer_sync.o \ | 4 | oprof.o cpu_buffer.o buffer_sync.o \ |
5 | event_buffer.o oprofile_files.o \ | 5 | event_buffer.o oprofile_files.o \ |
6 | oprofilefs.o oprofile_stats.o \ | 6 | oprofilefs.o oprofile_stats.o \ |
7 | timer_int.o ) | 7 | timer_int.o nmi_timer_int.o ) |
8 | 8 | ||
9 | oprofile-y := $(DRIVER_OBJS) init.o backtrace.o | 9 | oprofile-y := $(DRIVER_OBJS) init.o backtrace.o |
10 | oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ | 10 | oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ |
11 | op_model_ppro.o op_model_p4.o | 11 | op_model_ppro.o op_model_p4.o |
12 | oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o | ||
diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c index f148cf652678..9e138d00ad36 100644 --- a/arch/x86/oprofile/init.c +++ b/arch/x86/oprofile/init.c | |||
@@ -16,37 +16,23 @@ | |||
16 | * with the NMI mode driver. | 16 | * with the NMI mode driver. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #ifdef CONFIG_X86_LOCAL_APIC | ||
19 | extern int op_nmi_init(struct oprofile_operations *ops); | 20 | extern int op_nmi_init(struct oprofile_operations *ops); |
20 | extern int op_nmi_timer_init(struct oprofile_operations *ops); | ||
21 | extern void op_nmi_exit(void); | 21 | extern void op_nmi_exit(void); |
22 | extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); | 22 | #else |
23 | static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; } | ||
24 | static void op_nmi_exit(void) { } | ||
25 | #endif | ||
23 | 26 | ||
24 | static int nmi_timer; | 27 | extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); |
25 | 28 | ||
26 | int __init oprofile_arch_init(struct oprofile_operations *ops) | 29 | int __init oprofile_arch_init(struct oprofile_operations *ops) |
27 | { | 30 | { |
28 | int ret; | ||
29 | |||
30 | ret = -ENODEV; | ||
31 | |||
32 | #ifdef CONFIG_X86_LOCAL_APIC | ||
33 | ret = op_nmi_init(ops); | ||
34 | #endif | ||
35 | nmi_timer = (ret != 0); | ||
36 | #ifdef CONFIG_X86_IO_APIC | ||
37 | if (nmi_timer) | ||
38 | ret = op_nmi_timer_init(ops); | ||
39 | #endif | ||
40 | ops->backtrace = x86_backtrace; | 31 | ops->backtrace = x86_backtrace; |
41 | 32 | return op_nmi_init(ops); | |
42 | return ret; | ||
43 | } | 33 | } |
44 | 34 | ||
45 | |||
46 | void oprofile_arch_exit(void) | 35 | void oprofile_arch_exit(void) |
47 | { | 36 | { |
48 | #ifdef CONFIG_X86_LOCAL_APIC | 37 | op_nmi_exit(); |
49 | if (!nmi_timer) | ||
50 | op_nmi_exit(); | ||
51 | #endif | ||
52 | } | 38 | } |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 75f9528e0372..26b8a8514ee5 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -595,24 +595,36 @@ static int __init p4_init(char **cpu_type) | |||
595 | return 0; | 595 | return 0; |
596 | } | 596 | } |
597 | 597 | ||
598 | static int force_arch_perfmon; | 598 | enum __force_cpu_type { |
599 | static int force_cpu_type(const char *str, struct kernel_param *kp) | 599 | reserved = 0, /* do not force */ |
600 | timer, | ||
601 | arch_perfmon, | ||
602 | }; | ||
603 | |||
604 | static int force_cpu_type; | ||
605 | |||
606 | static int set_cpu_type(const char *str, struct kernel_param *kp) | ||
600 | { | 607 | { |
601 | if (!strcmp(str, "arch_perfmon")) { | 608 | if (!strcmp(str, "timer")) { |
602 | force_arch_perfmon = 1; | 609 | force_cpu_type = timer; |
610 | printk(KERN_INFO "oprofile: forcing NMI timer mode\n"); | ||
611 | } else if (!strcmp(str, "arch_perfmon")) { | ||
612 | force_cpu_type = arch_perfmon; | ||
603 | printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); | 613 | printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); |
614 | } else { | ||
615 | force_cpu_type = 0; | ||
604 | } | 616 | } |
605 | 617 | ||
606 | return 0; | 618 | return 0; |
607 | } | 619 | } |
608 | module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); | 620 | module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); |
609 | 621 | ||
610 | static int __init ppro_init(char **cpu_type) | 622 | static int __init ppro_init(char **cpu_type) |
611 | { | 623 | { |
612 | __u8 cpu_model = boot_cpu_data.x86_model; | 624 | __u8 cpu_model = boot_cpu_data.x86_model; |
613 | struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ | 625 | struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ |
614 | 626 | ||
615 | if (force_arch_perfmon && cpu_has_arch_perfmon) | 627 | if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon) |
616 | return 0; | 628 | return 0; |
617 | 629 | ||
618 | /* | 630 | /* |
@@ -679,6 +691,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
679 | if (!cpu_has_apic) | 691 | if (!cpu_has_apic) |
680 | return -ENODEV; | 692 | return -ENODEV; |
681 | 693 | ||
694 | if (force_cpu_type == timer) | ||
695 | return -ENODEV; | ||
696 | |||
682 | switch (vendor) { | 697 | switch (vendor) { |
683 | case X86_VENDOR_AMD: | 698 | case X86_VENDOR_AMD: |
684 | /* Needs to be at least an Athlon (or hammer in 32bit mode) */ | 699 | /* Needs to be at least an Athlon (or hammer in 32bit mode) */ |
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c deleted file mode 100644 index 7f8052cd6620..000000000000 --- a/arch/x86/oprofile/nmi_timer_int.c +++ /dev/null | |||
@@ -1,50 +0,0 @@ | |||
1 | /** | ||
2 | * @file nmi_timer_int.c | ||
3 | * | ||
4 | * @remark Copyright 2003 OProfile authors | ||
5 | * @remark Read the file COPYING | ||
6 | * | ||
7 | * @author Zwane Mwaikambo <zwane@linuxpower.ca> | ||
8 | */ | ||
9 | |||
10 | #include <linux/init.h> | ||
11 | #include <linux/smp.h> | ||
12 | #include <linux/errno.h> | ||
13 | #include <linux/oprofile.h> | ||
14 | #include <linux/rcupdate.h> | ||
15 | #include <linux/kdebug.h> | ||
16 | |||
17 | #include <asm/nmi.h> | ||
18 | #include <asm/apic.h> | ||
19 | #include <asm/ptrace.h> | ||
20 | |||
21 | static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs) | ||
22 | { | ||
23 | oprofile_add_sample(regs, 0); | ||
24 | return NMI_HANDLED; | ||
25 | } | ||
26 | |||
27 | static int timer_start(void) | ||
28 | { | ||
29 | if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify, | ||
30 | 0, "oprofile-timer")) | ||
31 | return 1; | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | |||
36 | static void timer_stop(void) | ||
37 | { | ||
38 | unregister_nmi_handler(NMI_LOCAL, "oprofile-timer"); | ||
39 | synchronize_sched(); /* Allow already-started NMIs to complete. */ | ||
40 | } | ||
41 | |||
42 | |||
43 | int __init op_nmi_timer_init(struct oprofile_operations *ops) | ||
44 | { | ||
45 | ops->start = timer_start; | ||
46 | ops->stop = timer_stop; | ||
47 | ops->cpu_type = "timer"; | ||
48 | printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); | ||
49 | return 0; | ||
50 | } | ||
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 6b8759f7634e..e76e18c94a3c 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
@@ -15,11 +15,12 @@ obj-$(CONFIG_X86_VISWS) += visws.o | |||
15 | 15 | ||
16 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | 16 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o |
17 | 17 | ||
18 | obj-$(CONFIG_X86_MRST) += mrst.o | 18 | obj-$(CONFIG_X86_INTEL_MID) += mrst.o |
19 | 19 | ||
20 | obj-y += common.o early.o | 20 | obj-y += common.o early.o |
21 | obj-y += amd_bus.o bus_numa.o | 21 | obj-y += bus_numa.o |
22 | 22 | ||
23 | obj-$(CONFIG_AMD_NB) += amd_bus.o | ||
23 | obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o | 24 | obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o |
24 | 25 | ||
25 | ifeq ($(CONFIG_PCI_DEBUG),y) | 26 | ifeq ($(CONFIG_PCI_DEBUG),y) |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 404f21a3ff9e..a312e76063a7 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -12,7 +12,7 @@ struct pci_root_info { | |||
12 | char *name; | 12 | char *name; |
13 | unsigned int res_num; | 13 | unsigned int res_num; |
14 | struct resource *res; | 14 | struct resource *res; |
15 | struct pci_bus *bus; | 15 | struct list_head *resources; |
16 | int busnum; | 16 | int busnum; |
17 | }; | 17 | }; |
18 | 18 | ||
@@ -24,6 +24,12 @@ static int __init set_use_crs(const struct dmi_system_id *id) | |||
24 | return 0; | 24 | return 0; |
25 | } | 25 | } |
26 | 26 | ||
27 | static int __init set_nouse_crs(const struct dmi_system_id *id) | ||
28 | { | ||
29 | pci_use_crs = false; | ||
30 | return 0; | ||
31 | } | ||
32 | |||
27 | static const struct dmi_system_id pci_use_crs_table[] __initconst = { | 33 | static const struct dmi_system_id pci_use_crs_table[] __initconst = { |
28 | /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */ | 34 | /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */ |
29 | { | 35 | { |
@@ -54,6 +60,29 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = { | |||
54 | DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), | 60 | DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), |
55 | }, | 61 | }, |
56 | }, | 62 | }, |
63 | |||
64 | /* Now for the blacklist.. */ | ||
65 | |||
66 | /* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */ | ||
67 | { | ||
68 | .callback = set_nouse_crs, | ||
69 | .ident = "Dell Studio 1557", | ||
70 | .matches = { | ||
71 | DMI_MATCH(DMI_BOARD_VENDOR, "Dell Inc."), | ||
72 | DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1557"), | ||
73 | DMI_MATCH(DMI_BIOS_VERSION, "A09"), | ||
74 | }, | ||
75 | }, | ||
76 | /* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */ | ||
77 | { | ||
78 | .callback = set_nouse_crs, | ||
79 | .ident = "Thinkpad SL510", | ||
80 | .matches = { | ||
81 | DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), | ||
82 | DMI_MATCH(DMI_BOARD_NAME, "2847DFG"), | ||
83 | DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"), | ||
84 | }, | ||
85 | }, | ||
57 | {} | 86 | {} |
58 | }; | 87 | }; |
59 | 88 | ||
@@ -149,7 +178,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
149 | struct acpi_resource_address64 addr; | 178 | struct acpi_resource_address64 addr; |
150 | acpi_status status; | 179 | acpi_status status; |
151 | unsigned long flags; | 180 | unsigned long flags; |
152 | u64 start, end; | 181 | u64 start, orig_end, end; |
153 | 182 | ||
154 | status = resource_to_addr(acpi_res, &addr); | 183 | status = resource_to_addr(acpi_res, &addr); |
155 | if (!ACPI_SUCCESS(status)) | 184 | if (!ACPI_SUCCESS(status)) |
@@ -165,7 +194,21 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
165 | return AE_OK; | 194 | return AE_OK; |
166 | 195 | ||
167 | start = addr.minimum + addr.translation_offset; | 196 | start = addr.minimum + addr.translation_offset; |
168 | end = addr.maximum + addr.translation_offset; | 197 | orig_end = end = addr.maximum + addr.translation_offset; |
198 | |||
199 | /* Exclude non-addressable range or non-addressable portion of range */ | ||
200 | end = min(end, (u64)iomem_resource.end); | ||
201 | if (end <= start) { | ||
202 | dev_info(&info->bridge->dev, | ||
203 | "host bridge window [%#llx-%#llx] " | ||
204 | "(ignored, not CPU addressable)\n", start, orig_end); | ||
205 | return AE_OK; | ||
206 | } else if (orig_end != end) { | ||
207 | dev_info(&info->bridge->dev, | ||
208 | "host bridge window [%#llx-%#llx] " | ||
209 | "([%#llx-%#llx] ignored, not CPU addressable)\n", | ||
210 | start, orig_end, end + 1, orig_end); | ||
211 | } | ||
169 | 212 | ||
170 | res = &info->res[info->res_num]; | 213 | res = &info->res[info->res_num]; |
171 | res->name = info->name; | 214 | res->name = info->name; |
@@ -261,23 +304,20 @@ static void add_resources(struct pci_root_info *info) | |||
261 | "ignoring host bridge window %pR (conflicts with %s %pR)\n", | 304 | "ignoring host bridge window %pR (conflicts with %s %pR)\n", |
262 | res, conflict->name, conflict); | 305 | res, conflict->name, conflict); |
263 | else | 306 | else |
264 | pci_bus_add_resource(info->bus, res, 0); | 307 | pci_add_resource(info->resources, res); |
265 | } | 308 | } |
266 | } | 309 | } |
267 | 310 | ||
268 | static void | 311 | static void |
269 | get_current_resources(struct acpi_device *device, int busnum, | 312 | get_current_resources(struct acpi_device *device, int busnum, |
270 | int domain, struct pci_bus *bus) | 313 | int domain, struct list_head *resources) |
271 | { | 314 | { |
272 | struct pci_root_info info; | 315 | struct pci_root_info info; |
273 | size_t size; | 316 | size_t size; |
274 | 317 | ||
275 | if (pci_use_crs) | ||
276 | pci_bus_remove_resources(bus); | ||
277 | |||
278 | info.bridge = device; | 318 | info.bridge = device; |
279 | info.bus = bus; | ||
280 | info.res_num = 0; | 319 | info.res_num = 0; |
320 | info.resources = resources; | ||
281 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, | 321 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, |
282 | &info); | 322 | &info); |
283 | if (!info.res_num) | 323 | if (!info.res_num) |
@@ -286,7 +326,7 @@ get_current_resources(struct acpi_device *device, int busnum, | |||
286 | size = sizeof(*info.res) * info.res_num; | 326 | size = sizeof(*info.res) * info.res_num; |
287 | info.res = kmalloc(size, GFP_KERNEL); | 327 | info.res = kmalloc(size, GFP_KERNEL); |
288 | if (!info.res) | 328 | if (!info.res) |
289 | goto res_alloc_fail; | 329 | return; |
290 | 330 | ||
291 | info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum); | 331 | info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum); |
292 | if (!info.name) | 332 | if (!info.name) |
@@ -301,8 +341,6 @@ get_current_resources(struct acpi_device *device, int busnum, | |||
301 | 341 | ||
302 | name_alloc_fail: | 342 | name_alloc_fail: |
303 | kfree(info.res); | 343 | kfree(info.res); |
304 | res_alloc_fail: | ||
305 | return; | ||
306 | } | 344 | } |
307 | 345 | ||
308 | struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | 346 | struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) |
@@ -310,6 +348,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | |||
310 | struct acpi_device *device = root->device; | 348 | struct acpi_device *device = root->device; |
311 | int domain = root->segment; | 349 | int domain = root->segment; |
312 | int busnum = root->secondary.start; | 350 | int busnum = root->secondary.start; |
351 | LIST_HEAD(resources); | ||
313 | struct pci_bus *bus; | 352 | struct pci_bus *bus; |
314 | struct pci_sysdata *sd; | 353 | struct pci_sysdata *sd; |
315 | int node; | 354 | int node; |
@@ -364,11 +403,15 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | |||
364 | memcpy(bus->sysdata, sd, sizeof(*sd)); | 403 | memcpy(bus->sysdata, sd, sizeof(*sd)); |
365 | kfree(sd); | 404 | kfree(sd); |
366 | } else { | 405 | } else { |
367 | bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); | 406 | get_current_resources(device, busnum, domain, &resources); |
368 | if (bus) { | 407 | if (list_empty(&resources)) |
369 | get_current_resources(device, busnum, domain, bus); | 408 | x86_pci_root_bus_resources(busnum, &resources); |
409 | bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, | ||
410 | &resources); | ||
411 | if (bus) | ||
370 | bus->subordinate = pci_scan_child_bus(bus); | 412 | bus->subordinate = pci_scan_child_bus(bus); |
371 | } | 413 | else |
414 | pci_free_resource_list(&resources); | ||
372 | } | 415 | } |
373 | 416 | ||
374 | /* After the PCI-E bus has been walked and all devices discovered, | 417 | /* After the PCI-E bus has been walked and all devices discovered, |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 026e4931d162..0567df3890e1 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -30,34 +30,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = { | |||
30 | { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, | 30 | { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, |
31 | }; | 31 | }; |
32 | 32 | ||
33 | static u64 __initdata fam10h_mmconf_start; | ||
34 | static u64 __initdata fam10h_mmconf_end; | ||
35 | static void __init get_pci_mmcfg_amd_fam10h_range(void) | ||
36 | { | ||
37 | u32 address; | ||
38 | u64 base, msr; | ||
39 | unsigned segn_busn_bits; | ||
40 | |||
41 | /* assume all cpus from fam10h have mmconf */ | ||
42 | if (boot_cpu_data.x86 < 0x10) | ||
43 | return; | ||
44 | |||
45 | address = MSR_FAM10H_MMIO_CONF_BASE; | ||
46 | rdmsrl(address, msr); | ||
47 | |||
48 | /* mmconfig is not enable */ | ||
49 | if (!(msr & FAM10H_MMIO_CONF_ENABLE)) | ||
50 | return; | ||
51 | |||
52 | base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); | ||
53 | |||
54 | segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & | ||
55 | FAM10H_MMIO_CONF_BUSRANGE_MASK; | ||
56 | |||
57 | fam10h_mmconf_start = base; | ||
58 | fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; | ||
59 | } | ||
60 | |||
61 | #define RANGE_NUM 16 | 33 | #define RANGE_NUM 16 |
62 | 34 | ||
63 | /** | 35 | /** |
@@ -85,6 +57,9 @@ static int __init early_fill_mp_bus_info(void) | |||
85 | u64 val; | 57 | u64 val; |
86 | u32 address; | 58 | u32 address; |
87 | bool found; | 59 | bool found; |
60 | struct resource fam10h_mmconf_res, *fam10h_mmconf; | ||
61 | u64 fam10h_mmconf_start; | ||
62 | u64 fam10h_mmconf_end; | ||
88 | 63 | ||
89 | if (!early_pci_allowed()) | 64 | if (!early_pci_allowed()) |
90 | return -1; | 65 | return -1; |
@@ -211,12 +186,17 @@ static int __init early_fill_mp_bus_info(void) | |||
211 | subtract_range(range, RANGE_NUM, 0, end); | 186 | subtract_range(range, RANGE_NUM, 0, end); |
212 | 187 | ||
213 | /* get mmconfig */ | 188 | /* get mmconfig */ |
214 | get_pci_mmcfg_amd_fam10h_range(); | 189 | fam10h_mmconf = amd_get_mmconfig_range(&fam10h_mmconf_res); |
215 | /* need to take out mmconf range */ | 190 | /* need to take out mmconf range */ |
216 | if (fam10h_mmconf_end) { | 191 | if (fam10h_mmconf) { |
217 | printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); | 192 | printk(KERN_DEBUG "Fam 10h mmconf %pR\n", fam10h_mmconf); |
193 | fam10h_mmconf_start = fam10h_mmconf->start; | ||
194 | fam10h_mmconf_end = fam10h_mmconf->end; | ||
218 | subtract_range(range, RANGE_NUM, fam10h_mmconf_start, | 195 | subtract_range(range, RANGE_NUM, fam10h_mmconf_start, |
219 | fam10h_mmconf_end + 1); | 196 | fam10h_mmconf_end + 1); |
197 | } else { | ||
198 | fam10h_mmconf_start = 0; | ||
199 | fam10h_mmconf_end = 0; | ||
220 | } | 200 | } |
221 | 201 | ||
222 | /* mmio resource */ | 202 | /* mmio resource */ |
@@ -403,7 +383,6 @@ static void __init pci_enable_pci_io_ecs(void) | |||
403 | ++n; | 383 | ++n; |
404 | } | 384 | } |
405 | } | 385 | } |
406 | pr_info("Extended Config Space enabled on %u nodes\n", n); | ||
407 | #endif | 386 | #endif |
408 | } | 387 | } |
409 | 388 | ||
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index ab8269b0da29..f3a7c569a403 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c | |||
@@ -15,10 +15,11 @@ | |||
15 | #include <linux/pci.h> | 15 | #include <linux/pci.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <asm/pci_x86.h> | 17 | #include <asm/pci_x86.h> |
18 | #include <asm/pci-direct.h> | ||
18 | 19 | ||
19 | #include "bus_numa.h" | 20 | #include "bus_numa.h" |
20 | 21 | ||
21 | static void __devinit cnb20le_res(struct pci_dev *dev) | 22 | static void __init cnb20le_res(u8 bus, u8 slot, u8 func) |
22 | { | 23 | { |
23 | struct pci_root_info *info; | 24 | struct pci_root_info *info; |
24 | struct resource res; | 25 | struct resource res; |
@@ -26,21 +27,12 @@ static void __devinit cnb20le_res(struct pci_dev *dev) | |||
26 | u8 fbus, lbus; | 27 | u8 fbus, lbus; |
27 | int i; | 28 | int i; |
28 | 29 | ||
29 | #ifdef CONFIG_ACPI | ||
30 | /* | ||
31 | * We should get host bridge information from ACPI unless the BIOS | ||
32 | * doesn't support it. | ||
33 | */ | ||
34 | if (acpi_os_get_root_pointer()) | ||
35 | return; | ||
36 | #endif | ||
37 | |||
38 | info = &pci_root_info[pci_root_num]; | 30 | info = &pci_root_info[pci_root_num]; |
39 | pci_root_num++; | 31 | pci_root_num++; |
40 | 32 | ||
41 | /* read the PCI bus numbers */ | 33 | /* read the PCI bus numbers */ |
42 | pci_read_config_byte(dev, 0x44, &fbus); | 34 | fbus = read_pci_config_byte(bus, slot, func, 0x44); |
43 | pci_read_config_byte(dev, 0x45, &lbus); | 35 | lbus = read_pci_config_byte(bus, slot, func, 0x45); |
44 | info->bus_min = fbus; | 36 | info->bus_min = fbus; |
45 | info->bus_max = lbus; | 37 | info->bus_max = lbus; |
46 | 38 | ||
@@ -59,8 +51,8 @@ static void __devinit cnb20le_res(struct pci_dev *dev) | |||
59 | } | 51 | } |
60 | 52 | ||
61 | /* read the non-prefetchable memory window */ | 53 | /* read the non-prefetchable memory window */ |
62 | pci_read_config_word(dev, 0xc0, &word1); | 54 | word1 = read_pci_config_16(bus, slot, func, 0xc0); |
63 | pci_read_config_word(dev, 0xc2, &word2); | 55 | word2 = read_pci_config_16(bus, slot, func, 0xc2); |
64 | if (word1 != word2) { | 56 | if (word1 != word2) { |
65 | res.start = (word1 << 16) | 0x0000; | 57 | res.start = (word1 << 16) | 0x0000; |
66 | res.end = (word2 << 16) | 0xffff; | 58 | res.end = (word2 << 16) | 0xffff; |
@@ -69,8 +61,8 @@ static void __devinit cnb20le_res(struct pci_dev *dev) | |||
69 | } | 61 | } |
70 | 62 | ||
71 | /* read the prefetchable memory window */ | 63 | /* read the prefetchable memory window */ |
72 | pci_read_config_word(dev, 0xc4, &word1); | 64 | word1 = read_pci_config_16(bus, slot, func, 0xc4); |
73 | pci_read_config_word(dev, 0xc6, &word2); | 65 | word2 = read_pci_config_16(bus, slot, func, 0xc6); |
74 | if (word1 != word2) { | 66 | if (word1 != word2) { |
75 | res.start = (word1 << 16) | 0x0000; | 67 | res.start = (word1 << 16) | 0x0000; |
76 | res.end = (word2 << 16) | 0xffff; | 68 | res.end = (word2 << 16) | 0xffff; |
@@ -79,8 +71,8 @@ static void __devinit cnb20le_res(struct pci_dev *dev) | |||
79 | } | 71 | } |
80 | 72 | ||
81 | /* read the IO port window */ | 73 | /* read the IO port window */ |
82 | pci_read_config_word(dev, 0xd0, &word1); | 74 | word1 = read_pci_config_16(bus, slot, func, 0xd0); |
83 | pci_read_config_word(dev, 0xd2, &word2); | 75 | word2 = read_pci_config_16(bus, slot, func, 0xd2); |
84 | if (word1 != word2) { | 76 | if (word1 != word2) { |
85 | res.start = word1; | 77 | res.start = word1; |
86 | res.end = word2; | 78 | res.end = word2; |
@@ -92,13 +84,37 @@ static void __devinit cnb20le_res(struct pci_dev *dev) | |||
92 | res.start = fbus; | 84 | res.start = fbus; |
93 | res.end = lbus; | 85 | res.end = lbus; |
94 | res.flags = IORESOURCE_BUS; | 86 | res.flags = IORESOURCE_BUS; |
95 | dev_info(&dev->dev, "CNB20LE PCI Host Bridge (domain %04x %pR)\n", | 87 | printk(KERN_INFO "CNB20LE PCI Host Bridge (domain 0000 %pR)\n", &res); |
96 | pci_domain_nr(dev->bus), &res); | ||
97 | 88 | ||
98 | for (i = 0; i < info->res_num; i++) | 89 | for (i = 0; i < info->res_num; i++) |
99 | dev_info(&dev->dev, "host bridge window %pR\n", &info->res[i]); | 90 | printk(KERN_INFO "host bridge window %pR\n", &info->res[i]); |
100 | } | 91 | } |
101 | 92 | ||
102 | DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_LE, | 93 | static int __init broadcom_postcore_init(void) |
103 | cnb20le_res); | 94 | { |
95 | u8 bus = 0, slot = 0; | ||
96 | u32 id; | ||
97 | u16 vendor, device; | ||
98 | |||
99 | #ifdef CONFIG_ACPI | ||
100 | /* | ||
101 | * We should get host bridge information from ACPI unless the BIOS | ||
102 | * doesn't support it. | ||
103 | */ | ||
104 | if (acpi_os_get_root_pointer()) | ||
105 | return 0; | ||
106 | #endif | ||
107 | |||
108 | id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); | ||
109 | vendor = id & 0xffff; | ||
110 | device = (id >> 16) & 0xffff; | ||
111 | |||
112 | if (vendor == PCI_VENDOR_ID_SERVERWORKS && | ||
113 | device == PCI_DEVICE_ID_SERVERWORKS_LE) { | ||
114 | cnb20le_res(bus, slot, 0); | ||
115 | cnb20le_res(bus, slot, 1); | ||
116 | } | ||
117 | return 0; | ||
118 | } | ||
104 | 119 | ||
120 | postcore_initcall(broadcom_postcore_init); | ||
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 64a122883896..fd3f65510e9d 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c | |||
@@ -7,45 +7,50 @@ | |||
7 | int pci_root_num; | 7 | int pci_root_num; |
8 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; | 8 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; |
9 | 9 | ||
10 | void x86_pci_root_bus_res_quirks(struct pci_bus *b) | 10 | void x86_pci_root_bus_resources(int bus, struct list_head *resources) |
11 | { | 11 | { |
12 | int i; | 12 | int i; |
13 | int j; | 13 | int j; |
14 | struct pci_root_info *info; | 14 | struct pci_root_info *info; |
15 | 15 | ||
16 | /* don't go for it if _CRS is used already */ | ||
17 | if (b->resource[0] != &ioport_resource || | ||
18 | b->resource[1] != &iomem_resource) | ||
19 | return; | ||
20 | |||
21 | if (!pci_root_num) | 16 | if (!pci_root_num) |
22 | return; | 17 | goto default_resources; |
23 | 18 | ||
24 | for (i = 0; i < pci_root_num; i++) { | 19 | for (i = 0; i < pci_root_num; i++) { |
25 | if (pci_root_info[i].bus_min == b->number) | 20 | if (pci_root_info[i].bus_min == bus) |
26 | break; | 21 | break; |
27 | } | 22 | } |
28 | 23 | ||
29 | if (i == pci_root_num) | 24 | if (i == pci_root_num) |
30 | return; | 25 | goto default_resources; |
31 | 26 | ||
32 | printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", | 27 | printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n", |
33 | b->number); | 28 | bus); |
34 | 29 | ||
35 | pci_bus_remove_resources(b); | ||
36 | info = &pci_root_info[i]; | 30 | info = &pci_root_info[i]; |
37 | for (j = 0; j < info->res_num; j++) { | 31 | for (j = 0; j < info->res_num; j++) { |
38 | struct resource *res; | 32 | struct resource *res; |
39 | struct resource *root; | 33 | struct resource *root; |
40 | 34 | ||
41 | res = &info->res[j]; | 35 | res = &info->res[j]; |
42 | pci_bus_add_resource(b, res, 0); | 36 | pci_add_resource(resources, res); |
43 | if (res->flags & IORESOURCE_IO) | 37 | if (res->flags & IORESOURCE_IO) |
44 | root = &ioport_resource; | 38 | root = &ioport_resource; |
45 | else | 39 | else |
46 | root = &iomem_resource; | 40 | root = &iomem_resource; |
47 | insert_resource(root, res); | 41 | insert_resource(root, res); |
48 | } | 42 | } |
43 | return; | ||
44 | |||
45 | default_resources: | ||
46 | /* | ||
47 | * We don't have any host bridge aperture information from the | ||
48 | * "native host bridge drivers," e.g., amd_bus or broadcom_bus, | ||
49 | * so fall back to the defaults historically used by pci_create_bus(). | ||
50 | */ | ||
51 | printk(KERN_DEBUG "PCI: root bus %02x: using default resources\n", bus); | ||
52 | pci_add_resource(resources, &ioport_resource); | ||
53 | pci_add_resource(resources, &iomem_resource); | ||
49 | } | 54 | } |
50 | 55 | ||
51 | void __devinit update_res(struct pci_root_info *info, resource_size_t start, | 56 | void __devinit update_res(struct pci_root_info *info, resource_size_t start, |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 7962ccb4d9b2..323481e06ef8 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -164,9 +164,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *b) | |||
164 | { | 164 | { |
165 | struct pci_dev *dev; | 165 | struct pci_dev *dev; |
166 | 166 | ||
167 | /* root bus? */ | ||
168 | if (!b->parent) | ||
169 | x86_pci_root_bus_res_quirks(b); | ||
170 | pci_read_bridge_bases(b); | 167 | pci_read_bridge_bases(b); |
171 | list_for_each_entry(dev, &b->devices, bus_list) | 168 | list_for_each_entry(dev, &b->devices, bus_list) |
172 | pcibios_fixup_device_resources(dev); | 169 | pcibios_fixup_device_resources(dev); |
@@ -433,6 +430,7 @@ void __init dmi_check_pciprobe(void) | |||
433 | 430 | ||
434 | struct pci_bus * __devinit pcibios_scan_root(int busnum) | 431 | struct pci_bus * __devinit pcibios_scan_root(int busnum) |
435 | { | 432 | { |
433 | LIST_HEAD(resources); | ||
436 | struct pci_bus *bus = NULL; | 434 | struct pci_bus *bus = NULL; |
437 | struct pci_sysdata *sd; | 435 | struct pci_sysdata *sd; |
438 | 436 | ||
@@ -456,9 +454,12 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) | |||
456 | sd->node = get_mp_bus_to_node(busnum); | 454 | sd->node = get_mp_bus_to_node(busnum); |
457 | 455 | ||
458 | printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); | 456 | printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); |
459 | bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); | 457 | x86_pci_root_bus_resources(busnum, &resources); |
460 | if (!bus) | 458 | bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); |
459 | if (!bus) { | ||
460 | pci_free_resource_list(&resources); | ||
461 | kfree(sd); | 461 | kfree(sd); |
462 | } | ||
462 | 463 | ||
463 | return bus; | 464 | return bus; |
464 | } | 465 | } |
@@ -639,6 +640,7 @@ int pci_ext_cfg_avail(struct pci_dev *dev) | |||
639 | 640 | ||
640 | struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) | 641 | struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) |
641 | { | 642 | { |
643 | LIST_HEAD(resources); | ||
642 | struct pci_bus *bus = NULL; | 644 | struct pci_bus *bus = NULL; |
643 | struct pci_sysdata *sd; | 645 | struct pci_sysdata *sd; |
644 | 646 | ||
@@ -653,9 +655,12 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, | |||
653 | return NULL; | 655 | return NULL; |
654 | } | 656 | } |
655 | sd->node = node; | 657 | sd->node = node; |
656 | bus = pci_scan_bus(busno, ops, sd); | 658 | x86_pci_root_bus_resources(busno, &resources); |
657 | if (!bus) | 659 | bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources); |
660 | if (!bus) { | ||
661 | pci_free_resource_list(&resources); | ||
658 | kfree(sd); | 662 | kfree(sd); |
663 | } | ||
659 | 664 | ||
660 | return bus; | 665 | return bus; |
661 | } | 666 | } |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 794b092d01ae..91821a1a0c3a 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -254,26 +254,6 @@ void __init pcibios_resource_survey(void) | |||
254 | */ | 254 | */ |
255 | fs_initcall(pcibios_assign_resources); | 255 | fs_initcall(pcibios_assign_resources); |
256 | 256 | ||
257 | /* | ||
258 | * If we set up a device for bus mastering, we need to check the latency | ||
259 | * timer as certain crappy BIOSes forget to set it properly. | ||
260 | */ | ||
261 | unsigned int pcibios_max_latency = 255; | ||
262 | |||
263 | void pcibios_set_master(struct pci_dev *dev) | ||
264 | { | ||
265 | u8 lat; | ||
266 | pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); | ||
267 | if (lat < 16) | ||
268 | lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; | ||
269 | else if (lat > pcibios_max_latency) | ||
270 | lat = pcibios_max_latency; | ||
271 | else | ||
272 | return; | ||
273 | dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat); | ||
274 | pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); | ||
275 | } | ||
276 | |||
277 | static const struct vm_operations_struct pci_mmap_ops = { | 257 | static const struct vm_operations_struct pci_mmap_ops = { |
278 | .access = generic_access_phys, | 258 | .access = generic_access_phys, |
279 | }; | 259 | }; |
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 2c2aeabc2609..a1df191129d3 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c | |||
@@ -31,9 +31,6 @@ int __init pci_legacy_init(void) | |||
31 | 31 | ||
32 | printk("PCI: Probing PCI hardware\n"); | 32 | printk("PCI: Probing PCI hardware\n"); |
33 | pci_root_bus = pcibios_scan_root(0); | 33 | pci_root_bus = pcibios_scan_root(0); |
34 | if (pci_root_bus) | ||
35 | pci_bus_add_devices(pci_root_bus); | ||
36 | |||
37 | return 0; | 34 | return 0; |
38 | } | 35 | } |
39 | 36 | ||
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 51abf02f9226..83e125b95ca6 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c | |||
@@ -153,8 +153,6 @@ int __init pci_numaq_init(void) | |||
153 | raw_pci_ops = &pci_direct_conf1_mq; | 153 | raw_pci_ops = &pci_direct_conf1_mq; |
154 | 154 | ||
155 | pci_root_bus = pcibios_scan_root(0); | 155 | pci_root_bus = pcibios_scan_root(0); |
156 | if (pci_root_bus) | ||
157 | pci_bus_add_devices(pci_root_bus); | ||
158 | if (num_online_nodes() > 1) | 156 | if (num_online_nodes() > 1) |
159 | for_each_online_node(quad) { | 157 | for_each_online_node(quad) { |
160 | if (quad == 0) | 158 | if (quad == 0) |
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index db0e9a51e611..da8fe0535ff4 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c | |||
@@ -44,7 +44,7 @@ static inline void set_bios_x(void) | |||
44 | pcibios_enabled = 1; | 44 | pcibios_enabled = 1; |
45 | set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); | 45 | set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); |
46 | if (__supported_pte_mask & _PAGE_NX) | 46 | if (__supported_pte_mask & _PAGE_NX) |
47 | printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n"); | 47 | printk(KERN_INFO "PCI : PCI BIOS area is rw and x. Use pci=nobios if you want it NX.\n"); |
48 | } | 48 | } |
49 | 49 | ||
50 | /* | 50 | /* |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 37718f0f053d..4cf9bd0a1653 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -238,7 +238,8 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm, | |||
238 | 238 | ||
239 | spin_lock_irqsave(&rtc_lock, flags); | 239 | spin_lock_irqsave(&rtc_lock, flags); |
240 | efi_call_phys_prelog(); | 240 | efi_call_phys_prelog(); |
241 | status = efi_call_phys2(efi_phys.get_time, tm, tc); | 241 | status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), |
242 | virt_to_phys(tc)); | ||
242 | efi_call_phys_epilog(); | 243 | efi_call_phys_epilog(); |
243 | spin_unlock_irqrestore(&rtc_lock, flags); | 244 | spin_unlock_irqrestore(&rtc_lock, flags); |
244 | return status; | 245 | return status; |
@@ -352,8 +353,7 @@ void __init efi_memblock_x86_reserve_range(void) | |||
352 | boot_params.efi_info.efi_memdesc_size; | 353 | boot_params.efi_info.efi_memdesc_size; |
353 | memmap.desc_version = boot_params.efi_info.efi_memdesc_version; | 354 | memmap.desc_version = boot_params.efi_info.efi_memdesc_version; |
354 | memmap.desc_size = boot_params.efi_info.efi_memdesc_size; | 355 | memmap.desc_size = boot_params.efi_info.efi_memdesc_size; |
355 | memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size, | 356 | memblock_reserve(pmap, memmap.nr_map * memmap.desc_size); |
356 | "EFI memmap"); | ||
357 | } | 357 | } |
358 | 358 | ||
359 | #if EFI_DEBUG | 359 | #if EFI_DEBUG |
@@ -397,16 +397,14 @@ void __init efi_reserve_boot_services(void) | |||
397 | if ((start+size >= virt_to_phys(_text) | 397 | if ((start+size >= virt_to_phys(_text) |
398 | && start <= virt_to_phys(_end)) || | 398 | && start <= virt_to_phys(_end)) || |
399 | !e820_all_mapped(start, start+size, E820_RAM) || | 399 | !e820_all_mapped(start, start+size, E820_RAM) || |
400 | memblock_x86_check_reserved_size(&start, &size, | 400 | memblock_is_region_reserved(start, size)) { |
401 | 1<<EFI_PAGE_SHIFT)) { | ||
402 | /* Could not reserve, skip it */ | 401 | /* Could not reserve, skip it */ |
403 | md->num_pages = 0; | 402 | md->num_pages = 0; |
404 | memblock_dbg(PFX "Could not reserve boot range " | 403 | memblock_dbg(PFX "Could not reserve boot range " |
405 | "[0x%010llx-0x%010llx]\n", | 404 | "[0x%010llx-0x%010llx]\n", |
406 | start, start+size-1); | 405 | start, start+size-1); |
407 | } else | 406 | } else |
408 | memblock_x86_reserve_range(start, start+size, | 407 | memblock_reserve(start, size); |
409 | "EFI Boot"); | ||
410 | } | 408 | } |
411 | } | 409 | } |
412 | 410 | ||
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c index ca1973699d3d..dc5f1d32aced 100644 --- a/arch/x86/platform/geode/alix.c +++ b/arch/x86/platform/geode/alix.c | |||
@@ -27,7 +27,7 @@ | |||
27 | 27 | ||
28 | #include <asm/geode.h> | 28 | #include <asm/geode.h> |
29 | 29 | ||
30 | static int force = 0; | 30 | static bool force = 0; |
31 | module_param(force, bool, 0444); | 31 | module_param(force, bool, 0444); |
32 | /* FIXME: Award bios is not automatically detected as Alix platform */ | 32 | /* FIXME: Award bios is not automatically detected as Alix platform */ |
33 | MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform"); | 33 | MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform"); |
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c index 1ba7f5ed8c9b..5917eb56b313 100644 --- a/arch/x86/platform/iris/iris.c +++ b/arch/x86/platform/iris/iris.c | |||
@@ -42,7 +42,7 @@ MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>"); | |||
42 | MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); | 42 | MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); |
43 | MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); | 43 | MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); |
44 | 44 | ||
45 | static int force; | 45 | static bool force; |
46 | 46 | ||
47 | module_param(force, bool, 0); | 47 | module_param(force, bool, 0); |
48 | MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation."); | 48 | MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation."); |
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile index 1ea38775a6d3..7baed5135e0f 100644 --- a/arch/x86/platform/mrst/Makefile +++ b/arch/x86/platform/mrst/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-$(CONFIG_X86_MRST) += mrst.o | 1 | obj-$(CONFIG_X86_INTEL_MID) += mrst.o |
2 | obj-$(CONFIG_X86_MRST) += vrtc.o | 2 | obj-$(CONFIG_X86_INTEL_MID) += vrtc.o |
3 | obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o | 3 | obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o |
4 | obj-$(CONFIG_X86_MRST) += pmu.o | 4 | obj-$(CONFIG_X86_MRST) += pmu.o |
diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c index 25bfdbb5b130..3c6e328483c7 100644 --- a/arch/x86/platform/mrst/early_printk_mrst.c +++ b/arch/x86/platform/mrst/early_printk_mrst.c | |||
@@ -245,16 +245,24 @@ struct console early_mrst_console = { | |||
245 | * Following is the early console based on Medfield HSU (High | 245 | * Following is the early console based on Medfield HSU (High |
246 | * Speed UART) device. | 246 | * Speed UART) device. |
247 | */ | 247 | */ |
248 | #define HSU_PORT2_PADDR 0xffa28180 | 248 | #define HSU_PORT_BASE 0xffa28080 |
249 | 249 | ||
250 | static void __iomem *phsu; | 250 | static void __iomem *phsu; |
251 | 251 | ||
252 | void hsu_early_console_init(void) | 252 | void hsu_early_console_init(const char *s) |
253 | { | 253 | { |
254 | unsigned long paddr, port = 0; | ||
254 | u8 lcr; | 255 | u8 lcr; |
255 | 256 | ||
256 | phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, | 257 | /* |
257 | HSU_PORT2_PADDR); | 258 | * Select the early HSU console port if specified by user in the |
259 | * kernel command line. | ||
260 | */ | ||
261 | if (*s && !kstrtoul(s, 10, &port)) | ||
262 | port = clamp_val(port, 0, 2); | ||
263 | |||
264 | paddr = HSU_PORT_BASE + port * 0x80; | ||
265 | phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr); | ||
258 | 266 | ||
259 | /* Disable FIFO */ | 267 | /* Disable FIFO */ |
260 | writeb(0x0, phsu + UART_FCR); | 268 | writeb(0x0, phsu + UART_FCR); |
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index ad4ec1cb097e..475e2cd0f3c3 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c | |||
@@ -848,8 +848,7 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *entry) | |||
848 | if (mrst_has_msic()) | 848 | if (mrst_has_msic()) |
849 | return; | 849 | return; |
850 | 850 | ||
851 | /* ID as IRQ is a hack that will go away */ | 851 | pdev = platform_device_alloc(entry->name, 0); |
852 | pdev = platform_device_alloc(entry->name, entry->irq); | ||
853 | if (pdev == NULL) { | 852 | if (pdev == NULL) { |
854 | pr_err("out of memory for SFI platform device '%s'.\n", | 853 | pr_err("out of memory for SFI platform device '%s'.\n", |
855 | entry->name); | 854 | entry->name); |
@@ -1030,6 +1029,7 @@ static int __init pb_keys_init(void) | |||
1030 | num = sizeof(gpio_button) / sizeof(struct gpio_keys_button); | 1029 | num = sizeof(gpio_button) / sizeof(struct gpio_keys_button); |
1031 | for (i = 0; i < num; i++) { | 1030 | for (i = 0; i < num; i++) { |
1032 | gb[i].gpio = get_gpio_by_name(gb[i].desc); | 1031 | gb[i].gpio = get_gpio_by_name(gb[i].desc); |
1032 | pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, gb[i].gpio); | ||
1033 | if (gb[i].gpio == -1) | 1033 | if (gb[i].gpio == -1) |
1034 | continue; | 1034 | continue; |
1035 | 1035 | ||
diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c index 309c70fb7759..5d4ba301e776 100644 --- a/arch/x86/platform/uv/uv_sysfs.c +++ b/arch/x86/platform/uv/uv_sysfs.c | |||
@@ -19,7 +19,7 @@ | |||
19 | * Copyright (c) Russ Anderson | 19 | * Copyright (c) Russ Anderson |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/sysdev.h> | 22 | #include <linux/device.h> |
23 | #include <asm/uv/bios.h> | 23 | #include <asm/uv/bios.h> |
24 | #include <asm/uv/uv.h> | 24 | #include <asm/uv/uv.h> |
25 | 25 | ||
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile new file mode 100644 index 000000000000..564b2476fede --- /dev/null +++ b/arch/x86/syscalls/Makefile | |||
@@ -0,0 +1,43 @@ | |||
1 | out := $(obj)/../include/generated/asm | ||
2 | |||
3 | # Create output directory if not already present | ||
4 | _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') | ||
5 | |||
6 | syscall32 := $(srctree)/$(src)/syscall_32.tbl | ||
7 | syscall64 := $(srctree)/$(src)/syscall_64.tbl | ||
8 | |||
9 | syshdr := $(srctree)/$(src)/syscallhdr.sh | ||
10 | systbl := $(srctree)/$(src)/syscalltbl.sh | ||
11 | |||
12 | quiet_cmd_syshdr = SYSHDR $@ | ||
13 | cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' $< $@ \ | ||
14 | $(syshdr_abi_$(basetarget)) $(syshdr_pfx_$(basetarget)) | ||
15 | quiet_cmd_systbl = SYSTBL $@ | ||
16 | cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@ | ||
17 | |||
18 | syshdr_abi_unistd_32 := i386 | ||
19 | $(out)/unistd_32.h: $(syscall32) $(syshdr) | ||
20 | $(call if_changed,syshdr) | ||
21 | |||
22 | syshdr_abi_unistd_32_ia32 := i386 | ||
23 | syshdr_pfx_unistd_32_ia32 := ia32_ | ||
24 | $(out)/unistd_32_ia32.h: $(syscall32) $(syshdr) | ||
25 | $(call if_changed,syshdr) | ||
26 | |||
27 | syshdr_abi_unistd_64 := 64 | ||
28 | $(out)/unistd_64.h: $(syscall64) $(syshdr) | ||
29 | $(call if_changed,syshdr) | ||
30 | |||
31 | $(out)/syscalls_32.h: $(syscall32) $(systbl) | ||
32 | $(call if_changed,systbl) | ||
33 | $(out)/syscalls_64.h: $(syscall64) $(systbl) | ||
34 | $(call if_changed,systbl) | ||
35 | |||
36 | syshdr-y += unistd_32.h unistd_64.h | ||
37 | syshdr-y += syscalls_32.h | ||
38 | syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h | ||
39 | syshdr-$(CONFIG_X86_64) += syscalls_64.h | ||
40 | |||
41 | targets += $(syshdr-y) | ||
42 | |||
43 | all: $(addprefix $(out)/,$(targets)) | ||
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl new file mode 100644 index 000000000000..ce98e287c066 --- /dev/null +++ b/arch/x86/syscalls/syscall_32.tbl | |||
@@ -0,0 +1,357 @@ | |||
1 | # | ||
2 | # 32-bit system call numbers and entry vectors | ||
3 | # | ||
4 | # The format is: | ||
5 | # <number> <abi> <name> <entry point> <compat entry point> | ||
6 | # | ||
7 | # The abi is always "i386" for this file. | ||
8 | # | ||
9 | 0 i386 restart_syscall sys_restart_syscall | ||
10 | 1 i386 exit sys_exit | ||
11 | 2 i386 fork ptregs_fork stub32_fork | ||
12 | 3 i386 read sys_read | ||
13 | 4 i386 write sys_write | ||
14 | 5 i386 open sys_open compat_sys_open | ||
15 | 6 i386 close sys_close | ||
16 | 7 i386 waitpid sys_waitpid sys32_waitpid | ||
17 | 8 i386 creat sys_creat | ||
18 | 9 i386 link sys_link | ||
19 | 10 i386 unlink sys_unlink | ||
20 | 11 i386 execve ptregs_execve stub32_execve | ||
21 | 12 i386 chdir sys_chdir | ||
22 | 13 i386 time sys_time compat_sys_time | ||
23 | 14 i386 mknod sys_mknod | ||
24 | 15 i386 chmod sys_chmod | ||
25 | 16 i386 lchown sys_lchown16 | ||
26 | 17 i386 break | ||
27 | 18 i386 oldstat sys_stat | ||
28 | 19 i386 lseek sys_lseek sys32_lseek | ||
29 | 20 i386 getpid sys_getpid | ||
30 | 21 i386 mount sys_mount compat_sys_mount | ||
31 | 22 i386 umount sys_oldumount | ||
32 | 23 i386 setuid sys_setuid16 | ||
33 | 24 i386 getuid sys_getuid16 | ||
34 | 25 i386 stime sys_stime compat_sys_stime | ||
35 | 26 i386 ptrace sys_ptrace compat_sys_ptrace | ||
36 | 27 i386 alarm sys_alarm | ||
37 | 28 i386 oldfstat sys_fstat | ||
38 | 29 i386 pause sys_pause | ||
39 | 30 i386 utime sys_utime compat_sys_utime | ||
40 | 31 i386 stty | ||
41 | 32 i386 gtty | ||
42 | 33 i386 access sys_access | ||
43 | 34 i386 nice sys_nice | ||
44 | 35 i386 ftime | ||
45 | 36 i386 sync sys_sync | ||
46 | 37 i386 kill sys_kill sys32_kill | ||
47 | 38 i386 rename sys_rename | ||
48 | 39 i386 mkdir sys_mkdir | ||
49 | 40 i386 rmdir sys_rmdir | ||
50 | 41 i386 dup sys_dup | ||
51 | 42 i386 pipe sys_pipe | ||
52 | 43 i386 times sys_times compat_sys_times | ||
53 | 44 i386 prof | ||
54 | 45 i386 brk sys_brk | ||
55 | 46 i386 setgid sys_setgid16 | ||
56 | 47 i386 getgid sys_getgid16 | ||
57 | 48 i386 signal sys_signal | ||
58 | 49 i386 geteuid sys_geteuid16 | ||
59 | 50 i386 getegid sys_getegid16 | ||
60 | 51 i386 acct sys_acct | ||
61 | 52 i386 umount2 sys_umount | ||
62 | 53 i386 lock | ||
63 | 54 i386 ioctl sys_ioctl compat_sys_ioctl | ||
64 | 55 i386 fcntl sys_fcntl compat_sys_fcntl64 | ||
65 | 56 i386 mpx | ||
66 | 57 i386 setpgid sys_setpgid | ||
67 | 58 i386 ulimit | ||
68 | 59 i386 oldolduname sys_olduname | ||
69 | 60 i386 umask sys_umask | ||
70 | 61 i386 chroot sys_chroot | ||
71 | 62 i386 ustat sys_ustat compat_sys_ustat | ||
72 | 63 i386 dup2 sys_dup2 | ||
73 | 64 i386 getppid sys_getppid | ||
74 | 65 i386 getpgrp sys_getpgrp | ||
75 | 66 i386 setsid sys_setsid | ||
76 | 67 i386 sigaction sys_sigaction sys32_sigaction | ||
77 | 68 i386 sgetmask sys_sgetmask | ||
78 | 69 i386 ssetmask sys_ssetmask | ||
79 | 70 i386 setreuid sys_setreuid16 | ||
80 | 71 i386 setregid sys_setregid16 | ||
81 | 72 i386 sigsuspend sys_sigsuspend sys32_sigsuspend | ||
82 | 73 i386 sigpending sys_sigpending compat_sys_sigpending | ||
83 | 74 i386 sethostname sys_sethostname | ||
84 | 75 i386 setrlimit sys_setrlimit compat_sys_setrlimit | ||
85 | 76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit | ||
86 | 77 i386 getrusage sys_getrusage compat_sys_getrusage | ||
87 | 78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday | ||
88 | 79 i386 settimeofday sys_settimeofday compat_sys_settimeofday | ||
89 | 80 i386 getgroups sys_getgroups16 | ||
90 | 81 i386 setgroups sys_setgroups16 | ||
91 | 82 i386 select sys_old_select compat_sys_old_select | ||
92 | 83 i386 symlink sys_symlink | ||
93 | 84 i386 oldlstat sys_lstat | ||
94 | 85 i386 readlink sys_readlink | ||
95 | 86 i386 uselib sys_uselib | ||
96 | 87 i386 swapon sys_swapon | ||
97 | 88 i386 reboot sys_reboot | ||
98 | 89 i386 readdir sys_old_readdir compat_sys_old_readdir | ||
99 | 90 i386 mmap sys_old_mmap sys32_mmap | ||
100 | 91 i386 munmap sys_munmap | ||
101 | 92 i386 truncate sys_truncate | ||
102 | 93 i386 ftruncate sys_ftruncate | ||
103 | 94 i386 fchmod sys_fchmod | ||
104 | 95 i386 fchown sys_fchown16 | ||
105 | 96 i386 getpriority sys_getpriority | ||
106 | 97 i386 setpriority sys_setpriority | ||
107 | 98 i386 profil | ||
108 | 99 i386 statfs sys_statfs compat_sys_statfs | ||
109 | 100 i386 fstatfs sys_fstatfs compat_sys_fstatfs | ||
110 | 101 i386 ioperm sys_ioperm | ||
111 | 102 i386 socketcall sys_socketcall compat_sys_socketcall | ||
112 | 103 i386 syslog sys_syslog | ||
113 | 104 i386 setitimer sys_setitimer compat_sys_setitimer | ||
114 | 105 i386 getitimer sys_getitimer compat_sys_getitimer | ||
115 | 106 i386 stat sys_newstat compat_sys_newstat | ||
116 | 107 i386 lstat sys_newlstat compat_sys_newlstat | ||
117 | 108 i386 fstat sys_newfstat compat_sys_newfstat | ||
118 | 109 i386 olduname sys_uname | ||
119 | 110 i386 iopl ptregs_iopl stub32_iopl | ||
120 | 111 i386 vhangup sys_vhangup | ||
121 | 112 i386 idle | ||
122 | 113 i386 vm86old ptregs_vm86old sys32_vm86_warning | ||
123 | 114 i386 wait4 sys_wait4 compat_sys_wait4 | ||
124 | 115 i386 swapoff sys_swapoff | ||
125 | 116 i386 sysinfo sys_sysinfo compat_sys_sysinfo | ||
126 | 117 i386 ipc sys_ipc sys32_ipc | ||
127 | 118 i386 fsync sys_fsync | ||
128 | 119 i386 sigreturn ptregs_sigreturn stub32_sigreturn | ||
129 | 120 i386 clone ptregs_clone stub32_clone | ||
130 | 121 i386 setdomainname sys_setdomainname | ||
131 | 122 i386 uname sys_newuname | ||
132 | 123 i386 modify_ldt sys_modify_ldt | ||
133 | 124 i386 adjtimex sys_adjtimex compat_sys_adjtimex | ||
134 | 125 i386 mprotect sys_mprotect sys32_mprotect | ||
135 | 126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask | ||
136 | 127 i386 create_module | ||
137 | 128 i386 init_module sys_init_module | ||
138 | 129 i386 delete_module sys_delete_module | ||
139 | 130 i386 get_kernel_syms | ||
140 | 131 i386 quotactl sys_quotactl sys32_quotactl | ||
141 | 132 i386 getpgid sys_getpgid | ||
142 | 133 i386 fchdir sys_fchdir | ||
143 | 134 i386 bdflush sys_bdflush | ||
144 | 135 i386 sysfs sys_sysfs | ||
145 | 136 i386 personality sys_personality | ||
146 | 137 i386 afs_syscall | ||
147 | 138 i386 setfsuid sys_setfsuid16 | ||
148 | 139 i386 setfsgid sys_setfsgid16 | ||
149 | 140 i386 _llseek sys_llseek | ||
150 | 141 i386 getdents sys_getdents compat_sys_getdents | ||
151 | 142 i386 _newselect sys_select compat_sys_select | ||
152 | 143 i386 flock sys_flock | ||
153 | 144 i386 msync sys_msync | ||
154 | 145 i386 readv sys_readv compat_sys_readv | ||
155 | 146 i386 writev sys_writev compat_sys_writev | ||
156 | 147 i386 getsid sys_getsid | ||
157 | 148 i386 fdatasync sys_fdatasync | ||
158 | 149 i386 _sysctl sys_sysctl compat_sys_sysctl | ||
159 | 150 i386 mlock sys_mlock | ||
160 | 151 i386 munlock sys_munlock | ||
161 | 152 i386 mlockall sys_mlockall | ||
162 | 153 i386 munlockall sys_munlockall | ||
163 | 154 i386 sched_setparam sys_sched_setparam | ||
164 | 155 i386 sched_getparam sys_sched_getparam | ||
165 | 156 i386 sched_setscheduler sys_sched_setscheduler | ||
166 | 157 i386 sched_getscheduler sys_sched_getscheduler | ||
167 | 158 i386 sched_yield sys_sched_yield | ||
168 | 159 i386 sched_get_priority_max sys_sched_get_priority_max | ||
169 | 160 i386 sched_get_priority_min sys_sched_get_priority_min | ||
170 | 161 i386 sched_rr_get_interval sys_sched_rr_get_interval sys32_sched_rr_get_interval | ||
171 | 162 i386 nanosleep sys_nanosleep compat_sys_nanosleep | ||
172 | 163 i386 mremap sys_mremap | ||
173 | 164 i386 setresuid sys_setresuid16 | ||
174 | 165 i386 getresuid sys_getresuid16 | ||
175 | 166 i386 vm86 ptregs_vm86 sys32_vm86_warning | ||
176 | 167 i386 query_module | ||
177 | 168 i386 poll sys_poll | ||
178 | 169 i386 nfsservctl | ||
179 | 170 i386 setresgid sys_setresgid16 | ||
180 | 171 i386 getresgid sys_getresgid16 | ||
181 | 172 i386 prctl sys_prctl | ||
182 | 173 i386 rt_sigreturn ptregs_rt_sigreturn stub32_rt_sigreturn | ||
183 | 174 i386 rt_sigaction sys_rt_sigaction sys32_rt_sigaction | ||
184 | 175 i386 rt_sigprocmask sys_rt_sigprocmask sys32_rt_sigprocmask | ||
185 | 176 i386 rt_sigpending sys_rt_sigpending sys32_rt_sigpending | ||
186 | 177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait | ||
187 | 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo sys32_rt_sigqueueinfo | ||
188 | 179 i386 rt_sigsuspend sys_rt_sigsuspend | ||
189 | 180 i386 pread64 sys_pread64 sys32_pread | ||
190 | 181 i386 pwrite64 sys_pwrite64 sys32_pwrite | ||
191 | 182 i386 chown sys_chown16 | ||
192 | 183 i386 getcwd sys_getcwd | ||
193 | 184 i386 capget sys_capget | ||
194 | 185 i386 capset sys_capset | ||
195 | 186 i386 sigaltstack ptregs_sigaltstack stub32_sigaltstack | ||
196 | 187 i386 sendfile sys_sendfile sys32_sendfile | ||
197 | 188 i386 getpmsg | ||
198 | 189 i386 putpmsg | ||
199 | 190 i386 vfork ptregs_vfork stub32_vfork | ||
200 | 191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit | ||
201 | 192 i386 mmap2 sys_mmap_pgoff | ||
202 | 193 i386 truncate64 sys_truncate64 sys32_truncate64 | ||
203 | 194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64 | ||
204 | 195 i386 stat64 sys_stat64 sys32_stat64 | ||
205 | 196 i386 lstat64 sys_lstat64 sys32_lstat64 | ||
206 | 197 i386 fstat64 sys_fstat64 sys32_fstat64 | ||
207 | 198 i386 lchown32 sys_lchown | ||
208 | 199 i386 getuid32 sys_getuid | ||
209 | 200 i386 getgid32 sys_getgid | ||
210 | 201 i386 geteuid32 sys_geteuid | ||
211 | 202 i386 getegid32 sys_getegid | ||
212 | 203 i386 setreuid32 sys_setreuid | ||
213 | 204 i386 setregid32 sys_setregid | ||
214 | 205 i386 getgroups32 sys_getgroups | ||
215 | 206 i386 setgroups32 sys_setgroups | ||
216 | 207 i386 fchown32 sys_fchown | ||
217 | 208 i386 setresuid32 sys_setresuid | ||
218 | 209 i386 getresuid32 sys_getresuid | ||
219 | 210 i386 setresgid32 sys_setresgid | ||
220 | 211 i386 getresgid32 sys_getresgid | ||
221 | 212 i386 chown32 sys_chown | ||
222 | 213 i386 setuid32 sys_setuid | ||
223 | 214 i386 setgid32 sys_setgid | ||
224 | 215 i386 setfsuid32 sys_setfsuid | ||
225 | 216 i386 setfsgid32 sys_setfsgid | ||
226 | 217 i386 pivot_root sys_pivot_root | ||
227 | 218 i386 mincore sys_mincore | ||
228 | 219 i386 madvise sys_madvise | ||
229 | 220 i386 getdents64 sys_getdents64 compat_sys_getdents64 | ||
230 | 221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64 | ||
231 | # 222 is unused | ||
232 | # 223 is unused | ||
233 | 224 i386 gettid sys_gettid | ||
234 | 225 i386 readahead sys_readahead sys32_readahead | ||
235 | 226 i386 setxattr sys_setxattr | ||
236 | 227 i386 lsetxattr sys_lsetxattr | ||
237 | 228 i386 fsetxattr sys_fsetxattr | ||
238 | 229 i386 getxattr sys_getxattr | ||
239 | 230 i386 lgetxattr sys_lgetxattr | ||
240 | 231 i386 fgetxattr sys_fgetxattr | ||
241 | 232 i386 listxattr sys_listxattr | ||
242 | 233 i386 llistxattr sys_llistxattr | ||
243 | 234 i386 flistxattr sys_flistxattr | ||
244 | 235 i386 removexattr sys_removexattr | ||
245 | 236 i386 lremovexattr sys_lremovexattr | ||
246 | 237 i386 fremovexattr sys_fremovexattr | ||
247 | 238 i386 tkill sys_tkill | ||
248 | 239 i386 sendfile64 sys_sendfile64 | ||
249 | 240 i386 futex sys_futex compat_sys_futex | ||
250 | 241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity | ||
251 | 242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity | ||
252 | 243 i386 set_thread_area sys_set_thread_area | ||
253 | 244 i386 get_thread_area sys_get_thread_area | ||
254 | 245 i386 io_setup sys_io_setup compat_sys_io_setup | ||
255 | 246 i386 io_destroy sys_io_destroy | ||
256 | 247 i386 io_getevents sys_io_getevents compat_sys_io_getevents | ||
257 | 248 i386 io_submit sys_io_submit compat_sys_io_submit | ||
258 | 249 i386 io_cancel sys_io_cancel | ||
259 | 250 i386 fadvise64 sys_fadvise64 sys32_fadvise64 | ||
260 | # 251 is available for reuse (was briefly sys_set_zone_reclaim) | ||
261 | 252 i386 exit_group sys_exit_group | ||
262 | 253 i386 lookup_dcookie sys_lookup_dcookie sys32_lookup_dcookie | ||
263 | 254 i386 epoll_create sys_epoll_create | ||
264 | 255 i386 epoll_ctl sys_epoll_ctl | ||
265 | 256 i386 epoll_wait sys_epoll_wait | ||
266 | 257 i386 remap_file_pages sys_remap_file_pages | ||
267 | 258 i386 set_tid_address sys_set_tid_address | ||
268 | 259 i386 timer_create sys_timer_create compat_sys_timer_create | ||
269 | 260 i386 timer_settime sys_timer_settime compat_sys_timer_settime | ||
270 | 261 i386 timer_gettime sys_timer_gettime compat_sys_timer_gettime | ||
271 | 262 i386 timer_getoverrun sys_timer_getoverrun | ||
272 | 263 i386 timer_delete sys_timer_delete | ||
273 | 264 i386 clock_settime sys_clock_settime compat_sys_clock_settime | ||
274 | 265 i386 clock_gettime sys_clock_gettime compat_sys_clock_gettime | ||
275 | 266 i386 clock_getres sys_clock_getres compat_sys_clock_getres | ||
276 | 267 i386 clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep | ||
277 | 268 i386 statfs64 sys_statfs64 compat_sys_statfs64 | ||
278 | 269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 | ||
279 | 270 i386 tgkill sys_tgkill | ||
280 | 271 i386 utimes sys_utimes compat_sys_utimes | ||
281 | 272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64 | ||
282 | 273 i386 vserver | ||
283 | 274 i386 mbind sys_mbind | ||
284 | 275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy | ||
285 | 276 i386 set_mempolicy sys_set_mempolicy | ||
286 | 277 i386 mq_open sys_mq_open compat_sys_mq_open | ||
287 | 278 i386 mq_unlink sys_mq_unlink | ||
288 | 279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend | ||
289 | 280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive | ||
290 | 281 i386 mq_notify sys_mq_notify compat_sys_mq_notify | ||
291 | 282 i386 mq_getsetaddr sys_mq_getsetattr compat_sys_mq_getsetattr | ||
292 | 283 i386 kexec_load sys_kexec_load compat_sys_kexec_load | ||
293 | 284 i386 waitid sys_waitid compat_sys_waitid | ||
294 | # 285 sys_setaltroot | ||
295 | 286 i386 add_key sys_add_key | ||
296 | 287 i386 request_key sys_request_key | ||
297 | 288 i386 keyctl sys_keyctl | ||
298 | 289 i386 ioprio_set sys_ioprio_set | ||
299 | 290 i386 ioprio_get sys_ioprio_get | ||
300 | 291 i386 inotify_init sys_inotify_init | ||
301 | 292 i386 inotify_add_watch sys_inotify_add_watch | ||
302 | 293 i386 inotify_rm_watch sys_inotify_rm_watch | ||
303 | 294 i386 migrate_pages sys_migrate_pages | ||
304 | 295 i386 openat sys_openat compat_sys_openat | ||
305 | 296 i386 mkdirat sys_mkdirat | ||
306 | 297 i386 mknodat sys_mknodat | ||
307 | 298 i386 fchownat sys_fchownat | ||
308 | 299 i386 futimesat sys_futimesat compat_sys_futimesat | ||
309 | 300 i386 fstatat64 sys_fstatat64 sys32_fstatat | ||
310 | 301 i386 unlinkat sys_unlinkat | ||
311 | 302 i386 renameat sys_renameat | ||
312 | 303 i386 linkat sys_linkat | ||
313 | 304 i386 symlinkat sys_symlinkat | ||
314 | 305 i386 readlinkat sys_readlinkat | ||
315 | 306 i386 fchmodat sys_fchmodat | ||
316 | 307 i386 faccessat sys_faccessat | ||
317 | 308 i386 pselect6 sys_pselect6 compat_sys_pselect6 | ||
318 | 309 i386 ppoll sys_ppoll compat_sys_ppoll | ||
319 | 310 i386 unshare sys_unshare | ||
320 | 311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list | ||
321 | 312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list | ||
322 | 313 i386 splice sys_splice | ||
323 | 314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range | ||
324 | 315 i386 tee sys_tee | ||
325 | 316 i386 vmsplice sys_vmsplice compat_sys_vmsplice | ||
326 | 317 i386 move_pages sys_move_pages compat_sys_move_pages | ||
327 | 318 i386 getcpu sys_getcpu | ||
328 | 319 i386 epoll_pwait sys_epoll_pwait | ||
329 | 320 i386 utimensat sys_utimensat compat_sys_utimensat | ||
330 | 321 i386 signalfd sys_signalfd compat_sys_signalfd | ||
331 | 322 i386 timerfd_create sys_timerfd_create | ||
332 | 323 i386 eventfd sys_eventfd | ||
333 | 324 i386 fallocate sys_fallocate sys32_fallocate | ||
334 | 325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime | ||
335 | 326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime | ||
336 | 327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4 | ||
337 | 328 i386 eventfd2 sys_eventfd2 | ||
338 | 329 i386 epoll_create1 sys_epoll_create1 | ||
339 | 330 i386 dup3 sys_dup3 | ||
340 | 331 i386 pipe2 sys_pipe2 | ||
341 | 332 i386 inotify_init1 sys_inotify_init1 | ||
342 | 333 i386 preadv sys_preadv compat_sys_preadv | ||
343 | 334 i386 pwritev sys_pwritev compat_sys_pwritev | ||
344 | 335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo | ||
345 | 336 i386 perf_event_open sys_perf_event_open | ||
346 | 337 i386 recvmmsg sys_recvmmsg compat_sys_recvmmsg | ||
347 | 338 i386 fanotify_init sys_fanotify_init | ||
348 | 339 i386 fanotify_mark sys_fanotify_mark sys32_fanotify_mark | ||
349 | 340 i386 prlimit64 sys_prlimit64 | ||
350 | 341 i386 name_to_handle_at sys_name_to_handle_at | ||
351 | 342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at | ||
352 | 343 i386 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime | ||
353 | 344 i386 syncfs sys_syncfs | ||
354 | 345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg | ||
355 | 346 i386 setns sys_setns | ||
356 | 347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv | ||
357 | 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev | ||
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl new file mode 100644 index 000000000000..b440a8f7eefa --- /dev/null +++ b/arch/x86/syscalls/syscall_64.tbl | |||
@@ -0,0 +1,320 @@ | |||
1 | # | ||
2 | # 64-bit system call numbers and entry vectors | ||
3 | # | ||
4 | # The format is: | ||
5 | # <number> <abi> <name> <entry point> | ||
6 | # | ||
7 | # The abi is always "64" for this file (for now.) | ||
8 | # | ||
9 | 0 64 read sys_read | ||
10 | 1 64 write sys_write | ||
11 | 2 64 open sys_open | ||
12 | 3 64 close sys_close | ||
13 | 4 64 stat sys_newstat | ||
14 | 5 64 fstat sys_newfstat | ||
15 | 6 64 lstat sys_newlstat | ||
16 | 7 64 poll sys_poll | ||
17 | 8 64 lseek sys_lseek | ||
18 | 9 64 mmap sys_mmap | ||
19 | 10 64 mprotect sys_mprotect | ||
20 | 11 64 munmap sys_munmap | ||
21 | 12 64 brk sys_brk | ||
22 | 13 64 rt_sigaction sys_rt_sigaction | ||
23 | 14 64 rt_sigprocmask sys_rt_sigprocmask | ||
24 | 15 64 rt_sigreturn stub_rt_sigreturn | ||
25 | 16 64 ioctl sys_ioctl | ||
26 | 17 64 pread64 sys_pread64 | ||
27 | 18 64 pwrite64 sys_pwrite64 | ||
28 | 19 64 readv sys_readv | ||
29 | 20 64 writev sys_writev | ||
30 | 21 64 access sys_access | ||
31 | 22 64 pipe sys_pipe | ||
32 | 23 64 select sys_select | ||
33 | 24 64 sched_yield sys_sched_yield | ||
34 | 25 64 mremap sys_mremap | ||
35 | 26 64 msync sys_msync | ||
36 | 27 64 mincore sys_mincore | ||
37 | 28 64 madvise sys_madvise | ||
38 | 29 64 shmget sys_shmget | ||
39 | 30 64 shmat sys_shmat | ||
40 | 31 64 shmctl sys_shmctl | ||
41 | 32 64 dup sys_dup | ||
42 | 33 64 dup2 sys_dup2 | ||
43 | 34 64 pause sys_pause | ||
44 | 35 64 nanosleep sys_nanosleep | ||
45 | 36 64 getitimer sys_getitimer | ||
46 | 37 64 alarm sys_alarm | ||
47 | 38 64 setitimer sys_setitimer | ||
48 | 39 64 getpid sys_getpid | ||
49 | 40 64 sendfile sys_sendfile64 | ||
50 | 41 64 socket sys_socket | ||
51 | 42 64 connect sys_connect | ||
52 | 43 64 accept sys_accept | ||
53 | 44 64 sendto sys_sendto | ||
54 | 45 64 recvfrom sys_recvfrom | ||
55 | 46 64 sendmsg sys_sendmsg | ||
56 | 47 64 recvmsg sys_recvmsg | ||
57 | 48 64 shutdown sys_shutdown | ||
58 | 49 64 bind sys_bind | ||
59 | 50 64 listen sys_listen | ||
60 | 51 64 getsockname sys_getsockname | ||
61 | 52 64 getpeername sys_getpeername | ||
62 | 53 64 socketpair sys_socketpair | ||
63 | 54 64 setsockopt sys_setsockopt | ||
64 | 55 64 getsockopt sys_getsockopt | ||
65 | 56 64 clone stub_clone | ||
66 | 57 64 fork stub_fork | ||
67 | 58 64 vfork stub_vfork | ||
68 | 59 64 execve stub_execve | ||
69 | 60 64 exit sys_exit | ||
70 | 61 64 wait4 sys_wait4 | ||
71 | 62 64 kill sys_kill | ||
72 | 63 64 uname sys_newuname | ||
73 | 64 64 semget sys_semget | ||
74 | 65 64 semop sys_semop | ||
75 | 66 64 semctl sys_semctl | ||
76 | 67 64 shmdt sys_shmdt | ||
77 | 68 64 msgget sys_msgget | ||
78 | 69 64 msgsnd sys_msgsnd | ||
79 | 70 64 msgrcv sys_msgrcv | ||
80 | 71 64 msgctl sys_msgctl | ||
81 | 72 64 fcntl sys_fcntl | ||
82 | 73 64 flock sys_flock | ||
83 | 74 64 fsync sys_fsync | ||
84 | 75 64 fdatasync sys_fdatasync | ||
85 | 76 64 truncate sys_truncate | ||
86 | 77 64 ftruncate sys_ftruncate | ||
87 | 78 64 getdents sys_getdents | ||
88 | 79 64 getcwd sys_getcwd | ||
89 | 80 64 chdir sys_chdir | ||
90 | 81 64 fchdir sys_fchdir | ||
91 | 82 64 rename sys_rename | ||
92 | 83 64 mkdir sys_mkdir | ||
93 | 84 64 rmdir sys_rmdir | ||
94 | 85 64 creat sys_creat | ||
95 | 86 64 link sys_link | ||
96 | 87 64 unlink sys_unlink | ||
97 | 88 64 symlink sys_symlink | ||
98 | 89 64 readlink sys_readlink | ||
99 | 90 64 chmod sys_chmod | ||
100 | 91 64 fchmod sys_fchmod | ||
101 | 92 64 chown sys_chown | ||
102 | 93 64 fchown sys_fchown | ||
103 | 94 64 lchown sys_lchown | ||
104 | 95 64 umask sys_umask | ||
105 | 96 64 gettimeofday sys_gettimeofday | ||
106 | 97 64 getrlimit sys_getrlimit | ||
107 | 98 64 getrusage sys_getrusage | ||
108 | 99 64 sysinfo sys_sysinfo | ||
109 | 100 64 times sys_times | ||
110 | 101 64 ptrace sys_ptrace | ||
111 | 102 64 getuid sys_getuid | ||
112 | 103 64 syslog sys_syslog | ||
113 | 104 64 getgid sys_getgid | ||
114 | 105 64 setuid sys_setuid | ||
115 | 106 64 setgid sys_setgid | ||
116 | 107 64 geteuid sys_geteuid | ||
117 | 108 64 getegid sys_getegid | ||
118 | 109 64 setpgid sys_setpgid | ||
119 | 110 64 getppid sys_getppid | ||
120 | 111 64 getpgrp sys_getpgrp | ||
121 | 112 64 setsid sys_setsid | ||
122 | 113 64 setreuid sys_setreuid | ||
123 | 114 64 setregid sys_setregid | ||
124 | 115 64 getgroups sys_getgroups | ||
125 | 116 64 setgroups sys_setgroups | ||
126 | 117 64 setresuid sys_setresuid | ||
127 | 118 64 getresuid sys_getresuid | ||
128 | 119 64 setresgid sys_setresgid | ||
129 | 120 64 getresgid sys_getresgid | ||
130 | 121 64 getpgid sys_getpgid | ||
131 | 122 64 setfsuid sys_setfsuid | ||
132 | 123 64 setfsgid sys_setfsgid | ||
133 | 124 64 getsid sys_getsid | ||
134 | 125 64 capget sys_capget | ||
135 | 126 64 capset sys_capset | ||
136 | 127 64 rt_sigpending sys_rt_sigpending | ||
137 | 128 64 rt_sigtimedwait sys_rt_sigtimedwait | ||
138 | 129 64 rt_sigqueueinfo sys_rt_sigqueueinfo | ||
139 | 130 64 rt_sigsuspend sys_rt_sigsuspend | ||
140 | 131 64 sigaltstack stub_sigaltstack | ||
141 | 132 64 utime sys_utime | ||
142 | 133 64 mknod sys_mknod | ||
143 | 134 64 uselib | ||
144 | 135 64 personality sys_personality | ||
145 | 136 64 ustat sys_ustat | ||
146 | 137 64 statfs sys_statfs | ||
147 | 138 64 fstatfs sys_fstatfs | ||
148 | 139 64 sysfs sys_sysfs | ||
149 | 140 64 getpriority sys_getpriority | ||
150 | 141 64 setpriority sys_setpriority | ||
151 | 142 64 sched_setparam sys_sched_setparam | ||
152 | 143 64 sched_getparam sys_sched_getparam | ||
153 | 144 64 sched_setscheduler sys_sched_setscheduler | ||
154 | 145 64 sched_getscheduler sys_sched_getscheduler | ||
155 | 146 64 sched_get_priority_max sys_sched_get_priority_max | ||
156 | 147 64 sched_get_priority_min sys_sched_get_priority_min | ||
157 | 148 64 sched_rr_get_interval sys_sched_rr_get_interval | ||
158 | 149 64 mlock sys_mlock | ||
159 | 150 64 munlock sys_munlock | ||
160 | 151 64 mlockall sys_mlockall | ||
161 | 152 64 munlockall sys_munlockall | ||
162 | 153 64 vhangup sys_vhangup | ||
163 | 154 64 modify_ldt sys_modify_ldt | ||
164 | 155 64 pivot_root sys_pivot_root | ||
165 | 156 64 _sysctl sys_sysctl | ||
166 | 157 64 prctl sys_prctl | ||
167 | 158 64 arch_prctl sys_arch_prctl | ||
168 | 159 64 adjtimex sys_adjtimex | ||
169 | 160 64 setrlimit sys_setrlimit | ||
170 | 161 64 chroot sys_chroot | ||
171 | 162 64 sync sys_sync | ||
172 | 163 64 acct sys_acct | ||
173 | 164 64 settimeofday sys_settimeofday | ||
174 | 165 64 mount sys_mount | ||
175 | 166 64 umount2 sys_umount | ||
176 | 167 64 swapon sys_swapon | ||
177 | 168 64 swapoff sys_swapoff | ||
178 | 169 64 reboot sys_reboot | ||
179 | 170 64 sethostname sys_sethostname | ||
180 | 171 64 setdomainname sys_setdomainname | ||
181 | 172 64 iopl stub_iopl | ||
182 | 173 64 ioperm sys_ioperm | ||
183 | 174 64 create_module | ||
184 | 175 64 init_module sys_init_module | ||
185 | 176 64 delete_module sys_delete_module | ||
186 | 177 64 get_kernel_syms | ||
187 | 178 64 query_module | ||
188 | 179 64 quotactl sys_quotactl | ||
189 | 180 64 nfsservctl | ||
190 | 181 64 getpmsg | ||
191 | 182 64 putpmsg | ||
192 | 183 64 afs_syscall | ||
193 | 184 64 tuxcall | ||
194 | 185 64 security | ||
195 | 186 64 gettid sys_gettid | ||
196 | 187 64 readahead sys_readahead | ||
197 | 188 64 setxattr sys_setxattr | ||
198 | 189 64 lsetxattr sys_lsetxattr | ||
199 | 190 64 fsetxattr sys_fsetxattr | ||
200 | 191 64 getxattr sys_getxattr | ||
201 | 192 64 lgetxattr sys_lgetxattr | ||
202 | 193 64 fgetxattr sys_fgetxattr | ||
203 | 194 64 listxattr sys_listxattr | ||
204 | 195 64 llistxattr sys_llistxattr | ||
205 | 196 64 flistxattr sys_flistxattr | ||
206 | 197 64 removexattr sys_removexattr | ||
207 | 198 64 lremovexattr sys_lremovexattr | ||
208 | 199 64 fremovexattr sys_fremovexattr | ||
209 | 200 64 tkill sys_tkill | ||
210 | 201 64 time sys_time | ||
211 | 202 64 futex sys_futex | ||
212 | 203 64 sched_setaffinity sys_sched_setaffinity | ||
213 | 204 64 sched_getaffinity sys_sched_getaffinity | ||
214 | 205 64 set_thread_area | ||
215 | 206 64 io_setup sys_io_setup | ||
216 | 207 64 io_destroy sys_io_destroy | ||
217 | 208 64 io_getevents sys_io_getevents | ||
218 | 209 64 io_submit sys_io_submit | ||
219 | 210 64 io_cancel sys_io_cancel | ||
220 | 211 64 get_thread_area | ||
221 | 212 64 lookup_dcookie sys_lookup_dcookie | ||
222 | 213 64 epoll_create sys_epoll_create | ||
223 | 214 64 epoll_ctl_old | ||
224 | 215 64 epoll_wait_old | ||
225 | 216 64 remap_file_pages sys_remap_file_pages | ||
226 | 217 64 getdents64 sys_getdents64 | ||
227 | 218 64 set_tid_address sys_set_tid_address | ||
228 | 219 64 restart_syscall sys_restart_syscall | ||
229 | 220 64 semtimedop sys_semtimedop | ||
230 | 221 64 fadvise64 sys_fadvise64 | ||
231 | 222 64 timer_create sys_timer_create | ||
232 | 223 64 timer_settime sys_timer_settime | ||
233 | 224 64 timer_gettime sys_timer_gettime | ||
234 | 225 64 timer_getoverrun sys_timer_getoverrun | ||
235 | 226 64 timer_delete sys_timer_delete | ||
236 | 227 64 clock_settime sys_clock_settime | ||
237 | 228 64 clock_gettime sys_clock_gettime | ||
238 | 229 64 clock_getres sys_clock_getres | ||
239 | 230 64 clock_nanosleep sys_clock_nanosleep | ||
240 | 231 64 exit_group sys_exit_group | ||
241 | 232 64 epoll_wait sys_epoll_wait | ||
242 | 233 64 epoll_ctl sys_epoll_ctl | ||
243 | 234 64 tgkill sys_tgkill | ||
244 | 235 64 utimes sys_utimes | ||
245 | 236 64 vserver | ||
246 | 237 64 mbind sys_mbind | ||
247 | 238 64 set_mempolicy sys_set_mempolicy | ||
248 | 239 64 get_mempolicy sys_get_mempolicy | ||
249 | 240 64 mq_open sys_mq_open | ||
250 | 241 64 mq_unlink sys_mq_unlink | ||
251 | 242 64 mq_timedsend sys_mq_timedsend | ||
252 | 243 64 mq_timedreceive sys_mq_timedreceive | ||
253 | 244 64 mq_notify sys_mq_notify | ||
254 | 245 64 mq_getsetattr sys_mq_getsetattr | ||
255 | 246 64 kexec_load sys_kexec_load | ||
256 | 247 64 waitid sys_waitid | ||
257 | 248 64 add_key sys_add_key | ||
258 | 249 64 request_key sys_request_key | ||
259 | 250 64 keyctl sys_keyctl | ||
260 | 251 64 ioprio_set sys_ioprio_set | ||
261 | 252 64 ioprio_get sys_ioprio_get | ||
262 | 253 64 inotify_init sys_inotify_init | ||
263 | 254 64 inotify_add_watch sys_inotify_add_watch | ||
264 | 255 64 inotify_rm_watch sys_inotify_rm_watch | ||
265 | 256 64 migrate_pages sys_migrate_pages | ||
266 | 257 64 openat sys_openat | ||
267 | 258 64 mkdirat sys_mkdirat | ||
268 | 259 64 mknodat sys_mknodat | ||
269 | 260 64 fchownat sys_fchownat | ||
270 | 261 64 futimesat sys_futimesat | ||
271 | 262 64 newfstatat sys_newfstatat | ||
272 | 263 64 unlinkat sys_unlinkat | ||
273 | 264 64 renameat sys_renameat | ||
274 | 265 64 linkat sys_linkat | ||
275 | 266 64 symlinkat sys_symlinkat | ||
276 | 267 64 readlinkat sys_readlinkat | ||
277 | 268 64 fchmodat sys_fchmodat | ||
278 | 269 64 faccessat sys_faccessat | ||
279 | 270 64 pselect6 sys_pselect6 | ||
280 | 271 64 ppoll sys_ppoll | ||
281 | 272 64 unshare sys_unshare | ||
282 | 273 64 set_robust_list sys_set_robust_list | ||
283 | 274 64 get_robust_list sys_get_robust_list | ||
284 | 275 64 splice sys_splice | ||
285 | 276 64 tee sys_tee | ||
286 | 277 64 sync_file_range sys_sync_file_range | ||
287 | 278 64 vmsplice sys_vmsplice | ||
288 | 279 64 move_pages sys_move_pages | ||
289 | 280 64 utimensat sys_utimensat | ||
290 | 281 64 epoll_pwait sys_epoll_pwait | ||
291 | 282 64 signalfd sys_signalfd | ||
292 | 283 64 timerfd_create sys_timerfd_create | ||
293 | 284 64 eventfd sys_eventfd | ||
294 | 285 64 fallocate sys_fallocate | ||
295 | 286 64 timerfd_settime sys_timerfd_settime | ||
296 | 287 64 timerfd_gettime sys_timerfd_gettime | ||
297 | 288 64 accept4 sys_accept4 | ||
298 | 289 64 signalfd4 sys_signalfd4 | ||
299 | 290 64 eventfd2 sys_eventfd2 | ||
300 | 291 64 epoll_create1 sys_epoll_create1 | ||
301 | 292 64 dup3 sys_dup3 | ||
302 | 293 64 pipe2 sys_pipe2 | ||
303 | 294 64 inotify_init1 sys_inotify_init1 | ||
304 | 295 64 preadv sys_preadv | ||
305 | 296 64 pwritev sys_pwritev | ||
306 | 297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo | ||
307 | 298 64 perf_event_open sys_perf_event_open | ||
308 | 299 64 recvmmsg sys_recvmmsg | ||
309 | 300 64 fanotify_init sys_fanotify_init | ||
310 | 301 64 fanotify_mark sys_fanotify_mark | ||
311 | 302 64 prlimit64 sys_prlimit64 | ||
312 | 303 64 name_to_handle_at sys_name_to_handle_at | ||
313 | 304 64 open_by_handle_at sys_open_by_handle_at | ||
314 | 305 64 clock_adjtime sys_clock_adjtime | ||
315 | 306 64 syncfs sys_syncfs | ||
316 | 307 64 sendmmsg sys_sendmmsg | ||
317 | 308 64 setns sys_setns | ||
318 | 309 64 getcpu sys_getcpu | ||
319 | 310 64 process_vm_readv sys_process_vm_readv | ||
320 | 311 64 process_vm_writev sys_process_vm_writev | ||
diff --git a/arch/x86/syscalls/syscallhdr.sh b/arch/x86/syscalls/syscallhdr.sh new file mode 100644 index 000000000000..31fd5f1f38f7 --- /dev/null +++ b/arch/x86/syscalls/syscallhdr.sh | |||
@@ -0,0 +1,27 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | in="$1" | ||
4 | out="$2" | ||
5 | my_abis=`echo "($3)" | tr ',' '|'` | ||
6 | prefix="$4" | ||
7 | offset="$5" | ||
8 | |||
9 | fileguard=_ASM_X86_`basename "$out" | sed \ | ||
10 | -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \ | ||
11 | -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'` | ||
12 | grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | ( | ||
13 | echo "#ifndef ${fileguard}" | ||
14 | echo "#define ${fileguard} 1" | ||
15 | echo "" | ||
16 | |||
17 | while read nr abi name entry ; do | ||
18 | if [ -z "$offset" ]; then | ||
19 | echo "#define __NR_${prefix}${name} $nr" | ||
20 | else | ||
21 | echo "#define __NR_${prefix}${name} ($offset + $nr)" | ||
22 | fi | ||
23 | done | ||
24 | |||
25 | echo "" | ||
26 | echo "#endif /* ${fileguard} */" | ||
27 | ) > "$out" | ||
diff --git a/arch/x86/syscalls/syscalltbl.sh b/arch/x86/syscalls/syscalltbl.sh new file mode 100644 index 000000000000..0e7f8ec071e7 --- /dev/null +++ b/arch/x86/syscalls/syscalltbl.sh | |||
@@ -0,0 +1,15 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | in="$1" | ||
4 | out="$2" | ||
5 | |||
6 | grep '^[0-9]' "$in" | sort -n | ( | ||
7 | while read nr abi name entry compat; do | ||
8 | abi=`echo "$abi" | tr '[a-z]' '[A-Z]'` | ||
9 | if [ -n "$compat" ]; then | ||
10 | echo "__SYSCALL_${abi}($nr, $entry, $compat)" | ||
11 | elif [ -n "$entry" ]; then | ||
12 | echo "__SYSCALL_${abi}($nr, $entry, $entry)" | ||
13 | fi | ||
14 | done | ||
15 | ) > "$out" | ||
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index f82082677337..d511aa97533a 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile | |||
@@ -18,14 +18,21 @@ chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk | |||
18 | quiet_cmd_posttest = TEST $@ | 18 | quiet_cmd_posttest = TEST $@ |
19 | cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) | 19 | cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) |
20 | 20 | ||
21 | posttest: $(obj)/test_get_len vmlinux | 21 | quiet_cmd_sanitytest = TEST $@ |
22 | cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000 | ||
23 | |||
24 | posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity | ||
22 | $(call cmd,posttest) | 25 | $(call cmd,posttest) |
26 | $(call cmd,sanitytest) | ||
23 | 27 | ||
24 | hostprogs-y := test_get_len | 28 | hostprogs-y += test_get_len insn_sanity |
25 | 29 | ||
26 | # -I needed for generated C source and C source which in the kernel tree. | 30 | # -I needed for generated C source and C source which in the kernel tree. |
27 | HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ | 31 | HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ |
28 | 32 | ||
33 | HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ | ||
34 | |||
29 | # Dependencies are also needed. | 35 | # Dependencies are also needed. |
30 | $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c | 36 | $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c |
31 | 37 | ||
38 | $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c | ||
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index eaf11f52fc0b..5f6a5b6c3a15 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk | |||
@@ -47,7 +47,7 @@ BEGIN { | |||
47 | sep_expr = "^\\|$" | 47 | sep_expr = "^\\|$" |
48 | group_expr = "^Grp[0-9A-Za-z]+" | 48 | group_expr = "^Grp[0-9A-Za-z]+" |
49 | 49 | ||
50 | imm_expr = "^[IJAO][a-z]" | 50 | imm_expr = "^[IJAOL][a-z]" |
51 | imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | 51 | imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" |
52 | imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | 52 | imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" |
53 | imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" | 53 | imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" |
@@ -59,6 +59,7 @@ BEGIN { | |||
59 | imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" | 59 | imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" |
60 | imm_flag["Ob"] = "INAT_MOFFSET" | 60 | imm_flag["Ob"] = "INAT_MOFFSET" |
61 | imm_flag["Ov"] = "INAT_MOFFSET" | 61 | imm_flag["Ov"] = "INAT_MOFFSET" |
62 | imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | ||
62 | 63 | ||
63 | modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" | 64 | modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" |
64 | force64_expr = "\\([df]64\\)" | 65 | force64_expr = "\\([df]64\\)" |
@@ -70,8 +71,12 @@ BEGIN { | |||
70 | lprefix3_expr = "\\(F2\\)" | 71 | lprefix3_expr = "\\(F2\\)" |
71 | max_lprefix = 4 | 72 | max_lprefix = 4 |
72 | 73 | ||
73 | vexok_expr = "\\(VEX\\)" | 74 | # All opcodes starting with lower-case 'v' or with (v1) superscript |
74 | vexonly_expr = "\\(oVEX\\)" | 75 | # accepts VEX prefix |
76 | vexok_opcode_expr = "^v.*" | ||
77 | vexok_expr = "\\(v1\\)" | ||
78 | # All opcodes with (v) superscript supports *only* VEX prefix | ||
79 | vexonly_expr = "\\(v\\)" | ||
75 | 80 | ||
76 | prefix_expr = "\\(Prefix\\)" | 81 | prefix_expr = "\\(Prefix\\)" |
77 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" | 82 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" |
@@ -85,8 +90,8 @@ BEGIN { | |||
85 | prefix_num["SEG=GS"] = "INAT_PFX_GS" | 90 | prefix_num["SEG=GS"] = "INAT_PFX_GS" |
86 | prefix_num["SEG=SS"] = "INAT_PFX_SS" | 91 | prefix_num["SEG=SS"] = "INAT_PFX_SS" |
87 | prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" | 92 | prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" |
88 | prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" | 93 | prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" |
89 | prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" | 94 | prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" |
90 | 95 | ||
91 | clear_vars() | 96 | clear_vars() |
92 | } | 97 | } |
@@ -310,12 +315,10 @@ function convert_operands(count,opnd, i,j,imm,mod) | |||
310 | if (match(opcode, fpu_expr)) | 315 | if (match(opcode, fpu_expr)) |
311 | flags = add_flags(flags, "INAT_MODRM") | 316 | flags = add_flags(flags, "INAT_MODRM") |
312 | 317 | ||
313 | # check VEX only code | 318 | # check VEX codes |
314 | if (match(ext, vexonly_expr)) | 319 | if (match(ext, vexonly_expr)) |
315 | flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") | 320 | flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") |
316 | 321 | else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) | |
317 | # check VEX only code | ||
318 | if (match(ext, vexok_expr)) | ||
319 | flags = add_flags(flags, "INAT_VEXOK") | 322 | flags = add_flags(flags, "INAT_VEXOK") |
320 | 323 | ||
321 | # check prefixes | 324 | # check prefixes |
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c new file mode 100644 index 000000000000..cc2f8c131286 --- /dev/null +++ b/arch/x86/tools/insn_sanity.c | |||
@@ -0,0 +1,275 @@ | |||
1 | /* | ||
2 | * x86 decoder sanity test - based on test_get_insn.c | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2009 | ||
19 | * Copyright (C) Hitachi, Ltd., 2011 | ||
20 | */ | ||
21 | |||
22 | #include <stdlib.h> | ||
23 | #include <stdio.h> | ||
24 | #include <string.h> | ||
25 | #include <assert.h> | ||
26 | #include <unistd.h> | ||
27 | #include <sys/types.h> | ||
28 | #include <sys/stat.h> | ||
29 | #include <fcntl.h> | ||
30 | |||
31 | #define unlikely(cond) (cond) | ||
32 | #define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) | ||
33 | |||
34 | #include <asm/insn.h> | ||
35 | #include <inat.c> | ||
36 | #include <insn.c> | ||
37 | |||
38 | /* | ||
39 | * Test of instruction analysis against tampering. | ||
40 | * Feed random binary to instruction decoder and ensure not to | ||
41 | * access out-of-instruction-buffer. | ||
42 | */ | ||
43 | |||
44 | #define DEFAULT_MAX_ITER 10000 | ||
45 | #define INSN_NOP 0x90 | ||
46 | |||
47 | static const char *prog; /* Program name */ | ||
48 | static int verbose; /* Verbosity */ | ||
49 | static int x86_64; /* x86-64 bit mode flag */ | ||
50 | static unsigned int seed; /* Random seed */ | ||
51 | static unsigned long iter_start; /* Start of iteration number */ | ||
52 | static unsigned long iter_end = DEFAULT_MAX_ITER; /* End of iteration number */ | ||
53 | static FILE *input_file; /* Input file name */ | ||
54 | |||
55 | static void usage(const char *err) | ||
56 | { | ||
57 | if (err) | ||
58 | fprintf(stderr, "Error: %s\n\n", err); | ||
59 | fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog); | ||
60 | fprintf(stderr, "\t-y 64bit mode\n"); | ||
61 | fprintf(stderr, "\t-n 32bit mode\n"); | ||
62 | fprintf(stderr, "\t-v Verbosity(-vv dumps any decoded result)\n"); | ||
63 | fprintf(stderr, "\t-s Give a random seed (and iteration number)\n"); | ||
64 | fprintf(stderr, "\t-m Give a maximum iteration number\n"); | ||
65 | fprintf(stderr, "\t-i Give an input file with decoded binary\n"); | ||
66 | exit(1); | ||
67 | } | ||
68 | |||
69 | static void dump_field(FILE *fp, const char *name, const char *indent, | ||
70 | struct insn_field *field) | ||
71 | { | ||
72 | fprintf(fp, "%s.%s = {\n", indent, name); | ||
73 | fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", | ||
74 | indent, field->value, field->bytes[0], field->bytes[1], | ||
75 | field->bytes[2], field->bytes[3]); | ||
76 | fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, | ||
77 | field->got, field->nbytes); | ||
78 | } | ||
79 | |||
80 | static void dump_insn(FILE *fp, struct insn *insn) | ||
81 | { | ||
82 | fprintf(fp, "Instruction = {\n"); | ||
83 | dump_field(fp, "prefixes", "\t", &insn->prefixes); | ||
84 | dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); | ||
85 | dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); | ||
86 | dump_field(fp, "opcode", "\t", &insn->opcode); | ||
87 | dump_field(fp, "modrm", "\t", &insn->modrm); | ||
88 | dump_field(fp, "sib", "\t", &insn->sib); | ||
89 | dump_field(fp, "displacement", "\t", &insn->displacement); | ||
90 | dump_field(fp, "immediate1", "\t", &insn->immediate1); | ||
91 | dump_field(fp, "immediate2", "\t", &insn->immediate2); | ||
92 | fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", | ||
93 | insn->attr, insn->opnd_bytes, insn->addr_bytes); | ||
94 | fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", | ||
95 | insn->length, insn->x86_64, insn->kaddr); | ||
96 | } | ||
97 | |||
98 | static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter, | ||
99 | unsigned char *insn_buf, struct insn *insn) | ||
100 | { | ||
101 | int i; | ||
102 | |||
103 | fprintf(fp, "%s:\n", msg); | ||
104 | |||
105 | dump_insn(fp, insn); | ||
106 | |||
107 | fprintf(fp, "You can reproduce this with below command(s);\n"); | ||
108 | |||
109 | /* Input a decoded instruction sequence directly */ | ||
110 | fprintf(fp, " $ echo "); | ||
111 | for (i = 0; i < MAX_INSN_SIZE; i++) | ||
112 | fprintf(fp, " %02x", insn_buf[i]); | ||
113 | fprintf(fp, " | %s -i -\n", prog); | ||
114 | |||
115 | if (!input_file) { | ||
116 | fprintf(fp, "Or \n"); | ||
117 | /* Give a seed and iteration number */ | ||
118 | fprintf(fp, " $ %s -s 0x%x,%lu\n", prog, seed, nr_iter); | ||
119 | } | ||
120 | } | ||
121 | |||
122 | static void init_random_seed(void) | ||
123 | { | ||
124 | int fd; | ||
125 | |||
126 | fd = open("/dev/urandom", O_RDONLY); | ||
127 | if (fd < 0) | ||
128 | goto fail; | ||
129 | |||
130 | if (read(fd, &seed, sizeof(seed)) != sizeof(seed)) | ||
131 | goto fail; | ||
132 | |||
133 | close(fd); | ||
134 | return; | ||
135 | fail: | ||
136 | usage("Failed to open /dev/urandom"); | ||
137 | } | ||
138 | |||
139 | /* Read given instruction sequence from the input file */ | ||
140 | static int read_next_insn(unsigned char *insn_buf) | ||
141 | { | ||
142 | char buf[256] = "", *tmp; | ||
143 | int i; | ||
144 | |||
145 | tmp = fgets(buf, ARRAY_SIZE(buf), input_file); | ||
146 | if (tmp == NULL || feof(input_file)) | ||
147 | return 0; | ||
148 | |||
149 | for (i = 0; i < MAX_INSN_SIZE; i++) { | ||
150 | insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16); | ||
151 | if (*tmp != ' ') | ||
152 | break; | ||
153 | } | ||
154 | |||
155 | return i; | ||
156 | } | ||
157 | |||
158 | static int generate_insn(unsigned char *insn_buf) | ||
159 | { | ||
160 | int i; | ||
161 | |||
162 | if (input_file) | ||
163 | return read_next_insn(insn_buf); | ||
164 | |||
165 | /* Fills buffer with random binary up to MAX_INSN_SIZE */ | ||
166 | for (i = 0; i < MAX_INSN_SIZE - 1; i += 2) | ||
167 | *(unsigned short *)(&insn_buf[i]) = random() & 0xffff; | ||
168 | |||
169 | while (i < MAX_INSN_SIZE) | ||
170 | insn_buf[i++] = random() & 0xff; | ||
171 | |||
172 | return i; | ||
173 | } | ||
174 | |||
175 | static void parse_args(int argc, char **argv) | ||
176 | { | ||
177 | int c; | ||
178 | char *tmp = NULL; | ||
179 | int set_seed = 0; | ||
180 | |||
181 | prog = argv[0]; | ||
182 | while ((c = getopt(argc, argv, "ynvs:m:i:")) != -1) { | ||
183 | switch (c) { | ||
184 | case 'y': | ||
185 | x86_64 = 1; | ||
186 | break; | ||
187 | case 'n': | ||
188 | x86_64 = 0; | ||
189 | break; | ||
190 | case 'v': | ||
191 | verbose++; | ||
192 | break; | ||
193 | case 'i': | ||
194 | if (strcmp("-", optarg) == 0) | ||
195 | input_file = stdin; | ||
196 | else | ||
197 | input_file = fopen(optarg, "r"); | ||
198 | if (!input_file) | ||
199 | usage("Failed to open input file"); | ||
200 | break; | ||
201 | case 's': | ||
202 | seed = (unsigned int)strtoul(optarg, &tmp, 0); | ||
203 | if (*tmp == ',') { | ||
204 | optarg = tmp + 1; | ||
205 | iter_start = strtoul(optarg, &tmp, 0); | ||
206 | } | ||
207 | if (*tmp != '\0' || tmp == optarg) | ||
208 | usage("Failed to parse seed"); | ||
209 | set_seed = 1; | ||
210 | break; | ||
211 | case 'm': | ||
212 | iter_end = strtoul(optarg, &tmp, 0); | ||
213 | if (*tmp != '\0' || tmp == optarg) | ||
214 | usage("Failed to parse max_iter"); | ||
215 | break; | ||
216 | default: | ||
217 | usage(NULL); | ||
218 | } | ||
219 | } | ||
220 | |||
221 | /* Check errors */ | ||
222 | if (iter_end < iter_start) | ||
223 | usage("Max iteration number must be bigger than iter-num"); | ||
224 | |||
225 | if (set_seed && input_file) | ||
226 | usage("Don't use input file (-i) with random seed (-s)"); | ||
227 | |||
228 | /* Initialize random seed */ | ||
229 | if (!input_file) { | ||
230 | if (!set_seed) /* No seed is given */ | ||
231 | init_random_seed(); | ||
232 | srand(seed); | ||
233 | } | ||
234 | } | ||
235 | |||
236 | int main(int argc, char **argv) | ||
237 | { | ||
238 | struct insn insn; | ||
239 | int insns = 0; | ||
240 | int errors = 0; | ||
241 | unsigned long i; | ||
242 | unsigned char insn_buf[MAX_INSN_SIZE * 2]; | ||
243 | |||
244 | parse_args(argc, argv); | ||
245 | |||
246 | /* Prepare stop bytes with NOPs */ | ||
247 | memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE); | ||
248 | |||
249 | for (i = 0; i < iter_end; i++) { | ||
250 | if (generate_insn(insn_buf) <= 0) | ||
251 | break; | ||
252 | |||
253 | if (i < iter_start) /* Skip to given iteration number */ | ||
254 | continue; | ||
255 | |||
256 | /* Decode an instruction */ | ||
257 | insn_init(&insn, insn_buf, x86_64); | ||
258 | insn_get_length(&insn); | ||
259 | |||
260 | if (insn.next_byte <= insn.kaddr || | ||
261 | insn.kaddr + MAX_INSN_SIZE < insn.next_byte) { | ||
262 | /* Access out-of-range memory */ | ||
263 | dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn); | ||
264 | errors++; | ||
265 | } else if (verbose && !insn_complete(&insn)) | ||
266 | dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn); | ||
267 | else if (verbose >= 2) | ||
268 | dump_insn(stdout, &insn); | ||
269 | insns++; | ||
270 | } | ||
271 | |||
272 | fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed); | ||
273 | |||
274 | return errors ? 1 : 0; | ||
275 | } | ||
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 1d97bd84b6fb..b2b54d2edf53 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig | |||
@@ -6,14 +6,6 @@ menu "UML-specific options" | |||
6 | 6 | ||
7 | menu "Host processor type and features" | 7 | menu "Host processor type and features" |
8 | 8 | ||
9 | config CMPXCHG_LOCAL | ||
10 | bool | ||
11 | default n | ||
12 | |||
13 | config CMPXCHG_DOUBLE | ||
14 | bool | ||
15 | default n | ||
16 | |||
17 | source "arch/x86/Kconfig.cpu" | 9 | source "arch/x86/Kconfig.cpu" |
18 | 10 | ||
19 | endmenu | 11 | endmenu |
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 8fb58400e415..5d065b2222d3 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile | |||
@@ -37,7 +37,8 @@ subarch-$(CONFIG_MODULES) += ../kernel/module.o | |||
37 | USER_OBJS := bugs_$(BITS).o ptrace_user.o fault.o | 37 | USER_OBJS := bugs_$(BITS).o ptrace_user.o fault.o |
38 | 38 | ||
39 | extra-y += user-offsets.s | 39 | extra-y += user-offsets.s |
40 | $(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) | 40 | $(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) \ |
41 | -Iarch/x86/include/generated | ||
41 | 42 | ||
42 | UNPROFILE_OBJS := stub_segv.o | 43 | UNPROFILE_OBJS := stub_segv.o |
43 | CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING) | 44 | CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING) |
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h index 711b1621747f..5ef9344a8b24 100644 --- a/arch/x86/um/shared/sysdep/ptrace.h +++ b/arch/x86/um/shared/sysdep/ptrace.h | |||
@@ -3,3 +3,8 @@ | |||
3 | #else | 3 | #else |
4 | #include "ptrace_64.h" | 4 | #include "ptrace_64.h" |
5 | #endif | 5 | #endif |
6 | |||
7 | static inline long regs_return_value(struct uml_pt_regs *regs) | ||
8 | { | ||
9 | return UPT_SYSCALL_RET(regs); | ||
10 | } | ||
diff --git a/arch/x86/um/sys_call_table_32.S b/arch/x86/um/sys_call_table_32.S deleted file mode 100644 index a7ca80d2dceb..000000000000 --- a/arch/x86/um/sys_call_table_32.S +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | /* Steal i386 syscall table for our purposes, but with some slight changes.*/ | ||
3 | |||
4 | #define sys_iopl sys_ni_syscall | ||
5 | #define sys_ioperm sys_ni_syscall | ||
6 | |||
7 | #define sys_vm86old sys_ni_syscall | ||
8 | #define sys_vm86 sys_ni_syscall | ||
9 | |||
10 | #define old_mmap sys_old_mmap | ||
11 | |||
12 | #define ptregs_fork sys_fork | ||
13 | #define ptregs_execve sys_execve | ||
14 | #define ptregs_iopl sys_iopl | ||
15 | #define ptregs_vm86old sys_vm86old | ||
16 | #define ptregs_clone sys_clone | ||
17 | #define ptregs_vm86 sys_vm86 | ||
18 | #define ptregs_sigaltstack sys_sigaltstack | ||
19 | #define ptregs_vfork sys_vfork | ||
20 | |||
21 | .section .rodata,"a" | ||
22 | |||
23 | #include "../kernel/syscall_table_32.S" | ||
24 | |||
25 | ENTRY(syscall_table_size) | ||
26 | .long .-sys_call_table | ||
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c new file mode 100644 index 000000000000..416bd40c0eba --- /dev/null +++ b/arch/x86/um/sys_call_table_32.c | |||
@@ -0,0 +1,55 @@ | |||
1 | /* | ||
2 | * System call table for UML/i386, copied from arch/x86/kernel/syscall_*.c | ||
3 | * with some changes for UML. | ||
4 | */ | ||
5 | |||
6 | #include <linux/linkage.h> | ||
7 | #include <linux/sys.h> | ||
8 | #include <linux/cache.h> | ||
9 | #include <generated/user_constants.h> | ||
10 | |||
11 | #define __NO_STUBS | ||
12 | |||
13 | /* | ||
14 | * Below you can see, in terms of #define's, the differences between the x86-64 | ||
15 | * and the UML syscall table. | ||
16 | */ | ||
17 | |||
18 | /* Not going to be implemented by UML, since we have no hardware. */ | ||
19 | #define sys_iopl sys_ni_syscall | ||
20 | #define sys_ioperm sys_ni_syscall | ||
21 | |||
22 | #define sys_vm86old sys_ni_syscall | ||
23 | #define sys_vm86 sys_ni_syscall | ||
24 | |||
25 | #define old_mmap sys_old_mmap | ||
26 | |||
27 | #define ptregs_fork sys_fork | ||
28 | #define ptregs_execve sys_execve | ||
29 | #define ptregs_iopl sys_iopl | ||
30 | #define ptregs_vm86old sys_vm86old | ||
31 | #define ptregs_clone sys_clone | ||
32 | #define ptregs_vm86 sys_vm86 | ||
33 | #define ptregs_sigaltstack sys_sigaltstack | ||
34 | #define ptregs_vfork sys_vfork | ||
35 | |||
36 | #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; | ||
37 | #include <asm/syscalls_32.h> | ||
38 | |||
39 | #undef __SYSCALL_I386 | ||
40 | #define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym, | ||
41 | |||
42 | typedef void (*sys_call_ptr_t)(void); | ||
43 | |||
44 | extern void sys_ni_syscall(void); | ||
45 | |||
46 | const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { | ||
47 | /* | ||
48 | * Smells like a compiler bug -- it doesn't work | ||
49 | * when the & below is removed. | ||
50 | */ | ||
51 | [0 ... __NR_syscall_max] = &sys_ni_syscall, | ||
52 | #include <asm/syscalls_32.h> | ||
53 | }; | ||
54 | |||
55 | int syscall_table_size = sizeof(sys_call_table); | ||
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index 99522f78b162..fe626c3ba01b 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c | |||
@@ -1,11 +1,12 @@ | |||
1 | /* | 1 | /* |
2 | * System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c | 2 | * System call table for UML/x86-64, copied from arch/x86/kernel/syscall_*.c |
3 | * with some changes for UML. | 3 | * with some changes for UML. |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/linkage.h> | 6 | #include <linux/linkage.h> |
7 | #include <linux/sys.h> | 7 | #include <linux/sys.h> |
8 | #include <linux/cache.h> | 8 | #include <linux/cache.h> |
9 | #include <generated/user_constants.h> | ||
9 | 10 | ||
10 | #define __NO_STUBS | 11 | #define __NO_STUBS |
11 | 12 | ||
@@ -34,31 +35,23 @@ | |||
34 | #define stub_sigaltstack sys_sigaltstack | 35 | #define stub_sigaltstack sys_sigaltstack |
35 | #define stub_rt_sigreturn sys_rt_sigreturn | 36 | #define stub_rt_sigreturn sys_rt_sigreturn |
36 | 37 | ||
37 | #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; | 38 | #define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; |
38 | #undef _ASM_X86_UNISTD_64_H | 39 | #include <asm/syscalls_64.h> |
39 | #include "../../x86/include/asm/unistd_64.h" | ||
40 | 40 | ||
41 | #undef __SYSCALL | 41 | #undef __SYSCALL_64 |
42 | #define __SYSCALL(nr, sym) [ nr ] = sym, | 42 | #define __SYSCALL_64(nr, sym, compat) [ nr ] = sym, |
43 | #undef _ASM_X86_UNISTD_64_H | ||
44 | 43 | ||
45 | typedef void (*sys_call_ptr_t)(void); | 44 | typedef void (*sys_call_ptr_t)(void); |
46 | 45 | ||
47 | extern void sys_ni_syscall(void); | 46 | extern void sys_ni_syscall(void); |
48 | 47 | ||
49 | /* | 48 | const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { |
50 | * We used to have a trick here which made sure that holes in the | 49 | /* |
51 | * x86_64 table were filled in with sys_ni_syscall, but a comment in | 50 | * Smells like a compiler bug -- it doesn't work |
52 | * unistd_64.h says that holes aren't allowed, so the trick was | 51 | * when the & below is removed. |
53 | * removed. | 52 | */ |
54 | * The trick looked like this | 53 | [0 ... __NR_syscall_max] = &sys_ni_syscall, |
55 | * [0 ... UM_NR_syscall_max] = &sys_ni_syscall | 54 | #include <asm/syscalls_64.h> |
56 | * before including unistd_64.h - the later initializations overwrote | ||
57 | * the sys_ni_syscall filler. | ||
58 | */ | ||
59 | |||
60 | sys_call_ptr_t sys_call_table[] __cacheline_aligned = { | ||
61 | #include <asm/unistd_64.h> | ||
62 | }; | 55 | }; |
63 | 56 | ||
64 | int syscall_table_size = sizeof(sys_call_table); | 57 | int syscall_table_size = sizeof(sys_call_table); |
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index ca49be8ddd0c..5edf4f4bbf53 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c | |||
@@ -8,6 +8,18 @@ | |||
8 | #include <asm/ptrace.h> | 8 | #include <asm/ptrace.h> |
9 | #include <asm/types.h> | 9 | #include <asm/types.h> |
10 | 10 | ||
11 | #ifdef __i386__ | ||
12 | #define __SYSCALL_I386(nr, sym, compat) [nr] = 1, | ||
13 | static char syscalls[] = { | ||
14 | #include <asm/syscalls_32.h> | ||
15 | }; | ||
16 | #else | ||
17 | #define __SYSCALL_64(nr, sym, compat) [nr] = 1, | ||
18 | static char syscalls[] = { | ||
19 | #include <asm/syscalls_64.h> | ||
20 | }; | ||
21 | #endif | ||
22 | |||
11 | #define DEFINE(sym, val) \ | 23 | #define DEFINE(sym, val) \ |
12 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) | 24 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) |
13 | 25 | ||
@@ -77,4 +89,7 @@ void foo(void) | |||
77 | DEFINE(UM_PROT_READ, PROT_READ); | 89 | DEFINE(UM_PROT_READ, PROT_READ); |
78 | DEFINE(UM_PROT_WRITE, PROT_WRITE); | 90 | DEFINE(UM_PROT_WRITE, PROT_WRITE); |
79 | DEFINE(UM_PROT_EXEC, PROT_EXEC); | 91 | DEFINE(UM_PROT_EXEC, PROT_EXEC); |
92 | |||
93 | DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); | ||
94 | DEFINE(NR_syscalls, sizeof(syscalls)); | ||
80 | } | 95 | } |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 26c731a106af..fdce49c7aff6 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -29,7 +29,8 @@ config XEN_PVHVM | |||
29 | 29 | ||
30 | config XEN_MAX_DOMAIN_MEMORY | 30 | config XEN_MAX_DOMAIN_MEMORY |
31 | int | 31 | int |
32 | default 128 | 32 | default 500 if X86_64 |
33 | default 64 if X86_32 | ||
33 | depends on XEN | 34 | depends on XEN |
34 | help | 35 | help |
35 | This only affects the sizing of some bss arrays, the unused | 36 | This only affects the sizing of some bss arrays, the unused |
@@ -48,3 +49,4 @@ config XEN_DEBUG_FS | |||
48 | help | 49 | help |
49 | Enable statistics output and various tuning options in debugfs. | 50 | Enable statistics output and various tuning options in debugfs. |
50 | Enabling this option may incur a significant performance overhead. | 51 | Enabling this option may incur a significant performance overhead. |
52 | |||
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c index 7c0fedd98ea0..ef1db1900d86 100644 --- a/arch/x86/xen/debugfs.c +++ b/arch/x86/xen/debugfs.c | |||
@@ -109,7 +109,7 @@ static const struct file_operations u32_array_fops = { | |||
109 | .llseek = no_llseek, | 109 | .llseek = no_llseek, |
110 | }; | 110 | }; |
111 | 111 | ||
112 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | 112 | struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode, |
113 | struct dentry *parent, | 113 | struct dentry *parent, |
114 | u32 *array, unsigned elements) | 114 | u32 *array, unsigned elements) |
115 | { | 115 | { |
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h index e28132084832..78d25499be5b 100644 --- a/arch/x86/xen/debugfs.h +++ b/arch/x86/xen/debugfs.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | struct dentry * __init xen_init_debugfs(void); | 4 | struct dentry * __init xen_init_debugfs(void); |
5 | 5 | ||
6 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | 6 | struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode, |
7 | struct dentry *parent, | 7 | struct dentry *parent, |
8 | u32 *array, unsigned elements); | 8 | u32 *array, unsigned elements); |
9 | 9 | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1f928659c338..12eb07bfb267 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1215,8 +1215,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1215 | local_irq_disable(); | 1215 | local_irq_disable(); |
1216 | early_boot_irqs_disabled = true; | 1216 | early_boot_irqs_disabled = true; |
1217 | 1217 | ||
1218 | memblock_init(); | ||
1219 | |||
1220 | xen_raw_console_write("mapping kernel into physical memory\n"); | 1218 | xen_raw_console_write("mapping kernel into physical memory\n"); |
1221 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); | 1219 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); |
1222 | xen_ident_map_ISA(); | 1220 | xen_ident_map_ISA(); |
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 5a40d24ba331..3a5f55d51907 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c | |||
@@ -54,6 +54,20 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page, | |||
54 | return 0; | 54 | return 0; |
55 | } | 55 | } |
56 | 56 | ||
57 | /* | ||
58 | * This function is used to map shared frames to store grant status. It is | ||
59 | * different from map_pte_fn above, the frames type here is uint64_t. | ||
60 | */ | ||
61 | static int map_pte_fn_status(pte_t *pte, struct page *pmd_page, | ||
62 | unsigned long addr, void *data) | ||
63 | { | ||
64 | uint64_t **frames = (uint64_t **)data; | ||
65 | |||
66 | set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); | ||
67 | (*frames)++; | ||
68 | return 0; | ||
69 | } | ||
70 | |||
57 | static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | 71 | static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, |
58 | unsigned long addr, void *data) | 72 | unsigned long addr, void *data) |
59 | { | 73 | { |
@@ -64,10 +78,10 @@ static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | |||
64 | 78 | ||
65 | int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, | 79 | int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, |
66 | unsigned long max_nr_gframes, | 80 | unsigned long max_nr_gframes, |
67 | struct grant_entry **__shared) | 81 | void **__shared) |
68 | { | 82 | { |
69 | int rc; | 83 | int rc; |
70 | struct grant_entry *shared = *__shared; | 84 | void *shared = *__shared; |
71 | 85 | ||
72 | if (shared == NULL) { | 86 | if (shared == NULL) { |
73 | struct vm_struct *area = | 87 | struct vm_struct *area = |
@@ -83,8 +97,30 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, | |||
83 | return rc; | 97 | return rc; |
84 | } | 98 | } |
85 | 99 | ||
86 | void arch_gnttab_unmap_shared(struct grant_entry *shared, | 100 | int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, |
87 | unsigned long nr_gframes) | 101 | unsigned long max_nr_gframes, |
102 | grant_status_t **__shared) | ||
103 | { | ||
104 | int rc; | ||
105 | grant_status_t *shared = *__shared; | ||
106 | |||
107 | if (shared == NULL) { | ||
108 | /* No need to pass in PTE as we are going to do it | ||
109 | * in apply_to_page_range anyhow. */ | ||
110 | struct vm_struct *area = | ||
111 | alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); | ||
112 | BUG_ON(area == NULL); | ||
113 | shared = area->addr; | ||
114 | *__shared = shared; | ||
115 | } | ||
116 | |||
117 | rc = apply_to_page_range(&init_mm, (unsigned long)shared, | ||
118 | PAGE_SIZE * nr_gframes, | ||
119 | map_pte_fn_status, &frames); | ||
120 | return rc; | ||
121 | } | ||
122 | |||
123 | void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) | ||
88 | { | 124 | { |
89 | apply_to_page_range(&init_mm, (unsigned long)shared, | 125 | apply_to_page_range(&init_mm, (unsigned long)shared, |
90 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); | 126 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 87f6673b1207..58a0e46c404d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1774,10 +1774,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1774 | __xen_write_cr3(true, __pa(pgd)); | 1774 | __xen_write_cr3(true, __pa(pgd)); |
1775 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 1775 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
1776 | 1776 | ||
1777 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), | 1777 | memblock_reserve(__pa(xen_start_info->pt_base), |
1778 | __pa(xen_start_info->pt_base + | 1778 | xen_start_info->nr_pt_frames * PAGE_SIZE); |
1779 | xen_start_info->nr_pt_frames * PAGE_SIZE), | ||
1780 | "XEN PAGETABLES"); | ||
1781 | 1779 | ||
1782 | return pgd; | 1780 | return pgd; |
1783 | } | 1781 | } |
@@ -1853,10 +1851,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1853 | PFN_DOWN(__pa(initial_page_table))); | 1851 | PFN_DOWN(__pa(initial_page_table))); |
1854 | xen_write_cr3(__pa(initial_page_table)); | 1852 | xen_write_cr3(__pa(initial_page_table)); |
1855 | 1853 | ||
1856 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), | 1854 | memblock_reserve(__pa(xen_start_info->pt_base), |
1857 | __pa(xen_start_info->pt_base + | 1855 | xen_start_info->nr_pt_frames * PAGE_SIZE); |
1858 | xen_start_info->nr_pt_frames * PAGE_SIZE), | ||
1859 | "XEN PAGETABLES"); | ||
1860 | 1856 | ||
1861 | return initial_page_table; | 1857 | return initial_page_table; |
1862 | } | 1858 | } |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index b2c7179fa263..e03c63692176 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -75,7 +75,7 @@ static void __init xen_add_extra_mem(u64 start, u64 size) | |||
75 | if (i == XEN_EXTRA_MEM_MAX_REGIONS) | 75 | if (i == XEN_EXTRA_MEM_MAX_REGIONS) |
76 | printk(KERN_WARNING "Warning: not enough extra memory regions\n"); | 76 | printk(KERN_WARNING "Warning: not enough extra memory regions\n"); |
77 | 77 | ||
78 | memblock_x86_reserve_range(start, start + size, "XEN EXTRA"); | 78 | memblock_reserve(start, size); |
79 | 79 | ||
80 | xen_max_p2m_pfn = PFN_DOWN(start + size); | 80 | xen_max_p2m_pfn = PFN_DOWN(start + size); |
81 | 81 | ||
@@ -311,9 +311,8 @@ char * __init xen_memory_setup(void) | |||
311 | * - xen_start_info | 311 | * - xen_start_info |
312 | * See comment above "struct start_info" in <xen/interface/xen.h> | 312 | * See comment above "struct start_info" in <xen/interface/xen.h> |
313 | */ | 313 | */ |
314 | memblock_x86_reserve_range(__pa(xen_start_info->mfn_list), | 314 | memblock_reserve(__pa(xen_start_info->mfn_list), |
315 | __pa(xen_start_info->pt_base), | 315 | xen_start_info->pt_base - xen_start_info->mfn_list); |
316 | "XEN START INFO"); | ||
317 | 316 | ||
318 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 317 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
319 | 318 | ||