diff options
author | H. Peter Anvin <hpa@linux.intel.com> | 2012-01-19 15:56:50 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2012-01-19 15:56:50 -0500 |
commit | 282f445a779ed76fca9884fe377bf56a3088b208 (patch) | |
tree | d9abcf526baee0100672851e0a8894c19e762a39 /arch/x86 | |
parent | 68f30fbee19cc67849b9fa8e153ede70758afe81 (diff) | |
parent | 90a4c0f51e8e44111a926be6f4c87af3938a79c3 (diff) |
Merge remote-tracking branch 'linus/master' into x86/urgent
Diffstat (limited to 'arch/x86')
153 files changed, 8721 insertions, 4145 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index db190faffba1..864cc6e6ac8e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -60,8 +60,12 @@ config X86 | |||
60 | select PERF_EVENTS | 60 | select PERF_EVENTS |
61 | select HAVE_PERF_EVENTS_NMI | 61 | select HAVE_PERF_EVENTS_NMI |
62 | select ANON_INODES | 62 | select ANON_INODES |
63 | select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 | ||
64 | select HAVE_CMPXCHG_LOCAL if !M386 | ||
65 | select HAVE_CMPXCHG_DOUBLE | ||
63 | select HAVE_ARCH_KMEMCHECK | 66 | select HAVE_ARCH_KMEMCHECK |
64 | select HAVE_USER_RETURN_NOTIFIER | 67 | select HAVE_USER_RETURN_NOTIFIER |
68 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | ||
65 | select HAVE_ARCH_JUMP_LABEL | 69 | select HAVE_ARCH_JUMP_LABEL |
66 | select HAVE_TEXT_POKE_SMP | 70 | select HAVE_TEXT_POKE_SMP |
67 | select HAVE_GENERIC_HARDIRQS | 71 | select HAVE_GENERIC_HARDIRQS |
@@ -77,6 +81,7 @@ config X86 | |||
77 | select HAVE_BPF_JIT if (X86_64 && NET) | 81 | select HAVE_BPF_JIT if (X86_64 && NET) |
78 | select CLKEVT_I8253 | 82 | select CLKEVT_I8253 |
79 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | 83 | select ARCH_HAVE_NMI_SAFE_CMPXCHG |
84 | select GENERIC_IOMAP | ||
80 | 85 | ||
81 | config INSTRUCTION_DECODER | 86 | config INSTRUCTION_DECODER |
82 | def_bool (KPROBES || PERF_EVENTS) | 87 | def_bool (KPROBES || PERF_EVENTS) |
@@ -132,9 +137,6 @@ config NEED_SG_DMA_LENGTH | |||
132 | config GENERIC_ISA_DMA | 137 | config GENERIC_ISA_DMA |
133 | def_bool ISA_DMA_API | 138 | def_bool ISA_DMA_API |
134 | 139 | ||
135 | config GENERIC_IOMAP | ||
136 | def_bool y | ||
137 | |||
138 | config GENERIC_BUG | 140 | config GENERIC_BUG |
139 | def_bool y | 141 | def_bool y |
140 | depends on BUG | 142 | depends on BUG |
@@ -421,12 +423,14 @@ config X86_MRST | |||
421 | depends on PCI | 423 | depends on PCI |
422 | depends on PCI_GOANY | 424 | depends on PCI_GOANY |
423 | depends on X86_IO_APIC | 425 | depends on X86_IO_APIC |
426 | select X86_INTEL_MID | ||
427 | select SFI | ||
428 | select DW_APB_TIMER | ||
424 | select APB_TIMER | 429 | select APB_TIMER |
425 | select I2C | 430 | select I2C |
426 | select SPI | 431 | select SPI |
427 | select INTEL_SCU_IPC | 432 | select INTEL_SCU_IPC |
428 | select X86_PLATFORM_DEVICES | 433 | select X86_PLATFORM_DEVICES |
429 | select X86_INTEL_MID | ||
430 | ---help--- | 434 | ---help--- |
431 | Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin | 435 | Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin |
432 | Internet Device(MID) platform. Moorestown consists of two chips: | 436 | Internet Device(MID) platform. Moorestown consists of two chips: |
@@ -435,6 +439,26 @@ config X86_MRST | |||
435 | nor standard legacy replacement devices/features. e.g. Moorestown does | 439 | nor standard legacy replacement devices/features. e.g. Moorestown does |
436 | not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. | 440 | not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. |
437 | 441 | ||
442 | config X86_MDFLD | ||
443 | bool "Medfield MID platform" | ||
444 | depends on PCI | ||
445 | depends on PCI_GOANY | ||
446 | depends on X86_IO_APIC | ||
447 | select X86_INTEL_MID | ||
448 | select SFI | ||
449 | select DW_APB_TIMER | ||
450 | select APB_TIMER | ||
451 | select I2C | ||
452 | select SPI | ||
453 | select INTEL_SCU_IPC | ||
454 | select X86_PLATFORM_DEVICES | ||
455 | ---help--- | ||
456 | Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin | ||
457 | Internet Device(MID) platform. | ||
458 | Unlike standard x86 PCs, Medfield does not have many legacy devices | ||
459 | nor standard legacy replacement devices/features. e.g. Medfield does | ||
460 | not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. | ||
461 | |||
438 | endif | 462 | endif |
439 | 463 | ||
440 | config X86_RDC321X | 464 | config X86_RDC321X |
@@ -632,7 +656,7 @@ config X86_SUMMIT_NUMA | |||
632 | 656 | ||
633 | config X86_CYCLONE_TIMER | 657 | config X86_CYCLONE_TIMER |
634 | def_bool y | 658 | def_bool y |
635 | depends on X86_32_NON_STANDARD | 659 | depends on X86_SUMMIT |
636 | 660 | ||
637 | source "arch/x86/Kconfig.cpu" | 661 | source "arch/x86/Kconfig.cpu" |
638 | 662 | ||
@@ -660,9 +684,10 @@ config HPET_EMULATE_RTC | |||
660 | depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) | 684 | depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) |
661 | 685 | ||
662 | config APB_TIMER | 686 | config APB_TIMER |
663 | def_bool y if MRST | 687 | def_bool y if X86_INTEL_MID |
664 | prompt "Langwell APB Timer Support" if X86_MRST | 688 | prompt "Intel MID APB Timer Support" if X86_INTEL_MID |
665 | select DW_APB_TIMER | 689 | select DW_APB_TIMER |
690 | depends on X86_INTEL_MID && SFI | ||
666 | help | 691 | help |
667 | APB timer is the replacement for 8254, HPET on X86 MID platforms. | 692 | APB timer is the replacement for 8254, HPET on X86 MID platforms. |
668 | The APBT provides a stable time base on SMP | 693 | The APBT provides a stable time base on SMP |
@@ -1490,6 +1515,13 @@ config EFI | |||
1490 | resultant kernel should continue to boot on existing non-EFI | 1515 | resultant kernel should continue to boot on existing non-EFI |
1491 | platforms. | 1516 | platforms. |
1492 | 1517 | ||
1518 | config EFI_STUB | ||
1519 | bool "EFI stub support" | ||
1520 | depends on EFI | ||
1521 | ---help--- | ||
1522 | This kernel feature allows a bzImage to be loaded directly | ||
1523 | by EFI firmware without the use of a bootloader. | ||
1524 | |||
1493 | config SECCOMP | 1525 | config SECCOMP |
1494 | def_bool y | 1526 | def_bool y |
1495 | prompt "Enable seccomp to safely compute untrusted bytecode" | 1527 | prompt "Enable seccomp to safely compute untrusted bytecode" |
@@ -1742,7 +1774,7 @@ source "drivers/sfi/Kconfig" | |||
1742 | 1774 | ||
1743 | config X86_APM_BOOT | 1775 | config X86_APM_BOOT |
1744 | def_bool y | 1776 | def_bool y |
1745 | depends on APM || APM_MODULE | 1777 | depends on APM |
1746 | 1778 | ||
1747 | menuconfig APM | 1779 | menuconfig APM |
1748 | tristate "APM (Advanced Power Management) BIOS support" | 1780 | tristate "APM (Advanced Power Management) BIOS support" |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index e3ca7e0d858c..3c57033e2211 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -309,12 +309,6 @@ config X86_INTERNODE_CACHE_SHIFT | |||
309 | config X86_CMPXCHG | 309 | config X86_CMPXCHG |
310 | def_bool X86_64 || (X86_32 && !M386) | 310 | def_bool X86_64 || (X86_32 && !M386) |
311 | 311 | ||
312 | config CMPXCHG_LOCAL | ||
313 | def_bool X86_64 || (X86_32 && !M386) | ||
314 | |||
315 | config CMPXCHG_DOUBLE | ||
316 | def_bool y | ||
317 | |||
318 | config X86_L1_CACHE_SHIFT | 312 | config X86_L1_CACHE_SHIFT |
319 | int | 313 | int |
320 | default "7" if MPENTIUM4 || MPSC | 314 | default "7" if MPENTIUM4 || MPSC |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index bf56e1793272..e46c2147397f 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -43,9 +43,9 @@ config EARLY_PRINTK | |||
43 | with klogd/syslogd or the X server. You should normally N here, | 43 | with klogd/syslogd or the X server. You should normally N here, |
44 | unless you want to debug such a crash. | 44 | unless you want to debug such a crash. |
45 | 45 | ||
46 | config EARLY_PRINTK_MRST | 46 | config EARLY_PRINTK_INTEL_MID |
47 | bool "Early printk for MRST platform support" | 47 | bool "Early printk for Intel MID platform support" |
48 | depends on EARLY_PRINTK && X86_MRST | 48 | depends on EARLY_PRINTK && X86_INTEL_MID |
49 | 49 | ||
50 | config EARLY_PRINTK_DBGP | 50 | config EARLY_PRINTK_DBGP |
51 | bool "Early printk via EHCI debug port" | 51 | bool "Early printk via EHCI debug port" |
@@ -63,8 +63,11 @@ config DEBUG_STACKOVERFLOW | |||
63 | bool "Check for stack overflows" | 63 | bool "Check for stack overflows" |
64 | depends on DEBUG_KERNEL | 64 | depends on DEBUG_KERNEL |
65 | ---help--- | 65 | ---help--- |
66 | This option will cause messages to be printed if free stack space | 66 | Say Y here if you want to check the overflows of kernel, IRQ |
67 | drops below a certain limit. | 67 | and exception stacks. This option will cause messages of the |
68 | stacks in detail when free stack space drops below a certain | ||
69 | limit. | ||
70 | If in doubt, say "N". | ||
68 | 71 | ||
69 | config X86_PTDUMP | 72 | config X86_PTDUMP |
70 | bool "Export kernel pagetable layout to userspace via debugfs" | 73 | bool "Export kernel pagetable layout to userspace via debugfs" |
@@ -284,4 +287,16 @@ config DEBUG_STRICT_USER_COPY_CHECKS | |||
284 | 287 | ||
285 | If unsure, or if you run an older (pre 4.4) gcc, say N. | 288 | If unsure, or if you run an older (pre 4.4) gcc, say N. |
286 | 289 | ||
290 | config DEBUG_NMI_SELFTEST | ||
291 | bool "NMI Selftest" | ||
292 | depends on DEBUG_KERNEL && X86_LOCAL_APIC | ||
293 | ---help--- | ||
294 | Enabling this option turns on a quick NMI selftest to verify | ||
295 | that the NMI behaves correctly. | ||
296 | |||
297 | This might help diagnose strange hangs that rely on NMI to | ||
298 | function properly. | ||
299 | |||
300 | If unsure, say N. | ||
301 | |||
287 | endmenu | 302 | endmenu |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b02e509072a7..209ba1294592 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -118,6 +118,12 @@ KBUILD_CFLAGS += $(mflags-y) | |||
118 | KBUILD_AFLAGS += $(mflags-y) | 118 | KBUILD_AFLAGS += $(mflags-y) |
119 | 119 | ||
120 | ### | 120 | ### |
121 | # Syscall table generation | ||
122 | |||
123 | archheaders: | ||
124 | $(Q)$(MAKE) $(build)=arch/x86/syscalls all | ||
125 | |||
126 | ### | ||
121 | # Kernel objects | 127 | # Kernel objects |
122 | 128 | ||
123 | head-y := arch/x86/kernel/head_$(BITS).o | 129 | head-y := arch/x86/kernel/head_$(BITS).o |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 09664efb9cee..b123b9a8f5b3 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -23,7 +23,15 @@ LDFLAGS_vmlinux := -T | |||
23 | 23 | ||
24 | hostprogs-y := mkpiggy | 24 | hostprogs-y := mkpiggy |
25 | 25 | ||
26 | $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o $(obj)/piggy.o FORCE | 26 | VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ |
27 | $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ | ||
28 | $(obj)/piggy.o | ||
29 | |||
30 | ifeq ($(CONFIG_EFI_STUB), y) | ||
31 | VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o | ||
32 | endif | ||
33 | |||
34 | $(obj)/vmlinux: $(VMLINUX_OBJS) FORCE | ||
27 | $(call if_changed,ld) | 35 | $(call if_changed,ld) |
28 | @: | 36 | @: |
29 | 37 | ||
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c new file mode 100644 index 000000000000..fec216f4fbc3 --- /dev/null +++ b/arch/x86/boot/compressed/eboot.c | |||
@@ -0,0 +1,1022 @@ | |||
1 | /* ----------------------------------------------------------------------- | ||
2 | * | ||
3 | * Copyright 2011 Intel Corporation; author Matt Fleming | ||
4 | * | ||
5 | * This file is part of the Linux kernel, and is made available under | ||
6 | * the terms of the GNU General Public License version 2. | ||
7 | * | ||
8 | * ----------------------------------------------------------------------- */ | ||
9 | |||
10 | #include <linux/efi.h> | ||
11 | #include <asm/efi.h> | ||
12 | #include <asm/setup.h> | ||
13 | #include <asm/desc.h> | ||
14 | |||
15 | #include "eboot.h" | ||
16 | |||
17 | static efi_system_table_t *sys_table; | ||
18 | |||
19 | static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size, | ||
20 | unsigned long *desc_size) | ||
21 | { | ||
22 | efi_memory_desc_t *m = NULL; | ||
23 | efi_status_t status; | ||
24 | unsigned long key; | ||
25 | u32 desc_version; | ||
26 | |||
27 | *map_size = sizeof(*m) * 32; | ||
28 | again: | ||
29 | /* | ||
30 | * Add an additional efi_memory_desc_t because we're doing an | ||
31 | * allocation which may be in a new descriptor region. | ||
32 | */ | ||
33 | *map_size += sizeof(*m); | ||
34 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
35 | EFI_LOADER_DATA, *map_size, (void **)&m); | ||
36 | if (status != EFI_SUCCESS) | ||
37 | goto fail; | ||
38 | |||
39 | status = efi_call_phys5(sys_table->boottime->get_memory_map, map_size, | ||
40 | m, &key, desc_size, &desc_version); | ||
41 | if (status == EFI_BUFFER_TOO_SMALL) { | ||
42 | efi_call_phys1(sys_table->boottime->free_pool, m); | ||
43 | goto again; | ||
44 | } | ||
45 | |||
46 | if (status != EFI_SUCCESS) | ||
47 | efi_call_phys1(sys_table->boottime->free_pool, m); | ||
48 | |||
49 | fail: | ||
50 | *map = m; | ||
51 | return status; | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * Allocate at the highest possible address that is not above 'max'. | ||
56 | */ | ||
57 | static efi_status_t high_alloc(unsigned long size, unsigned long align, | ||
58 | unsigned long *addr, unsigned long max) | ||
59 | { | ||
60 | unsigned long map_size, desc_size; | ||
61 | efi_memory_desc_t *map; | ||
62 | efi_status_t status; | ||
63 | unsigned long nr_pages; | ||
64 | u64 max_addr = 0; | ||
65 | int i; | ||
66 | |||
67 | status = __get_map(&map, &map_size, &desc_size); | ||
68 | if (status != EFI_SUCCESS) | ||
69 | goto fail; | ||
70 | |||
71 | nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; | ||
72 | again: | ||
73 | for (i = 0; i < map_size / desc_size; i++) { | ||
74 | efi_memory_desc_t *desc; | ||
75 | unsigned long m = (unsigned long)map; | ||
76 | u64 start, end; | ||
77 | |||
78 | desc = (efi_memory_desc_t *)(m + (i * desc_size)); | ||
79 | if (desc->type != EFI_CONVENTIONAL_MEMORY) | ||
80 | continue; | ||
81 | |||
82 | if (desc->num_pages < nr_pages) | ||
83 | continue; | ||
84 | |||
85 | start = desc->phys_addr; | ||
86 | end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT); | ||
87 | |||
88 | if ((start + size) > end || (start + size) > max) | ||
89 | continue; | ||
90 | |||
91 | if (end - size > max) | ||
92 | end = max; | ||
93 | |||
94 | if (round_down(end - size, align) < start) | ||
95 | continue; | ||
96 | |||
97 | start = round_down(end - size, align); | ||
98 | |||
99 | /* | ||
100 | * Don't allocate at 0x0. It will confuse code that | ||
101 | * checks pointers against NULL. | ||
102 | */ | ||
103 | if (start == 0x0) | ||
104 | continue; | ||
105 | |||
106 | if (start > max_addr) | ||
107 | max_addr = start; | ||
108 | } | ||
109 | |||
110 | if (!max_addr) | ||
111 | status = EFI_NOT_FOUND; | ||
112 | else { | ||
113 | status = efi_call_phys4(sys_table->boottime->allocate_pages, | ||
114 | EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, | ||
115 | nr_pages, &max_addr); | ||
116 | if (status != EFI_SUCCESS) { | ||
117 | max = max_addr; | ||
118 | max_addr = 0; | ||
119 | goto again; | ||
120 | } | ||
121 | |||
122 | *addr = max_addr; | ||
123 | } | ||
124 | |||
125 | free_pool: | ||
126 | efi_call_phys1(sys_table->boottime->free_pool, map); | ||
127 | |||
128 | fail: | ||
129 | return status; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Allocate at the lowest possible address. | ||
134 | */ | ||
135 | static efi_status_t low_alloc(unsigned long size, unsigned long align, | ||
136 | unsigned long *addr) | ||
137 | { | ||
138 | unsigned long map_size, desc_size; | ||
139 | efi_memory_desc_t *map; | ||
140 | efi_status_t status; | ||
141 | unsigned long nr_pages; | ||
142 | int i; | ||
143 | |||
144 | status = __get_map(&map, &map_size, &desc_size); | ||
145 | if (status != EFI_SUCCESS) | ||
146 | goto fail; | ||
147 | |||
148 | nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; | ||
149 | for (i = 0; i < map_size / desc_size; i++) { | ||
150 | efi_memory_desc_t *desc; | ||
151 | unsigned long m = (unsigned long)map; | ||
152 | u64 start, end; | ||
153 | |||
154 | desc = (efi_memory_desc_t *)(m + (i * desc_size)); | ||
155 | |||
156 | if (desc->type != EFI_CONVENTIONAL_MEMORY) | ||
157 | continue; | ||
158 | |||
159 | if (desc->num_pages < nr_pages) | ||
160 | continue; | ||
161 | |||
162 | start = desc->phys_addr; | ||
163 | end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT); | ||
164 | |||
165 | /* | ||
166 | * Don't allocate at 0x0. It will confuse code that | ||
167 | * checks pointers against NULL. Skip the first 8 | ||
168 | * bytes so we start at a nice even number. | ||
169 | */ | ||
170 | if (start == 0x0) | ||
171 | start += 8; | ||
172 | |||
173 | start = round_up(start, align); | ||
174 | if ((start + size) > end) | ||
175 | continue; | ||
176 | |||
177 | status = efi_call_phys4(sys_table->boottime->allocate_pages, | ||
178 | EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, | ||
179 | nr_pages, &start); | ||
180 | if (status == EFI_SUCCESS) { | ||
181 | *addr = start; | ||
182 | break; | ||
183 | } | ||
184 | } | ||
185 | |||
186 | if (i == map_size / desc_size) | ||
187 | status = EFI_NOT_FOUND; | ||
188 | |||
189 | free_pool: | ||
190 | efi_call_phys1(sys_table->boottime->free_pool, map); | ||
191 | fail: | ||
192 | return status; | ||
193 | } | ||
194 | |||
195 | static void low_free(unsigned long size, unsigned long addr) | ||
196 | { | ||
197 | unsigned long nr_pages; | ||
198 | |||
199 | nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; | ||
200 | efi_call_phys2(sys_table->boottime->free_pages, addr, size); | ||
201 | } | ||
202 | |||
203 | static void find_bits(unsigned long mask, u8 *pos, u8 *size) | ||
204 | { | ||
205 | u8 first, len; | ||
206 | |||
207 | first = 0; | ||
208 | len = 0; | ||
209 | |||
210 | if (mask) { | ||
211 | while (!(mask & 0x1)) { | ||
212 | mask = mask >> 1; | ||
213 | first++; | ||
214 | } | ||
215 | |||
216 | while (mask & 0x1) { | ||
217 | mask = mask >> 1; | ||
218 | len++; | ||
219 | } | ||
220 | } | ||
221 | |||
222 | *pos = first; | ||
223 | *size = len; | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * See if we have Graphics Output Protocol | ||
228 | */ | ||
229 | static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, | ||
230 | unsigned long size) | ||
231 | { | ||
232 | struct efi_graphics_output_protocol *gop, *first_gop; | ||
233 | struct efi_pixel_bitmask pixel_info; | ||
234 | unsigned long nr_gops; | ||
235 | efi_status_t status; | ||
236 | void **gop_handle; | ||
237 | u16 width, height; | ||
238 | u32 fb_base, fb_size; | ||
239 | u32 pixels_per_scan_line; | ||
240 | int pixel_format; | ||
241 | int i; | ||
242 | |||
243 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
244 | EFI_LOADER_DATA, size, &gop_handle); | ||
245 | if (status != EFI_SUCCESS) | ||
246 | return status; | ||
247 | |||
248 | status = efi_call_phys5(sys_table->boottime->locate_handle, | ||
249 | EFI_LOCATE_BY_PROTOCOL, proto, | ||
250 | NULL, &size, gop_handle); | ||
251 | if (status != EFI_SUCCESS) | ||
252 | goto free_handle; | ||
253 | |||
254 | first_gop = NULL; | ||
255 | |||
256 | nr_gops = size / sizeof(void *); | ||
257 | for (i = 0; i < nr_gops; i++) { | ||
258 | struct efi_graphics_output_mode_info *info; | ||
259 | efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; | ||
260 | void *pciio; | ||
261 | void *h = gop_handle[i]; | ||
262 | |||
263 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | ||
264 | h, proto, &gop); | ||
265 | if (status != EFI_SUCCESS) | ||
266 | continue; | ||
267 | |||
268 | efi_call_phys3(sys_table->boottime->handle_protocol, | ||
269 | h, &pciio_proto, &pciio); | ||
270 | |||
271 | status = efi_call_phys4(gop->query_mode, gop, | ||
272 | gop->mode->mode, &size, &info); | ||
273 | if (status == EFI_SUCCESS && (!first_gop || pciio)) { | ||
274 | /* | ||
275 | * Apple provide GOPs that are not backed by | ||
276 | * real hardware (they're used to handle | ||
277 | * multiple displays). The workaround is to | ||
278 | * search for a GOP implementing the PCIIO | ||
279 | * protocol, and if one isn't found, to just | ||
280 | * fallback to the first GOP. | ||
281 | */ | ||
282 | width = info->horizontal_resolution; | ||
283 | height = info->vertical_resolution; | ||
284 | fb_base = gop->mode->frame_buffer_base; | ||
285 | fb_size = gop->mode->frame_buffer_size; | ||
286 | pixel_format = info->pixel_format; | ||
287 | pixel_info = info->pixel_information; | ||
288 | pixels_per_scan_line = info->pixels_per_scan_line; | ||
289 | |||
290 | /* | ||
291 | * Once we've found a GOP supporting PCIIO, | ||
292 | * don't bother looking any further. | ||
293 | */ | ||
294 | if (pciio) | ||
295 | break; | ||
296 | |||
297 | first_gop = gop; | ||
298 | } | ||
299 | } | ||
300 | |||
301 | /* Did we find any GOPs? */ | ||
302 | if (!first_gop) | ||
303 | goto free_handle; | ||
304 | |||
305 | /* EFI framebuffer */ | ||
306 | si->orig_video_isVGA = VIDEO_TYPE_EFI; | ||
307 | |||
308 | si->lfb_width = width; | ||
309 | si->lfb_height = height; | ||
310 | si->lfb_base = fb_base; | ||
311 | si->lfb_size = fb_size; | ||
312 | si->pages = 1; | ||
313 | |||
314 | if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { | ||
315 | si->lfb_depth = 32; | ||
316 | si->lfb_linelength = pixels_per_scan_line * 4; | ||
317 | si->red_size = 8; | ||
318 | si->red_pos = 0; | ||
319 | si->green_size = 8; | ||
320 | si->green_pos = 8; | ||
321 | si->blue_size = 8; | ||
322 | si->blue_pos = 16; | ||
323 | si->rsvd_size = 8; | ||
324 | si->rsvd_pos = 24; | ||
325 | } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) { | ||
326 | si->lfb_depth = 32; | ||
327 | si->lfb_linelength = pixels_per_scan_line * 4; | ||
328 | si->red_size = 8; | ||
329 | si->red_pos = 16; | ||
330 | si->green_size = 8; | ||
331 | si->green_pos = 8; | ||
332 | si->blue_size = 8; | ||
333 | si->blue_pos = 0; | ||
334 | si->rsvd_size = 8; | ||
335 | si->rsvd_pos = 24; | ||
336 | } else if (pixel_format == PIXEL_BIT_MASK) { | ||
337 | find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size); | ||
338 | find_bits(pixel_info.green_mask, &si->green_pos, | ||
339 | &si->green_size); | ||
340 | find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size); | ||
341 | find_bits(pixel_info.reserved_mask, &si->rsvd_pos, | ||
342 | &si->rsvd_size); | ||
343 | si->lfb_depth = si->red_size + si->green_size + | ||
344 | si->blue_size + si->rsvd_size; | ||
345 | si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8; | ||
346 | } else { | ||
347 | si->lfb_depth = 4; | ||
348 | si->lfb_linelength = si->lfb_width / 2; | ||
349 | si->red_size = 0; | ||
350 | si->red_pos = 0; | ||
351 | si->green_size = 0; | ||
352 | si->green_pos = 0; | ||
353 | si->blue_size = 0; | ||
354 | si->blue_pos = 0; | ||
355 | si->rsvd_size = 0; | ||
356 | si->rsvd_pos = 0; | ||
357 | } | ||
358 | |||
359 | free_handle: | ||
360 | efi_call_phys1(sys_table->boottime->free_pool, gop_handle); | ||
361 | return status; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * See if we have Universal Graphics Adapter (UGA) protocol | ||
366 | */ | ||
367 | static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, | ||
368 | unsigned long size) | ||
369 | { | ||
370 | struct efi_uga_draw_protocol *uga, *first_uga; | ||
371 | unsigned long nr_ugas; | ||
372 | efi_status_t status; | ||
373 | u32 width, height; | ||
374 | void **uga_handle = NULL; | ||
375 | int i; | ||
376 | |||
377 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
378 | EFI_LOADER_DATA, size, &uga_handle); | ||
379 | if (status != EFI_SUCCESS) | ||
380 | return status; | ||
381 | |||
382 | status = efi_call_phys5(sys_table->boottime->locate_handle, | ||
383 | EFI_LOCATE_BY_PROTOCOL, uga_proto, | ||
384 | NULL, &size, uga_handle); | ||
385 | if (status != EFI_SUCCESS) | ||
386 | goto free_handle; | ||
387 | |||
388 | first_uga = NULL; | ||
389 | |||
390 | nr_ugas = size / sizeof(void *); | ||
391 | for (i = 0; i < nr_ugas; i++) { | ||
392 | efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; | ||
393 | void *handle = uga_handle[i]; | ||
394 | u32 w, h, depth, refresh; | ||
395 | void *pciio; | ||
396 | |||
397 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | ||
398 | handle, uga_proto, &uga); | ||
399 | if (status != EFI_SUCCESS) | ||
400 | continue; | ||
401 | |||
402 | efi_call_phys3(sys_table->boottime->handle_protocol, | ||
403 | handle, &pciio_proto, &pciio); | ||
404 | |||
405 | status = efi_call_phys5(uga->get_mode, uga, &w, &h, | ||
406 | &depth, &refresh); | ||
407 | if (status == EFI_SUCCESS && (!first_uga || pciio)) { | ||
408 | width = w; | ||
409 | height = h; | ||
410 | |||
411 | /* | ||
412 | * Once we've found a UGA supporting PCIIO, | ||
413 | * don't bother looking any further. | ||
414 | */ | ||
415 | if (pciio) | ||
416 | break; | ||
417 | |||
418 | first_uga = uga; | ||
419 | } | ||
420 | } | ||
421 | |||
422 | if (!first_uga) | ||
423 | goto free_handle; | ||
424 | |||
425 | /* EFI framebuffer */ | ||
426 | si->orig_video_isVGA = VIDEO_TYPE_EFI; | ||
427 | |||
428 | si->lfb_depth = 32; | ||
429 | si->lfb_width = width; | ||
430 | si->lfb_height = height; | ||
431 | |||
432 | si->red_size = 8; | ||
433 | si->red_pos = 16; | ||
434 | si->green_size = 8; | ||
435 | si->green_pos = 8; | ||
436 | si->blue_size = 8; | ||
437 | si->blue_pos = 0; | ||
438 | si->rsvd_size = 8; | ||
439 | si->rsvd_pos = 24; | ||
440 | |||
441 | |||
442 | free_handle: | ||
443 | efi_call_phys1(sys_table->boottime->free_pool, uga_handle); | ||
444 | return status; | ||
445 | } | ||
446 | |||
447 | void setup_graphics(struct boot_params *boot_params) | ||
448 | { | ||
449 | efi_guid_t graphics_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID; | ||
450 | struct screen_info *si; | ||
451 | efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; | ||
452 | efi_status_t status; | ||
453 | unsigned long size; | ||
454 | void **gop_handle = NULL; | ||
455 | void **uga_handle = NULL; | ||
456 | |||
457 | si = &boot_params->screen_info; | ||
458 | memset(si, 0, sizeof(*si)); | ||
459 | |||
460 | size = 0; | ||
461 | status = efi_call_phys5(sys_table->boottime->locate_handle, | ||
462 | EFI_LOCATE_BY_PROTOCOL, &graphics_proto, | ||
463 | NULL, &size, gop_handle); | ||
464 | if (status == EFI_BUFFER_TOO_SMALL) | ||
465 | status = setup_gop(si, &graphics_proto, size); | ||
466 | |||
467 | if (status != EFI_SUCCESS) { | ||
468 | size = 0; | ||
469 | status = efi_call_phys5(sys_table->boottime->locate_handle, | ||
470 | EFI_LOCATE_BY_PROTOCOL, &uga_proto, | ||
471 | NULL, &size, uga_handle); | ||
472 | if (status == EFI_BUFFER_TOO_SMALL) | ||
473 | setup_uga(si, &uga_proto, size); | ||
474 | } | ||
475 | } | ||
476 | |||
477 | struct initrd { | ||
478 | efi_file_handle_t *handle; | ||
479 | u64 size; | ||
480 | }; | ||
481 | |||
482 | /* | ||
483 | * Check the cmdline for a LILO-style initrd= arguments. | ||
484 | * | ||
485 | * We only support loading an initrd from the same filesystem as the | ||
486 | * kernel image. | ||
487 | */ | ||
488 | static efi_status_t handle_ramdisks(efi_loaded_image_t *image, | ||
489 | struct setup_header *hdr) | ||
490 | { | ||
491 | struct initrd *initrds; | ||
492 | unsigned long initrd_addr; | ||
493 | efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; | ||
494 | u64 initrd_total; | ||
495 | efi_file_io_interface_t *io; | ||
496 | efi_file_handle_t *fh; | ||
497 | efi_status_t status; | ||
498 | int nr_initrds; | ||
499 | char *str; | ||
500 | int i, j, k; | ||
501 | |||
502 | initrd_addr = 0; | ||
503 | initrd_total = 0; | ||
504 | |||
505 | str = (char *)(unsigned long)hdr->cmd_line_ptr; | ||
506 | |||
507 | j = 0; /* See close_handles */ | ||
508 | |||
509 | if (!str || !*str) | ||
510 | return EFI_SUCCESS; | ||
511 | |||
512 | for (nr_initrds = 0; *str; nr_initrds++) { | ||
513 | str = strstr(str, "initrd="); | ||
514 | if (!str) | ||
515 | break; | ||
516 | |||
517 | str += 7; | ||
518 | |||
519 | /* Skip any leading slashes */ | ||
520 | while (*str == '/' || *str == '\\') | ||
521 | str++; | ||
522 | |||
523 | while (*str && *str != ' ' && *str != '\n') | ||
524 | str++; | ||
525 | } | ||
526 | |||
527 | if (!nr_initrds) | ||
528 | return EFI_SUCCESS; | ||
529 | |||
530 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
531 | EFI_LOADER_DATA, | ||
532 | nr_initrds * sizeof(*initrds), | ||
533 | &initrds); | ||
534 | if (status != EFI_SUCCESS) | ||
535 | goto fail; | ||
536 | |||
537 | str = (char *)(unsigned long)hdr->cmd_line_ptr; | ||
538 | for (i = 0; i < nr_initrds; i++) { | ||
539 | struct initrd *initrd; | ||
540 | efi_file_handle_t *h; | ||
541 | efi_file_info_t *info; | ||
542 | efi_char16_t filename[256]; | ||
543 | unsigned long info_sz; | ||
544 | efi_guid_t info_guid = EFI_FILE_INFO_ID; | ||
545 | efi_char16_t *p; | ||
546 | u64 file_sz; | ||
547 | |||
548 | str = strstr(str, "initrd="); | ||
549 | if (!str) | ||
550 | break; | ||
551 | |||
552 | str += 7; | ||
553 | |||
554 | initrd = &initrds[i]; | ||
555 | p = filename; | ||
556 | |||
557 | /* Skip any leading slashes */ | ||
558 | while (*str == '/' || *str == '\\') | ||
559 | str++; | ||
560 | |||
561 | while (*str && *str != ' ' && *str != '\n') { | ||
562 | if (p >= filename + sizeof(filename)) | ||
563 | break; | ||
564 | |||
565 | *p++ = *str++; | ||
566 | } | ||
567 | |||
568 | *p = '\0'; | ||
569 | |||
570 | /* Only open the volume once. */ | ||
571 | if (!i) { | ||
572 | efi_boot_services_t *boottime; | ||
573 | |||
574 | boottime = sys_table->boottime; | ||
575 | |||
576 | status = efi_call_phys3(boottime->handle_protocol, | ||
577 | image->device_handle, &fs_proto, &io); | ||
578 | if (status != EFI_SUCCESS) | ||
579 | goto free_initrds; | ||
580 | |||
581 | status = efi_call_phys2(io->open_volume, io, &fh); | ||
582 | if (status != EFI_SUCCESS) | ||
583 | goto free_initrds; | ||
584 | } | ||
585 | |||
586 | status = efi_call_phys5(fh->open, fh, &h, filename, | ||
587 | EFI_FILE_MODE_READ, (u64)0); | ||
588 | if (status != EFI_SUCCESS) | ||
589 | goto close_handles; | ||
590 | |||
591 | initrd->handle = h; | ||
592 | |||
593 | info_sz = 0; | ||
594 | status = efi_call_phys4(h->get_info, h, &info_guid, | ||
595 | &info_sz, NULL); | ||
596 | if (status != EFI_BUFFER_TOO_SMALL) | ||
597 | goto close_handles; | ||
598 | |||
599 | grow: | ||
600 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
601 | EFI_LOADER_DATA, info_sz, &info); | ||
602 | if (status != EFI_SUCCESS) | ||
603 | goto close_handles; | ||
604 | |||
605 | status = efi_call_phys4(h->get_info, h, &info_guid, | ||
606 | &info_sz, info); | ||
607 | if (status == EFI_BUFFER_TOO_SMALL) { | ||
608 | efi_call_phys1(sys_table->boottime->free_pool, info); | ||
609 | goto grow; | ||
610 | } | ||
611 | |||
612 | file_sz = info->file_size; | ||
613 | efi_call_phys1(sys_table->boottime->free_pool, info); | ||
614 | |||
615 | if (status != EFI_SUCCESS) | ||
616 | goto close_handles; | ||
617 | |||
618 | initrd->size = file_sz; | ||
619 | initrd_total += file_sz; | ||
620 | } | ||
621 | |||
622 | if (initrd_total) { | ||
623 | unsigned long addr; | ||
624 | |||
625 | /* | ||
626 | * Multiple initrd's need to be at consecutive | ||
627 | * addresses in memory, so allocate enough memory for | ||
628 | * all the initrd's. | ||
629 | */ | ||
630 | status = high_alloc(initrd_total, 0x1000, | ||
631 | &initrd_addr, hdr->initrd_addr_max); | ||
632 | if (status != EFI_SUCCESS) | ||
633 | goto close_handles; | ||
634 | |||
635 | /* We've run out of free low memory. */ | ||
636 | if (initrd_addr > hdr->initrd_addr_max) { | ||
637 | status = EFI_INVALID_PARAMETER; | ||
638 | goto free_initrd_total; | ||
639 | } | ||
640 | |||
641 | addr = initrd_addr; | ||
642 | for (j = 0; j < nr_initrds; j++) { | ||
643 | u64 size; | ||
644 | |||
645 | size = initrds[j].size; | ||
646 | while (size) { | ||
647 | u64 chunksize; | ||
648 | if (size > EFI_READ_CHUNK_SIZE) | ||
649 | chunksize = EFI_READ_CHUNK_SIZE; | ||
650 | else | ||
651 | chunksize = size; | ||
652 | status = efi_call_phys3(fh->read, | ||
653 | initrds[j].handle, | ||
654 | &chunksize, addr); | ||
655 | if (status != EFI_SUCCESS) | ||
656 | goto free_initrd_total; | ||
657 | addr += chunksize; | ||
658 | size -= chunksize; | ||
659 | } | ||
660 | |||
661 | efi_call_phys1(fh->close, initrds[j].handle); | ||
662 | } | ||
663 | |||
664 | } | ||
665 | |||
666 | efi_call_phys1(sys_table->boottime->free_pool, initrds); | ||
667 | |||
668 | hdr->ramdisk_image = initrd_addr; | ||
669 | hdr->ramdisk_size = initrd_total; | ||
670 | |||
671 | return status; | ||
672 | |||
673 | free_initrd_total: | ||
674 | low_free(initrd_total, initrd_addr); | ||
675 | |||
676 | close_handles: | ||
677 | for (k = j; k < nr_initrds; k++) | ||
678 | efi_call_phys1(fh->close, initrds[k].handle); | ||
679 | free_initrds: | ||
680 | efi_call_phys1(sys_table->boottime->free_pool, initrds); | ||
681 | fail: | ||
682 | hdr->ramdisk_image = 0; | ||
683 | hdr->ramdisk_size = 0; | ||
684 | |||
685 | return status; | ||
686 | } | ||
687 | |||
688 | /* | ||
689 | * Because the x86 boot code expects to be passed a boot_params we | ||
690 | * need to create one ourselves (usually the bootloader would create | ||
691 | * one for us). | ||
692 | */ | ||
693 | static efi_status_t make_boot_params(struct boot_params *boot_params, | ||
694 | efi_loaded_image_t *image, | ||
695 | void *handle) | ||
696 | { | ||
697 | struct efi_info *efi = &boot_params->efi_info; | ||
698 | struct apm_bios_info *bi = &boot_params->apm_bios_info; | ||
699 | struct sys_desc_table *sdt = &boot_params->sys_desc_table; | ||
700 | struct e820entry *e820_map = &boot_params->e820_map[0]; | ||
701 | struct e820entry *prev = NULL; | ||
702 | struct setup_header *hdr = &boot_params->hdr; | ||
703 | unsigned long size, key, desc_size, _size; | ||
704 | efi_memory_desc_t *mem_map; | ||
705 | void *options = image->load_options; | ||
706 | u32 load_options_size = image->load_options_size / 2; /* ASCII */ | ||
707 | int options_size = 0; | ||
708 | efi_status_t status; | ||
709 | __u32 desc_version; | ||
710 | unsigned long cmdline; | ||
711 | u8 nr_entries; | ||
712 | u16 *s2; | ||
713 | u8 *s1; | ||
714 | int i; | ||
715 | |||
716 | hdr->type_of_loader = 0x21; | ||
717 | |||
718 | /* Convert unicode cmdline to ascii */ | ||
719 | cmdline = 0; | ||
720 | s2 = (u16 *)options; | ||
721 | |||
722 | if (s2) { | ||
723 | while (*s2 && *s2 != '\n' && options_size < load_options_size) { | ||
724 | s2++; | ||
725 | options_size++; | ||
726 | } | ||
727 | |||
728 | if (options_size) { | ||
729 | if (options_size > hdr->cmdline_size) | ||
730 | options_size = hdr->cmdline_size; | ||
731 | |||
732 | options_size++; /* NUL termination */ | ||
733 | |||
734 | status = low_alloc(options_size, 1, &cmdline); | ||
735 | if (status != EFI_SUCCESS) | ||
736 | goto fail; | ||
737 | |||
738 | s1 = (u8 *)(unsigned long)cmdline; | ||
739 | s2 = (u16 *)options; | ||
740 | |||
741 | for (i = 0; i < options_size - 1; i++) | ||
742 | *s1++ = *s2++; | ||
743 | |||
744 | *s1 = '\0'; | ||
745 | } | ||
746 | } | ||
747 | |||
748 | hdr->cmd_line_ptr = cmdline; | ||
749 | |||
750 | hdr->ramdisk_image = 0; | ||
751 | hdr->ramdisk_size = 0; | ||
752 | |||
753 | status = handle_ramdisks(image, hdr); | ||
754 | if (status != EFI_SUCCESS) | ||
755 | goto free_cmdline; | ||
756 | |||
757 | setup_graphics(boot_params); | ||
758 | |||
759 | /* Clear APM BIOS info */ | ||
760 | memset(bi, 0, sizeof(*bi)); | ||
761 | |||
762 | memset(sdt, 0, sizeof(*sdt)); | ||
763 | |||
764 | memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32)); | ||
765 | |||
766 | size = sizeof(*mem_map) * 32; | ||
767 | |||
768 | again: | ||
769 | size += sizeof(*mem_map); | ||
770 | _size = size; | ||
771 | status = low_alloc(size, 1, (unsigned long *)&mem_map); | ||
772 | if (status != EFI_SUCCESS) | ||
773 | goto free_cmdline; | ||
774 | |||
775 | status = efi_call_phys5(sys_table->boottime->get_memory_map, &size, | ||
776 | mem_map, &key, &desc_size, &desc_version); | ||
777 | if (status == EFI_BUFFER_TOO_SMALL) { | ||
778 | low_free(_size, (unsigned long)mem_map); | ||
779 | goto again; | ||
780 | } | ||
781 | |||
782 | if (status != EFI_SUCCESS) | ||
783 | goto free_mem_map; | ||
784 | |||
785 | efi->efi_systab = (unsigned long)sys_table; | ||
786 | efi->efi_memdesc_size = desc_size; | ||
787 | efi->efi_memdesc_version = desc_version; | ||
788 | efi->efi_memmap = (unsigned long)mem_map; | ||
789 | efi->efi_memmap_size = size; | ||
790 | |||
791 | #ifdef CONFIG_X86_64 | ||
792 | efi->efi_systab_hi = (unsigned long)sys_table >> 32; | ||
793 | efi->efi_memmap_hi = (unsigned long)mem_map >> 32; | ||
794 | #endif | ||
795 | |||
796 | /* Might as well exit boot services now */ | ||
797 | status = efi_call_phys2(sys_table->boottime->exit_boot_services, | ||
798 | handle, key); | ||
799 | if (status != EFI_SUCCESS) | ||
800 | goto free_mem_map; | ||
801 | |||
802 | /* Historic? */ | ||
803 | boot_params->alt_mem_k = 32 * 1024; | ||
804 | |||
805 | /* | ||
806 | * Convert the EFI memory map to E820. | ||
807 | */ | ||
808 | nr_entries = 0; | ||
809 | for (i = 0; i < size / desc_size; i++) { | ||
810 | efi_memory_desc_t *d; | ||
811 | unsigned int e820_type = 0; | ||
812 | unsigned long m = (unsigned long)mem_map; | ||
813 | |||
814 | d = (efi_memory_desc_t *)(m + (i * desc_size)); | ||
815 | switch (d->type) { | ||
816 | case EFI_RESERVED_TYPE: | ||
817 | case EFI_RUNTIME_SERVICES_CODE: | ||
818 | case EFI_RUNTIME_SERVICES_DATA: | ||
819 | case EFI_MEMORY_MAPPED_IO: | ||
820 | case EFI_MEMORY_MAPPED_IO_PORT_SPACE: | ||
821 | case EFI_PAL_CODE: | ||
822 | e820_type = E820_RESERVED; | ||
823 | break; | ||
824 | |||
825 | case EFI_UNUSABLE_MEMORY: | ||
826 | e820_type = E820_UNUSABLE; | ||
827 | break; | ||
828 | |||
829 | case EFI_ACPI_RECLAIM_MEMORY: | ||
830 | e820_type = E820_ACPI; | ||
831 | break; | ||
832 | |||
833 | case EFI_LOADER_CODE: | ||
834 | case EFI_LOADER_DATA: | ||
835 | case EFI_BOOT_SERVICES_CODE: | ||
836 | case EFI_BOOT_SERVICES_DATA: | ||
837 | case EFI_CONVENTIONAL_MEMORY: | ||
838 | e820_type = E820_RAM; | ||
839 | break; | ||
840 | |||
841 | case EFI_ACPI_MEMORY_NVS: | ||
842 | e820_type = E820_NVS; | ||
843 | break; | ||
844 | |||
845 | default: | ||
846 | continue; | ||
847 | } | ||
848 | |||
849 | /* Merge adjacent mappings */ | ||
850 | if (prev && prev->type == e820_type && | ||
851 | (prev->addr + prev->size) == d->phys_addr) | ||
852 | prev->size += d->num_pages << 12; | ||
853 | else { | ||
854 | e820_map->addr = d->phys_addr; | ||
855 | e820_map->size = d->num_pages << 12; | ||
856 | e820_map->type = e820_type; | ||
857 | prev = e820_map++; | ||
858 | nr_entries++; | ||
859 | } | ||
860 | } | ||
861 | |||
862 | boot_params->e820_entries = nr_entries; | ||
863 | |||
864 | return EFI_SUCCESS; | ||
865 | |||
866 | free_mem_map: | ||
867 | low_free(_size, (unsigned long)mem_map); | ||
868 | free_cmdline: | ||
869 | if (options_size) | ||
870 | low_free(options_size, hdr->cmd_line_ptr); | ||
871 | fail: | ||
872 | return status; | ||
873 | } | ||
874 | |||
875 | /* | ||
876 | * On success we return a pointer to a boot_params structure, and NULL | ||
877 | * on failure. | ||
878 | */ | ||
879 | struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | ||
880 | { | ||
881 | struct boot_params *boot_params; | ||
882 | unsigned long start, nr_pages; | ||
883 | struct desc_ptr *gdt, *idt; | ||
884 | efi_loaded_image_t *image; | ||
885 | struct setup_header *hdr; | ||
886 | efi_status_t status; | ||
887 | efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; | ||
888 | struct desc_struct *desc; | ||
889 | |||
890 | sys_table = _table; | ||
891 | |||
892 | /* Check if we were booted by the EFI firmware */ | ||
893 | if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | ||
894 | goto fail; | ||
895 | |||
896 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | ||
897 | handle, &proto, (void *)&image); | ||
898 | if (status != EFI_SUCCESS) | ||
899 | goto fail; | ||
900 | |||
901 | status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); | ||
902 | if (status != EFI_SUCCESS) | ||
903 | goto fail; | ||
904 | |||
905 | memset(boot_params, 0x0, 0x4000); | ||
906 | |||
907 | /* Copy first two sectors to boot_params */ | ||
908 | memcpy(boot_params, image->image_base, 1024); | ||
909 | |||
910 | hdr = &boot_params->hdr; | ||
911 | |||
912 | /* | ||
913 | * The EFI firmware loader could have placed the kernel image | ||
914 | * anywhere in memory, but the kernel has various restrictions | ||
915 | * on the max physical address it can run at. Attempt to move | ||
916 | * the kernel to boot_params.pref_address, or as low as | ||
917 | * possible. | ||
918 | */ | ||
919 | start = hdr->pref_address; | ||
920 | nr_pages = round_up(hdr->init_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; | ||
921 | |||
922 | status = efi_call_phys4(sys_table->boottime->allocate_pages, | ||
923 | EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, | ||
924 | nr_pages, &start); | ||
925 | if (status != EFI_SUCCESS) { | ||
926 | status = low_alloc(hdr->init_size, hdr->kernel_alignment, | ||
927 | &start); | ||
928 | if (status != EFI_SUCCESS) | ||
929 | goto fail; | ||
930 | } | ||
931 | |||
932 | hdr->code32_start = (__u32)start; | ||
933 | hdr->pref_address = (__u64)(unsigned long)image->image_base; | ||
934 | |||
935 | memcpy((void *)start, image->image_base, image->image_size); | ||
936 | |||
937 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
938 | EFI_LOADER_DATA, sizeof(*gdt), | ||
939 | (void **)&gdt); | ||
940 | if (status != EFI_SUCCESS) | ||
941 | goto fail; | ||
942 | |||
943 | gdt->size = 0x800; | ||
944 | status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); | ||
945 | if (status != EFI_SUCCESS) | ||
946 | goto fail; | ||
947 | |||
948 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
949 | EFI_LOADER_DATA, sizeof(*idt), | ||
950 | (void **)&idt); | ||
951 | if (status != EFI_SUCCESS) | ||
952 | goto fail; | ||
953 | |||
954 | idt->size = 0; | ||
955 | idt->address = 0; | ||
956 | |||
957 | status = make_boot_params(boot_params, image, handle); | ||
958 | if (status != EFI_SUCCESS) | ||
959 | goto fail; | ||
960 | |||
961 | memset((char *)gdt->address, 0x0, gdt->size); | ||
962 | desc = (struct desc_struct *)gdt->address; | ||
963 | |||
964 | /* The first GDT is a dummy and the second is unused. */ | ||
965 | desc += 2; | ||
966 | |||
967 | desc->limit0 = 0xffff; | ||
968 | desc->base0 = 0x0000; | ||
969 | desc->base1 = 0x0000; | ||
970 | desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ; | ||
971 | desc->s = DESC_TYPE_CODE_DATA; | ||
972 | desc->dpl = 0; | ||
973 | desc->p = 1; | ||
974 | desc->limit = 0xf; | ||
975 | desc->avl = 0; | ||
976 | desc->l = 0; | ||
977 | desc->d = SEG_OP_SIZE_32BIT; | ||
978 | desc->g = SEG_GRANULARITY_4KB; | ||
979 | desc->base2 = 0x00; | ||
980 | |||
981 | desc++; | ||
982 | desc->limit0 = 0xffff; | ||
983 | desc->base0 = 0x0000; | ||
984 | desc->base1 = 0x0000; | ||
985 | desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE; | ||
986 | desc->s = DESC_TYPE_CODE_DATA; | ||
987 | desc->dpl = 0; | ||
988 | desc->p = 1; | ||
989 | desc->limit = 0xf; | ||
990 | desc->avl = 0; | ||
991 | desc->l = 0; | ||
992 | desc->d = SEG_OP_SIZE_32BIT; | ||
993 | desc->g = SEG_GRANULARITY_4KB; | ||
994 | desc->base2 = 0x00; | ||
995 | |||
996 | #ifdef CONFIG_X86_64 | ||
997 | /* Task segment value */ | ||
998 | desc++; | ||
999 | desc->limit0 = 0x0000; | ||
1000 | desc->base0 = 0x0000; | ||
1001 | desc->base1 = 0x0000; | ||
1002 | desc->type = SEG_TYPE_TSS; | ||
1003 | desc->s = 0; | ||
1004 | desc->dpl = 0; | ||
1005 | desc->p = 1; | ||
1006 | desc->limit = 0x0; | ||
1007 | desc->avl = 0; | ||
1008 | desc->l = 0; | ||
1009 | desc->d = 0; | ||
1010 | desc->g = SEG_GRANULARITY_4KB; | ||
1011 | desc->base2 = 0x00; | ||
1012 | #endif /* CONFIG_X86_64 */ | ||
1013 | |||
1014 | asm volatile ("lidt %0" : : "m" (*idt)); | ||
1015 | asm volatile ("lgdt %0" : : "m" (*gdt)); | ||
1016 | |||
1017 | asm volatile("cli"); | ||
1018 | |||
1019 | return boot_params; | ||
1020 | fail: | ||
1021 | return NULL; | ||
1022 | } | ||
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h new file mode 100644 index 000000000000..39251663e65b --- /dev/null +++ b/arch/x86/boot/compressed/eboot.h | |||
@@ -0,0 +1,61 @@ | |||
1 | #ifndef BOOT_COMPRESSED_EBOOT_H | ||
2 | #define BOOT_COMPRESSED_EBOOT_H | ||
3 | |||
4 | #define SEG_TYPE_DATA (0 << 3) | ||
5 | #define SEG_TYPE_READ_WRITE (1 << 1) | ||
6 | #define SEG_TYPE_CODE (1 << 3) | ||
7 | #define SEG_TYPE_EXEC_READ (1 << 1) | ||
8 | #define SEG_TYPE_TSS ((1 << 3) | (1 << 0)) | ||
9 | #define SEG_OP_SIZE_32BIT (1 << 0) | ||
10 | #define SEG_GRANULARITY_4KB (1 << 0) | ||
11 | |||
12 | #define DESC_TYPE_CODE_DATA (1 << 0) | ||
13 | |||
14 | #define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT) | ||
15 | #define EFI_READ_CHUNK_SIZE (1024 * 1024) | ||
16 | |||
17 | #define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 | ||
18 | #define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 | ||
19 | #define PIXEL_BIT_MASK 2 | ||
20 | #define PIXEL_BLT_ONLY 3 | ||
21 | #define PIXEL_FORMAT_MAX 4 | ||
22 | |||
23 | struct efi_pixel_bitmask { | ||
24 | u32 red_mask; | ||
25 | u32 green_mask; | ||
26 | u32 blue_mask; | ||
27 | u32 reserved_mask; | ||
28 | }; | ||
29 | |||
30 | struct efi_graphics_output_mode_info { | ||
31 | u32 version; | ||
32 | u32 horizontal_resolution; | ||
33 | u32 vertical_resolution; | ||
34 | int pixel_format; | ||
35 | struct efi_pixel_bitmask pixel_information; | ||
36 | u32 pixels_per_scan_line; | ||
37 | } __packed; | ||
38 | |||
39 | struct efi_graphics_output_protocol_mode { | ||
40 | u32 max_mode; | ||
41 | u32 mode; | ||
42 | unsigned long info; | ||
43 | unsigned long size_of_info; | ||
44 | u64 frame_buffer_base; | ||
45 | unsigned long frame_buffer_size; | ||
46 | } __packed; | ||
47 | |||
48 | struct efi_graphics_output_protocol { | ||
49 | void *query_mode; | ||
50 | unsigned long set_mode; | ||
51 | unsigned long blt; | ||
52 | struct efi_graphics_output_protocol_mode *mode; | ||
53 | }; | ||
54 | |||
55 | struct efi_uga_draw_protocol { | ||
56 | void *get_mode; | ||
57 | void *set_mode; | ||
58 | void *blt; | ||
59 | }; | ||
60 | |||
61 | #endif /* BOOT_COMPRESSED_EBOOT_H */ | ||
diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S new file mode 100644 index 000000000000..a53440e81d52 --- /dev/null +++ b/arch/x86/boot/compressed/efi_stub_32.S | |||
@@ -0,0 +1,86 @@ | |||
1 | /* | ||
2 | * EFI call stub for IA32. | ||
3 | * | ||
4 | * This stub allows us to make EFI calls in physical mode with interrupts | ||
5 | * turned off. Note that this implementation is different from the one in | ||
6 | * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical | ||
7 | * mode at this point. | ||
8 | */ | ||
9 | |||
10 | #include <linux/linkage.h> | ||
11 | #include <asm/page_types.h> | ||
12 | |||
13 | /* | ||
14 | * efi_call_phys(void *, ...) is a function with variable parameters. | ||
15 | * All the callers of this function assure that all the parameters are 4-bytes. | ||
16 | */ | ||
17 | |||
18 | /* | ||
19 | * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. | ||
20 | * So we'd better save all of them at the beginning of this function and restore | ||
21 | * at the end no matter how many we use, because we can not assure EFI runtime | ||
22 | * service functions will comply with gcc calling convention, too. | ||
23 | */ | ||
24 | |||
25 | .text | ||
26 | ENTRY(efi_call_phys) | ||
27 | /* | ||
28 | * 0. The function can only be called in Linux kernel. So CS has been | ||
29 | * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found | ||
30 | * the values of these registers are the same. And, the corresponding | ||
31 | * GDT entries are identical. So I will do nothing about segment reg | ||
32 | * and GDT, but change GDT base register in prelog and epilog. | ||
33 | */ | ||
34 | |||
35 | /* | ||
36 | * 1. Because we haven't been relocated by this point we need to | ||
37 | * use relative addressing. | ||
38 | */ | ||
39 | call 1f | ||
40 | 1: popl %edx | ||
41 | subl $1b, %edx | ||
42 | |||
43 | /* | ||
44 | * 2. Now on the top of stack is the return | ||
45 | * address in the caller of efi_call_phys(), then parameter 1, | ||
46 | * parameter 2, ..., param n. To make things easy, we save the return | ||
47 | * address of efi_call_phys in a global variable. | ||
48 | */ | ||
49 | popl %ecx | ||
50 | movl %ecx, saved_return_addr(%edx) | ||
51 | /* get the function pointer into ECX*/ | ||
52 | popl %ecx | ||
53 | movl %ecx, efi_rt_function_ptr(%edx) | ||
54 | |||
55 | /* | ||
56 | * 3. Call the physical function. | ||
57 | */ | ||
58 | call *%ecx | ||
59 | |||
60 | /* | ||
61 | * 4. Balance the stack. And because EAX contain the return value, | ||
62 | * we'd better not clobber it. We need to calculate our address | ||
63 | * again because %ecx and %edx are not preserved across EFI function | ||
64 | * calls. | ||
65 | */ | ||
66 | call 1f | ||
67 | 1: popl %edx | ||
68 | subl $1b, %edx | ||
69 | |||
70 | movl efi_rt_function_ptr(%edx), %ecx | ||
71 | pushl %ecx | ||
72 | |||
73 | /* | ||
74 | * 10. Push the saved return address onto the stack and return. | ||
75 | */ | ||
76 | movl saved_return_addr(%edx), %ecx | ||
77 | pushl %ecx | ||
78 | ret | ||
79 | ENDPROC(efi_call_phys) | ||
80 | .previous | ||
81 | |||
82 | .data | ||
83 | saved_return_addr: | ||
84 | .long 0 | ||
85 | efi_rt_function_ptr: | ||
86 | .long 0 | ||
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S new file mode 100644 index 000000000000..cedc60de86eb --- /dev/null +++ b/arch/x86/boot/compressed/efi_stub_64.S | |||
@@ -0,0 +1 @@ | |||
#include "../../platform/efi/efi_stub_64.S" | |||
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 67a655a39ce4..a0559930a180 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -32,6 +32,28 @@ | |||
32 | 32 | ||
33 | __HEAD | 33 | __HEAD |
34 | ENTRY(startup_32) | 34 | ENTRY(startup_32) |
35 | #ifdef CONFIG_EFI_STUB | ||
36 | /* | ||
37 | * We don't need the return address, so set up the stack so | ||
38 | * efi_main() can find its arugments. | ||
39 | */ | ||
40 | add $0x4, %esp | ||
41 | |||
42 | call efi_main | ||
43 | cmpl $0, %eax | ||
44 | je preferred_addr | ||
45 | movl %eax, %esi | ||
46 | call 1f | ||
47 | 1: | ||
48 | popl %eax | ||
49 | subl $1b, %eax | ||
50 | subl BP_pref_address(%esi), %eax | ||
51 | add BP_code32_start(%esi), %eax | ||
52 | leal preferred_addr(%eax), %eax | ||
53 | jmp *%eax | ||
54 | |||
55 | preferred_addr: | ||
56 | #endif | ||
35 | cld | 57 | cld |
36 | /* | 58 | /* |
37 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking | 59 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 35af09d13dc1..558d76ce23bc 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -199,6 +199,26 @@ ENTRY(startup_64) | |||
199 | * an identity mapped page table being provied that maps our | 199 | * an identity mapped page table being provied that maps our |
200 | * entire text+data+bss and hopefully all of memory. | 200 | * entire text+data+bss and hopefully all of memory. |
201 | */ | 201 | */ |
202 | #ifdef CONFIG_EFI_STUB | ||
203 | pushq %rsi | ||
204 | mov %rcx, %rdi | ||
205 | mov %rdx, %rsi | ||
206 | call efi_main | ||
207 | popq %rsi | ||
208 | cmpq $0,%rax | ||
209 | je preferred_addr | ||
210 | movq %rax,%rsi | ||
211 | call 1f | ||
212 | 1: | ||
213 | popq %rax | ||
214 | subq $1b, %rax | ||
215 | subq BP_pref_address(%rsi), %rax | ||
216 | add BP_code32_start(%esi), %eax | ||
217 | leaq preferred_addr(%rax), %rax | ||
218 | jmp *%rax | ||
219 | |||
220 | preferred_addr: | ||
221 | #endif | ||
202 | 222 | ||
203 | /* Setup data segments. */ | 223 | /* Setup data segments. */ |
204 | xorl %eax, %eax | 224 | xorl %eax, %eax |
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c index 19b3e693cd72..ffb9c5c9d748 100644 --- a/arch/x86/boot/compressed/string.c +++ b/arch/x86/boot/compressed/string.c | |||
@@ -1,2 +1,11 @@ | |||
1 | #include "misc.h" | 1 | #include "misc.h" |
2 | |||
3 | int memcmp(const void *s1, const void *s2, size_t len) | ||
4 | { | ||
5 | u8 diff; | ||
6 | asm("repe; cmpsb; setnz %0" | ||
7 | : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); | ||
8 | return diff; | ||
9 | } | ||
10 | |||
2 | #include "../string.c" | 11 | #include "../string.c" |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index bdb4d458ec8c..f1bbeeb09148 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -45,6 +45,11 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ | |||
45 | 45 | ||
46 | .global bootsect_start | 46 | .global bootsect_start |
47 | bootsect_start: | 47 | bootsect_start: |
48 | #ifdef CONFIG_EFI_STUB | ||
49 | # "MZ", MS-DOS header | ||
50 | .byte 0x4d | ||
51 | .byte 0x5a | ||
52 | #endif | ||
48 | 53 | ||
49 | # Normalize the start address | 54 | # Normalize the start address |
50 | ljmp $BOOTSEG, $start2 | 55 | ljmp $BOOTSEG, $start2 |
@@ -79,6 +84,14 @@ bs_die: | |||
79 | # invoke the BIOS reset code... | 84 | # invoke the BIOS reset code... |
80 | ljmp $0xf000,$0xfff0 | 85 | ljmp $0xf000,$0xfff0 |
81 | 86 | ||
87 | #ifdef CONFIG_EFI_STUB | ||
88 | .org 0x3c | ||
89 | # | ||
90 | # Offset to the PE header. | ||
91 | # | ||
92 | .long pe_header | ||
93 | #endif /* CONFIG_EFI_STUB */ | ||
94 | |||
82 | .section ".bsdata", "a" | 95 | .section ".bsdata", "a" |
83 | bugger_off_msg: | 96 | bugger_off_msg: |
84 | .ascii "Direct booting from floppy is no longer supported.\r\n" | 97 | .ascii "Direct booting from floppy is no longer supported.\r\n" |
@@ -87,6 +100,141 @@ bugger_off_msg: | |||
87 | .ascii "Remove disk and press any key to reboot . . .\r\n" | 100 | .ascii "Remove disk and press any key to reboot . . .\r\n" |
88 | .byte 0 | 101 | .byte 0 |
89 | 102 | ||
103 | #ifdef CONFIG_EFI_STUB | ||
104 | pe_header: | ||
105 | .ascii "PE" | ||
106 | .word 0 | ||
107 | |||
108 | coff_header: | ||
109 | #ifdef CONFIG_X86_32 | ||
110 | .word 0x14c # i386 | ||
111 | #else | ||
112 | .word 0x8664 # x86-64 | ||
113 | #endif | ||
114 | .word 2 # nr_sections | ||
115 | .long 0 # TimeDateStamp | ||
116 | .long 0 # PointerToSymbolTable | ||
117 | .long 1 # NumberOfSymbols | ||
118 | .word section_table - optional_header # SizeOfOptionalHeader | ||
119 | #ifdef CONFIG_X86_32 | ||
120 | .word 0x306 # Characteristics. | ||
121 | # IMAGE_FILE_32BIT_MACHINE | | ||
122 | # IMAGE_FILE_DEBUG_STRIPPED | | ||
123 | # IMAGE_FILE_EXECUTABLE_IMAGE | | ||
124 | # IMAGE_FILE_LINE_NUMS_STRIPPED | ||
125 | #else | ||
126 | .word 0x206 # Characteristics | ||
127 | # IMAGE_FILE_DEBUG_STRIPPED | | ||
128 | # IMAGE_FILE_EXECUTABLE_IMAGE | | ||
129 | # IMAGE_FILE_LINE_NUMS_STRIPPED | ||
130 | #endif | ||
131 | |||
132 | optional_header: | ||
133 | #ifdef CONFIG_X86_32 | ||
134 | .word 0x10b # PE32 format | ||
135 | #else | ||
136 | .word 0x20b # PE32+ format | ||
137 | #endif | ||
138 | .byte 0x02 # MajorLinkerVersion | ||
139 | .byte 0x14 # MinorLinkerVersion | ||
140 | |||
141 | # Filled in by build.c | ||
142 | .long 0 # SizeOfCode | ||
143 | |||
144 | .long 0 # SizeOfInitializedData | ||
145 | .long 0 # SizeOfUninitializedData | ||
146 | |||
147 | # Filled in by build.c | ||
148 | .long 0x0000 # AddressOfEntryPoint | ||
149 | |||
150 | .long 0x0000 # BaseOfCode | ||
151 | #ifdef CONFIG_X86_32 | ||
152 | .long 0 # data | ||
153 | #endif | ||
154 | |||
155 | extra_header_fields: | ||
156 | #ifdef CONFIG_X86_32 | ||
157 | .long 0 # ImageBase | ||
158 | #else | ||
159 | .quad 0 # ImageBase | ||
160 | #endif | ||
161 | .long 0x1000 # SectionAlignment | ||
162 | .long 0x200 # FileAlignment | ||
163 | .word 0 # MajorOperatingSystemVersion | ||
164 | .word 0 # MinorOperatingSystemVersion | ||
165 | .word 0 # MajorImageVersion | ||
166 | .word 0 # MinorImageVersion | ||
167 | .word 0 # MajorSubsystemVersion | ||
168 | .word 0 # MinorSubsystemVersion | ||
169 | .long 0 # Win32VersionValue | ||
170 | |||
171 | # | ||
172 | # The size of the bzImage is written in tools/build.c | ||
173 | # | ||
174 | .long 0 # SizeOfImage | ||
175 | |||
176 | .long 0x200 # SizeOfHeaders | ||
177 | .long 0 # CheckSum | ||
178 | .word 0xa # Subsystem (EFI application) | ||
179 | .word 0 # DllCharacteristics | ||
180 | #ifdef CONFIG_X86_32 | ||
181 | .long 0 # SizeOfStackReserve | ||
182 | .long 0 # SizeOfStackCommit | ||
183 | .long 0 # SizeOfHeapReserve | ||
184 | .long 0 # SizeOfHeapCommit | ||
185 | #else | ||
186 | .quad 0 # SizeOfStackReserve | ||
187 | .quad 0 # SizeOfStackCommit | ||
188 | .quad 0 # SizeOfHeapReserve | ||
189 | .quad 0 # SizeOfHeapCommit | ||
190 | #endif | ||
191 | .long 0 # LoaderFlags | ||
192 | .long 0x1 # NumberOfRvaAndSizes | ||
193 | |||
194 | .quad 0 # ExportTable | ||
195 | .quad 0 # ImportTable | ||
196 | .quad 0 # ResourceTable | ||
197 | .quad 0 # ExceptionTable | ||
198 | .quad 0 # CertificationTable | ||
199 | .quad 0 # BaseRelocationTable | ||
200 | |||
201 | # Section table | ||
202 | section_table: | ||
203 | .ascii ".text" | ||
204 | .byte 0 | ||
205 | .byte 0 | ||
206 | .byte 0 | ||
207 | .long 0 | ||
208 | .long 0x0 # startup_{32,64} | ||
209 | .long 0 # Size of initialized data | ||
210 | # on disk | ||
211 | .long 0x0 # startup_{32,64} | ||
212 | .long 0 # PointerToRelocations | ||
213 | .long 0 # PointerToLineNumbers | ||
214 | .word 0 # NumberOfRelocations | ||
215 | .word 0 # NumberOfLineNumbers | ||
216 | .long 0x60500020 # Characteristics (section flags) | ||
217 | |||
218 | # | ||
219 | # The EFI application loader requires a relocation section | ||
220 | # because EFI applications are relocatable and not having | ||
221 | # this section seems to confuse it. But since we don't need | ||
222 | # the loader to fixup any relocs for us just fill it with a | ||
223 | # single dummy reloc. | ||
224 | # | ||
225 | .ascii ".reloc" | ||
226 | .byte 0 | ||
227 | .byte 0 | ||
228 | .long reloc_end - reloc_start | ||
229 | .long reloc_start | ||
230 | .long reloc_end - reloc_start # SizeOfRawData | ||
231 | .long reloc_start # PointerToRawData | ||
232 | .long 0 # PointerToRelocations | ||
233 | .long 0 # PointerToLineNumbers | ||
234 | .word 0 # NumberOfRelocations | ||
235 | .word 0 # NumberOfLineNumbers | ||
236 | .long 0x42100040 # Characteristics (section flags) | ||
237 | #endif /* CONFIG_EFI_STUB */ | ||
90 | 238 | ||
91 | # Kernel attributes; used by setup. This is part 1 of the | 239 | # Kernel attributes; used by setup. This is part 1 of the |
92 | # header, from the old boot sector. | 240 | # header, from the old boot sector. |
@@ -318,3 +466,13 @@ die: | |||
318 | setup_corrupt: | 466 | setup_corrupt: |
319 | .byte 7 | 467 | .byte 7 |
320 | .string "No setup signature found...\n" | 468 | .string "No setup signature found...\n" |
469 | |||
470 | .data | ||
471 | dummy: .long 0 | ||
472 | |||
473 | .section .reloc | ||
474 | reloc_start: | ||
475 | .long dummy - reloc_start | ||
476 | .long 10 | ||
477 | .word 0 | ||
478 | reloc_end: | ||
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 3cbc4058dd26..574dedfe2890 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c | |||
@@ -111,3 +111,38 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas | |||
111 | 111 | ||
112 | return result; | 112 | return result; |
113 | } | 113 | } |
114 | |||
115 | /** | ||
116 | * strlen - Find the length of a string | ||
117 | * @s: The string to be sized | ||
118 | */ | ||
119 | size_t strlen(const char *s) | ||
120 | { | ||
121 | const char *sc; | ||
122 | |||
123 | for (sc = s; *sc != '\0'; ++sc) | ||
124 | /* nothing */; | ||
125 | return sc - s; | ||
126 | } | ||
127 | |||
128 | /** | ||
129 | * strstr - Find the first substring in a %NUL terminated string | ||
130 | * @s1: The string to be searched | ||
131 | * @s2: The string to search for | ||
132 | */ | ||
133 | char *strstr(const char *s1, const char *s2) | ||
134 | { | ||
135 | size_t l1, l2; | ||
136 | |||
137 | l2 = strlen(s2); | ||
138 | if (!l2) | ||
139 | return (char *)s1; | ||
140 | l1 = strlen(s1); | ||
141 | while (l1 >= l2) { | ||
142 | l1--; | ||
143 | if (!memcmp(s1, s2, l2)) | ||
144 | return (char *)s1; | ||
145 | s1++; | ||
146 | } | ||
147 | return NULL; | ||
148 | } | ||
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index fdc60a0b3c20..4e9bd6bcafa6 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c | |||
@@ -135,6 +135,9 @@ static void usage(void) | |||
135 | 135 | ||
136 | int main(int argc, char ** argv) | 136 | int main(int argc, char ** argv) |
137 | { | 137 | { |
138 | #ifdef CONFIG_EFI_STUB | ||
139 | unsigned int file_sz, pe_header; | ||
140 | #endif | ||
138 | unsigned int i, sz, setup_sectors; | 141 | unsigned int i, sz, setup_sectors; |
139 | int c; | 142 | int c; |
140 | u32 sys_size; | 143 | u32 sys_size; |
@@ -194,6 +197,42 @@ int main(int argc, char ** argv) | |||
194 | buf[0x1f6] = sys_size >> 16; | 197 | buf[0x1f6] = sys_size >> 16; |
195 | buf[0x1f7] = sys_size >> 24; | 198 | buf[0x1f7] = sys_size >> 24; |
196 | 199 | ||
200 | #ifdef CONFIG_EFI_STUB | ||
201 | file_sz = sz + i + ((sys_size * 16) - sz); | ||
202 | |||
203 | pe_header = *(unsigned int *)&buf[0x3c]; | ||
204 | |||
205 | /* Size of code */ | ||
206 | *(unsigned int *)&buf[pe_header + 0x1c] = file_sz; | ||
207 | |||
208 | /* Size of image */ | ||
209 | *(unsigned int *)&buf[pe_header + 0x50] = file_sz; | ||
210 | |||
211 | #ifdef CONFIG_X86_32 | ||
212 | /* Address of entry point */ | ||
213 | *(unsigned int *)&buf[pe_header + 0x28] = i; | ||
214 | |||
215 | /* .text size */ | ||
216 | *(unsigned int *)&buf[pe_header + 0xb0] = file_sz; | ||
217 | |||
218 | /* .text size of initialised data */ | ||
219 | *(unsigned int *)&buf[pe_header + 0xb8] = file_sz; | ||
220 | #else | ||
221 | /* | ||
222 | * Address of entry point. startup_32 is at the beginning and | ||
223 | * the 64-bit entry point (startup_64) is always 512 bytes | ||
224 | * after. | ||
225 | */ | ||
226 | *(unsigned int *)&buf[pe_header + 0x28] = i + 512; | ||
227 | |||
228 | /* .text size */ | ||
229 | *(unsigned int *)&buf[pe_header + 0xc0] = file_sz; | ||
230 | |||
231 | /* .text size of initialised data */ | ||
232 | *(unsigned int *)&buf[pe_header + 0xc8] = file_sz; | ||
233 | #endif /* CONFIG_X86_32 */ | ||
234 | #endif /* CONFIG_EFI_STUB */ | ||
235 | |||
197 | crc = partial_crc32(buf, i, crc); | 236 | crc = partial_crc32(buf, i, crc); |
198 | if (fwrite(buf, 1, i, stdout) != i) | 237 | if (fwrite(buf, 1, i, stdout) != i) |
199 | die("Writing setup failed"); | 238 | die("Writing setup failed"); |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 3537d4b91f74..2b0b9631474b 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -5,12 +5,14 @@ | |||
5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | 5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o |
6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | 6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o |
7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | 7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o |
8 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o | ||
8 | 9 | ||
9 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o | 10 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o |
10 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o | 11 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o |
11 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 12 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
12 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o | 13 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o |
13 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o | 14 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o |
15 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o | ||
14 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o | 16 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o |
15 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o | 17 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o |
16 | 18 | ||
@@ -20,12 +22,14 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o | |||
20 | aes-i586-y := aes-i586-asm_32.o aes_glue.o | 22 | aes-i586-y := aes-i586-asm_32.o aes_glue.o |
21 | twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o | 23 | twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o |
22 | salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o | 24 | salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o |
25 | serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o | ||
23 | 26 | ||
24 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o | 27 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o |
25 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o | 28 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o |
26 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | 29 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
27 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o | 30 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o |
28 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 31 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
32 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o | ||
29 | 33 | ||
30 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 34 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
31 | 35 | ||
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S new file mode 100644 index 000000000000..4e37677ca851 --- /dev/null +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S | |||
@@ -0,0 +1,638 @@ | |||
1 | /* | ||
2 | * Serpent Cipher 4-way parallel algorithm (i586/SSE2) | ||
3 | * | ||
4 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on crypto/serpent.c by | ||
7 | * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no> | ||
8 | * 2003 Herbert Valerio Riedel <hvr@gnu.org> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | .file "serpent-sse2-i586-asm_32.S" | ||
28 | .text | ||
29 | |||
30 | #define arg_ctx 4 | ||
31 | #define arg_dst 8 | ||
32 | #define arg_src 12 | ||
33 | #define arg_xor 16 | ||
34 | |||
35 | /********************************************************************** | ||
36 | 4-way SSE2 serpent | ||
37 | **********************************************************************/ | ||
38 | #define CTX %edx | ||
39 | |||
40 | #define RA %xmm0 | ||
41 | #define RB %xmm1 | ||
42 | #define RC %xmm2 | ||
43 | #define RD %xmm3 | ||
44 | #define RE %xmm4 | ||
45 | |||
46 | #define RT0 %xmm5 | ||
47 | #define RT1 %xmm6 | ||
48 | |||
49 | #define RNOT %xmm7 | ||
50 | |||
51 | #define get_key(i, j, t) \ | ||
52 | movd (4*(i)+(j))*4(CTX), t; \ | ||
53 | pshufd $0, t, t; | ||
54 | |||
55 | #define K(x0, x1, x2, x3, x4, i) \ | ||
56 | get_key(i, 0, x4); \ | ||
57 | get_key(i, 1, RT0); \ | ||
58 | get_key(i, 2, RT1); \ | ||
59 | pxor x4, x0; \ | ||
60 | pxor RT0, x1; \ | ||
61 | pxor RT1, x2; \ | ||
62 | get_key(i, 3, x4); \ | ||
63 | pxor x4, x3; | ||
64 | |||
65 | #define LK(x0, x1, x2, x3, x4, i) \ | ||
66 | movdqa x0, x4; \ | ||
67 | pslld $13, x0; \ | ||
68 | psrld $(32 - 13), x4; \ | ||
69 | por x4, x0; \ | ||
70 | pxor x0, x1; \ | ||
71 | movdqa x2, x4; \ | ||
72 | pslld $3, x2; \ | ||
73 | psrld $(32 - 3), x4; \ | ||
74 | por x4, x2; \ | ||
75 | pxor x2, x1; \ | ||
76 | movdqa x1, x4; \ | ||
77 | pslld $1, x1; \ | ||
78 | psrld $(32 - 1), x4; \ | ||
79 | por x4, x1; \ | ||
80 | movdqa x0, x4; \ | ||
81 | pslld $3, x4; \ | ||
82 | pxor x2, x3; \ | ||
83 | pxor x4, x3; \ | ||
84 | movdqa x3, x4; \ | ||
85 | pslld $7, x3; \ | ||
86 | psrld $(32 - 7), x4; \ | ||
87 | por x4, x3; \ | ||
88 | movdqa x1, x4; \ | ||
89 | pslld $7, x4; \ | ||
90 | pxor x1, x0; \ | ||
91 | pxor x3, x0; \ | ||
92 | pxor x3, x2; \ | ||
93 | pxor x4, x2; \ | ||
94 | movdqa x0, x4; \ | ||
95 | get_key(i, 1, RT0); \ | ||
96 | pxor RT0, x1; \ | ||
97 | get_key(i, 3, RT0); \ | ||
98 | pxor RT0, x3; \ | ||
99 | pslld $5, x0; \ | ||
100 | psrld $(32 - 5), x4; \ | ||
101 | por x4, x0; \ | ||
102 | movdqa x2, x4; \ | ||
103 | pslld $22, x2; \ | ||
104 | psrld $(32 - 22), x4; \ | ||
105 | por x4, x2; \ | ||
106 | get_key(i, 0, RT0); \ | ||
107 | pxor RT0, x0; \ | ||
108 | get_key(i, 2, RT0); \ | ||
109 | pxor RT0, x2; | ||
110 | |||
111 | #define KL(x0, x1, x2, x3, x4, i) \ | ||
112 | K(x0, x1, x2, x3, x4, i); \ | ||
113 | movdqa x0, x4; \ | ||
114 | psrld $5, x0; \ | ||
115 | pslld $(32 - 5), x4; \ | ||
116 | por x4, x0; \ | ||
117 | movdqa x2, x4; \ | ||
118 | psrld $22, x2; \ | ||
119 | pslld $(32 - 22), x4; \ | ||
120 | por x4, x2; \ | ||
121 | pxor x3, x2; \ | ||
122 | pxor x3, x0; \ | ||
123 | movdqa x1, x4; \ | ||
124 | pslld $7, x4; \ | ||
125 | pxor x1, x0; \ | ||
126 | pxor x4, x2; \ | ||
127 | movdqa x1, x4; \ | ||
128 | psrld $1, x1; \ | ||
129 | pslld $(32 - 1), x4; \ | ||
130 | por x4, x1; \ | ||
131 | movdqa x3, x4; \ | ||
132 | psrld $7, x3; \ | ||
133 | pslld $(32 - 7), x4; \ | ||
134 | por x4, x3; \ | ||
135 | pxor x0, x1; \ | ||
136 | movdqa x0, x4; \ | ||
137 | pslld $3, x4; \ | ||
138 | pxor x4, x3; \ | ||
139 | movdqa x0, x4; \ | ||
140 | psrld $13, x0; \ | ||
141 | pslld $(32 - 13), x4; \ | ||
142 | por x4, x0; \ | ||
143 | pxor x2, x1; \ | ||
144 | pxor x2, x3; \ | ||
145 | movdqa x2, x4; \ | ||
146 | psrld $3, x2; \ | ||
147 | pslld $(32 - 3), x4; \ | ||
148 | por x4, x2; | ||
149 | |||
150 | #define S0(x0, x1, x2, x3, x4) \ | ||
151 | movdqa x3, x4; \ | ||
152 | por x0, x3; \ | ||
153 | pxor x4, x0; \ | ||
154 | pxor x2, x4; \ | ||
155 | pxor RNOT, x4; \ | ||
156 | pxor x1, x3; \ | ||
157 | pand x0, x1; \ | ||
158 | pxor x4, x1; \ | ||
159 | pxor x0, x2; \ | ||
160 | pxor x3, x0; \ | ||
161 | por x0, x4; \ | ||
162 | pxor x2, x0; \ | ||
163 | pand x1, x2; \ | ||
164 | pxor x2, x3; \ | ||
165 | pxor RNOT, x1; \ | ||
166 | pxor x4, x2; \ | ||
167 | pxor x2, x1; | ||
168 | |||
169 | #define S1(x0, x1, x2, x3, x4) \ | ||
170 | movdqa x1, x4; \ | ||
171 | pxor x0, x1; \ | ||
172 | pxor x3, x0; \ | ||
173 | pxor RNOT, x3; \ | ||
174 | pand x1, x4; \ | ||
175 | por x1, x0; \ | ||
176 | pxor x2, x3; \ | ||
177 | pxor x3, x0; \ | ||
178 | pxor x3, x1; \ | ||
179 | pxor x4, x3; \ | ||
180 | por x4, x1; \ | ||
181 | pxor x2, x4; \ | ||
182 | pand x0, x2; \ | ||
183 | pxor x1, x2; \ | ||
184 | por x0, x1; \ | ||
185 | pxor RNOT, x0; \ | ||
186 | pxor x2, x0; \ | ||
187 | pxor x1, x4; | ||
188 | |||
189 | #define S2(x0, x1, x2, x3, x4) \ | ||
190 | pxor RNOT, x3; \ | ||
191 | pxor x0, x1; \ | ||
192 | movdqa x0, x4; \ | ||
193 | pand x2, x0; \ | ||
194 | pxor x3, x0; \ | ||
195 | por x4, x3; \ | ||
196 | pxor x1, x2; \ | ||
197 | pxor x1, x3; \ | ||
198 | pand x0, x1; \ | ||
199 | pxor x2, x0; \ | ||
200 | pand x3, x2; \ | ||
201 | por x1, x3; \ | ||
202 | pxor RNOT, x0; \ | ||
203 | pxor x0, x3; \ | ||
204 | pxor x0, x4; \ | ||
205 | pxor x2, x0; \ | ||
206 | por x2, x1; | ||
207 | |||
208 | #define S3(x0, x1, x2, x3, x4) \ | ||
209 | movdqa x1, x4; \ | ||
210 | pxor x3, x1; \ | ||
211 | por x0, x3; \ | ||
212 | pand x0, x4; \ | ||
213 | pxor x2, x0; \ | ||
214 | pxor x1, x2; \ | ||
215 | pand x3, x1; \ | ||
216 | pxor x3, x2; \ | ||
217 | por x4, x0; \ | ||
218 | pxor x3, x4; \ | ||
219 | pxor x0, x1; \ | ||
220 | pand x3, x0; \ | ||
221 | pand x4, x3; \ | ||
222 | pxor x2, x3; \ | ||
223 | por x1, x4; \ | ||
224 | pand x1, x2; \ | ||
225 | pxor x3, x4; \ | ||
226 | pxor x3, x0; \ | ||
227 | pxor x2, x3; | ||
228 | |||
229 | #define S4(x0, x1, x2, x3, x4) \ | ||
230 | movdqa x3, x4; \ | ||
231 | pand x0, x3; \ | ||
232 | pxor x4, x0; \ | ||
233 | pxor x2, x3; \ | ||
234 | por x4, x2; \ | ||
235 | pxor x1, x0; \ | ||
236 | pxor x3, x4; \ | ||
237 | por x0, x2; \ | ||
238 | pxor x1, x2; \ | ||
239 | pand x0, x1; \ | ||
240 | pxor x4, x1; \ | ||
241 | pand x2, x4; \ | ||
242 | pxor x3, x2; \ | ||
243 | pxor x0, x4; \ | ||
244 | por x1, x3; \ | ||
245 | pxor RNOT, x1; \ | ||
246 | pxor x0, x3; | ||
247 | |||
248 | #define S5(x0, x1, x2, x3, x4) \ | ||
249 | movdqa x1, x4; \ | ||
250 | por x0, x1; \ | ||
251 | pxor x1, x2; \ | ||
252 | pxor RNOT, x3; \ | ||
253 | pxor x0, x4; \ | ||
254 | pxor x2, x0; \ | ||
255 | pand x4, x1; \ | ||
256 | por x3, x4; \ | ||
257 | pxor x0, x4; \ | ||
258 | pand x3, x0; \ | ||
259 | pxor x3, x1; \ | ||
260 | pxor x2, x3; \ | ||
261 | pxor x1, x0; \ | ||
262 | pand x4, x2; \ | ||
263 | pxor x2, x1; \ | ||
264 | pand x0, x2; \ | ||
265 | pxor x2, x3; | ||
266 | |||
267 | #define S6(x0, x1, x2, x3, x4) \ | ||
268 | movdqa x1, x4; \ | ||
269 | pxor x0, x3; \ | ||
270 | pxor x2, x1; \ | ||
271 | pxor x0, x2; \ | ||
272 | pand x3, x0; \ | ||
273 | por x3, x1; \ | ||
274 | pxor RNOT, x4; \ | ||
275 | pxor x1, x0; \ | ||
276 | pxor x2, x1; \ | ||
277 | pxor x4, x3; \ | ||
278 | pxor x0, x4; \ | ||
279 | pand x0, x2; \ | ||
280 | pxor x1, x4; \ | ||
281 | pxor x3, x2; \ | ||
282 | pand x1, x3; \ | ||
283 | pxor x0, x3; \ | ||
284 | pxor x2, x1; | ||
285 | |||
286 | #define S7(x0, x1, x2, x3, x4) \ | ||
287 | pxor RNOT, x1; \ | ||
288 | movdqa x1, x4; \ | ||
289 | pxor RNOT, x0; \ | ||
290 | pand x2, x1; \ | ||
291 | pxor x3, x1; \ | ||
292 | por x4, x3; \ | ||
293 | pxor x2, x4; \ | ||
294 | pxor x3, x2; \ | ||
295 | pxor x0, x3; \ | ||
296 | por x1, x0; \ | ||
297 | pand x0, x2; \ | ||
298 | pxor x4, x0; \ | ||
299 | pxor x3, x4; \ | ||
300 | pand x0, x3; \ | ||
301 | pxor x1, x4; \ | ||
302 | pxor x4, x2; \ | ||
303 | pxor x1, x3; \ | ||
304 | por x0, x4; \ | ||
305 | pxor x1, x4; | ||
306 | |||
307 | #define SI0(x0, x1, x2, x3, x4) \ | ||
308 | movdqa x3, x4; \ | ||
309 | pxor x0, x1; \ | ||
310 | por x1, x3; \ | ||
311 | pxor x1, x4; \ | ||
312 | pxor RNOT, x0; \ | ||
313 | pxor x3, x2; \ | ||
314 | pxor x0, x3; \ | ||
315 | pand x1, x0; \ | ||
316 | pxor x2, x0; \ | ||
317 | pand x3, x2; \ | ||
318 | pxor x4, x3; \ | ||
319 | pxor x3, x2; \ | ||
320 | pxor x3, x1; \ | ||
321 | pand x0, x3; \ | ||
322 | pxor x0, x1; \ | ||
323 | pxor x2, x0; \ | ||
324 | pxor x3, x4; | ||
325 | |||
326 | #define SI1(x0, x1, x2, x3, x4) \ | ||
327 | pxor x3, x1; \ | ||
328 | movdqa x0, x4; \ | ||
329 | pxor x2, x0; \ | ||
330 | pxor RNOT, x2; \ | ||
331 | por x1, x4; \ | ||
332 | pxor x3, x4; \ | ||
333 | pand x1, x3; \ | ||
334 | pxor x2, x1; \ | ||
335 | pand x4, x2; \ | ||
336 | pxor x1, x4; \ | ||
337 | por x3, x1; \ | ||
338 | pxor x0, x3; \ | ||
339 | pxor x0, x2; \ | ||
340 | por x4, x0; \ | ||
341 | pxor x4, x2; \ | ||
342 | pxor x0, x1; \ | ||
343 | pxor x1, x4; | ||
344 | |||
345 | #define SI2(x0, x1, x2, x3, x4) \ | ||
346 | pxor x1, x2; \ | ||
347 | movdqa x3, x4; \ | ||
348 | pxor RNOT, x3; \ | ||
349 | por x2, x3; \ | ||
350 | pxor x4, x2; \ | ||
351 | pxor x0, x4; \ | ||
352 | pxor x1, x3; \ | ||
353 | por x2, x1; \ | ||
354 | pxor x0, x2; \ | ||
355 | pxor x4, x1; \ | ||
356 | por x3, x4; \ | ||
357 | pxor x3, x2; \ | ||
358 | pxor x2, x4; \ | ||
359 | pand x1, x2; \ | ||
360 | pxor x3, x2; \ | ||
361 | pxor x4, x3; \ | ||
362 | pxor x0, x4; | ||
363 | |||
364 | #define SI3(x0, x1, x2, x3, x4) \ | ||
365 | pxor x1, x2; \ | ||
366 | movdqa x1, x4; \ | ||
367 | pand x2, x1; \ | ||
368 | pxor x0, x1; \ | ||
369 | por x4, x0; \ | ||
370 | pxor x3, x4; \ | ||
371 | pxor x3, x0; \ | ||
372 | por x1, x3; \ | ||
373 | pxor x2, x1; \ | ||
374 | pxor x3, x1; \ | ||
375 | pxor x2, x0; \ | ||
376 | pxor x3, x2; \ | ||
377 | pand x1, x3; \ | ||
378 | pxor x0, x1; \ | ||
379 | pand x2, x0; \ | ||
380 | pxor x3, x4; \ | ||
381 | pxor x0, x3; \ | ||
382 | pxor x1, x0; | ||
383 | |||
384 | #define SI4(x0, x1, x2, x3, x4) \ | ||
385 | pxor x3, x2; \ | ||
386 | movdqa x0, x4; \ | ||
387 | pand x1, x0; \ | ||
388 | pxor x2, x0; \ | ||
389 | por x3, x2; \ | ||
390 | pxor RNOT, x4; \ | ||
391 | pxor x0, x1; \ | ||
392 | pxor x2, x0; \ | ||
393 | pand x4, x2; \ | ||
394 | pxor x0, x2; \ | ||
395 | por x4, x0; \ | ||
396 | pxor x3, x0; \ | ||
397 | pand x2, x3; \ | ||
398 | pxor x3, x4; \ | ||
399 | pxor x1, x3; \ | ||
400 | pand x0, x1; \ | ||
401 | pxor x1, x4; \ | ||
402 | pxor x3, x0; | ||
403 | |||
404 | #define SI5(x0, x1, x2, x3, x4) \ | ||
405 | movdqa x1, x4; \ | ||
406 | por x2, x1; \ | ||
407 | pxor x4, x2; \ | ||
408 | pxor x3, x1; \ | ||
409 | pand x4, x3; \ | ||
410 | pxor x3, x2; \ | ||
411 | por x0, x3; \ | ||
412 | pxor RNOT, x0; \ | ||
413 | pxor x2, x3; \ | ||
414 | por x0, x2; \ | ||
415 | pxor x1, x4; \ | ||
416 | pxor x4, x2; \ | ||
417 | pand x0, x4; \ | ||
418 | pxor x1, x0; \ | ||
419 | pxor x3, x1; \ | ||
420 | pand x2, x0; \ | ||
421 | pxor x3, x2; \ | ||
422 | pxor x2, x0; \ | ||
423 | pxor x4, x2; \ | ||
424 | pxor x3, x4; | ||
425 | |||
426 | #define SI6(x0, x1, x2, x3, x4) \ | ||
427 | pxor x2, x0; \ | ||
428 | movdqa x0, x4; \ | ||
429 | pand x3, x0; \ | ||
430 | pxor x3, x2; \ | ||
431 | pxor x2, x0; \ | ||
432 | pxor x1, x3; \ | ||
433 | por x4, x2; \ | ||
434 | pxor x3, x2; \ | ||
435 | pand x0, x3; \ | ||
436 | pxor RNOT, x0; \ | ||
437 | pxor x1, x3; \ | ||
438 | pand x2, x1; \ | ||
439 | pxor x0, x4; \ | ||
440 | pxor x4, x3; \ | ||
441 | pxor x2, x4; \ | ||
442 | pxor x1, x0; \ | ||
443 | pxor x0, x2; | ||
444 | |||
445 | #define SI7(x0, x1, x2, x3, x4) \ | ||
446 | movdqa x3, x4; \ | ||
447 | pand x0, x3; \ | ||
448 | pxor x2, x0; \ | ||
449 | por x4, x2; \ | ||
450 | pxor x1, x4; \ | ||
451 | pxor RNOT, x0; \ | ||
452 | por x3, x1; \ | ||
453 | pxor x0, x4; \ | ||
454 | pand x2, x0; \ | ||
455 | pxor x1, x0; \ | ||
456 | pand x2, x1; \ | ||
457 | pxor x2, x3; \ | ||
458 | pxor x3, x4; \ | ||
459 | pand x3, x2; \ | ||
460 | por x0, x3; \ | ||
461 | pxor x4, x1; \ | ||
462 | pxor x4, x3; \ | ||
463 | pand x0, x4; \ | ||
464 | pxor x2, x4; | ||
465 | |||
466 | #define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ | ||
467 | movdqa x2, t3; \ | ||
468 | movdqa x0, t1; \ | ||
469 | unpcklps x3, t3; \ | ||
470 | movdqa x0, t2; \ | ||
471 | unpcklps x1, t1; \ | ||
472 | unpckhps x1, t2; \ | ||
473 | movdqa t3, x1; \ | ||
474 | unpckhps x3, x2; \ | ||
475 | movdqa t1, x0; \ | ||
476 | movhlps t1, x1; \ | ||
477 | movdqa t2, t1; \ | ||
478 | movlhps t3, x0; \ | ||
479 | movlhps x2, t1; \ | ||
480 | movhlps t2, x2; \ | ||
481 | movdqa x2, x3; \ | ||
482 | movdqa t1, x2; | ||
483 | |||
484 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ | ||
485 | movdqu (0*4*4)(in), x0; \ | ||
486 | movdqu (1*4*4)(in), x1; \ | ||
487 | movdqu (2*4*4)(in), x2; \ | ||
488 | movdqu (3*4*4)(in), x3; \ | ||
489 | \ | ||
490 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
491 | |||
492 | #define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
493 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
494 | \ | ||
495 | movdqu x0, (0*4*4)(out); \ | ||
496 | movdqu x1, (1*4*4)(out); \ | ||
497 | movdqu x2, (2*4*4)(out); \ | ||
498 | movdqu x3, (3*4*4)(out); | ||
499 | |||
500 | #define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
501 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
502 | \ | ||
503 | movdqu (0*4*4)(out), t0; \ | ||
504 | pxor t0, x0; \ | ||
505 | movdqu x0, (0*4*4)(out); \ | ||
506 | movdqu (1*4*4)(out), t0; \ | ||
507 | pxor t0, x1; \ | ||
508 | movdqu x1, (1*4*4)(out); \ | ||
509 | movdqu (2*4*4)(out), t0; \ | ||
510 | pxor t0, x2; \ | ||
511 | movdqu x2, (2*4*4)(out); \ | ||
512 | movdqu (3*4*4)(out), t0; \ | ||
513 | pxor t0, x3; \ | ||
514 | movdqu x3, (3*4*4)(out); | ||
515 | |||
516 | .align 8 | ||
517 | .global __serpent_enc_blk_4way | ||
518 | .type __serpent_enc_blk_4way,@function; | ||
519 | |||
520 | __serpent_enc_blk_4way: | ||
521 | /* input: | ||
522 | * arg_ctx(%esp): ctx, CTX | ||
523 | * arg_dst(%esp): dst | ||
524 | * arg_src(%esp): src | ||
525 | * arg_xor(%esp): bool, if true: xor output | ||
526 | */ | ||
527 | |||
528 | pcmpeqd RNOT, RNOT; | ||
529 | |||
530 | movl arg_ctx(%esp), CTX; | ||
531 | |||
532 | movl arg_src(%esp), %eax; | ||
533 | read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | ||
534 | |||
535 | K(RA, RB, RC, RD, RE, 0); | ||
536 | S0(RA, RB, RC, RD, RE); LK(RC, RB, RD, RA, RE, 1); | ||
537 | S1(RC, RB, RD, RA, RE); LK(RE, RD, RA, RC, RB, 2); | ||
538 | S2(RE, RD, RA, RC, RB); LK(RB, RD, RE, RC, RA, 3); | ||
539 | S3(RB, RD, RE, RC, RA); LK(RC, RA, RD, RB, RE, 4); | ||
540 | S4(RC, RA, RD, RB, RE); LK(RA, RD, RB, RE, RC, 5); | ||
541 | S5(RA, RD, RB, RE, RC); LK(RC, RA, RD, RE, RB, 6); | ||
542 | S6(RC, RA, RD, RE, RB); LK(RD, RB, RA, RE, RC, 7); | ||
543 | S7(RD, RB, RA, RE, RC); LK(RC, RA, RE, RD, RB, 8); | ||
544 | S0(RC, RA, RE, RD, RB); LK(RE, RA, RD, RC, RB, 9); | ||
545 | S1(RE, RA, RD, RC, RB); LK(RB, RD, RC, RE, RA, 10); | ||
546 | S2(RB, RD, RC, RE, RA); LK(RA, RD, RB, RE, RC, 11); | ||
547 | S3(RA, RD, RB, RE, RC); LK(RE, RC, RD, RA, RB, 12); | ||
548 | S4(RE, RC, RD, RA, RB); LK(RC, RD, RA, RB, RE, 13); | ||
549 | S5(RC, RD, RA, RB, RE); LK(RE, RC, RD, RB, RA, 14); | ||
550 | S6(RE, RC, RD, RB, RA); LK(RD, RA, RC, RB, RE, 15); | ||
551 | S7(RD, RA, RC, RB, RE); LK(RE, RC, RB, RD, RA, 16); | ||
552 | S0(RE, RC, RB, RD, RA); LK(RB, RC, RD, RE, RA, 17); | ||
553 | S1(RB, RC, RD, RE, RA); LK(RA, RD, RE, RB, RC, 18); | ||
554 | S2(RA, RD, RE, RB, RC); LK(RC, RD, RA, RB, RE, 19); | ||
555 | S3(RC, RD, RA, RB, RE); LK(RB, RE, RD, RC, RA, 20); | ||
556 | S4(RB, RE, RD, RC, RA); LK(RE, RD, RC, RA, RB, 21); | ||
557 | S5(RE, RD, RC, RA, RB); LK(RB, RE, RD, RA, RC, 22); | ||
558 | S6(RB, RE, RD, RA, RC); LK(RD, RC, RE, RA, RB, 23); | ||
559 | S7(RD, RC, RE, RA, RB); LK(RB, RE, RA, RD, RC, 24); | ||
560 | S0(RB, RE, RA, RD, RC); LK(RA, RE, RD, RB, RC, 25); | ||
561 | S1(RA, RE, RD, RB, RC); LK(RC, RD, RB, RA, RE, 26); | ||
562 | S2(RC, RD, RB, RA, RE); LK(RE, RD, RC, RA, RB, 27); | ||
563 | S3(RE, RD, RC, RA, RB); LK(RA, RB, RD, RE, RC, 28); | ||
564 | S4(RA, RB, RD, RE, RC); LK(RB, RD, RE, RC, RA, 29); | ||
565 | S5(RB, RD, RE, RC, RA); LK(RA, RB, RD, RC, RE, 30); | ||
566 | S6(RA, RB, RD, RC, RE); LK(RD, RE, RB, RC, RA, 31); | ||
567 | S7(RD, RE, RB, RC, RA); K(RA, RB, RC, RD, RE, 32); | ||
568 | |||
569 | movl arg_dst(%esp), %eax; | ||
570 | |||
571 | cmpb $0, arg_xor(%esp); | ||
572 | jnz __enc_xor4; | ||
573 | |||
574 | write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | ||
575 | |||
576 | ret; | ||
577 | |||
578 | __enc_xor4: | ||
579 | xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | ||
580 | |||
581 | ret; | ||
582 | |||
583 | .align 8 | ||
584 | .global serpent_dec_blk_4way | ||
585 | .type serpent_dec_blk_4way,@function; | ||
586 | |||
587 | serpent_dec_blk_4way: | ||
588 | /* input: | ||
589 | * arg_ctx(%esp): ctx, CTX | ||
590 | * arg_dst(%esp): dst | ||
591 | * arg_src(%esp): src | ||
592 | */ | ||
593 | |||
594 | pcmpeqd RNOT, RNOT; | ||
595 | |||
596 | movl arg_ctx(%esp), CTX; | ||
597 | |||
598 | movl arg_src(%esp), %eax; | ||
599 | read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | ||
600 | |||
601 | K(RA, RB, RC, RD, RE, 32); | ||
602 | SI7(RA, RB, RC, RD, RE); KL(RB, RD, RA, RE, RC, 31); | ||
603 | SI6(RB, RD, RA, RE, RC); KL(RA, RC, RE, RB, RD, 30); | ||
604 | SI5(RA, RC, RE, RB, RD); KL(RC, RD, RA, RE, RB, 29); | ||
605 | SI4(RC, RD, RA, RE, RB); KL(RC, RA, RB, RE, RD, 28); | ||
606 | SI3(RC, RA, RB, RE, RD); KL(RB, RC, RD, RE, RA, 27); | ||
607 | SI2(RB, RC, RD, RE, RA); KL(RC, RA, RE, RD, RB, 26); | ||
608 | SI1(RC, RA, RE, RD, RB); KL(RB, RA, RE, RD, RC, 25); | ||
609 | SI0(RB, RA, RE, RD, RC); KL(RE, RC, RA, RB, RD, 24); | ||
610 | SI7(RE, RC, RA, RB, RD); KL(RC, RB, RE, RD, RA, 23); | ||
611 | SI6(RC, RB, RE, RD, RA); KL(RE, RA, RD, RC, RB, 22); | ||
612 | SI5(RE, RA, RD, RC, RB); KL(RA, RB, RE, RD, RC, 21); | ||
613 | SI4(RA, RB, RE, RD, RC); KL(RA, RE, RC, RD, RB, 20); | ||
614 | SI3(RA, RE, RC, RD, RB); KL(RC, RA, RB, RD, RE, 19); | ||
615 | SI2(RC, RA, RB, RD, RE); KL(RA, RE, RD, RB, RC, 18); | ||
616 | SI1(RA, RE, RD, RB, RC); KL(RC, RE, RD, RB, RA, 17); | ||
617 | SI0(RC, RE, RD, RB, RA); KL(RD, RA, RE, RC, RB, 16); | ||
618 | SI7(RD, RA, RE, RC, RB); KL(RA, RC, RD, RB, RE, 15); | ||
619 | SI6(RA, RC, RD, RB, RE); KL(RD, RE, RB, RA, RC, 14); | ||
620 | SI5(RD, RE, RB, RA, RC); KL(RE, RC, RD, RB, RA, 13); | ||
621 | SI4(RE, RC, RD, RB, RA); KL(RE, RD, RA, RB, RC, 12); | ||
622 | SI3(RE, RD, RA, RB, RC); KL(RA, RE, RC, RB, RD, 11); | ||
623 | SI2(RA, RE, RC, RB, RD); KL(RE, RD, RB, RC, RA, 10); | ||
624 | SI1(RE, RD, RB, RC, RA); KL(RA, RD, RB, RC, RE, 9); | ||
625 | SI0(RA, RD, RB, RC, RE); KL(RB, RE, RD, RA, RC, 8); | ||
626 | SI7(RB, RE, RD, RA, RC); KL(RE, RA, RB, RC, RD, 7); | ||
627 | SI6(RE, RA, RB, RC, RD); KL(RB, RD, RC, RE, RA, 6); | ||
628 | SI5(RB, RD, RC, RE, RA); KL(RD, RA, RB, RC, RE, 5); | ||
629 | SI4(RD, RA, RB, RC, RE); KL(RD, RB, RE, RC, RA, 4); | ||
630 | SI3(RD, RB, RE, RC, RA); KL(RE, RD, RA, RC, RB, 3); | ||
631 | SI2(RE, RD, RA, RC, RB); KL(RD, RB, RC, RA, RE, 2); | ||
632 | SI1(RD, RB, RC, RA, RE); KL(RE, RB, RC, RA, RD, 1); | ||
633 | SI0(RE, RB, RC, RA, RD); K(RC, RD, RB, RE, RA, 0); | ||
634 | |||
635 | movl arg_dst(%esp), %eax; | ||
636 | write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); | ||
637 | |||
638 | ret; | ||
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S new file mode 100644 index 000000000000..7f24a1540821 --- /dev/null +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | |||
@@ -0,0 +1,761 @@ | |||
1 | /* | ||
2 | * Serpent Cipher 8-way parallel algorithm (x86_64/SSE2) | ||
3 | * | ||
4 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on crypto/serpent.c by | ||
7 | * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no> | ||
8 | * 2003 Herbert Valerio Riedel <hvr@gnu.org> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | .file "serpent-sse2-x86_64-asm_64.S" | ||
28 | .text | ||
29 | |||
30 | #define CTX %rdi | ||
31 | |||
32 | /********************************************************************** | ||
33 | 8-way SSE2 serpent | ||
34 | **********************************************************************/ | ||
35 | #define RA1 %xmm0 | ||
36 | #define RB1 %xmm1 | ||
37 | #define RC1 %xmm2 | ||
38 | #define RD1 %xmm3 | ||
39 | #define RE1 %xmm4 | ||
40 | |||
41 | #define RA2 %xmm5 | ||
42 | #define RB2 %xmm6 | ||
43 | #define RC2 %xmm7 | ||
44 | #define RD2 %xmm8 | ||
45 | #define RE2 %xmm9 | ||
46 | |||
47 | #define RNOT %xmm10 | ||
48 | |||
49 | #define RK0 %xmm11 | ||
50 | #define RK1 %xmm12 | ||
51 | #define RK2 %xmm13 | ||
52 | #define RK3 %xmm14 | ||
53 | |||
54 | #define S0_1(x0, x1, x2, x3, x4) \ | ||
55 | movdqa x3, x4; \ | ||
56 | por x0, x3; \ | ||
57 | pxor x4, x0; \ | ||
58 | pxor x2, x4; \ | ||
59 | pxor RNOT, x4; \ | ||
60 | pxor x1, x3; \ | ||
61 | pand x0, x1; \ | ||
62 | pxor x4, x1; \ | ||
63 | pxor x0, x2; | ||
64 | #define S0_2(x0, x1, x2, x3, x4) \ | ||
65 | pxor x3, x0; \ | ||
66 | por x0, x4; \ | ||
67 | pxor x2, x0; \ | ||
68 | pand x1, x2; \ | ||
69 | pxor x2, x3; \ | ||
70 | pxor RNOT, x1; \ | ||
71 | pxor x4, x2; \ | ||
72 | pxor x2, x1; | ||
73 | |||
74 | #define S1_1(x0, x1, x2, x3, x4) \ | ||
75 | movdqa x1, x4; \ | ||
76 | pxor x0, x1; \ | ||
77 | pxor x3, x0; \ | ||
78 | pxor RNOT, x3; \ | ||
79 | pand x1, x4; \ | ||
80 | por x1, x0; \ | ||
81 | pxor x2, x3; \ | ||
82 | pxor x3, x0; \ | ||
83 | pxor x3, x1; | ||
84 | #define S1_2(x0, x1, x2, x3, x4) \ | ||
85 | pxor x4, x3; \ | ||
86 | por x4, x1; \ | ||
87 | pxor x2, x4; \ | ||
88 | pand x0, x2; \ | ||
89 | pxor x1, x2; \ | ||
90 | por x0, x1; \ | ||
91 | pxor RNOT, x0; \ | ||
92 | pxor x2, x0; \ | ||
93 | pxor x1, x4; | ||
94 | |||
95 | #define S2_1(x0, x1, x2, x3, x4) \ | ||
96 | pxor RNOT, x3; \ | ||
97 | pxor x0, x1; \ | ||
98 | movdqa x0, x4; \ | ||
99 | pand x2, x0; \ | ||
100 | pxor x3, x0; \ | ||
101 | por x4, x3; \ | ||
102 | pxor x1, x2; \ | ||
103 | pxor x1, x3; \ | ||
104 | pand x0, x1; | ||
105 | #define S2_2(x0, x1, x2, x3, x4) \ | ||
106 | pxor x2, x0; \ | ||
107 | pand x3, x2; \ | ||
108 | por x1, x3; \ | ||
109 | pxor RNOT, x0; \ | ||
110 | pxor x0, x3; \ | ||
111 | pxor x0, x4; \ | ||
112 | pxor x2, x0; \ | ||
113 | por x2, x1; | ||
114 | |||
115 | #define S3_1(x0, x1, x2, x3, x4) \ | ||
116 | movdqa x1, x4; \ | ||
117 | pxor x3, x1; \ | ||
118 | por x0, x3; \ | ||
119 | pand x0, x4; \ | ||
120 | pxor x2, x0; \ | ||
121 | pxor x1, x2; \ | ||
122 | pand x3, x1; \ | ||
123 | pxor x3, x2; \ | ||
124 | por x4, x0; \ | ||
125 | pxor x3, x4; | ||
126 | #define S3_2(x0, x1, x2, x3, x4) \ | ||
127 | pxor x0, x1; \ | ||
128 | pand x3, x0; \ | ||
129 | pand x4, x3; \ | ||
130 | pxor x2, x3; \ | ||
131 | por x1, x4; \ | ||
132 | pand x1, x2; \ | ||
133 | pxor x3, x4; \ | ||
134 | pxor x3, x0; \ | ||
135 | pxor x2, x3; | ||
136 | |||
137 | #define S4_1(x0, x1, x2, x3, x4) \ | ||
138 | movdqa x3, x4; \ | ||
139 | pand x0, x3; \ | ||
140 | pxor x4, x0; \ | ||
141 | pxor x2, x3; \ | ||
142 | por x4, x2; \ | ||
143 | pxor x1, x0; \ | ||
144 | pxor x3, x4; \ | ||
145 | por x0, x2; \ | ||
146 | pxor x1, x2; | ||
147 | #define S4_2(x0, x1, x2, x3, x4) \ | ||
148 | pand x0, x1; \ | ||
149 | pxor x4, x1; \ | ||
150 | pand x2, x4; \ | ||
151 | pxor x3, x2; \ | ||
152 | pxor x0, x4; \ | ||
153 | por x1, x3; \ | ||
154 | pxor RNOT, x1; \ | ||
155 | pxor x0, x3; | ||
156 | |||
157 | #define S5_1(x0, x1, x2, x3, x4) \ | ||
158 | movdqa x1, x4; \ | ||
159 | por x0, x1; \ | ||
160 | pxor x1, x2; \ | ||
161 | pxor RNOT, x3; \ | ||
162 | pxor x0, x4; \ | ||
163 | pxor x2, x0; \ | ||
164 | pand x4, x1; \ | ||
165 | por x3, x4; \ | ||
166 | pxor x0, x4; | ||
167 | #define S5_2(x0, x1, x2, x3, x4) \ | ||
168 | pand x3, x0; \ | ||
169 | pxor x3, x1; \ | ||
170 | pxor x2, x3; \ | ||
171 | pxor x1, x0; \ | ||
172 | pand x4, x2; \ | ||
173 | pxor x2, x1; \ | ||
174 | pand x0, x2; \ | ||
175 | pxor x2, x3; | ||
176 | |||
177 | #define S6_1(x0, x1, x2, x3, x4) \ | ||
178 | movdqa x1, x4; \ | ||
179 | pxor x0, x3; \ | ||
180 | pxor x2, x1; \ | ||
181 | pxor x0, x2; \ | ||
182 | pand x3, x0; \ | ||
183 | por x3, x1; \ | ||
184 | pxor RNOT, x4; \ | ||
185 | pxor x1, x0; \ | ||
186 | pxor x2, x1; | ||
187 | #define S6_2(x0, x1, x2, x3, x4) \ | ||
188 | pxor x4, x3; \ | ||
189 | pxor x0, x4; \ | ||
190 | pand x0, x2; \ | ||
191 | pxor x1, x4; \ | ||
192 | pxor x3, x2; \ | ||
193 | pand x1, x3; \ | ||
194 | pxor x0, x3; \ | ||
195 | pxor x2, x1; | ||
196 | |||
197 | #define S7_1(x0, x1, x2, x3, x4) \ | ||
198 | pxor RNOT, x1; \ | ||
199 | movdqa x1, x4; \ | ||
200 | pxor RNOT, x0; \ | ||
201 | pand x2, x1; \ | ||
202 | pxor x3, x1; \ | ||
203 | por x4, x3; \ | ||
204 | pxor x2, x4; \ | ||
205 | pxor x3, x2; \ | ||
206 | pxor x0, x3; \ | ||
207 | por x1, x0; | ||
208 | #define S7_2(x0, x1, x2, x3, x4) \ | ||
209 | pand x0, x2; \ | ||
210 | pxor x4, x0; \ | ||
211 | pxor x3, x4; \ | ||
212 | pand x0, x3; \ | ||
213 | pxor x1, x4; \ | ||
214 | pxor x4, x2; \ | ||
215 | pxor x1, x3; \ | ||
216 | por x0, x4; \ | ||
217 | pxor x1, x4; | ||
218 | |||
219 | #define SI0_1(x0, x1, x2, x3, x4) \ | ||
220 | movdqa x3, x4; \ | ||
221 | pxor x0, x1; \ | ||
222 | por x1, x3; \ | ||
223 | pxor x1, x4; \ | ||
224 | pxor RNOT, x0; \ | ||
225 | pxor x3, x2; \ | ||
226 | pxor x0, x3; \ | ||
227 | pand x1, x0; \ | ||
228 | pxor x2, x0; | ||
229 | #define SI0_2(x0, x1, x2, x3, x4) \ | ||
230 | pand x3, x2; \ | ||
231 | pxor x4, x3; \ | ||
232 | pxor x3, x2; \ | ||
233 | pxor x3, x1; \ | ||
234 | pand x0, x3; \ | ||
235 | pxor x0, x1; \ | ||
236 | pxor x2, x0; \ | ||
237 | pxor x3, x4; | ||
238 | |||
239 | #define SI1_1(x0, x1, x2, x3, x4) \ | ||
240 | pxor x3, x1; \ | ||
241 | movdqa x0, x4; \ | ||
242 | pxor x2, x0; \ | ||
243 | pxor RNOT, x2; \ | ||
244 | por x1, x4; \ | ||
245 | pxor x3, x4; \ | ||
246 | pand x1, x3; \ | ||
247 | pxor x2, x1; \ | ||
248 | pand x4, x2; | ||
249 | #define SI1_2(x0, x1, x2, x3, x4) \ | ||
250 | pxor x1, x4; \ | ||
251 | por x3, x1; \ | ||
252 | pxor x0, x3; \ | ||
253 | pxor x0, x2; \ | ||
254 | por x4, x0; \ | ||
255 | pxor x4, x2; \ | ||
256 | pxor x0, x1; \ | ||
257 | pxor x1, x4; | ||
258 | |||
259 | #define SI2_1(x0, x1, x2, x3, x4) \ | ||
260 | pxor x1, x2; \ | ||
261 | movdqa x3, x4; \ | ||
262 | pxor RNOT, x3; \ | ||
263 | por x2, x3; \ | ||
264 | pxor x4, x2; \ | ||
265 | pxor x0, x4; \ | ||
266 | pxor x1, x3; \ | ||
267 | por x2, x1; \ | ||
268 | pxor x0, x2; | ||
269 | #define SI2_2(x0, x1, x2, x3, x4) \ | ||
270 | pxor x4, x1; \ | ||
271 | por x3, x4; \ | ||
272 | pxor x3, x2; \ | ||
273 | pxor x2, x4; \ | ||
274 | pand x1, x2; \ | ||
275 | pxor x3, x2; \ | ||
276 | pxor x4, x3; \ | ||
277 | pxor x0, x4; | ||
278 | |||
279 | #define SI3_1(x0, x1, x2, x3, x4) \ | ||
280 | pxor x1, x2; \ | ||
281 | movdqa x1, x4; \ | ||
282 | pand x2, x1; \ | ||
283 | pxor x0, x1; \ | ||
284 | por x4, x0; \ | ||
285 | pxor x3, x4; \ | ||
286 | pxor x3, x0; \ | ||
287 | por x1, x3; \ | ||
288 | pxor x2, x1; | ||
289 | #define SI3_2(x0, x1, x2, x3, x4) \ | ||
290 | pxor x3, x1; \ | ||
291 | pxor x2, x0; \ | ||
292 | pxor x3, x2; \ | ||
293 | pand x1, x3; \ | ||
294 | pxor x0, x1; \ | ||
295 | pand x2, x0; \ | ||
296 | pxor x3, x4; \ | ||
297 | pxor x0, x3; \ | ||
298 | pxor x1, x0; | ||
299 | |||
300 | #define SI4_1(x0, x1, x2, x3, x4) \ | ||
301 | pxor x3, x2; \ | ||
302 | movdqa x0, x4; \ | ||
303 | pand x1, x0; \ | ||
304 | pxor x2, x0; \ | ||
305 | por x3, x2; \ | ||
306 | pxor RNOT, x4; \ | ||
307 | pxor x0, x1; \ | ||
308 | pxor x2, x0; \ | ||
309 | pand x4, x2; | ||
310 | #define SI4_2(x0, x1, x2, x3, x4) \ | ||
311 | pxor x0, x2; \ | ||
312 | por x4, x0; \ | ||
313 | pxor x3, x0; \ | ||
314 | pand x2, x3; \ | ||
315 | pxor x3, x4; \ | ||
316 | pxor x1, x3; \ | ||
317 | pand x0, x1; \ | ||
318 | pxor x1, x4; \ | ||
319 | pxor x3, x0; | ||
320 | |||
321 | #define SI5_1(x0, x1, x2, x3, x4) \ | ||
322 | movdqa x1, x4; \ | ||
323 | por x2, x1; \ | ||
324 | pxor x4, x2; \ | ||
325 | pxor x3, x1; \ | ||
326 | pand x4, x3; \ | ||
327 | pxor x3, x2; \ | ||
328 | por x0, x3; \ | ||
329 | pxor RNOT, x0; \ | ||
330 | pxor x2, x3; \ | ||
331 | por x0, x2; | ||
332 | #define SI5_2(x0, x1, x2, x3, x4) \ | ||
333 | pxor x1, x4; \ | ||
334 | pxor x4, x2; \ | ||
335 | pand x0, x4; \ | ||
336 | pxor x1, x0; \ | ||
337 | pxor x3, x1; \ | ||
338 | pand x2, x0; \ | ||
339 | pxor x3, x2; \ | ||
340 | pxor x2, x0; \ | ||
341 | pxor x4, x2; \ | ||
342 | pxor x3, x4; | ||
343 | |||
344 | #define SI6_1(x0, x1, x2, x3, x4) \ | ||
345 | pxor x2, x0; \ | ||
346 | movdqa x0, x4; \ | ||
347 | pand x3, x0; \ | ||
348 | pxor x3, x2; \ | ||
349 | pxor x2, x0; \ | ||
350 | pxor x1, x3; \ | ||
351 | por x4, x2; \ | ||
352 | pxor x3, x2; \ | ||
353 | pand x0, x3; | ||
354 | #define SI6_2(x0, x1, x2, x3, x4) \ | ||
355 | pxor RNOT, x0; \ | ||
356 | pxor x1, x3; \ | ||
357 | pand x2, x1; \ | ||
358 | pxor x0, x4; \ | ||
359 | pxor x4, x3; \ | ||
360 | pxor x2, x4; \ | ||
361 | pxor x1, x0; \ | ||
362 | pxor x0, x2; | ||
363 | |||
364 | #define SI7_1(x0, x1, x2, x3, x4) \ | ||
365 | movdqa x3, x4; \ | ||
366 | pand x0, x3; \ | ||
367 | pxor x2, x0; \ | ||
368 | por x4, x2; \ | ||
369 | pxor x1, x4; \ | ||
370 | pxor RNOT, x0; \ | ||
371 | por x3, x1; \ | ||
372 | pxor x0, x4; \ | ||
373 | pand x2, x0; \ | ||
374 | pxor x1, x0; | ||
375 | #define SI7_2(x0, x1, x2, x3, x4) \ | ||
376 | pand x2, x1; \ | ||
377 | pxor x2, x3; \ | ||
378 | pxor x3, x4; \ | ||
379 | pand x3, x2; \ | ||
380 | por x0, x3; \ | ||
381 | pxor x4, x1; \ | ||
382 | pxor x4, x3; \ | ||
383 | pand x0, x4; \ | ||
384 | pxor x2, x4; | ||
385 | |||
386 | #define get_key(i, j, t) \ | ||
387 | movd (4*(i)+(j))*4(CTX), t; \ | ||
388 | pshufd $0, t, t; | ||
389 | |||
390 | #define K2(x0, x1, x2, x3, x4, i) \ | ||
391 | get_key(i, 0, RK0); \ | ||
392 | get_key(i, 1, RK1); \ | ||
393 | get_key(i, 2, RK2); \ | ||
394 | get_key(i, 3, RK3); \ | ||
395 | pxor RK0, x0 ## 1; \ | ||
396 | pxor RK1, x1 ## 1; \ | ||
397 | pxor RK2, x2 ## 1; \ | ||
398 | pxor RK3, x3 ## 1; \ | ||
399 | pxor RK0, x0 ## 2; \ | ||
400 | pxor RK1, x1 ## 2; \ | ||
401 | pxor RK2, x2 ## 2; \ | ||
402 | pxor RK3, x3 ## 2; | ||
403 | |||
404 | #define LK2(x0, x1, x2, x3, x4, i) \ | ||
405 | movdqa x0 ## 1, x4 ## 1; \ | ||
406 | pslld $13, x0 ## 1; \ | ||
407 | psrld $(32 - 13), x4 ## 1; \ | ||
408 | por x4 ## 1, x0 ## 1; \ | ||
409 | pxor x0 ## 1, x1 ## 1; \ | ||
410 | movdqa x2 ## 1, x4 ## 1; \ | ||
411 | pslld $3, x2 ## 1; \ | ||
412 | psrld $(32 - 3), x4 ## 1; \ | ||
413 | por x4 ## 1, x2 ## 1; \ | ||
414 | pxor x2 ## 1, x1 ## 1; \ | ||
415 | movdqa x0 ## 2, x4 ## 2; \ | ||
416 | pslld $13, x0 ## 2; \ | ||
417 | psrld $(32 - 13), x4 ## 2; \ | ||
418 | por x4 ## 2, x0 ## 2; \ | ||
419 | pxor x0 ## 2, x1 ## 2; \ | ||
420 | movdqa x2 ## 2, x4 ## 2; \ | ||
421 | pslld $3, x2 ## 2; \ | ||
422 | psrld $(32 - 3), x4 ## 2; \ | ||
423 | por x4 ## 2, x2 ## 2; \ | ||
424 | pxor x2 ## 2, x1 ## 2; \ | ||
425 | movdqa x1 ## 1, x4 ## 1; \ | ||
426 | pslld $1, x1 ## 1; \ | ||
427 | psrld $(32 - 1), x4 ## 1; \ | ||
428 | por x4 ## 1, x1 ## 1; \ | ||
429 | movdqa x0 ## 1, x4 ## 1; \ | ||
430 | pslld $3, x4 ## 1; \ | ||
431 | pxor x2 ## 1, x3 ## 1; \ | ||
432 | pxor x4 ## 1, x3 ## 1; \ | ||
433 | movdqa x3 ## 1, x4 ## 1; \ | ||
434 | get_key(i, 1, RK1); \ | ||
435 | movdqa x1 ## 2, x4 ## 2; \ | ||
436 | pslld $1, x1 ## 2; \ | ||
437 | psrld $(32 - 1), x4 ## 2; \ | ||
438 | por x4 ## 2, x1 ## 2; \ | ||
439 | movdqa x0 ## 2, x4 ## 2; \ | ||
440 | pslld $3, x4 ## 2; \ | ||
441 | pxor x2 ## 2, x3 ## 2; \ | ||
442 | pxor x4 ## 2, x3 ## 2; \ | ||
443 | movdqa x3 ## 2, x4 ## 2; \ | ||
444 | get_key(i, 3, RK3); \ | ||
445 | pslld $7, x3 ## 1; \ | ||
446 | psrld $(32 - 7), x4 ## 1; \ | ||
447 | por x4 ## 1, x3 ## 1; \ | ||
448 | movdqa x1 ## 1, x4 ## 1; \ | ||
449 | pslld $7, x4 ## 1; \ | ||
450 | pxor x1 ## 1, x0 ## 1; \ | ||
451 | pxor x3 ## 1, x0 ## 1; \ | ||
452 | pxor x3 ## 1, x2 ## 1; \ | ||
453 | pxor x4 ## 1, x2 ## 1; \ | ||
454 | get_key(i, 0, RK0); \ | ||
455 | pslld $7, x3 ## 2; \ | ||
456 | psrld $(32 - 7), x4 ## 2; \ | ||
457 | por x4 ## 2, x3 ## 2; \ | ||
458 | movdqa x1 ## 2, x4 ## 2; \ | ||
459 | pslld $7, x4 ## 2; \ | ||
460 | pxor x1 ## 2, x0 ## 2; \ | ||
461 | pxor x3 ## 2, x0 ## 2; \ | ||
462 | pxor x3 ## 2, x2 ## 2; \ | ||
463 | pxor x4 ## 2, x2 ## 2; \ | ||
464 | get_key(i, 2, RK2); \ | ||
465 | pxor RK1, x1 ## 1; \ | ||
466 | pxor RK3, x3 ## 1; \ | ||
467 | movdqa x0 ## 1, x4 ## 1; \ | ||
468 | pslld $5, x0 ## 1; \ | ||
469 | psrld $(32 - 5), x4 ## 1; \ | ||
470 | por x4 ## 1, x0 ## 1; \ | ||
471 | movdqa x2 ## 1, x4 ## 1; \ | ||
472 | pslld $22, x2 ## 1; \ | ||
473 | psrld $(32 - 22), x4 ## 1; \ | ||
474 | por x4 ## 1, x2 ## 1; \ | ||
475 | pxor RK0, x0 ## 1; \ | ||
476 | pxor RK2, x2 ## 1; \ | ||
477 | pxor RK1, x1 ## 2; \ | ||
478 | pxor RK3, x3 ## 2; \ | ||
479 | movdqa x0 ## 2, x4 ## 2; \ | ||
480 | pslld $5, x0 ## 2; \ | ||
481 | psrld $(32 - 5), x4 ## 2; \ | ||
482 | por x4 ## 2, x0 ## 2; \ | ||
483 | movdqa x2 ## 2, x4 ## 2; \ | ||
484 | pslld $22, x2 ## 2; \ | ||
485 | psrld $(32 - 22), x4 ## 2; \ | ||
486 | por x4 ## 2, x2 ## 2; \ | ||
487 | pxor RK0, x0 ## 2; \ | ||
488 | pxor RK2, x2 ## 2; | ||
489 | |||
490 | #define KL2(x0, x1, x2, x3, x4, i) \ | ||
491 | pxor RK0, x0 ## 1; \ | ||
492 | pxor RK2, x2 ## 1; \ | ||
493 | movdqa x0 ## 1, x4 ## 1; \ | ||
494 | psrld $5, x0 ## 1; \ | ||
495 | pslld $(32 - 5), x4 ## 1; \ | ||
496 | por x4 ## 1, x0 ## 1; \ | ||
497 | pxor RK3, x3 ## 1; \ | ||
498 | pxor RK1, x1 ## 1; \ | ||
499 | movdqa x2 ## 1, x4 ## 1; \ | ||
500 | psrld $22, x2 ## 1; \ | ||
501 | pslld $(32 - 22), x4 ## 1; \ | ||
502 | por x4 ## 1, x2 ## 1; \ | ||
503 | pxor x3 ## 1, x2 ## 1; \ | ||
504 | pxor RK0, x0 ## 2; \ | ||
505 | pxor RK2, x2 ## 2; \ | ||
506 | movdqa x0 ## 2, x4 ## 2; \ | ||
507 | psrld $5, x0 ## 2; \ | ||
508 | pslld $(32 - 5), x4 ## 2; \ | ||
509 | por x4 ## 2, x0 ## 2; \ | ||
510 | pxor RK3, x3 ## 2; \ | ||
511 | pxor RK1, x1 ## 2; \ | ||
512 | movdqa x2 ## 2, x4 ## 2; \ | ||
513 | psrld $22, x2 ## 2; \ | ||
514 | pslld $(32 - 22), x4 ## 2; \ | ||
515 | por x4 ## 2, x2 ## 2; \ | ||
516 | pxor x3 ## 2, x2 ## 2; \ | ||
517 | pxor x3 ## 1, x0 ## 1; \ | ||
518 | movdqa x1 ## 1, x4 ## 1; \ | ||
519 | pslld $7, x4 ## 1; \ | ||
520 | pxor x1 ## 1, x0 ## 1; \ | ||
521 | pxor x4 ## 1, x2 ## 1; \ | ||
522 | movdqa x1 ## 1, x4 ## 1; \ | ||
523 | psrld $1, x1 ## 1; \ | ||
524 | pslld $(32 - 1), x4 ## 1; \ | ||
525 | por x4 ## 1, x1 ## 1; \ | ||
526 | pxor x3 ## 2, x0 ## 2; \ | ||
527 | movdqa x1 ## 2, x4 ## 2; \ | ||
528 | pslld $7, x4 ## 2; \ | ||
529 | pxor x1 ## 2, x0 ## 2; \ | ||
530 | pxor x4 ## 2, x2 ## 2; \ | ||
531 | movdqa x1 ## 2, x4 ## 2; \ | ||
532 | psrld $1, x1 ## 2; \ | ||
533 | pslld $(32 - 1), x4 ## 2; \ | ||
534 | por x4 ## 2, x1 ## 2; \ | ||
535 | movdqa x3 ## 1, x4 ## 1; \ | ||
536 | psrld $7, x3 ## 1; \ | ||
537 | pslld $(32 - 7), x4 ## 1; \ | ||
538 | por x4 ## 1, x3 ## 1; \ | ||
539 | pxor x0 ## 1, x1 ## 1; \ | ||
540 | movdqa x0 ## 1, x4 ## 1; \ | ||
541 | pslld $3, x4 ## 1; \ | ||
542 | pxor x4 ## 1, x3 ## 1; \ | ||
543 | movdqa x0 ## 1, x4 ## 1; \ | ||
544 | movdqa x3 ## 2, x4 ## 2; \ | ||
545 | psrld $7, x3 ## 2; \ | ||
546 | pslld $(32 - 7), x4 ## 2; \ | ||
547 | por x4 ## 2, x3 ## 2; \ | ||
548 | pxor x0 ## 2, x1 ## 2; \ | ||
549 | movdqa x0 ## 2, x4 ## 2; \ | ||
550 | pslld $3, x4 ## 2; \ | ||
551 | pxor x4 ## 2, x3 ## 2; \ | ||
552 | movdqa x0 ## 2, x4 ## 2; \ | ||
553 | psrld $13, x0 ## 1; \ | ||
554 | pslld $(32 - 13), x4 ## 1; \ | ||
555 | por x4 ## 1, x0 ## 1; \ | ||
556 | pxor x2 ## 1, x1 ## 1; \ | ||
557 | pxor x2 ## 1, x3 ## 1; \ | ||
558 | movdqa x2 ## 1, x4 ## 1; \ | ||
559 | psrld $3, x2 ## 1; \ | ||
560 | pslld $(32 - 3), x4 ## 1; \ | ||
561 | por x4 ## 1, x2 ## 1; \ | ||
562 | psrld $13, x0 ## 2; \ | ||
563 | pslld $(32 - 13), x4 ## 2; \ | ||
564 | por x4 ## 2, x0 ## 2; \ | ||
565 | pxor x2 ## 2, x1 ## 2; \ | ||
566 | pxor x2 ## 2, x3 ## 2; \ | ||
567 | movdqa x2 ## 2, x4 ## 2; \ | ||
568 | psrld $3, x2 ## 2; \ | ||
569 | pslld $(32 - 3), x4 ## 2; \ | ||
570 | por x4 ## 2, x2 ## 2; | ||
571 | |||
572 | #define S(SBOX, x0, x1, x2, x3, x4) \ | ||
573 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
574 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
575 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
576 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); | ||
577 | |||
578 | #define SP(SBOX, x0, x1, x2, x3, x4, i) \ | ||
579 | get_key(i, 0, RK0); \ | ||
580 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
581 | get_key(i, 2, RK2); \ | ||
582 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
583 | get_key(i, 3, RK3); \ | ||
584 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
585 | get_key(i, 1, RK1); \ | ||
586 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
587 | |||
588 | #define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ | ||
589 | movdqa x2, t3; \ | ||
590 | movdqa x0, t1; \ | ||
591 | unpcklps x3, t3; \ | ||
592 | movdqa x0, t2; \ | ||
593 | unpcklps x1, t1; \ | ||
594 | unpckhps x1, t2; \ | ||
595 | movdqa t3, x1; \ | ||
596 | unpckhps x3, x2; \ | ||
597 | movdqa t1, x0; \ | ||
598 | movhlps t1, x1; \ | ||
599 | movdqa t2, t1; \ | ||
600 | movlhps t3, x0; \ | ||
601 | movlhps x2, t1; \ | ||
602 | movhlps t2, x2; \ | ||
603 | movdqa x2, x3; \ | ||
604 | movdqa t1, x2; | ||
605 | |||
606 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ | ||
607 | movdqu (0*4*4)(in), x0; \ | ||
608 | movdqu (1*4*4)(in), x1; \ | ||
609 | movdqu (2*4*4)(in), x2; \ | ||
610 | movdqu (3*4*4)(in), x3; \ | ||
611 | \ | ||
612 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
613 | |||
614 | #define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
615 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
616 | \ | ||
617 | movdqu x0, (0*4*4)(out); \ | ||
618 | movdqu x1, (1*4*4)(out); \ | ||
619 | movdqu x2, (2*4*4)(out); \ | ||
620 | movdqu x3, (3*4*4)(out); | ||
621 | |||
622 | #define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
623 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
624 | \ | ||
625 | movdqu (0*4*4)(out), t0; \ | ||
626 | pxor t0, x0; \ | ||
627 | movdqu x0, (0*4*4)(out); \ | ||
628 | movdqu (1*4*4)(out), t0; \ | ||
629 | pxor t0, x1; \ | ||
630 | movdqu x1, (1*4*4)(out); \ | ||
631 | movdqu (2*4*4)(out), t0; \ | ||
632 | pxor t0, x2; \ | ||
633 | movdqu x2, (2*4*4)(out); \ | ||
634 | movdqu (3*4*4)(out), t0; \ | ||
635 | pxor t0, x3; \ | ||
636 | movdqu x3, (3*4*4)(out); | ||
637 | |||
638 | .align 8 | ||
639 | .global __serpent_enc_blk_8way | ||
640 | .type __serpent_enc_blk_8way,@function; | ||
641 | |||
642 | __serpent_enc_blk_8way: | ||
643 | /* input: | ||
644 | * %rdi: ctx, CTX | ||
645 | * %rsi: dst | ||
646 | * %rdx: src | ||
647 | * %rcx: bool, if true: xor output | ||
648 | */ | ||
649 | |||
650 | pcmpeqd RNOT, RNOT; | ||
651 | |||
652 | leaq (4*4*4)(%rdx), %rax; | ||
653 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
654 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
655 | |||
656 | K2(RA, RB, RC, RD, RE, 0); | ||
657 | S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); | ||
658 | S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); | ||
659 | S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); | ||
660 | S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); | ||
661 | S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); | ||
662 | S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); | ||
663 | S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); | ||
664 | S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); | ||
665 | S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); | ||
666 | S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); | ||
667 | S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); | ||
668 | S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); | ||
669 | S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); | ||
670 | S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); | ||
671 | S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); | ||
672 | S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); | ||
673 | S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); | ||
674 | S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); | ||
675 | S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); | ||
676 | S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); | ||
677 | S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); | ||
678 | S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); | ||
679 | S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); | ||
680 | S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); | ||
681 | S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); | ||
682 | S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); | ||
683 | S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); | ||
684 | S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); | ||
685 | S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); | ||
686 | S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); | ||
687 | S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); | ||
688 | S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); | ||
689 | |||
690 | leaq (4*4*4)(%rsi), %rax; | ||
691 | |||
692 | testb %cl, %cl; | ||
693 | jnz __enc_xor8; | ||
694 | |||
695 | write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
696 | write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
697 | |||
698 | ret; | ||
699 | |||
700 | __enc_xor8: | ||
701 | xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
702 | xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
703 | |||
704 | ret; | ||
705 | |||
706 | .align 8 | ||
707 | .global serpent_dec_blk_8way | ||
708 | .type serpent_dec_blk_8way,@function; | ||
709 | |||
710 | serpent_dec_blk_8way: | ||
711 | /* input: | ||
712 | * %rdi: ctx, CTX | ||
713 | * %rsi: dst | ||
714 | * %rdx: src | ||
715 | */ | ||
716 | |||
717 | pcmpeqd RNOT, RNOT; | ||
718 | |||
719 | leaq (4*4*4)(%rdx), %rax; | ||
720 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
721 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
722 | |||
723 | K2(RA, RB, RC, RD, RE, 32); | ||
724 | SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); | ||
725 | SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); | ||
726 | SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); | ||
727 | SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); | ||
728 | SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); | ||
729 | SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); | ||
730 | SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); | ||
731 | SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); | ||
732 | SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); | ||
733 | SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); | ||
734 | SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); | ||
735 | SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); | ||
736 | SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); | ||
737 | SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); | ||
738 | SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); | ||
739 | SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); | ||
740 | SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); | ||
741 | SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); | ||
742 | SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); | ||
743 | SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); | ||
744 | SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); | ||
745 | SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); | ||
746 | SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); | ||
747 | SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); | ||
748 | SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); | ||
749 | SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); | ||
750 | SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); | ||
751 | SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); | ||
752 | SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); | ||
753 | SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); | ||
754 | SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); | ||
755 | S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); | ||
756 | |||
757 | leaq (4*4*4)(%rsi), %rax; | ||
758 | write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); | ||
759 | write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); | ||
760 | |||
761 | ret; | ||
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c new file mode 100644 index 000000000000..7955a9b76b91 --- /dev/null +++ b/arch/x86/crypto/serpent_sse2_glue.c | |||
@@ -0,0 +1,1070 @@ | |||
1 | /* | ||
2 | * Glue Code for SSE2 assembler versions of Serpent Cipher | ||
3 | * | ||
4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Glue code based on aesni-intel_glue.c by: | ||
7 | * Copyright (C) 2008, Intel Corp. | ||
8 | * Author: Huang Ying <ying.huang@intel.com> | ||
9 | * | ||
10 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
11 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
12 | * CTR part based on code (crypto/ctr.c) by: | ||
13 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
23 | * GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
28 | * USA | ||
29 | * | ||
30 | */ | ||
31 | |||
32 | #include <linux/module.h> | ||
33 | #include <linux/hardirq.h> | ||
34 | #include <linux/types.h> | ||
35 | #include <linux/crypto.h> | ||
36 | #include <linux/err.h> | ||
37 | #include <crypto/algapi.h> | ||
38 | #include <crypto/serpent.h> | ||
39 | #include <crypto/cryptd.h> | ||
40 | #include <crypto/b128ops.h> | ||
41 | #include <crypto/ctr.h> | ||
42 | #include <crypto/lrw.h> | ||
43 | #include <crypto/xts.h> | ||
44 | #include <asm/i387.h> | ||
45 | #include <asm/serpent.h> | ||
46 | #include <crypto/scatterwalk.h> | ||
47 | #include <linux/workqueue.h> | ||
48 | #include <linux/spinlock.h> | ||
49 | |||
50 | struct async_serpent_ctx { | ||
51 | struct cryptd_ablkcipher *cryptd_tfm; | ||
52 | }; | ||
53 | |||
54 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
55 | { | ||
56 | if (fpu_enabled) | ||
57 | return true; | ||
58 | |||
59 | /* SSE2 is only used when chunk to be processed is large enough, so | ||
60 | * do not enable FPU until it is necessary. | ||
61 | */ | ||
62 | if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) | ||
63 | return false; | ||
64 | |||
65 | kernel_fpu_begin(); | ||
66 | return true; | ||
67 | } | ||
68 | |||
69 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
70 | { | ||
71 | if (fpu_enabled) | ||
72 | kernel_fpu_end(); | ||
73 | } | ||
74 | |||
75 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | ||
76 | bool enc) | ||
77 | { | ||
78 | bool fpu_enabled = false; | ||
79 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
80 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
81 | unsigned int nbytes; | ||
82 | int err; | ||
83 | |||
84 | err = blkcipher_walk_virt(desc, walk); | ||
85 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
86 | |||
87 | while ((nbytes = walk->nbytes)) { | ||
88 | u8 *wsrc = walk->src.virt.addr; | ||
89 | u8 *wdst = walk->dst.virt.addr; | ||
90 | |||
91 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
92 | |||
93 | /* Process multi-block batch */ | ||
94 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
95 | do { | ||
96 | if (enc) | ||
97 | serpent_enc_blk_xway(ctx, wdst, wsrc); | ||
98 | else | ||
99 | serpent_dec_blk_xway(ctx, wdst, wsrc); | ||
100 | |||
101 | wsrc += bsize * SERPENT_PARALLEL_BLOCKS; | ||
102 | wdst += bsize * SERPENT_PARALLEL_BLOCKS; | ||
103 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
104 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
105 | |||
106 | if (nbytes < bsize) | ||
107 | goto done; | ||
108 | } | ||
109 | |||
110 | /* Handle leftovers */ | ||
111 | do { | ||
112 | if (enc) | ||
113 | __serpent_encrypt(ctx, wdst, wsrc); | ||
114 | else | ||
115 | __serpent_decrypt(ctx, wdst, wsrc); | ||
116 | |||
117 | wsrc += bsize; | ||
118 | wdst += bsize; | ||
119 | nbytes -= bsize; | ||
120 | } while (nbytes >= bsize); | ||
121 | |||
122 | done: | ||
123 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
124 | } | ||
125 | |||
126 | serpent_fpu_end(fpu_enabled); | ||
127 | return err; | ||
128 | } | ||
129 | |||
130 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
131 | struct scatterlist *src, unsigned int nbytes) | ||
132 | { | ||
133 | struct blkcipher_walk walk; | ||
134 | |||
135 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
136 | return ecb_crypt(desc, &walk, true); | ||
137 | } | ||
138 | |||
139 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
140 | struct scatterlist *src, unsigned int nbytes) | ||
141 | { | ||
142 | struct blkcipher_walk walk; | ||
143 | |||
144 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
145 | return ecb_crypt(desc, &walk, false); | ||
146 | } | ||
147 | |||
148 | static struct crypto_alg blk_ecb_alg = { | ||
149 | .cra_name = "__ecb-serpent-sse2", | ||
150 | .cra_driver_name = "__driver-ecb-serpent-sse2", | ||
151 | .cra_priority = 0, | ||
152 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
153 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
154 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
155 | .cra_alignmask = 0, | ||
156 | .cra_type = &crypto_blkcipher_type, | ||
157 | .cra_module = THIS_MODULE, | ||
158 | .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list), | ||
159 | .cra_u = { | ||
160 | .blkcipher = { | ||
161 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
162 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
163 | .setkey = serpent_setkey, | ||
164 | .encrypt = ecb_encrypt, | ||
165 | .decrypt = ecb_decrypt, | ||
166 | }, | ||
167 | }, | ||
168 | }; | ||
169 | |||
170 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | ||
171 | struct blkcipher_walk *walk) | ||
172 | { | ||
173 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
174 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
175 | unsigned int nbytes = walk->nbytes; | ||
176 | u128 *src = (u128 *)walk->src.virt.addr; | ||
177 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
178 | u128 *iv = (u128 *)walk->iv; | ||
179 | |||
180 | do { | ||
181 | u128_xor(dst, src, iv); | ||
182 | __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); | ||
183 | iv = dst; | ||
184 | |||
185 | src += 1; | ||
186 | dst += 1; | ||
187 | nbytes -= bsize; | ||
188 | } while (nbytes >= bsize); | ||
189 | |||
190 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
191 | return nbytes; | ||
192 | } | ||
193 | |||
194 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
195 | struct scatterlist *src, unsigned int nbytes) | ||
196 | { | ||
197 | struct blkcipher_walk walk; | ||
198 | int err; | ||
199 | |||
200 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
201 | err = blkcipher_walk_virt(desc, &walk); | ||
202 | |||
203 | while ((nbytes = walk.nbytes)) { | ||
204 | nbytes = __cbc_encrypt(desc, &walk); | ||
205 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
206 | } | ||
207 | |||
208 | return err; | ||
209 | } | ||
210 | |||
211 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | ||
212 | struct blkcipher_walk *walk) | ||
213 | { | ||
214 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
215 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
216 | unsigned int nbytes = walk->nbytes; | ||
217 | u128 *src = (u128 *)walk->src.virt.addr; | ||
218 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
219 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
220 | u128 last_iv; | ||
221 | int i; | ||
222 | |||
223 | /* Start of the last block. */ | ||
224 | src += nbytes / bsize - 1; | ||
225 | dst += nbytes / bsize - 1; | ||
226 | |||
227 | last_iv = *src; | ||
228 | |||
229 | /* Process multi-block batch */ | ||
230 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
231 | do { | ||
232 | nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); | ||
233 | src -= SERPENT_PARALLEL_BLOCKS - 1; | ||
234 | dst -= SERPENT_PARALLEL_BLOCKS - 1; | ||
235 | |||
236 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | ||
237 | ivs[i] = src[i]; | ||
238 | |||
239 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
240 | |||
241 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | ||
242 | u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); | ||
243 | |||
244 | nbytes -= bsize; | ||
245 | if (nbytes < bsize) | ||
246 | goto done; | ||
247 | |||
248 | u128_xor(dst, dst, src - 1); | ||
249 | src -= 1; | ||
250 | dst -= 1; | ||
251 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
252 | |||
253 | if (nbytes < bsize) | ||
254 | goto done; | ||
255 | } | ||
256 | |||
257 | /* Handle leftovers */ | ||
258 | for (;;) { | ||
259 | __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); | ||
260 | |||
261 | nbytes -= bsize; | ||
262 | if (nbytes < bsize) | ||
263 | break; | ||
264 | |||
265 | u128_xor(dst, dst, src - 1); | ||
266 | src -= 1; | ||
267 | dst -= 1; | ||
268 | } | ||
269 | |||
270 | done: | ||
271 | u128_xor(dst, dst, (u128 *)walk->iv); | ||
272 | *(u128 *)walk->iv = last_iv; | ||
273 | |||
274 | return nbytes; | ||
275 | } | ||
276 | |||
277 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
278 | struct scatterlist *src, unsigned int nbytes) | ||
279 | { | ||
280 | bool fpu_enabled = false; | ||
281 | struct blkcipher_walk walk; | ||
282 | int err; | ||
283 | |||
284 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
285 | err = blkcipher_walk_virt(desc, &walk); | ||
286 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
287 | |||
288 | while ((nbytes = walk.nbytes)) { | ||
289 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
290 | nbytes = __cbc_decrypt(desc, &walk); | ||
291 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
292 | } | ||
293 | |||
294 | serpent_fpu_end(fpu_enabled); | ||
295 | return err; | ||
296 | } | ||
297 | |||
298 | static struct crypto_alg blk_cbc_alg = { | ||
299 | .cra_name = "__cbc-serpent-sse2", | ||
300 | .cra_driver_name = "__driver-cbc-serpent-sse2", | ||
301 | .cra_priority = 0, | ||
302 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
303 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
304 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
305 | .cra_alignmask = 0, | ||
306 | .cra_type = &crypto_blkcipher_type, | ||
307 | .cra_module = THIS_MODULE, | ||
308 | .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list), | ||
309 | .cra_u = { | ||
310 | .blkcipher = { | ||
311 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
312 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
313 | .setkey = serpent_setkey, | ||
314 | .encrypt = cbc_encrypt, | ||
315 | .decrypt = cbc_decrypt, | ||
316 | }, | ||
317 | }, | ||
318 | }; | ||
319 | |||
320 | static inline void u128_to_be128(be128 *dst, const u128 *src) | ||
321 | { | ||
322 | dst->a = cpu_to_be64(src->a); | ||
323 | dst->b = cpu_to_be64(src->b); | ||
324 | } | ||
325 | |||
326 | static inline void be128_to_u128(u128 *dst, const be128 *src) | ||
327 | { | ||
328 | dst->a = be64_to_cpu(src->a); | ||
329 | dst->b = be64_to_cpu(src->b); | ||
330 | } | ||
331 | |||
332 | static inline void u128_inc(u128 *i) | ||
333 | { | ||
334 | i->b++; | ||
335 | if (!i->b) | ||
336 | i->a++; | ||
337 | } | ||
338 | |||
339 | static void ctr_crypt_final(struct blkcipher_desc *desc, | ||
340 | struct blkcipher_walk *walk) | ||
341 | { | ||
342 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
343 | u8 *ctrblk = walk->iv; | ||
344 | u8 keystream[SERPENT_BLOCK_SIZE]; | ||
345 | u8 *src = walk->src.virt.addr; | ||
346 | u8 *dst = walk->dst.virt.addr; | ||
347 | unsigned int nbytes = walk->nbytes; | ||
348 | |||
349 | __serpent_encrypt(ctx, keystream, ctrblk); | ||
350 | crypto_xor(keystream, src, nbytes); | ||
351 | memcpy(dst, keystream, nbytes); | ||
352 | |||
353 | crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); | ||
354 | } | ||
355 | |||
356 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | ||
357 | struct blkcipher_walk *walk) | ||
358 | { | ||
359 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
360 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
361 | unsigned int nbytes = walk->nbytes; | ||
362 | u128 *src = (u128 *)walk->src.virt.addr; | ||
363 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
364 | u128 ctrblk; | ||
365 | be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; | ||
366 | int i; | ||
367 | |||
368 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
369 | |||
370 | /* Process multi-block batch */ | ||
371 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
372 | do { | ||
373 | /* create ctrblks for parallel encrypt */ | ||
374 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
375 | if (dst != src) | ||
376 | dst[i] = src[i]; | ||
377 | |||
378 | u128_to_be128(&ctrblocks[i], &ctrblk); | ||
379 | u128_inc(&ctrblk); | ||
380 | } | ||
381 | |||
382 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, | ||
383 | (u8 *)ctrblocks); | ||
384 | |||
385 | src += SERPENT_PARALLEL_BLOCKS; | ||
386 | dst += SERPENT_PARALLEL_BLOCKS; | ||
387 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
388 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
389 | |||
390 | if (nbytes < bsize) | ||
391 | goto done; | ||
392 | } | ||
393 | |||
394 | /* Handle leftovers */ | ||
395 | do { | ||
396 | if (dst != src) | ||
397 | *dst = *src; | ||
398 | |||
399 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
400 | u128_inc(&ctrblk); | ||
401 | |||
402 | __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); | ||
403 | u128_xor(dst, dst, (u128 *)ctrblocks); | ||
404 | |||
405 | src += 1; | ||
406 | dst += 1; | ||
407 | nbytes -= bsize; | ||
408 | } while (nbytes >= bsize); | ||
409 | |||
410 | done: | ||
411 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
412 | return nbytes; | ||
413 | } | ||
414 | |||
415 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
416 | struct scatterlist *src, unsigned int nbytes) | ||
417 | { | ||
418 | bool fpu_enabled = false; | ||
419 | struct blkcipher_walk walk; | ||
420 | int err; | ||
421 | |||
422 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
423 | err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); | ||
424 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
425 | |||
426 | while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { | ||
427 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
428 | nbytes = __ctr_crypt(desc, &walk); | ||
429 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
430 | } | ||
431 | |||
432 | serpent_fpu_end(fpu_enabled); | ||
433 | |||
434 | if (walk.nbytes) { | ||
435 | ctr_crypt_final(desc, &walk); | ||
436 | err = blkcipher_walk_done(desc, &walk, 0); | ||
437 | } | ||
438 | |||
439 | return err; | ||
440 | } | ||
441 | |||
442 | static struct crypto_alg blk_ctr_alg = { | ||
443 | .cra_name = "__ctr-serpent-sse2", | ||
444 | .cra_driver_name = "__driver-ctr-serpent-sse2", | ||
445 | .cra_priority = 0, | ||
446 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
447 | .cra_blocksize = 1, | ||
448 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
449 | .cra_alignmask = 0, | ||
450 | .cra_type = &crypto_blkcipher_type, | ||
451 | .cra_module = THIS_MODULE, | ||
452 | .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), | ||
453 | .cra_u = { | ||
454 | .blkcipher = { | ||
455 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
456 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
457 | .ivsize = SERPENT_BLOCK_SIZE, | ||
458 | .setkey = serpent_setkey, | ||
459 | .encrypt = ctr_crypt, | ||
460 | .decrypt = ctr_crypt, | ||
461 | }, | ||
462 | }, | ||
463 | }; | ||
464 | |||
465 | struct crypt_priv { | ||
466 | struct serpent_ctx *ctx; | ||
467 | bool fpu_enabled; | ||
468 | }; | ||
469 | |||
470 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
471 | { | ||
472 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
473 | struct crypt_priv *ctx = priv; | ||
474 | int i; | ||
475 | |||
476 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
477 | |||
478 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
479 | serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); | ||
480 | return; | ||
481 | } | ||
482 | |||
483 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
484 | __serpent_encrypt(ctx->ctx, srcdst, srcdst); | ||
485 | } | ||
486 | |||
487 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
488 | { | ||
489 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
490 | struct crypt_priv *ctx = priv; | ||
491 | int i; | ||
492 | |||
493 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
494 | |||
495 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
496 | serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); | ||
497 | return; | ||
498 | } | ||
499 | |||
500 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
501 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | ||
502 | } | ||
503 | |||
504 | struct serpent_lrw_ctx { | ||
505 | struct lrw_table_ctx lrw_table; | ||
506 | struct serpent_ctx serpent_ctx; | ||
507 | }; | ||
508 | |||
509 | static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
510 | unsigned int keylen) | ||
511 | { | ||
512 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
513 | int err; | ||
514 | |||
515 | err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - | ||
516 | SERPENT_BLOCK_SIZE); | ||
517 | if (err) | ||
518 | return err; | ||
519 | |||
520 | return lrw_init_table(&ctx->lrw_table, key + keylen - | ||
521 | SERPENT_BLOCK_SIZE); | ||
522 | } | ||
523 | |||
524 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
525 | struct scatterlist *src, unsigned int nbytes) | ||
526 | { | ||
527 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
528 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
529 | struct crypt_priv crypt_ctx = { | ||
530 | .ctx = &ctx->serpent_ctx, | ||
531 | .fpu_enabled = false, | ||
532 | }; | ||
533 | struct lrw_crypt_req req = { | ||
534 | .tbuf = buf, | ||
535 | .tbuflen = sizeof(buf), | ||
536 | |||
537 | .table_ctx = &ctx->lrw_table, | ||
538 | .crypt_ctx = &crypt_ctx, | ||
539 | .crypt_fn = encrypt_callback, | ||
540 | }; | ||
541 | int ret; | ||
542 | |||
543 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
544 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
545 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
546 | |||
547 | return ret; | ||
548 | } | ||
549 | |||
550 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
551 | struct scatterlist *src, unsigned int nbytes) | ||
552 | { | ||
553 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
554 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
555 | struct crypt_priv crypt_ctx = { | ||
556 | .ctx = &ctx->serpent_ctx, | ||
557 | .fpu_enabled = false, | ||
558 | }; | ||
559 | struct lrw_crypt_req req = { | ||
560 | .tbuf = buf, | ||
561 | .tbuflen = sizeof(buf), | ||
562 | |||
563 | .table_ctx = &ctx->lrw_table, | ||
564 | .crypt_ctx = &crypt_ctx, | ||
565 | .crypt_fn = decrypt_callback, | ||
566 | }; | ||
567 | int ret; | ||
568 | |||
569 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
570 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
571 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
572 | |||
573 | return ret; | ||
574 | } | ||
575 | |||
576 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | ||
577 | { | ||
578 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
579 | |||
580 | lrw_free_table(&ctx->lrw_table); | ||
581 | } | ||
582 | |||
583 | static struct crypto_alg blk_lrw_alg = { | ||
584 | .cra_name = "__lrw-serpent-sse2", | ||
585 | .cra_driver_name = "__driver-lrw-serpent-sse2", | ||
586 | .cra_priority = 0, | ||
587 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
588 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
589 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | ||
590 | .cra_alignmask = 0, | ||
591 | .cra_type = &crypto_blkcipher_type, | ||
592 | .cra_module = THIS_MODULE, | ||
593 | .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list), | ||
594 | .cra_exit = lrw_exit_tfm, | ||
595 | .cra_u = { | ||
596 | .blkcipher = { | ||
597 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
598 | SERPENT_BLOCK_SIZE, | ||
599 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
600 | SERPENT_BLOCK_SIZE, | ||
601 | .ivsize = SERPENT_BLOCK_SIZE, | ||
602 | .setkey = lrw_serpent_setkey, | ||
603 | .encrypt = lrw_encrypt, | ||
604 | .decrypt = lrw_decrypt, | ||
605 | }, | ||
606 | }, | ||
607 | }; | ||
608 | |||
609 | struct serpent_xts_ctx { | ||
610 | struct serpent_ctx tweak_ctx; | ||
611 | struct serpent_ctx crypt_ctx; | ||
612 | }; | ||
613 | |||
614 | static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
615 | unsigned int keylen) | ||
616 | { | ||
617 | struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
618 | u32 *flags = &tfm->crt_flags; | ||
619 | int err; | ||
620 | |||
621 | /* key consists of keys of equal size concatenated, therefore | ||
622 | * the length must be even | ||
623 | */ | ||
624 | if (keylen % 2) { | ||
625 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
626 | return -EINVAL; | ||
627 | } | ||
628 | |||
629 | /* first half of xts-key is for crypt */ | ||
630 | err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); | ||
631 | if (err) | ||
632 | return err; | ||
633 | |||
634 | /* second half of xts-key is for tweak */ | ||
635 | return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); | ||
636 | } | ||
637 | |||
638 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
639 | struct scatterlist *src, unsigned int nbytes) | ||
640 | { | ||
641 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
642 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
643 | struct crypt_priv crypt_ctx = { | ||
644 | .ctx = &ctx->crypt_ctx, | ||
645 | .fpu_enabled = false, | ||
646 | }; | ||
647 | struct xts_crypt_req req = { | ||
648 | .tbuf = buf, | ||
649 | .tbuflen = sizeof(buf), | ||
650 | |||
651 | .tweak_ctx = &ctx->tweak_ctx, | ||
652 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
653 | .crypt_ctx = &crypt_ctx, | ||
654 | .crypt_fn = encrypt_callback, | ||
655 | }; | ||
656 | int ret; | ||
657 | |||
658 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
659 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
660 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
661 | |||
662 | return ret; | ||
663 | } | ||
664 | |||
665 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
666 | struct scatterlist *src, unsigned int nbytes) | ||
667 | { | ||
668 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
669 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
670 | struct crypt_priv crypt_ctx = { | ||
671 | .ctx = &ctx->crypt_ctx, | ||
672 | .fpu_enabled = false, | ||
673 | }; | ||
674 | struct xts_crypt_req req = { | ||
675 | .tbuf = buf, | ||
676 | .tbuflen = sizeof(buf), | ||
677 | |||
678 | .tweak_ctx = &ctx->tweak_ctx, | ||
679 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
680 | .crypt_ctx = &crypt_ctx, | ||
681 | .crypt_fn = decrypt_callback, | ||
682 | }; | ||
683 | int ret; | ||
684 | |||
685 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
686 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
687 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
688 | |||
689 | return ret; | ||
690 | } | ||
691 | |||
692 | static struct crypto_alg blk_xts_alg = { | ||
693 | .cra_name = "__xts-serpent-sse2", | ||
694 | .cra_driver_name = "__driver-xts-serpent-sse2", | ||
695 | .cra_priority = 0, | ||
696 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
697 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
698 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | ||
699 | .cra_alignmask = 0, | ||
700 | .cra_type = &crypto_blkcipher_type, | ||
701 | .cra_module = THIS_MODULE, | ||
702 | .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list), | ||
703 | .cra_u = { | ||
704 | .blkcipher = { | ||
705 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
706 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
707 | .ivsize = SERPENT_BLOCK_SIZE, | ||
708 | .setkey = xts_serpent_setkey, | ||
709 | .encrypt = xts_encrypt, | ||
710 | .decrypt = xts_decrypt, | ||
711 | }, | ||
712 | }, | ||
713 | }; | ||
714 | |||
715 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
716 | unsigned int key_len) | ||
717 | { | ||
718 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
719 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
720 | int err; | ||
721 | |||
722 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
723 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
724 | & CRYPTO_TFM_REQ_MASK); | ||
725 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
726 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
727 | & CRYPTO_TFM_RES_MASK); | ||
728 | return err; | ||
729 | } | ||
730 | |||
731 | static int __ablk_encrypt(struct ablkcipher_request *req) | ||
732 | { | ||
733 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
734 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
735 | struct blkcipher_desc desc; | ||
736 | |||
737 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
738 | desc.info = req->info; | ||
739 | desc.flags = 0; | ||
740 | |||
741 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
742 | &desc, req->dst, req->src, req->nbytes); | ||
743 | } | ||
744 | |||
745 | static int ablk_encrypt(struct ablkcipher_request *req) | ||
746 | { | ||
747 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
748 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
749 | |||
750 | if (!irq_fpu_usable()) { | ||
751 | struct ablkcipher_request *cryptd_req = | ||
752 | ablkcipher_request_ctx(req); | ||
753 | |||
754 | memcpy(cryptd_req, req, sizeof(*req)); | ||
755 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
756 | |||
757 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
758 | } else { | ||
759 | return __ablk_encrypt(req); | ||
760 | } | ||
761 | } | ||
762 | |||
763 | static int ablk_decrypt(struct ablkcipher_request *req) | ||
764 | { | ||
765 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
766 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
767 | |||
768 | if (!irq_fpu_usable()) { | ||
769 | struct ablkcipher_request *cryptd_req = | ||
770 | ablkcipher_request_ctx(req); | ||
771 | |||
772 | memcpy(cryptd_req, req, sizeof(*req)); | ||
773 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
774 | |||
775 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
776 | } else { | ||
777 | struct blkcipher_desc desc; | ||
778 | |||
779 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
780 | desc.info = req->info; | ||
781 | desc.flags = 0; | ||
782 | |||
783 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
784 | &desc, req->dst, req->src, req->nbytes); | ||
785 | } | ||
786 | } | ||
787 | |||
788 | static void ablk_exit(struct crypto_tfm *tfm) | ||
789 | { | ||
790 | struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); | ||
791 | |||
792 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
793 | } | ||
794 | |||
795 | static void ablk_init_common(struct crypto_tfm *tfm, | ||
796 | struct cryptd_ablkcipher *cryptd_tfm) | ||
797 | { | ||
798 | struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); | ||
799 | |||
800 | ctx->cryptd_tfm = cryptd_tfm; | ||
801 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
802 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
803 | } | ||
804 | |||
805 | static int ablk_ecb_init(struct crypto_tfm *tfm) | ||
806 | { | ||
807 | struct cryptd_ablkcipher *cryptd_tfm; | ||
808 | |||
809 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-serpent-sse2", 0, 0); | ||
810 | if (IS_ERR(cryptd_tfm)) | ||
811 | return PTR_ERR(cryptd_tfm); | ||
812 | ablk_init_common(tfm, cryptd_tfm); | ||
813 | return 0; | ||
814 | } | ||
815 | |||
816 | static struct crypto_alg ablk_ecb_alg = { | ||
817 | .cra_name = "ecb(serpent)", | ||
818 | .cra_driver_name = "ecb-serpent-sse2", | ||
819 | .cra_priority = 400, | ||
820 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
821 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
822 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | ||
823 | .cra_alignmask = 0, | ||
824 | .cra_type = &crypto_ablkcipher_type, | ||
825 | .cra_module = THIS_MODULE, | ||
826 | .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list), | ||
827 | .cra_init = ablk_ecb_init, | ||
828 | .cra_exit = ablk_exit, | ||
829 | .cra_u = { | ||
830 | .ablkcipher = { | ||
831 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
832 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
833 | .setkey = ablk_set_key, | ||
834 | .encrypt = ablk_encrypt, | ||
835 | .decrypt = ablk_decrypt, | ||
836 | }, | ||
837 | }, | ||
838 | }; | ||
839 | |||
840 | static int ablk_cbc_init(struct crypto_tfm *tfm) | ||
841 | { | ||
842 | struct cryptd_ablkcipher *cryptd_tfm; | ||
843 | |||
844 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-serpent-sse2", 0, 0); | ||
845 | if (IS_ERR(cryptd_tfm)) | ||
846 | return PTR_ERR(cryptd_tfm); | ||
847 | ablk_init_common(tfm, cryptd_tfm); | ||
848 | return 0; | ||
849 | } | ||
850 | |||
851 | static struct crypto_alg ablk_cbc_alg = { | ||
852 | .cra_name = "cbc(serpent)", | ||
853 | .cra_driver_name = "cbc-serpent-sse2", | ||
854 | .cra_priority = 400, | ||
855 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
856 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
857 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | ||
858 | .cra_alignmask = 0, | ||
859 | .cra_type = &crypto_ablkcipher_type, | ||
860 | .cra_module = THIS_MODULE, | ||
861 | .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list), | ||
862 | .cra_init = ablk_cbc_init, | ||
863 | .cra_exit = ablk_exit, | ||
864 | .cra_u = { | ||
865 | .ablkcipher = { | ||
866 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
867 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
868 | .ivsize = SERPENT_BLOCK_SIZE, | ||
869 | .setkey = ablk_set_key, | ||
870 | .encrypt = __ablk_encrypt, | ||
871 | .decrypt = ablk_decrypt, | ||
872 | }, | ||
873 | }, | ||
874 | }; | ||
875 | |||
876 | static int ablk_ctr_init(struct crypto_tfm *tfm) | ||
877 | { | ||
878 | struct cryptd_ablkcipher *cryptd_tfm; | ||
879 | |||
880 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-serpent-sse2", 0, 0); | ||
881 | if (IS_ERR(cryptd_tfm)) | ||
882 | return PTR_ERR(cryptd_tfm); | ||
883 | ablk_init_common(tfm, cryptd_tfm); | ||
884 | return 0; | ||
885 | } | ||
886 | |||
887 | static struct crypto_alg ablk_ctr_alg = { | ||
888 | .cra_name = "ctr(serpent)", | ||
889 | .cra_driver_name = "ctr-serpent-sse2", | ||
890 | .cra_priority = 400, | ||
891 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
892 | .cra_blocksize = 1, | ||
893 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | ||
894 | .cra_alignmask = 0, | ||
895 | .cra_type = &crypto_ablkcipher_type, | ||
896 | .cra_module = THIS_MODULE, | ||
897 | .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list), | ||
898 | .cra_init = ablk_ctr_init, | ||
899 | .cra_exit = ablk_exit, | ||
900 | .cra_u = { | ||
901 | .ablkcipher = { | ||
902 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
903 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
904 | .ivsize = SERPENT_BLOCK_SIZE, | ||
905 | .setkey = ablk_set_key, | ||
906 | .encrypt = ablk_encrypt, | ||
907 | .decrypt = ablk_encrypt, | ||
908 | .geniv = "chainiv", | ||
909 | }, | ||
910 | }, | ||
911 | }; | ||
912 | |||
913 | static int ablk_lrw_init(struct crypto_tfm *tfm) | ||
914 | { | ||
915 | struct cryptd_ablkcipher *cryptd_tfm; | ||
916 | |||
917 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-lrw-serpent-sse2", 0, 0); | ||
918 | if (IS_ERR(cryptd_tfm)) | ||
919 | return PTR_ERR(cryptd_tfm); | ||
920 | ablk_init_common(tfm, cryptd_tfm); | ||
921 | return 0; | ||
922 | } | ||
923 | |||
924 | static struct crypto_alg ablk_lrw_alg = { | ||
925 | .cra_name = "lrw(serpent)", | ||
926 | .cra_driver_name = "lrw-serpent-sse2", | ||
927 | .cra_priority = 400, | ||
928 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
929 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
930 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | ||
931 | .cra_alignmask = 0, | ||
932 | .cra_type = &crypto_ablkcipher_type, | ||
933 | .cra_module = THIS_MODULE, | ||
934 | .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list), | ||
935 | .cra_init = ablk_lrw_init, | ||
936 | .cra_exit = ablk_exit, | ||
937 | .cra_u = { | ||
938 | .ablkcipher = { | ||
939 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
940 | SERPENT_BLOCK_SIZE, | ||
941 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
942 | SERPENT_BLOCK_SIZE, | ||
943 | .ivsize = SERPENT_BLOCK_SIZE, | ||
944 | .setkey = ablk_set_key, | ||
945 | .encrypt = ablk_encrypt, | ||
946 | .decrypt = ablk_decrypt, | ||
947 | }, | ||
948 | }, | ||
949 | }; | ||
950 | |||
951 | static int ablk_xts_init(struct crypto_tfm *tfm) | ||
952 | { | ||
953 | struct cryptd_ablkcipher *cryptd_tfm; | ||
954 | |||
955 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-xts-serpent-sse2", 0, 0); | ||
956 | if (IS_ERR(cryptd_tfm)) | ||
957 | return PTR_ERR(cryptd_tfm); | ||
958 | ablk_init_common(tfm, cryptd_tfm); | ||
959 | return 0; | ||
960 | } | ||
961 | |||
962 | static struct crypto_alg ablk_xts_alg = { | ||
963 | .cra_name = "xts(serpent)", | ||
964 | .cra_driver_name = "xts-serpent-sse2", | ||
965 | .cra_priority = 400, | ||
966 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
967 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
968 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | ||
969 | .cra_alignmask = 0, | ||
970 | .cra_type = &crypto_ablkcipher_type, | ||
971 | .cra_module = THIS_MODULE, | ||
972 | .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list), | ||
973 | .cra_init = ablk_xts_init, | ||
974 | .cra_exit = ablk_exit, | ||
975 | .cra_u = { | ||
976 | .ablkcipher = { | ||
977 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
978 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
979 | .ivsize = SERPENT_BLOCK_SIZE, | ||
980 | .setkey = ablk_set_key, | ||
981 | .encrypt = ablk_encrypt, | ||
982 | .decrypt = ablk_decrypt, | ||
983 | }, | ||
984 | }, | ||
985 | }; | ||
986 | |||
987 | static int __init serpent_sse2_init(void) | ||
988 | { | ||
989 | int err; | ||
990 | |||
991 | if (!cpu_has_xmm2) { | ||
992 | printk(KERN_INFO "SSE2 instructions are not detected.\n"); | ||
993 | return -ENODEV; | ||
994 | } | ||
995 | |||
996 | err = crypto_register_alg(&blk_ecb_alg); | ||
997 | if (err) | ||
998 | goto blk_ecb_err; | ||
999 | err = crypto_register_alg(&blk_cbc_alg); | ||
1000 | if (err) | ||
1001 | goto blk_cbc_err; | ||
1002 | err = crypto_register_alg(&blk_ctr_alg); | ||
1003 | if (err) | ||
1004 | goto blk_ctr_err; | ||
1005 | err = crypto_register_alg(&ablk_ecb_alg); | ||
1006 | if (err) | ||
1007 | goto ablk_ecb_err; | ||
1008 | err = crypto_register_alg(&ablk_cbc_alg); | ||
1009 | if (err) | ||
1010 | goto ablk_cbc_err; | ||
1011 | err = crypto_register_alg(&ablk_ctr_alg); | ||
1012 | if (err) | ||
1013 | goto ablk_ctr_err; | ||
1014 | err = crypto_register_alg(&blk_lrw_alg); | ||
1015 | if (err) | ||
1016 | goto blk_lrw_err; | ||
1017 | err = crypto_register_alg(&ablk_lrw_alg); | ||
1018 | if (err) | ||
1019 | goto ablk_lrw_err; | ||
1020 | err = crypto_register_alg(&blk_xts_alg); | ||
1021 | if (err) | ||
1022 | goto blk_xts_err; | ||
1023 | err = crypto_register_alg(&ablk_xts_alg); | ||
1024 | if (err) | ||
1025 | goto ablk_xts_err; | ||
1026 | return err; | ||
1027 | |||
1028 | crypto_unregister_alg(&ablk_xts_alg); | ||
1029 | ablk_xts_err: | ||
1030 | crypto_unregister_alg(&blk_xts_alg); | ||
1031 | blk_xts_err: | ||
1032 | crypto_unregister_alg(&ablk_lrw_alg); | ||
1033 | ablk_lrw_err: | ||
1034 | crypto_unregister_alg(&blk_lrw_alg); | ||
1035 | blk_lrw_err: | ||
1036 | crypto_unregister_alg(&ablk_ctr_alg); | ||
1037 | ablk_ctr_err: | ||
1038 | crypto_unregister_alg(&ablk_cbc_alg); | ||
1039 | ablk_cbc_err: | ||
1040 | crypto_unregister_alg(&ablk_ecb_alg); | ||
1041 | ablk_ecb_err: | ||
1042 | crypto_unregister_alg(&blk_ctr_alg); | ||
1043 | blk_ctr_err: | ||
1044 | crypto_unregister_alg(&blk_cbc_alg); | ||
1045 | blk_cbc_err: | ||
1046 | crypto_unregister_alg(&blk_ecb_alg); | ||
1047 | blk_ecb_err: | ||
1048 | return err; | ||
1049 | } | ||
1050 | |||
1051 | static void __exit serpent_sse2_exit(void) | ||
1052 | { | ||
1053 | crypto_unregister_alg(&ablk_xts_alg); | ||
1054 | crypto_unregister_alg(&blk_xts_alg); | ||
1055 | crypto_unregister_alg(&ablk_lrw_alg); | ||
1056 | crypto_unregister_alg(&blk_lrw_alg); | ||
1057 | crypto_unregister_alg(&ablk_ctr_alg); | ||
1058 | crypto_unregister_alg(&ablk_cbc_alg); | ||
1059 | crypto_unregister_alg(&ablk_ecb_alg); | ||
1060 | crypto_unregister_alg(&blk_ctr_alg); | ||
1061 | crypto_unregister_alg(&blk_cbc_alg); | ||
1062 | crypto_unregister_alg(&blk_ecb_alg); | ||
1063 | } | ||
1064 | |||
1065 | module_init(serpent_sse2_init); | ||
1066 | module_exit(serpent_sse2_exit); | ||
1067 | |||
1068 | MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized"); | ||
1069 | MODULE_LICENSE("GPL"); | ||
1070 | MODULE_ALIAS("serpent"); | ||
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 5ede9c444c3e..7fee8c152f93 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c | |||
@@ -32,6 +32,8 @@ | |||
32 | #include <crypto/algapi.h> | 32 | #include <crypto/algapi.h> |
33 | #include <crypto/twofish.h> | 33 | #include <crypto/twofish.h> |
34 | #include <crypto/b128ops.h> | 34 | #include <crypto/b128ops.h> |
35 | #include <crypto/lrw.h> | ||
36 | #include <crypto/xts.h> | ||
35 | 37 | ||
36 | /* regular block cipher functions from twofish_x86_64 module */ | 38 | /* regular block cipher functions from twofish_x86_64 module */ |
37 | asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, | 39 | asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, |
@@ -432,6 +434,209 @@ static struct crypto_alg blk_ctr_alg = { | |||
432 | }, | 434 | }, |
433 | }; | 435 | }; |
434 | 436 | ||
437 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
438 | { | ||
439 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
440 | struct twofish_ctx *ctx = priv; | ||
441 | int i; | ||
442 | |||
443 | if (nbytes == 3 * bsize) { | ||
444 | twofish_enc_blk_3way(ctx, srcdst, srcdst); | ||
445 | return; | ||
446 | } | ||
447 | |||
448 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
449 | twofish_enc_blk(ctx, srcdst, srcdst); | ||
450 | } | ||
451 | |||
452 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
453 | { | ||
454 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
455 | struct twofish_ctx *ctx = priv; | ||
456 | int i; | ||
457 | |||
458 | if (nbytes == 3 * bsize) { | ||
459 | twofish_dec_blk_3way(ctx, srcdst, srcdst); | ||
460 | return; | ||
461 | } | ||
462 | |||
463 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
464 | twofish_dec_blk(ctx, srcdst, srcdst); | ||
465 | } | ||
466 | |||
467 | struct twofish_lrw_ctx { | ||
468 | struct lrw_table_ctx lrw_table; | ||
469 | struct twofish_ctx twofish_ctx; | ||
470 | }; | ||
471 | |||
472 | static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
473 | unsigned int keylen) | ||
474 | { | ||
475 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
476 | int err; | ||
477 | |||
478 | err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE, | ||
479 | &tfm->crt_flags); | ||
480 | if (err) | ||
481 | return err; | ||
482 | |||
483 | return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); | ||
484 | } | ||
485 | |||
486 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
487 | struct scatterlist *src, unsigned int nbytes) | ||
488 | { | ||
489 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
490 | be128 buf[3]; | ||
491 | struct lrw_crypt_req req = { | ||
492 | .tbuf = buf, | ||
493 | .tbuflen = sizeof(buf), | ||
494 | |||
495 | .table_ctx = &ctx->lrw_table, | ||
496 | .crypt_ctx = &ctx->twofish_ctx, | ||
497 | .crypt_fn = encrypt_callback, | ||
498 | }; | ||
499 | |||
500 | return lrw_crypt(desc, dst, src, nbytes, &req); | ||
501 | } | ||
502 | |||
503 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
504 | struct scatterlist *src, unsigned int nbytes) | ||
505 | { | ||
506 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
507 | be128 buf[3]; | ||
508 | struct lrw_crypt_req req = { | ||
509 | .tbuf = buf, | ||
510 | .tbuflen = sizeof(buf), | ||
511 | |||
512 | .table_ctx = &ctx->lrw_table, | ||
513 | .crypt_ctx = &ctx->twofish_ctx, | ||
514 | .crypt_fn = decrypt_callback, | ||
515 | }; | ||
516 | |||
517 | return lrw_crypt(desc, dst, src, nbytes, &req); | ||
518 | } | ||
519 | |||
520 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | ||
521 | { | ||
522 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
523 | |||
524 | lrw_free_table(&ctx->lrw_table); | ||
525 | } | ||
526 | |||
527 | static struct crypto_alg blk_lrw_alg = { | ||
528 | .cra_name = "lrw(twofish)", | ||
529 | .cra_driver_name = "lrw-twofish-3way", | ||
530 | .cra_priority = 300, | ||
531 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
532 | .cra_blocksize = TF_BLOCK_SIZE, | ||
533 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | ||
534 | .cra_alignmask = 0, | ||
535 | .cra_type = &crypto_blkcipher_type, | ||
536 | .cra_module = THIS_MODULE, | ||
537 | .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list), | ||
538 | .cra_exit = lrw_exit_tfm, | ||
539 | .cra_u = { | ||
540 | .blkcipher = { | ||
541 | .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, | ||
542 | .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE, | ||
543 | .ivsize = TF_BLOCK_SIZE, | ||
544 | .setkey = lrw_twofish_setkey, | ||
545 | .encrypt = lrw_encrypt, | ||
546 | .decrypt = lrw_decrypt, | ||
547 | }, | ||
548 | }, | ||
549 | }; | ||
550 | |||
551 | struct twofish_xts_ctx { | ||
552 | struct twofish_ctx tweak_ctx; | ||
553 | struct twofish_ctx crypt_ctx; | ||
554 | }; | ||
555 | |||
556 | static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
557 | unsigned int keylen) | ||
558 | { | ||
559 | struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
560 | u32 *flags = &tfm->crt_flags; | ||
561 | int err; | ||
562 | |||
563 | /* key consists of keys of equal size concatenated, therefore | ||
564 | * the length must be even | ||
565 | */ | ||
566 | if (keylen % 2) { | ||
567 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
568 | return -EINVAL; | ||
569 | } | ||
570 | |||
571 | /* first half of xts-key is for crypt */ | ||
572 | err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); | ||
573 | if (err) | ||
574 | return err; | ||
575 | |||
576 | /* second half of xts-key is for tweak */ | ||
577 | return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, | ||
578 | flags); | ||
579 | } | ||
580 | |||
581 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
582 | struct scatterlist *src, unsigned int nbytes) | ||
583 | { | ||
584 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
585 | be128 buf[3]; | ||
586 | struct xts_crypt_req req = { | ||
587 | .tbuf = buf, | ||
588 | .tbuflen = sizeof(buf), | ||
589 | |||
590 | .tweak_ctx = &ctx->tweak_ctx, | ||
591 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
592 | .crypt_ctx = &ctx->crypt_ctx, | ||
593 | .crypt_fn = encrypt_callback, | ||
594 | }; | ||
595 | |||
596 | return xts_crypt(desc, dst, src, nbytes, &req); | ||
597 | } | ||
598 | |||
599 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
600 | struct scatterlist *src, unsigned int nbytes) | ||
601 | { | ||
602 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
603 | be128 buf[3]; | ||
604 | struct xts_crypt_req req = { | ||
605 | .tbuf = buf, | ||
606 | .tbuflen = sizeof(buf), | ||
607 | |||
608 | .tweak_ctx = &ctx->tweak_ctx, | ||
609 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
610 | .crypt_ctx = &ctx->crypt_ctx, | ||
611 | .crypt_fn = decrypt_callback, | ||
612 | }; | ||
613 | |||
614 | return xts_crypt(desc, dst, src, nbytes, &req); | ||
615 | } | ||
616 | |||
617 | static struct crypto_alg blk_xts_alg = { | ||
618 | .cra_name = "xts(twofish)", | ||
619 | .cra_driver_name = "xts-twofish-3way", | ||
620 | .cra_priority = 300, | ||
621 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
622 | .cra_blocksize = TF_BLOCK_SIZE, | ||
623 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | ||
624 | .cra_alignmask = 0, | ||
625 | .cra_type = &crypto_blkcipher_type, | ||
626 | .cra_module = THIS_MODULE, | ||
627 | .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list), | ||
628 | .cra_u = { | ||
629 | .blkcipher = { | ||
630 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
631 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
632 | .ivsize = TF_BLOCK_SIZE, | ||
633 | .setkey = xts_twofish_setkey, | ||
634 | .encrypt = xts_encrypt, | ||
635 | .decrypt = xts_decrypt, | ||
636 | }, | ||
637 | }, | ||
638 | }; | ||
639 | |||
435 | int __init init(void) | 640 | int __init init(void) |
436 | { | 641 | { |
437 | int err; | 642 | int err; |
@@ -445,9 +650,20 @@ int __init init(void) | |||
445 | err = crypto_register_alg(&blk_ctr_alg); | 650 | err = crypto_register_alg(&blk_ctr_alg); |
446 | if (err) | 651 | if (err) |
447 | goto ctr_err; | 652 | goto ctr_err; |
653 | err = crypto_register_alg(&blk_lrw_alg); | ||
654 | if (err) | ||
655 | goto blk_lrw_err; | ||
656 | err = crypto_register_alg(&blk_xts_alg); | ||
657 | if (err) | ||
658 | goto blk_xts_err; | ||
448 | 659 | ||
449 | return 0; | 660 | return 0; |
450 | 661 | ||
662 | crypto_unregister_alg(&blk_xts_alg); | ||
663 | blk_xts_err: | ||
664 | crypto_unregister_alg(&blk_lrw_alg); | ||
665 | blk_lrw_err: | ||
666 | crypto_unregister_alg(&blk_ctr_alg); | ||
451 | ctr_err: | 667 | ctr_err: |
452 | crypto_unregister_alg(&blk_cbc_alg); | 668 | crypto_unregister_alg(&blk_cbc_alg); |
453 | cbc_err: | 669 | cbc_err: |
@@ -458,6 +674,8 @@ ecb_err: | |||
458 | 674 | ||
459 | void __exit fini(void) | 675 | void __exit fini(void) |
460 | { | 676 | { |
677 | crypto_unregister_alg(&blk_xts_alg); | ||
678 | crypto_unregister_alg(&blk_lrw_alg); | ||
461 | crypto_unregister_alg(&blk_ctr_alg); | 679 | crypto_unregister_alg(&blk_ctr_alg); |
462 | crypto_unregister_alg(&blk_cbc_alg); | 680 | crypto_unregister_alg(&blk_cbc_alg); |
463 | crypto_unregister_alg(&blk_ecb_alg); | 681 | crypto_unregister_alg(&blk_ecb_alg); |
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile index 52d0ccfcf6ea..455646e0e532 100644 --- a/arch/x86/ia32/Makefile +++ b/arch/x86/ia32/Makefile | |||
@@ -3,6 +3,7 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o | 5 | obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o |
6 | obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o | ||
6 | 7 | ||
7 | sysv-$(CONFIG_SYSVIPC) := ipc32.o | 8 | sysv-$(CONFIG_SYSVIPC) := ipc32.o |
8 | obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) | 9 | obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 3e274564f6bf..e3e734005e19 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/segment.h> | 14 | #include <asm/segment.h> |
15 | #include <asm/irqflags.h> | 15 | #include <asm/irqflags.h> |
16 | #include <linux/linkage.h> | 16 | #include <linux/linkage.h> |
17 | #include <linux/err.h> | ||
17 | 18 | ||
18 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 19 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
19 | #include <linux/elf-em.h> | 20 | #include <linux/elf-em.h> |
@@ -27,8 +28,6 @@ | |||
27 | 28 | ||
28 | .section .entry.text, "ax" | 29 | .section .entry.text, "ax" |
29 | 30 | ||
30 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | ||
31 | |||
32 | .macro IA32_ARG_FIXUP noebp=0 | 31 | .macro IA32_ARG_FIXUP noebp=0 |
33 | movl %edi,%r8d | 32 | movl %edi,%r8d |
34 | .if \noebp | 33 | .if \noebp |
@@ -191,7 +190,7 @@ sysexit_from_sys_call: | |||
191 | movl %ebx,%edx /* 3rd arg: 1st syscall arg */ | 190 | movl %ebx,%edx /* 3rd arg: 1st syscall arg */ |
192 | movl %eax,%esi /* 2nd arg: syscall number */ | 191 | movl %eax,%esi /* 2nd arg: syscall number */ |
193 | movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ | 192 | movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ |
194 | call audit_syscall_entry | 193 | call __audit_syscall_entry |
195 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ | 194 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ |
196 | cmpq $(IA32_NR_syscalls-1),%rax | 195 | cmpq $(IA32_NR_syscalls-1),%rax |
197 | ja ia32_badsys | 196 | ja ia32_badsys |
@@ -208,12 +207,13 @@ sysexit_from_sys_call: | |||
208 | TRACE_IRQS_ON | 207 | TRACE_IRQS_ON |
209 | sti | 208 | sti |
210 | movl %eax,%esi /* second arg, syscall return value */ | 209 | movl %eax,%esi /* second arg, syscall return value */ |
211 | cmpl $0,%eax /* is it < 0? */ | 210 | cmpl $-MAX_ERRNO,%eax /* is it an error ? */ |
212 | setl %al /* 1 if so, 0 if not */ | 211 | jbe 1f |
212 | movslq %eax, %rsi /* if error sign extend to 64 bits */ | ||
213 | 1: setbe %al /* 1 if error, 0 if not */ | ||
213 | movzbl %al,%edi /* zero-extend that into %edi */ | 214 | movzbl %al,%edi /* zero-extend that into %edi */ |
214 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | 215 | call __audit_syscall_exit |
215 | call audit_syscall_exit | 216 | movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ |
216 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ | ||
217 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 217 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
218 | cli | 218 | cli |
219 | TRACE_IRQS_OFF | 219 | TRACE_IRQS_OFF |
@@ -447,9 +447,6 @@ ia32_badsys: | |||
447 | movq $-ENOSYS,%rax | 447 | movq $-ENOSYS,%rax |
448 | jmp ia32_sysret | 448 | jmp ia32_sysret |
449 | 449 | ||
450 | quiet_ni_syscall: | ||
451 | movq $-ENOSYS,%rax | ||
452 | ret | ||
453 | CFI_ENDPROC | 450 | CFI_ENDPROC |
454 | 451 | ||
455 | .macro PTREGSCALL label, func, arg | 452 | .macro PTREGSCALL label, func, arg |
@@ -494,357 +491,3 @@ ia32_ptregs_common: | |||
494 | jmp ia32_sysret /* misbalances the return cache */ | 491 | jmp ia32_sysret /* misbalances the return cache */ |
495 | CFI_ENDPROC | 492 | CFI_ENDPROC |
496 | END(ia32_ptregs_common) | 493 | END(ia32_ptregs_common) |
497 | |||
498 | .section .rodata,"a" | ||
499 | .align 8 | ||
500 | ia32_sys_call_table: | ||
501 | .quad sys_restart_syscall | ||
502 | .quad sys_exit | ||
503 | .quad stub32_fork | ||
504 | .quad sys_read | ||
505 | .quad sys_write | ||
506 | .quad compat_sys_open /* 5 */ | ||
507 | .quad sys_close | ||
508 | .quad sys32_waitpid | ||
509 | .quad sys_creat | ||
510 | .quad sys_link | ||
511 | .quad sys_unlink /* 10 */ | ||
512 | .quad stub32_execve | ||
513 | .quad sys_chdir | ||
514 | .quad compat_sys_time | ||
515 | .quad sys_mknod | ||
516 | .quad sys_chmod /* 15 */ | ||
517 | .quad sys_lchown16 | ||
518 | .quad quiet_ni_syscall /* old break syscall holder */ | ||
519 | .quad sys_stat | ||
520 | .quad sys32_lseek | ||
521 | .quad sys_getpid /* 20 */ | ||
522 | .quad compat_sys_mount /* mount */ | ||
523 | .quad sys_oldumount /* old_umount */ | ||
524 | .quad sys_setuid16 | ||
525 | .quad sys_getuid16 | ||
526 | .quad compat_sys_stime /* stime */ /* 25 */ | ||
527 | .quad compat_sys_ptrace /* ptrace */ | ||
528 | .quad sys_alarm | ||
529 | .quad sys_fstat /* (old)fstat */ | ||
530 | .quad sys_pause | ||
531 | .quad compat_sys_utime /* 30 */ | ||
532 | .quad quiet_ni_syscall /* old stty syscall holder */ | ||
533 | .quad quiet_ni_syscall /* old gtty syscall holder */ | ||
534 | .quad sys_access | ||
535 | .quad sys_nice | ||
536 | .quad quiet_ni_syscall /* 35 */ /* old ftime syscall holder */ | ||
537 | .quad sys_sync | ||
538 | .quad sys32_kill | ||
539 | .quad sys_rename | ||
540 | .quad sys_mkdir | ||
541 | .quad sys_rmdir /* 40 */ | ||
542 | .quad sys_dup | ||
543 | .quad sys_pipe | ||
544 | .quad compat_sys_times | ||
545 | .quad quiet_ni_syscall /* old prof syscall holder */ | ||
546 | .quad sys_brk /* 45 */ | ||
547 | .quad sys_setgid16 | ||
548 | .quad sys_getgid16 | ||
549 | .quad sys_signal | ||
550 | .quad sys_geteuid16 | ||
551 | .quad sys_getegid16 /* 50 */ | ||
552 | .quad sys_acct | ||
553 | .quad sys_umount /* new_umount */ | ||
554 | .quad quiet_ni_syscall /* old lock syscall holder */ | ||
555 | .quad compat_sys_ioctl | ||
556 | .quad compat_sys_fcntl64 /* 55 */ | ||
557 | .quad quiet_ni_syscall /* old mpx syscall holder */ | ||
558 | .quad sys_setpgid | ||
559 | .quad quiet_ni_syscall /* old ulimit syscall holder */ | ||
560 | .quad sys_olduname | ||
561 | .quad sys_umask /* 60 */ | ||
562 | .quad sys_chroot | ||
563 | .quad compat_sys_ustat | ||
564 | .quad sys_dup2 | ||
565 | .quad sys_getppid | ||
566 | .quad sys_getpgrp /* 65 */ | ||
567 | .quad sys_setsid | ||
568 | .quad sys32_sigaction | ||
569 | .quad sys_sgetmask | ||
570 | .quad sys_ssetmask | ||
571 | .quad sys_setreuid16 /* 70 */ | ||
572 | .quad sys_setregid16 | ||
573 | .quad sys32_sigsuspend | ||
574 | .quad compat_sys_sigpending | ||
575 | .quad sys_sethostname | ||
576 | .quad compat_sys_setrlimit /* 75 */ | ||
577 | .quad compat_sys_old_getrlimit /* old_getrlimit */ | ||
578 | .quad compat_sys_getrusage | ||
579 | .quad compat_sys_gettimeofday | ||
580 | .quad compat_sys_settimeofday | ||
581 | .quad sys_getgroups16 /* 80 */ | ||
582 | .quad sys_setgroups16 | ||
583 | .quad compat_sys_old_select | ||
584 | .quad sys_symlink | ||
585 | .quad sys_lstat | ||
586 | .quad sys_readlink /* 85 */ | ||
587 | .quad sys_uselib | ||
588 | .quad sys_swapon | ||
589 | .quad sys_reboot | ||
590 | .quad compat_sys_old_readdir | ||
591 | .quad sys32_mmap /* 90 */ | ||
592 | .quad sys_munmap | ||
593 | .quad sys_truncate | ||
594 | .quad sys_ftruncate | ||
595 | .quad sys_fchmod | ||
596 | .quad sys_fchown16 /* 95 */ | ||
597 | .quad sys_getpriority | ||
598 | .quad sys_setpriority | ||
599 | .quad quiet_ni_syscall /* old profil syscall holder */ | ||
600 | .quad compat_sys_statfs | ||
601 | .quad compat_sys_fstatfs /* 100 */ | ||
602 | .quad sys_ioperm | ||
603 | .quad compat_sys_socketcall | ||
604 | .quad sys_syslog | ||
605 | .quad compat_sys_setitimer | ||
606 | .quad compat_sys_getitimer /* 105 */ | ||
607 | .quad compat_sys_newstat | ||
608 | .quad compat_sys_newlstat | ||
609 | .quad compat_sys_newfstat | ||
610 | .quad sys_uname | ||
611 | .quad stub32_iopl /* 110 */ | ||
612 | .quad sys_vhangup | ||
613 | .quad quiet_ni_syscall /* old "idle" system call */ | ||
614 | .quad sys32_vm86_warning /* vm86old */ | ||
615 | .quad compat_sys_wait4 | ||
616 | .quad sys_swapoff /* 115 */ | ||
617 | .quad compat_sys_sysinfo | ||
618 | .quad sys32_ipc | ||
619 | .quad sys_fsync | ||
620 | .quad stub32_sigreturn | ||
621 | .quad stub32_clone /* 120 */ | ||
622 | .quad sys_setdomainname | ||
623 | .quad sys_newuname | ||
624 | .quad sys_modify_ldt | ||
625 | .quad compat_sys_adjtimex | ||
626 | .quad sys32_mprotect /* 125 */ | ||
627 | .quad compat_sys_sigprocmask | ||
628 | .quad quiet_ni_syscall /* create_module */ | ||
629 | .quad sys_init_module | ||
630 | .quad sys_delete_module | ||
631 | .quad quiet_ni_syscall /* 130 get_kernel_syms */ | ||
632 | .quad sys32_quotactl | ||
633 | .quad sys_getpgid | ||
634 | .quad sys_fchdir | ||
635 | .quad quiet_ni_syscall /* bdflush */ | ||
636 | .quad sys_sysfs /* 135 */ | ||
637 | .quad sys_personality | ||
638 | .quad quiet_ni_syscall /* for afs_syscall */ | ||
639 | .quad sys_setfsuid16 | ||
640 | .quad sys_setfsgid16 | ||
641 | .quad sys_llseek /* 140 */ | ||
642 | .quad compat_sys_getdents | ||
643 | .quad compat_sys_select | ||
644 | .quad sys_flock | ||
645 | .quad sys_msync | ||
646 | .quad compat_sys_readv /* 145 */ | ||
647 | .quad compat_sys_writev | ||
648 | .quad sys_getsid | ||
649 | .quad sys_fdatasync | ||
650 | .quad compat_sys_sysctl /* sysctl */ | ||
651 | .quad sys_mlock /* 150 */ | ||
652 | .quad sys_munlock | ||
653 | .quad sys_mlockall | ||
654 | .quad sys_munlockall | ||
655 | .quad sys_sched_setparam | ||
656 | .quad sys_sched_getparam /* 155 */ | ||
657 | .quad sys_sched_setscheduler | ||
658 | .quad sys_sched_getscheduler | ||
659 | .quad sys_sched_yield | ||
660 | .quad sys_sched_get_priority_max | ||
661 | .quad sys_sched_get_priority_min /* 160 */ | ||
662 | .quad sys32_sched_rr_get_interval | ||
663 | .quad compat_sys_nanosleep | ||
664 | .quad sys_mremap | ||
665 | .quad sys_setresuid16 | ||
666 | .quad sys_getresuid16 /* 165 */ | ||
667 | .quad sys32_vm86_warning /* vm86 */ | ||
668 | .quad quiet_ni_syscall /* query_module */ | ||
669 | .quad sys_poll | ||
670 | .quad quiet_ni_syscall /* old nfsservctl */ | ||
671 | .quad sys_setresgid16 /* 170 */ | ||
672 | .quad sys_getresgid16 | ||
673 | .quad sys_prctl | ||
674 | .quad stub32_rt_sigreturn | ||
675 | .quad sys32_rt_sigaction | ||
676 | .quad sys32_rt_sigprocmask /* 175 */ | ||
677 | .quad sys32_rt_sigpending | ||
678 | .quad compat_sys_rt_sigtimedwait | ||
679 | .quad sys32_rt_sigqueueinfo | ||
680 | .quad sys_rt_sigsuspend | ||
681 | .quad sys32_pread /* 180 */ | ||
682 | .quad sys32_pwrite | ||
683 | .quad sys_chown16 | ||
684 | .quad sys_getcwd | ||
685 | .quad sys_capget | ||
686 | .quad sys_capset | ||
687 | .quad stub32_sigaltstack | ||
688 | .quad sys32_sendfile | ||
689 | .quad quiet_ni_syscall /* streams1 */ | ||
690 | .quad quiet_ni_syscall /* streams2 */ | ||
691 | .quad stub32_vfork /* 190 */ | ||
692 | .quad compat_sys_getrlimit | ||
693 | .quad sys_mmap_pgoff | ||
694 | .quad sys32_truncate64 | ||
695 | .quad sys32_ftruncate64 | ||
696 | .quad sys32_stat64 /* 195 */ | ||
697 | .quad sys32_lstat64 | ||
698 | .quad sys32_fstat64 | ||
699 | .quad sys_lchown | ||
700 | .quad sys_getuid | ||
701 | .quad sys_getgid /* 200 */ | ||
702 | .quad sys_geteuid | ||
703 | .quad sys_getegid | ||
704 | .quad sys_setreuid | ||
705 | .quad sys_setregid | ||
706 | .quad sys_getgroups /* 205 */ | ||
707 | .quad sys_setgroups | ||
708 | .quad sys_fchown | ||
709 | .quad sys_setresuid | ||
710 | .quad sys_getresuid | ||
711 | .quad sys_setresgid /* 210 */ | ||
712 | .quad sys_getresgid | ||
713 | .quad sys_chown | ||
714 | .quad sys_setuid | ||
715 | .quad sys_setgid | ||
716 | .quad sys_setfsuid /* 215 */ | ||
717 | .quad sys_setfsgid | ||
718 | .quad sys_pivot_root | ||
719 | .quad sys_mincore | ||
720 | .quad sys_madvise | ||
721 | .quad compat_sys_getdents64 /* 220 getdents64 */ | ||
722 | .quad compat_sys_fcntl64 | ||
723 | .quad quiet_ni_syscall /* tux */ | ||
724 | .quad quiet_ni_syscall /* security */ | ||
725 | .quad sys_gettid | ||
726 | .quad sys32_readahead /* 225 */ | ||
727 | .quad sys_setxattr | ||
728 | .quad sys_lsetxattr | ||
729 | .quad sys_fsetxattr | ||
730 | .quad sys_getxattr | ||
731 | .quad sys_lgetxattr /* 230 */ | ||
732 | .quad sys_fgetxattr | ||
733 | .quad sys_listxattr | ||
734 | .quad sys_llistxattr | ||
735 | .quad sys_flistxattr | ||
736 | .quad sys_removexattr /* 235 */ | ||
737 | .quad sys_lremovexattr | ||
738 | .quad sys_fremovexattr | ||
739 | .quad sys_tkill | ||
740 | .quad sys_sendfile64 | ||
741 | .quad compat_sys_futex /* 240 */ | ||
742 | .quad compat_sys_sched_setaffinity | ||
743 | .quad compat_sys_sched_getaffinity | ||
744 | .quad sys_set_thread_area | ||
745 | .quad sys_get_thread_area | ||
746 | .quad compat_sys_io_setup /* 245 */ | ||
747 | .quad sys_io_destroy | ||
748 | .quad compat_sys_io_getevents | ||
749 | .quad compat_sys_io_submit | ||
750 | .quad sys_io_cancel | ||
751 | .quad sys32_fadvise64 /* 250 */ | ||
752 | .quad quiet_ni_syscall /* free_huge_pages */ | ||
753 | .quad sys_exit_group | ||
754 | .quad sys32_lookup_dcookie | ||
755 | .quad sys_epoll_create | ||
756 | .quad sys_epoll_ctl /* 255 */ | ||
757 | .quad sys_epoll_wait | ||
758 | .quad sys_remap_file_pages | ||
759 | .quad sys_set_tid_address | ||
760 | .quad compat_sys_timer_create | ||
761 | .quad compat_sys_timer_settime /* 260 */ | ||
762 | .quad compat_sys_timer_gettime | ||
763 | .quad sys_timer_getoverrun | ||
764 | .quad sys_timer_delete | ||
765 | .quad compat_sys_clock_settime | ||
766 | .quad compat_sys_clock_gettime /* 265 */ | ||
767 | .quad compat_sys_clock_getres | ||
768 | .quad compat_sys_clock_nanosleep | ||
769 | .quad compat_sys_statfs64 | ||
770 | .quad compat_sys_fstatfs64 | ||
771 | .quad sys_tgkill /* 270 */ | ||
772 | .quad compat_sys_utimes | ||
773 | .quad sys32_fadvise64_64 | ||
774 | .quad quiet_ni_syscall /* sys_vserver */ | ||
775 | .quad sys_mbind | ||
776 | .quad compat_sys_get_mempolicy /* 275 */ | ||
777 | .quad sys_set_mempolicy | ||
778 | .quad compat_sys_mq_open | ||
779 | .quad sys_mq_unlink | ||
780 | .quad compat_sys_mq_timedsend | ||
781 | .quad compat_sys_mq_timedreceive /* 280 */ | ||
782 | .quad compat_sys_mq_notify | ||
783 | .quad compat_sys_mq_getsetattr | ||
784 | .quad compat_sys_kexec_load /* reserved for kexec */ | ||
785 | .quad compat_sys_waitid | ||
786 | .quad quiet_ni_syscall /* 285: sys_altroot */ | ||
787 | .quad sys_add_key | ||
788 | .quad sys_request_key | ||
789 | .quad sys_keyctl | ||
790 | .quad sys_ioprio_set | ||
791 | .quad sys_ioprio_get /* 290 */ | ||
792 | .quad sys_inotify_init | ||
793 | .quad sys_inotify_add_watch | ||
794 | .quad sys_inotify_rm_watch | ||
795 | .quad sys_migrate_pages | ||
796 | .quad compat_sys_openat /* 295 */ | ||
797 | .quad sys_mkdirat | ||
798 | .quad sys_mknodat | ||
799 | .quad sys_fchownat | ||
800 | .quad compat_sys_futimesat | ||
801 | .quad sys32_fstatat /* 300 */ | ||
802 | .quad sys_unlinkat | ||
803 | .quad sys_renameat | ||
804 | .quad sys_linkat | ||
805 | .quad sys_symlinkat | ||
806 | .quad sys_readlinkat /* 305 */ | ||
807 | .quad sys_fchmodat | ||
808 | .quad sys_faccessat | ||
809 | .quad compat_sys_pselect6 | ||
810 | .quad compat_sys_ppoll | ||
811 | .quad sys_unshare /* 310 */ | ||
812 | .quad compat_sys_set_robust_list | ||
813 | .quad compat_sys_get_robust_list | ||
814 | .quad sys_splice | ||
815 | .quad sys32_sync_file_range | ||
816 | .quad sys_tee /* 315 */ | ||
817 | .quad compat_sys_vmsplice | ||
818 | .quad compat_sys_move_pages | ||
819 | .quad sys_getcpu | ||
820 | .quad sys_epoll_pwait | ||
821 | .quad compat_sys_utimensat /* 320 */ | ||
822 | .quad compat_sys_signalfd | ||
823 | .quad sys_timerfd_create | ||
824 | .quad sys_eventfd | ||
825 | .quad sys32_fallocate | ||
826 | .quad compat_sys_timerfd_settime /* 325 */ | ||
827 | .quad compat_sys_timerfd_gettime | ||
828 | .quad compat_sys_signalfd4 | ||
829 | .quad sys_eventfd2 | ||
830 | .quad sys_epoll_create1 | ||
831 | .quad sys_dup3 /* 330 */ | ||
832 | .quad sys_pipe2 | ||
833 | .quad sys_inotify_init1 | ||
834 | .quad compat_sys_preadv | ||
835 | .quad compat_sys_pwritev | ||
836 | .quad compat_sys_rt_tgsigqueueinfo /* 335 */ | ||
837 | .quad sys_perf_event_open | ||
838 | .quad compat_sys_recvmmsg | ||
839 | .quad sys_fanotify_init | ||
840 | .quad sys32_fanotify_mark | ||
841 | .quad sys_prlimit64 /* 340 */ | ||
842 | .quad sys_name_to_handle_at | ||
843 | .quad compat_sys_open_by_handle_at | ||
844 | .quad compat_sys_clock_adjtime | ||
845 | .quad sys_syncfs | ||
846 | .quad compat_sys_sendmmsg /* 345 */ | ||
847 | .quad sys_setns | ||
848 | .quad compat_sys_process_vm_readv | ||
849 | .quad compat_sys_process_vm_writev | ||
850 | ia32_syscall_end: | ||
diff --git a/arch/x86/ia32/nosyscall.c b/arch/x86/ia32/nosyscall.c new file mode 100644 index 000000000000..51ecd5b4e787 --- /dev/null +++ b/arch/x86/ia32/nosyscall.c | |||
@@ -0,0 +1,7 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/errno.h> | ||
3 | |||
4 | long compat_ni_syscall(void) | ||
5 | { | ||
6 | return -ENOSYS; | ||
7 | } | ||
diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c new file mode 100644 index 000000000000..4754ba0f5d9f --- /dev/null +++ b/arch/x86/ia32/syscall_ia32.c | |||
@@ -0,0 +1,25 @@ | |||
1 | /* System call table for ia32 emulation. */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <linux/sys.h> | ||
5 | #include <linux/cache.h> | ||
6 | #include <asm/asm-offsets.h> | ||
7 | |||
8 | #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ; | ||
9 | #include <asm/syscalls_32.h> | ||
10 | #undef __SYSCALL_I386 | ||
11 | |||
12 | #define __SYSCALL_I386(nr, sym, compat) [nr] = compat, | ||
13 | |||
14 | typedef void (*sys_call_ptr_t)(void); | ||
15 | |||
16 | extern void compat_ni_syscall(void); | ||
17 | |||
18 | const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = { | ||
19 | /* | ||
20 | * Smells like a compiler bug -- it doesn't work | ||
21 | * when the & below is removed. | ||
22 | */ | ||
23 | [0 ... __NR_ia32_syscall_max] = &compat_ni_syscall, | ||
24 | #include <asm/syscalls_32.h> | ||
25 | }; | ||
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 6fa90a845e4c..b57e6a43a37a 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild | |||
@@ -19,7 +19,8 @@ header-y += processor-flags.h | |||
19 | header-y += ptrace-abi.h | 19 | header-y += ptrace-abi.h |
20 | header-y += sigcontext32.h | 20 | header-y += sigcontext32.h |
21 | header-y += ucontext.h | 21 | header-y += ucontext.h |
22 | header-y += unistd_32.h | ||
23 | header-y += unistd_64.h | ||
24 | header-y += vm86.h | 22 | header-y += vm86.h |
25 | header-y += vsyscall.h | 23 | header-y += vsyscall.h |
24 | |||
25 | genhdr-y += unistd_32.h | ||
26 | genhdr-y += unistd_64.h | ||
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 8e41071704a5..49ad773f4b9f 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _ASM_X86_AMD_NB_H | 1 | #ifndef _ASM_X86_AMD_NB_H |
2 | #define _ASM_X86_AMD_NB_H | 2 | #define _ASM_X86_AMD_NB_H |
3 | 3 | ||
4 | #include <linux/ioport.h> | ||
4 | #include <linux/pci.h> | 5 | #include <linux/pci.h> |
5 | 6 | ||
6 | struct amd_nb_bus_dev_range { | 7 | struct amd_nb_bus_dev_range { |
@@ -13,6 +14,7 @@ extern const struct pci_device_id amd_nb_misc_ids[]; | |||
13 | extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; | 14 | extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; |
14 | 15 | ||
15 | extern bool early_is_amd_nb(u32 value); | 16 | extern bool early_is_amd_nb(u32 value); |
17 | extern struct resource *amd_get_mmconfig_range(struct resource *res); | ||
16 | extern int amd_cache_northbridges(void); | 18 | extern int amd_cache_northbridges(void); |
17 | extern void amd_flush_garts(void); | 19 | extern void amd_flush_garts(void); |
18 | extern int amd_numa_init(void); | 20 | extern int amd_numa_init(void); |
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index e020d88ec02d..2f90c51cc49d 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h | |||
@@ -64,6 +64,8 @@ struct setup_header { | |||
64 | __u32 payload_offset; | 64 | __u32 payload_offset; |
65 | __u32 payload_length; | 65 | __u32 payload_length; |
66 | __u64 setup_data; | 66 | __u64 setup_data; |
67 | __u64 pref_address; | ||
68 | __u32 init_size; | ||
67 | } __attribute__((packed)); | 69 | } __attribute__((packed)); |
68 | 70 | ||
69 | struct sys_desc_table { | 71 | struct sys_desc_table { |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f3444f700f36..17c5d4bdee5e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -197,7 +197,10 @@ | |||
197 | 197 | ||
198 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ | 198 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ |
199 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ | 199 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ |
200 | #define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */ | ||
201 | #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ | ||
200 | #define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ | 202 | #define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ |
203 | #define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */ | ||
201 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | 204 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ |
202 | 205 | ||
203 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 206 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 078ad0caefc6..b903d5ea3941 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump); | |||
101 | 101 | ||
102 | extern void hw_breakpoint_restore(void); | 102 | extern void hw_breakpoint_restore(void); |
103 | 103 | ||
104 | #ifdef CONFIG_X86_64 | ||
105 | DECLARE_PER_CPU(int, debug_stack_usage); | ||
106 | static inline void debug_stack_usage_inc(void) | ||
107 | { | ||
108 | __get_cpu_var(debug_stack_usage)++; | ||
109 | } | ||
110 | static inline void debug_stack_usage_dec(void) | ||
111 | { | ||
112 | __get_cpu_var(debug_stack_usage)--; | ||
113 | } | ||
114 | int is_debug_stack(unsigned long addr); | ||
115 | void debug_stack_set_zero(void); | ||
116 | void debug_stack_reset(void); | ||
117 | #else /* !X86_64 */ | ||
118 | static inline int is_debug_stack(unsigned long addr) { return 0; } | ||
119 | static inline void debug_stack_set_zero(void) { } | ||
120 | static inline void debug_stack_reset(void) { } | ||
121 | static inline void debug_stack_usage_inc(void) { } | ||
122 | static inline void debug_stack_usage_dec(void) { } | ||
123 | #endif /* X86_64 */ | ||
124 | |||
125 | |||
104 | #endif /* __KERNEL__ */ | 126 | #endif /* __KERNEL__ */ |
105 | 127 | ||
106 | #endif /* _ASM_X86_DEBUGREG_H */ | 128 | #endif /* _ASM_X86_DEBUGREG_H */ |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 41935fadfdfc..e95822d683f4 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in | |||
35 | 35 | ||
36 | extern struct desc_ptr idt_descr; | 36 | extern struct desc_ptr idt_descr; |
37 | extern gate_desc idt_table[]; | 37 | extern gate_desc idt_table[]; |
38 | extern struct desc_ptr nmi_idt_descr; | ||
39 | extern gate_desc nmi_idt_table[]; | ||
38 | 40 | ||
39 | struct gdt_page { | 41 | struct gdt_page { |
40 | struct desc_struct gdt[GDT_ENTRIES]; | 42 | struct desc_struct gdt[GDT_ENTRIES]; |
@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) | |||
307 | desc->limit = (limit >> 16) & 0xf; | 309 | desc->limit = (limit >> 16) & 0xf; |
308 | } | 310 | } |
309 | 311 | ||
312 | #ifdef CONFIG_X86_64 | ||
313 | static inline void set_nmi_gate(int gate, void *addr) | ||
314 | { | ||
315 | gate_desc s; | ||
316 | |||
317 | pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); | ||
318 | write_idt_entry(nmi_idt_table, gate, &s); | ||
319 | } | ||
320 | #endif | ||
321 | |||
310 | static inline void _set_gate(int gate, unsigned type, void *addr, | 322 | static inline void _set_gate(int gate, unsigned type, void *addr, |
311 | unsigned dpl, unsigned ist, unsigned seg) | 323 | unsigned dpl, unsigned ist, unsigned seg) |
312 | { | 324 | { |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 7093e4a6a0bc..844f735fd63a 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -3,6 +3,8 @@ | |||
3 | 3 | ||
4 | #ifdef CONFIG_X86_32 | 4 | #ifdef CONFIG_X86_32 |
5 | 5 | ||
6 | #define EFI_LOADER_SIGNATURE "EL32" | ||
7 | |||
6 | extern unsigned long asmlinkage efi_call_phys(void *, ...); | 8 | extern unsigned long asmlinkage efi_call_phys(void *, ...); |
7 | 9 | ||
8 | #define efi_call_phys0(f) efi_call_phys(f) | 10 | #define efi_call_phys0(f) efi_call_phys(f) |
@@ -37,6 +39,8 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); | |||
37 | 39 | ||
38 | #else /* !CONFIG_X86_32 */ | 40 | #else /* !CONFIG_X86_32 */ |
39 | 41 | ||
42 | #define EFI_LOADER_SIGNATURE "EL64" | ||
43 | |||
40 | extern u64 efi_call0(void *fp); | 44 | extern u64 efi_call0(void *fp); |
41 | extern u64 efi_call1(void *fp, u64 arg1); | 45 | extern u64 efi_call1(void *fp, u64 arg1); |
42 | extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); | 46 | extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 460c74e4852c..4da3c0c4c974 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -117,7 +117,7 @@ enum fixed_addresses { | |||
117 | #endif | 117 | #endif |
118 | FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ | 118 | FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ |
119 | FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ | 119 | FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ |
120 | #ifdef CONFIG_X86_MRST | 120 | #ifdef CONFIG_X86_INTEL_MID |
121 | FIX_LNW_VRTC, | 121 | FIX_LNW_VRTC, |
122 | #endif | 122 | #endif |
123 | __end_of_permanent_fixed_addresses, | 123 | __end_of_permanent_fixed_addresses, |
diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h index 976f6ecd2ce6..b0d5716ca1e4 100644 --- a/arch/x86/include/asm/ia32_unistd.h +++ b/arch/x86/include/asm/ia32_unistd.h | |||
@@ -2,17 +2,10 @@ | |||
2 | #define _ASM_X86_IA32_UNISTD_H | 2 | #define _ASM_X86_IA32_UNISTD_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * This file contains the system call numbers of the ia32 port, | 5 | * This file contains the system call numbers of the ia32 compat ABI, |
6 | * this is for the kernel only. | 6 | * this is for the kernel only. |
7 | * Only add syscalls here where some part of the kernel needs to know | ||
8 | * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK | ||
9 | */ | 7 | */ |
10 | 8 | #define __SYSCALL_ia32_NR(x) (x) | |
11 | #define __NR_ia32_restart_syscall 0 | 9 | #include <asm/unistd_32_ia32.h> |
12 | #define __NR_ia32_exit 1 | ||
13 | #define __NR_ia32_read 3 | ||
14 | #define __NR_ia32_write 4 | ||
15 | #define __NR_ia32_sigreturn 119 | ||
16 | #define __NR_ia32_rt_sigreturn 173 | ||
17 | 10 | ||
18 | #endif /* _ASM_X86_IA32_UNISTD_H */ | 11 | #endif /* _ASM_X86_IA32_UNISTD_H */ |
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 8dbe353e41e1..adcc0ae73d09 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h | |||
@@ -5,6 +5,8 @@ | |||
5 | extern void __init early_ioremap_page_table_range_init(void); | 5 | extern void __init early_ioremap_page_table_range_init(void); |
6 | #endif | 6 | #endif |
7 | 7 | ||
8 | extern void __init zone_sizes_init(void); | ||
9 | |||
8 | extern unsigned long __init | 10 | extern unsigned long __init |
9 | kernel_physical_mapping_init(unsigned long start, | 11 | kernel_physical_mapping_init(unsigned long start, |
10 | unsigned long end, | 12 | unsigned long end, |
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 345c99cef152..dffc38ee6255 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h | |||
@@ -5,6 +5,7 @@ extern struct dma_map_ops nommu_dma_ops; | |||
5 | extern int force_iommu, no_iommu; | 5 | extern int force_iommu, no_iommu; |
6 | extern int iommu_detected; | 6 | extern int iommu_detected; |
7 | extern int iommu_pass_through; | 7 | extern int iommu_pass_through; |
8 | extern int iommu_group_mf; | ||
8 | 9 | ||
9 | /* 10 seconds */ | 10 | /* 10 seconds */ |
10 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) | 11 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index a026507893e9..ab4092e3214e 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -181,6 +181,7 @@ struct x86_emulate_ops { | |||
181 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); | 181 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); |
182 | int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); | 182 | int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); |
183 | int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); | 183 | int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); |
184 | int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); | ||
184 | void (*halt)(struct x86_emulate_ctxt *ctxt); | 185 | void (*halt)(struct x86_emulate_ctxt *ctxt); |
185 | void (*wbinvd)(struct x86_emulate_ctxt *ctxt); | 186 | void (*wbinvd)(struct x86_emulate_ctxt *ctxt); |
186 | int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); | 187 | int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); |
@@ -364,6 +365,7 @@ enum x86_intercept { | |||
364 | #endif | 365 | #endif |
365 | 366 | ||
366 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); | 367 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); |
368 | bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); | ||
367 | #define EMULATION_FAILED -1 | 369 | #define EMULATION_FAILED -1 |
368 | #define EMULATION_OK 0 | 370 | #define EMULATION_OK 0 |
369 | #define EMULATION_RESTART 1 | 371 | #define EMULATION_RESTART 1 |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b4973f4dab98..52d6640a5ca1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -16,10 +16,12 @@ | |||
16 | #include <linux/mmu_notifier.h> | 16 | #include <linux/mmu_notifier.h> |
17 | #include <linux/tracepoint.h> | 17 | #include <linux/tracepoint.h> |
18 | #include <linux/cpumask.h> | 18 | #include <linux/cpumask.h> |
19 | #include <linux/irq_work.h> | ||
19 | 20 | ||
20 | #include <linux/kvm.h> | 21 | #include <linux/kvm.h> |
21 | #include <linux/kvm_para.h> | 22 | #include <linux/kvm_para.h> |
22 | #include <linux/kvm_types.h> | 23 | #include <linux/kvm_types.h> |
24 | #include <linux/perf_event.h> | ||
23 | 25 | ||
24 | #include <asm/pvclock-abi.h> | 26 | #include <asm/pvclock-abi.h> |
25 | #include <asm/desc.h> | 27 | #include <asm/desc.h> |
@@ -31,6 +33,8 @@ | |||
31 | #define KVM_MEMORY_SLOTS 32 | 33 | #define KVM_MEMORY_SLOTS 32 |
32 | /* memory slots that does not exposed to userspace */ | 34 | /* memory slots that does not exposed to userspace */ |
33 | #define KVM_PRIVATE_MEM_SLOTS 4 | 35 | #define KVM_PRIVATE_MEM_SLOTS 4 |
36 | #define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) | ||
37 | |||
34 | #define KVM_MMIO_SIZE 16 | 38 | #define KVM_MMIO_SIZE 16 |
35 | 39 | ||
36 | #define KVM_PIO_PAGE_OFFSET 1 | 40 | #define KVM_PIO_PAGE_OFFSET 1 |
@@ -228,7 +232,7 @@ struct kvm_mmu_page { | |||
228 | * One bit set per slot which has memory | 232 | * One bit set per slot which has memory |
229 | * in this shadow page. | 233 | * in this shadow page. |
230 | */ | 234 | */ |
231 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 235 | DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM); |
232 | bool unsync; | 236 | bool unsync; |
233 | int root_count; /* Currently serving as active root */ | 237 | int root_count; /* Currently serving as active root */ |
234 | unsigned int unsync_children; | 238 | unsigned int unsync_children; |
@@ -239,14 +243,9 @@ struct kvm_mmu_page { | |||
239 | int clear_spte_count; | 243 | int clear_spte_count; |
240 | #endif | 244 | #endif |
241 | 245 | ||
242 | struct rcu_head rcu; | 246 | int write_flooding_count; |
243 | }; | ||
244 | 247 | ||
245 | struct kvm_pv_mmu_op_buffer { | 248 | struct rcu_head rcu; |
246 | void *ptr; | ||
247 | unsigned len; | ||
248 | unsigned processed; | ||
249 | char buf[512] __aligned(sizeof(long)); | ||
250 | }; | 249 | }; |
251 | 250 | ||
252 | struct kvm_pio_request { | 251 | struct kvm_pio_request { |
@@ -294,6 +293,37 @@ struct kvm_mmu { | |||
294 | u64 pdptrs[4]; /* pae */ | 293 | u64 pdptrs[4]; /* pae */ |
295 | }; | 294 | }; |
296 | 295 | ||
296 | enum pmc_type { | ||
297 | KVM_PMC_GP = 0, | ||
298 | KVM_PMC_FIXED, | ||
299 | }; | ||
300 | |||
301 | struct kvm_pmc { | ||
302 | enum pmc_type type; | ||
303 | u8 idx; | ||
304 | u64 counter; | ||
305 | u64 eventsel; | ||
306 | struct perf_event *perf_event; | ||
307 | struct kvm_vcpu *vcpu; | ||
308 | }; | ||
309 | |||
310 | struct kvm_pmu { | ||
311 | unsigned nr_arch_gp_counters; | ||
312 | unsigned nr_arch_fixed_counters; | ||
313 | unsigned available_event_types; | ||
314 | u64 fixed_ctr_ctrl; | ||
315 | u64 global_ctrl; | ||
316 | u64 global_status; | ||
317 | u64 global_ovf_ctrl; | ||
318 | u64 counter_bitmask[2]; | ||
319 | u64 global_ctrl_mask; | ||
320 | u8 version; | ||
321 | struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC]; | ||
322 | struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED]; | ||
323 | struct irq_work irq_work; | ||
324 | u64 reprogram_pmi; | ||
325 | }; | ||
326 | |||
297 | struct kvm_vcpu_arch { | 327 | struct kvm_vcpu_arch { |
298 | /* | 328 | /* |
299 | * rip and regs accesses must go through | 329 | * rip and regs accesses must go through |
@@ -345,19 +375,10 @@ struct kvm_vcpu_arch { | |||
345 | */ | 375 | */ |
346 | struct kvm_mmu *walk_mmu; | 376 | struct kvm_mmu *walk_mmu; |
347 | 377 | ||
348 | /* only needed in kvm_pv_mmu_op() path, but it's hot so | ||
349 | * put it here to avoid allocation */ | ||
350 | struct kvm_pv_mmu_op_buffer mmu_op_buffer; | ||
351 | |||
352 | struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; | 378 | struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; |
353 | struct kvm_mmu_memory_cache mmu_page_cache; | 379 | struct kvm_mmu_memory_cache mmu_page_cache; |
354 | struct kvm_mmu_memory_cache mmu_page_header_cache; | 380 | struct kvm_mmu_memory_cache mmu_page_header_cache; |
355 | 381 | ||
356 | gfn_t last_pt_write_gfn; | ||
357 | int last_pt_write_count; | ||
358 | u64 *last_pte_updated; | ||
359 | gfn_t last_pte_gfn; | ||
360 | |||
361 | struct fpu guest_fpu; | 382 | struct fpu guest_fpu; |
362 | u64 xcr0; | 383 | u64 xcr0; |
363 | 384 | ||
@@ -436,6 +457,8 @@ struct kvm_vcpu_arch { | |||
436 | unsigned access; | 457 | unsigned access; |
437 | gfn_t mmio_gfn; | 458 | gfn_t mmio_gfn; |
438 | 459 | ||
460 | struct kvm_pmu pmu; | ||
461 | |||
439 | /* used for guest single stepping over the given code position */ | 462 | /* used for guest single stepping over the given code position */ |
440 | unsigned long singlestep_rip; | 463 | unsigned long singlestep_rip; |
441 | 464 | ||
@@ -444,6 +467,9 @@ struct kvm_vcpu_arch { | |||
444 | 467 | ||
445 | cpumask_var_t wbinvd_dirty_mask; | 468 | cpumask_var_t wbinvd_dirty_mask; |
446 | 469 | ||
470 | unsigned long last_retry_eip; | ||
471 | unsigned long last_retry_addr; | ||
472 | |||
447 | struct { | 473 | struct { |
448 | bool halted; | 474 | bool halted; |
449 | gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; | 475 | gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; |
@@ -459,7 +485,6 @@ struct kvm_arch { | |||
459 | unsigned int n_requested_mmu_pages; | 485 | unsigned int n_requested_mmu_pages; |
460 | unsigned int n_max_mmu_pages; | 486 | unsigned int n_max_mmu_pages; |
461 | unsigned int indirect_shadow_pages; | 487 | unsigned int indirect_shadow_pages; |
462 | atomic_t invlpg_counter; | ||
463 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 488 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
464 | /* | 489 | /* |
465 | * Hash table of struct kvm_mmu_page. | 490 | * Hash table of struct kvm_mmu_page. |
@@ -660,6 +685,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
660 | 685 | ||
661 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 686 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
662 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); | 687 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); |
688 | int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, | ||
689 | struct kvm_memory_slot *slot); | ||
663 | void kvm_mmu_zap_all(struct kvm *kvm); | 690 | void kvm_mmu_zap_all(struct kvm *kvm); |
664 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 691 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
665 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 692 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
@@ -668,8 +695,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); | |||
668 | 695 | ||
669 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 696 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
670 | const void *val, int bytes); | 697 | const void *val, int bytes); |
671 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | ||
672 | gpa_t addr, unsigned long *ret); | ||
673 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); | 698 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); |
674 | 699 | ||
675 | extern bool tdp_enabled; | 700 | extern bool tdp_enabled; |
@@ -692,6 +717,7 @@ enum emulation_result { | |||
692 | #define EMULTYPE_NO_DECODE (1 << 0) | 717 | #define EMULTYPE_NO_DECODE (1 << 0) |
693 | #define EMULTYPE_TRAP_UD (1 << 1) | 718 | #define EMULTYPE_TRAP_UD (1 << 1) |
694 | #define EMULTYPE_SKIP (1 << 2) | 719 | #define EMULTYPE_SKIP (1 << 2) |
720 | #define EMULTYPE_RETRY (1 << 3) | ||
695 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, | 721 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, |
696 | int emulation_type, void *insn, int insn_len); | 722 | int emulation_type, void *insn, int insn_len); |
697 | 723 | ||
@@ -734,6 +760,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); | |||
734 | 760 | ||
735 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); | 761 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); |
736 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | 762 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); |
763 | bool kvm_rdpmc(struct kvm_vcpu *vcpu); | ||
737 | 764 | ||
738 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); | 765 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
739 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 766 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
@@ -754,13 +781,14 @@ int fx_init(struct kvm_vcpu *vcpu); | |||
754 | 781 | ||
755 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 782 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
756 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 783 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
757 | const u8 *new, int bytes, | 784 | const u8 *new, int bytes); |
758 | bool guest_initiated); | 785 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); |
759 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); | 786 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); |
760 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | 787 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); |
761 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 788 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
762 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); | 789 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); |
763 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); | 790 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); |
791 | gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); | ||
764 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, | 792 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, |
765 | struct x86_exception *exception); | 793 | struct x86_exception *exception); |
766 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, | 794 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, |
@@ -782,6 +810,11 @@ void kvm_disable_tdp(void); | |||
782 | int complete_pio(struct kvm_vcpu *vcpu); | 810 | int complete_pio(struct kvm_vcpu *vcpu); |
783 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | 811 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); |
784 | 812 | ||
813 | static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | ||
814 | { | ||
815 | return gpa; | ||
816 | } | ||
817 | |||
785 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) | 818 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) |
786 | { | 819 | { |
787 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); | 820 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); |
@@ -894,4 +927,17 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); | |||
894 | 927 | ||
895 | void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); | 928 | void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); |
896 | 929 | ||
930 | int kvm_is_in_guest(void); | ||
931 | |||
932 | void kvm_pmu_init(struct kvm_vcpu *vcpu); | ||
933 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu); | ||
934 | void kvm_pmu_reset(struct kvm_vcpu *vcpu); | ||
935 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); | ||
936 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); | ||
937 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | ||
938 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); | ||
939 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); | ||
940 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); | ||
941 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu); | ||
942 | |||
897 | #endif /* _ASM_X86_KVM_HOST_H */ | 943 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6add827381c9..6aefb14cbbc5 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {} | |||
151 | 151 | ||
152 | void mce_setup(struct mce *m); | 152 | void mce_setup(struct mce *m); |
153 | void mce_log(struct mce *m); | 153 | void mce_log(struct mce *m); |
154 | DECLARE_PER_CPU(struct sys_device, mce_sysdev); | 154 | extern struct device *mce_device[CONFIG_NR_CPUS]; |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * Maximum banks number. | 157 | * Maximum banks number. |
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 93f79094c224..0a0a95460434 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h | |||
@@ -67,7 +67,7 @@ extern struct console early_mrst_console; | |||
67 | extern void mrst_early_console_init(void); | 67 | extern void mrst_early_console_init(void); |
68 | 68 | ||
69 | extern struct console early_hsu_console; | 69 | extern struct console early_hsu_console; |
70 | extern void hsu_early_console_init(void); | 70 | extern void hsu_early_console_init(const char *); |
71 | 71 | ||
72 | extern void intel_scu_devices_create(void); | 72 | extern void intel_scu_devices_create(void); |
73 | extern void intel_scu_devices_destroy(void); | 73 | extern void intel_scu_devices_destroy(void); |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index d498943b906c..df75d07571ce 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -112,19 +112,28 @@ static inline void x86_teardown_msi_irq(unsigned int irq) | |||
112 | { | 112 | { |
113 | x86_msi.teardown_msi_irq(irq); | 113 | x86_msi.teardown_msi_irq(irq); |
114 | } | 114 | } |
115 | static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq) | ||
116 | { | ||
117 | x86_msi.restore_msi_irqs(dev, irq); | ||
118 | } | ||
115 | #define arch_setup_msi_irqs x86_setup_msi_irqs | 119 | #define arch_setup_msi_irqs x86_setup_msi_irqs |
116 | #define arch_teardown_msi_irqs x86_teardown_msi_irqs | 120 | #define arch_teardown_msi_irqs x86_teardown_msi_irqs |
117 | #define arch_teardown_msi_irq x86_teardown_msi_irq | 121 | #define arch_teardown_msi_irq x86_teardown_msi_irq |
122 | #define arch_restore_msi_irqs x86_restore_msi_irqs | ||
118 | /* implemented in arch/x86/kernel/apic/io_apic. */ | 123 | /* implemented in arch/x86/kernel/apic/io_apic. */ |
119 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); | 124 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); |
120 | void native_teardown_msi_irq(unsigned int irq); | 125 | void native_teardown_msi_irq(unsigned int irq); |
126 | void native_restore_msi_irqs(struct pci_dev *dev, int irq); | ||
121 | /* default to the implementation in drivers/lib/msi.c */ | 127 | /* default to the implementation in drivers/lib/msi.c */ |
122 | #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS | 128 | #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS |
129 | #define HAVE_DEFAULT_MSI_RESTORE_IRQS | ||
123 | void default_teardown_msi_irqs(struct pci_dev *dev); | 130 | void default_teardown_msi_irqs(struct pci_dev *dev); |
131 | void default_restore_msi_irqs(struct pci_dev *dev, int irq); | ||
124 | #else | 132 | #else |
125 | #define native_setup_msi_irqs NULL | 133 | #define native_setup_msi_irqs NULL |
126 | #define native_teardown_msi_irq NULL | 134 | #define native_teardown_msi_irq NULL |
127 | #define default_teardown_msi_irqs NULL | 135 | #define default_teardown_msi_irqs NULL |
136 | #define default_restore_msi_irqs NULL | ||
128 | #endif | 137 | #endif |
129 | 138 | ||
130 | #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) | 139 | #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) |
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index e38197806853..b3a531746026 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h | |||
@@ -44,8 +44,6 @@ enum pci_bf_sort_state { | |||
44 | 44 | ||
45 | /* pci-i386.c */ | 45 | /* pci-i386.c */ |
46 | 46 | ||
47 | extern unsigned int pcibios_max_latency; | ||
48 | |||
49 | void pcibios_resource_survey(void); | 47 | void pcibios_resource_survey(void); |
50 | void pcibios_set_cache_line_size(void); | 48 | void pcibios_set_cache_line_size(void); |
51 | 49 | ||
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 529bf07e8067..7a11910a63c4 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -414,22 +414,6 @@ do { \ | |||
414 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | 414 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) |
415 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | 415 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) |
416 | 416 | ||
417 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) | ||
418 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) | ||
419 | #define irqsafe_cpu_add_4(pcp, val) percpu_add_op((pcp), val) | ||
420 | #define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) | ||
421 | #define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) | ||
422 | #define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) | ||
423 | #define irqsafe_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) | ||
424 | #define irqsafe_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) | ||
425 | #define irqsafe_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) | ||
426 | #define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | ||
427 | #define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | ||
428 | #define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | ||
429 | #define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) | ||
430 | #define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | ||
431 | #define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | ||
432 | |||
433 | #ifndef CONFIG_M386 | 417 | #ifndef CONFIG_M386 |
434 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | 418 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) |
435 | #define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) | 419 | #define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) |
@@ -445,9 +429,6 @@ do { \ | |||
445 | #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 429 | #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
446 | #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 430 | #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
447 | 431 | ||
448 | #define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
449 | #define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
450 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
451 | #endif /* !CONFIG_M386 */ | 432 | #endif /* !CONFIG_M386 */ |
452 | 433 | ||
453 | #ifdef CONFIG_X86_CMPXCHG64 | 434 | #ifdef CONFIG_X86_CMPXCHG64 |
@@ -464,7 +445,6 @@ do { \ | |||
464 | 445 | ||
465 | #define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double | 446 | #define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
466 | #define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double | 447 | #define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
467 | #define irqsafe_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double | ||
468 | #endif /* CONFIG_X86_CMPXCHG64 */ | 448 | #endif /* CONFIG_X86_CMPXCHG64 */ |
469 | 449 | ||
470 | /* | 450 | /* |
@@ -492,13 +472,6 @@ do { \ | |||
492 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | 472 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) |
493 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 473 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
494 | 474 | ||
495 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) | ||
496 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | ||
497 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | ||
498 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | ||
499 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
500 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
501 | |||
502 | /* | 475 | /* |
503 | * Pretty complex macro to generate cmpxchg16 instruction. The instruction | 476 | * Pretty complex macro to generate cmpxchg16 instruction. The instruction |
504 | * is not supported on early AMD64 processors so we must be able to emulate | 477 | * is not supported on early AMD64 processors so we must be able to emulate |
@@ -521,7 +494,6 @@ do { \ | |||
521 | 494 | ||
522 | #define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double | 495 | #define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
523 | #define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double | 496 | #define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
524 | #define irqsafe_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double | ||
525 | 497 | ||
526 | #endif | 498 | #endif |
527 | 499 | ||
diff --git a/arch/x86/include/asm/serpent.h b/arch/x86/include/asm/serpent.h new file mode 100644 index 000000000000..d3ef63fe0c81 --- /dev/null +++ b/arch/x86/include/asm/serpent.h | |||
@@ -0,0 +1,63 @@ | |||
1 | #ifndef ASM_X86_SERPENT_H | ||
2 | #define ASM_X86_SERPENT_H | ||
3 | |||
4 | #include <linux/crypto.h> | ||
5 | #include <crypto/serpent.h> | ||
6 | |||
7 | #ifdef CONFIG_X86_32 | ||
8 | |||
9 | #define SERPENT_PARALLEL_BLOCKS 4 | ||
10 | |||
11 | asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst, | ||
12 | const u8 *src, bool xor); | ||
13 | asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst, | ||
14 | const u8 *src); | ||
15 | |||
16 | static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
17 | const u8 *src) | ||
18 | { | ||
19 | __serpent_enc_blk_4way(ctx, dst, src, false); | ||
20 | } | ||
21 | |||
22 | static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, | ||
23 | const u8 *src) | ||
24 | { | ||
25 | __serpent_enc_blk_4way(ctx, dst, src, true); | ||
26 | } | ||
27 | |||
28 | static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
29 | const u8 *src) | ||
30 | { | ||
31 | serpent_dec_blk_4way(ctx, dst, src); | ||
32 | } | ||
33 | |||
34 | #else | ||
35 | |||
36 | #define SERPENT_PARALLEL_BLOCKS 8 | ||
37 | |||
38 | asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, | ||
39 | const u8 *src, bool xor); | ||
40 | asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst, | ||
41 | const u8 *src); | ||
42 | |||
43 | static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
44 | const u8 *src) | ||
45 | { | ||
46 | __serpent_enc_blk_8way(ctx, dst, src, false); | ||
47 | } | ||
48 | |||
49 | static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, | ||
50 | const u8 *src) | ||
51 | { | ||
52 | __serpent_enc_blk_8way(ctx, dst, src, true); | ||
53 | } | ||
54 | |||
55 | static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
56 | const u8 *src) | ||
57 | { | ||
58 | serpent_dec_blk_8way(ctx, dst, src); | ||
59 | } | ||
60 | |||
61 | #endif | ||
62 | |||
63 | #endif | ||
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 9756551ec760..d0f19f9fb846 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -47,7 +47,7 @@ extern void reserve_standard_io_resources(void); | |||
47 | extern void i386_reserve_resources(void); | 47 | extern void i386_reserve_resources(void); |
48 | extern void setup_default_timer_irq(void); | 48 | extern void setup_default_timer_irq(void); |
49 | 49 | ||
50 | #ifdef CONFIG_X86_MRST | 50 | #ifdef CONFIG_X86_INTEL_MID |
51 | extern void x86_mrst_early_setup(void); | 51 | extern void x86_mrst_early_setup(void); |
52 | #else | 52 | #else |
53 | static inline void x86_mrst_early_setup(void) { } | 53 | static inline void x86_mrst_early_setup(void) { } |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 73b11bc0ae6f..0434c400287c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -225,5 +225,11 @@ extern int hard_smp_processor_id(void); | |||
225 | 225 | ||
226 | #endif /* CONFIG_X86_LOCAL_APIC */ | 226 | #endif /* CONFIG_X86_LOCAL_APIC */ |
227 | 227 | ||
228 | #ifdef CONFIG_DEBUG_NMI_SELFTEST | ||
229 | extern void nmi_selftest(void); | ||
230 | #else | ||
231 | #define nmi_selftest() do { } while (0) | ||
232 | #endif | ||
233 | |||
228 | #endif /* __ASSEMBLY__ */ | 234 | #endif /* __ASSEMBLY__ */ |
229 | #endif /* _ASM_X86_SMP_H */ | 235 | #endif /* _ASM_X86_SMP_H */ |
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index c4a348f7bd43..d962e5652a73 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | #include <linux/err.h> | 17 | #include <linux/err.h> |
18 | #include <asm/asm-offsets.h> /* For NR_syscalls */ | ||
18 | 19 | ||
19 | extern const unsigned long sys_call_table[]; | 20 | extern const unsigned long sys_call_table[]; |
20 | 21 | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 56a63ff7665e..bc817cd8b443 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -91,7 +91,6 @@ struct thread_info { | |||
91 | #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ | 91 | #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ |
92 | #define TIF_DEBUG 21 /* uses debug registers */ | 92 | #define TIF_DEBUG 21 /* uses debug registers */ |
93 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ | 93 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ |
94 | #define TIF_FREEZE 23 /* is freezing for suspend */ | ||
95 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ | 94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ |
96 | #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ | 95 | #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ |
97 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ | 96 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ |
@@ -113,7 +112,6 @@ struct thread_info { | |||
113 | #define _TIF_FORK (1 << TIF_FORK) | 112 | #define _TIF_FORK (1 << TIF_FORK) |
114 | #define _TIF_DEBUG (1 << TIF_DEBUG) | 113 | #define _TIF_DEBUG (1 << TIF_DEBUG) |
115 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) | 114 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) |
116 | #define _TIF_FREEZE (1 << TIF_FREEZE) | ||
117 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 115 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
118 | #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) | 116 | #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) |
119 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) | 117 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 800f77c60051..b9676ae37ada 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -172,7 +172,7 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) | |||
172 | } | 172 | } |
173 | 173 | ||
174 | struct pci_bus; | 174 | struct pci_bus; |
175 | void x86_pci_root_bus_res_quirks(struct pci_bus *b); | 175 | void x86_pci_root_bus_resources(int bus, struct list_head *resources); |
176 | 176 | ||
177 | #ifdef CONFIG_SMP | 177 | #ifdef CONFIG_SMP |
178 | #define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \ | 178 | #define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \ |
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 2a58ed3e51d8..b4a3db7ce140 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h | |||
@@ -1,13 +1,59 @@ | |||
1 | #ifndef _ASM_X86_UNISTD_H | ||
2 | #define _ASM_X86_UNISTD_H 1 | ||
3 | |||
1 | #ifdef __KERNEL__ | 4 | #ifdef __KERNEL__ |
2 | # ifdef CONFIG_X86_32 | 5 | # ifdef CONFIG_X86_32 |
3 | # include "unistd_32.h" | 6 | |
7 | # include <asm/unistd_32.h> | ||
8 | # define __ARCH_WANT_IPC_PARSE_VERSION | ||
9 | # define __ARCH_WANT_STAT64 | ||
10 | # define __ARCH_WANT_SYS_OLD_MMAP | ||
11 | # define __ARCH_WANT_SYS_OLD_SELECT | ||
12 | |||
4 | # else | 13 | # else |
5 | # include "unistd_64.h" | 14 | |
15 | # include <asm/unistd_64.h> | ||
16 | # define __ARCH_WANT_COMPAT_SYS_TIME | ||
17 | |||
6 | # endif | 18 | # endif |
19 | |||
20 | # define __ARCH_WANT_OLD_READDIR | ||
21 | # define __ARCH_WANT_OLD_STAT | ||
22 | # define __ARCH_WANT_SYS_ALARM | ||
23 | # define __ARCH_WANT_SYS_FADVISE64 | ||
24 | # define __ARCH_WANT_SYS_GETHOSTNAME | ||
25 | # define __ARCH_WANT_SYS_GETPGRP | ||
26 | # define __ARCH_WANT_SYS_LLSEEK | ||
27 | # define __ARCH_WANT_SYS_NICE | ||
28 | # define __ARCH_WANT_SYS_OLDUMOUNT | ||
29 | # define __ARCH_WANT_SYS_OLD_GETRLIMIT | ||
30 | # define __ARCH_WANT_SYS_OLD_UNAME | ||
31 | # define __ARCH_WANT_SYS_PAUSE | ||
32 | # define __ARCH_WANT_SYS_RT_SIGACTION | ||
33 | # define __ARCH_WANT_SYS_RT_SIGSUSPEND | ||
34 | # define __ARCH_WANT_SYS_SGETMASK | ||
35 | # define __ARCH_WANT_SYS_SIGNAL | ||
36 | # define __ARCH_WANT_SYS_SIGPENDING | ||
37 | # define __ARCH_WANT_SYS_SIGPROCMASK | ||
38 | # define __ARCH_WANT_SYS_SOCKETCALL | ||
39 | # define __ARCH_WANT_SYS_TIME | ||
40 | # define __ARCH_WANT_SYS_UTIME | ||
41 | # define __ARCH_WANT_SYS_WAITPID | ||
42 | |||
43 | /* | ||
44 | * "Conditional" syscalls | ||
45 | * | ||
46 | * What we want is __attribute__((weak,alias("sys_ni_syscall"))), | ||
47 | * but it doesn't work on all toolchains, so we just do it by hand | ||
48 | */ | ||
49 | # define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") | ||
50 | |||
7 | #else | 51 | #else |
8 | # ifdef __i386__ | 52 | # ifdef __i386__ |
9 | # include "unistd_32.h" | 53 | # include <asm/unistd_32.h> |
10 | # else | 54 | # else |
11 | # include "unistd_64.h" | 55 | # include <asm/unistd_64.h> |
12 | # endif | 56 | # endif |
13 | #endif | 57 | #endif |
58 | |||
59 | #endif /* _ASM_X86_UNISTD_H */ | ||
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h deleted file mode 100644 index 599c77d38f33..000000000000 --- a/arch/x86/include/asm/unistd_32.h +++ /dev/null | |||
@@ -1,401 +0,0 @@ | |||
1 | #ifndef _ASM_X86_UNISTD_32_H | ||
2 | #define _ASM_X86_UNISTD_32_H | ||
3 | |||
4 | /* | ||
5 | * This file contains the system call numbers. | ||
6 | */ | ||
7 | |||
8 | #define __NR_restart_syscall 0 | ||
9 | #define __NR_exit 1 | ||
10 | #define __NR_fork 2 | ||
11 | #define __NR_read 3 | ||
12 | #define __NR_write 4 | ||
13 | #define __NR_open 5 | ||
14 | #define __NR_close 6 | ||
15 | #define __NR_waitpid 7 | ||
16 | #define __NR_creat 8 | ||
17 | #define __NR_link 9 | ||
18 | #define __NR_unlink 10 | ||
19 | #define __NR_execve 11 | ||
20 | #define __NR_chdir 12 | ||
21 | #define __NR_time 13 | ||
22 | #define __NR_mknod 14 | ||
23 | #define __NR_chmod 15 | ||
24 | #define __NR_lchown 16 | ||
25 | #define __NR_break 17 | ||
26 | #define __NR_oldstat 18 | ||
27 | #define __NR_lseek 19 | ||
28 | #define __NR_getpid 20 | ||
29 | #define __NR_mount 21 | ||
30 | #define __NR_umount 22 | ||
31 | #define __NR_setuid 23 | ||
32 | #define __NR_getuid 24 | ||
33 | #define __NR_stime 25 | ||
34 | #define __NR_ptrace 26 | ||
35 | #define __NR_alarm 27 | ||
36 | #define __NR_oldfstat 28 | ||
37 | #define __NR_pause 29 | ||
38 | #define __NR_utime 30 | ||
39 | #define __NR_stty 31 | ||
40 | #define __NR_gtty 32 | ||
41 | #define __NR_access 33 | ||
42 | #define __NR_nice 34 | ||
43 | #define __NR_ftime 35 | ||
44 | #define __NR_sync 36 | ||
45 | #define __NR_kill 37 | ||
46 | #define __NR_rename 38 | ||
47 | #define __NR_mkdir 39 | ||
48 | #define __NR_rmdir 40 | ||
49 | #define __NR_dup 41 | ||
50 | #define __NR_pipe 42 | ||
51 | #define __NR_times 43 | ||
52 | #define __NR_prof 44 | ||
53 | #define __NR_brk 45 | ||
54 | #define __NR_setgid 46 | ||
55 | #define __NR_getgid 47 | ||
56 | #define __NR_signal 48 | ||
57 | #define __NR_geteuid 49 | ||
58 | #define __NR_getegid 50 | ||
59 | #define __NR_acct 51 | ||
60 | #define __NR_umount2 52 | ||
61 | #define __NR_lock 53 | ||
62 | #define __NR_ioctl 54 | ||
63 | #define __NR_fcntl 55 | ||
64 | #define __NR_mpx 56 | ||
65 | #define __NR_setpgid 57 | ||
66 | #define __NR_ulimit 58 | ||
67 | #define __NR_oldolduname 59 | ||
68 | #define __NR_umask 60 | ||
69 | #define __NR_chroot 61 | ||
70 | #define __NR_ustat 62 | ||
71 | #define __NR_dup2 63 | ||
72 | #define __NR_getppid 64 | ||
73 | #define __NR_getpgrp 65 | ||
74 | #define __NR_setsid 66 | ||
75 | #define __NR_sigaction 67 | ||
76 | #define __NR_sgetmask 68 | ||
77 | #define __NR_ssetmask 69 | ||
78 | #define __NR_setreuid 70 | ||
79 | #define __NR_setregid 71 | ||
80 | #define __NR_sigsuspend 72 | ||
81 | #define __NR_sigpending 73 | ||
82 | #define __NR_sethostname 74 | ||
83 | #define __NR_setrlimit 75 | ||
84 | #define __NR_getrlimit 76 /* Back compatible 2Gig limited rlimit */ | ||
85 | #define __NR_getrusage 77 | ||
86 | #define __NR_gettimeofday 78 | ||
87 | #define __NR_settimeofday 79 | ||
88 | #define __NR_getgroups 80 | ||
89 | #define __NR_setgroups 81 | ||
90 | #define __NR_select 82 | ||
91 | #define __NR_symlink 83 | ||
92 | #define __NR_oldlstat 84 | ||
93 | #define __NR_readlink 85 | ||
94 | #define __NR_uselib 86 | ||
95 | #define __NR_swapon 87 | ||
96 | #define __NR_reboot 88 | ||
97 | #define __NR_readdir 89 | ||
98 | #define __NR_mmap 90 | ||
99 | #define __NR_munmap 91 | ||
100 | #define __NR_truncate 92 | ||
101 | #define __NR_ftruncate 93 | ||
102 | #define __NR_fchmod 94 | ||
103 | #define __NR_fchown 95 | ||
104 | #define __NR_getpriority 96 | ||
105 | #define __NR_setpriority 97 | ||
106 | #define __NR_profil 98 | ||
107 | #define __NR_statfs 99 | ||
108 | #define __NR_fstatfs 100 | ||
109 | #define __NR_ioperm 101 | ||
110 | #define __NR_socketcall 102 | ||
111 | #define __NR_syslog 103 | ||
112 | #define __NR_setitimer 104 | ||
113 | #define __NR_getitimer 105 | ||
114 | #define __NR_stat 106 | ||
115 | #define __NR_lstat 107 | ||
116 | #define __NR_fstat 108 | ||
117 | #define __NR_olduname 109 | ||
118 | #define __NR_iopl 110 | ||
119 | #define __NR_vhangup 111 | ||
120 | #define __NR_idle 112 | ||
121 | #define __NR_vm86old 113 | ||
122 | #define __NR_wait4 114 | ||
123 | #define __NR_swapoff 115 | ||
124 | #define __NR_sysinfo 116 | ||
125 | #define __NR_ipc 117 | ||
126 | #define __NR_fsync 118 | ||
127 | #define __NR_sigreturn 119 | ||
128 | #define __NR_clone 120 | ||
129 | #define __NR_setdomainname 121 | ||
130 | #define __NR_uname 122 | ||
131 | #define __NR_modify_ldt 123 | ||
132 | #define __NR_adjtimex 124 | ||
133 | #define __NR_mprotect 125 | ||
134 | #define __NR_sigprocmask 126 | ||
135 | #define __NR_create_module 127 | ||
136 | #define __NR_init_module 128 | ||
137 | #define __NR_delete_module 129 | ||
138 | #define __NR_get_kernel_syms 130 | ||
139 | #define __NR_quotactl 131 | ||
140 | #define __NR_getpgid 132 | ||
141 | #define __NR_fchdir 133 | ||
142 | #define __NR_bdflush 134 | ||
143 | #define __NR_sysfs 135 | ||
144 | #define __NR_personality 136 | ||
145 | #define __NR_afs_syscall 137 /* Syscall for Andrew File System */ | ||
146 | #define __NR_setfsuid 138 | ||
147 | #define __NR_setfsgid 139 | ||
148 | #define __NR__llseek 140 | ||
149 | #define __NR_getdents 141 | ||
150 | #define __NR__newselect 142 | ||
151 | #define __NR_flock 143 | ||
152 | #define __NR_msync 144 | ||
153 | #define __NR_readv 145 | ||
154 | #define __NR_writev 146 | ||
155 | #define __NR_getsid 147 | ||
156 | #define __NR_fdatasync 148 | ||
157 | #define __NR__sysctl 149 | ||
158 | #define __NR_mlock 150 | ||
159 | #define __NR_munlock 151 | ||
160 | #define __NR_mlockall 152 | ||
161 | #define __NR_munlockall 153 | ||
162 | #define __NR_sched_setparam 154 | ||
163 | #define __NR_sched_getparam 155 | ||
164 | #define __NR_sched_setscheduler 156 | ||
165 | #define __NR_sched_getscheduler 157 | ||
166 | #define __NR_sched_yield 158 | ||
167 | #define __NR_sched_get_priority_max 159 | ||
168 | #define __NR_sched_get_priority_min 160 | ||
169 | #define __NR_sched_rr_get_interval 161 | ||
170 | #define __NR_nanosleep 162 | ||
171 | #define __NR_mremap 163 | ||
172 | #define __NR_setresuid 164 | ||
173 | #define __NR_getresuid 165 | ||
174 | #define __NR_vm86 166 | ||
175 | #define __NR_query_module 167 | ||
176 | #define __NR_poll 168 | ||
177 | #define __NR_nfsservctl 169 | ||
178 | #define __NR_setresgid 170 | ||
179 | #define __NR_getresgid 171 | ||
180 | #define __NR_prctl 172 | ||
181 | #define __NR_rt_sigreturn 173 | ||
182 | #define __NR_rt_sigaction 174 | ||
183 | #define __NR_rt_sigprocmask 175 | ||
184 | #define __NR_rt_sigpending 176 | ||
185 | #define __NR_rt_sigtimedwait 177 | ||
186 | #define __NR_rt_sigqueueinfo 178 | ||
187 | #define __NR_rt_sigsuspend 179 | ||
188 | #define __NR_pread64 180 | ||
189 | #define __NR_pwrite64 181 | ||
190 | #define __NR_chown 182 | ||
191 | #define __NR_getcwd 183 | ||
192 | #define __NR_capget 184 | ||
193 | #define __NR_capset 185 | ||
194 | #define __NR_sigaltstack 186 | ||
195 | #define __NR_sendfile 187 | ||
196 | #define __NR_getpmsg 188 /* some people actually want streams */ | ||
197 | #define __NR_putpmsg 189 /* some people actually want streams */ | ||
198 | #define __NR_vfork 190 | ||
199 | #define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ | ||
200 | #define __NR_mmap2 192 | ||
201 | #define __NR_truncate64 193 | ||
202 | #define __NR_ftruncate64 194 | ||
203 | #define __NR_stat64 195 | ||
204 | #define __NR_lstat64 196 | ||
205 | #define __NR_fstat64 197 | ||
206 | #define __NR_lchown32 198 | ||
207 | #define __NR_getuid32 199 | ||
208 | #define __NR_getgid32 200 | ||
209 | #define __NR_geteuid32 201 | ||
210 | #define __NR_getegid32 202 | ||
211 | #define __NR_setreuid32 203 | ||
212 | #define __NR_setregid32 204 | ||
213 | #define __NR_getgroups32 205 | ||
214 | #define __NR_setgroups32 206 | ||
215 | #define __NR_fchown32 207 | ||
216 | #define __NR_setresuid32 208 | ||
217 | #define __NR_getresuid32 209 | ||
218 | #define __NR_setresgid32 210 | ||
219 | #define __NR_getresgid32 211 | ||
220 | #define __NR_chown32 212 | ||
221 | #define __NR_setuid32 213 | ||
222 | #define __NR_setgid32 214 | ||
223 | #define __NR_setfsuid32 215 | ||
224 | #define __NR_setfsgid32 216 | ||
225 | #define __NR_pivot_root 217 | ||
226 | #define __NR_mincore 218 | ||
227 | #define __NR_madvise 219 | ||
228 | #define __NR_madvise1 219 /* delete when C lib stub is removed */ | ||
229 | #define __NR_getdents64 220 | ||
230 | #define __NR_fcntl64 221 | ||
231 | /* 223 is unused */ | ||
232 | #define __NR_gettid 224 | ||
233 | #define __NR_readahead 225 | ||
234 | #define __NR_setxattr 226 | ||
235 | #define __NR_lsetxattr 227 | ||
236 | #define __NR_fsetxattr 228 | ||
237 | #define __NR_getxattr 229 | ||
238 | #define __NR_lgetxattr 230 | ||
239 | #define __NR_fgetxattr 231 | ||
240 | #define __NR_listxattr 232 | ||
241 | #define __NR_llistxattr 233 | ||
242 | #define __NR_flistxattr 234 | ||
243 | #define __NR_removexattr 235 | ||
244 | #define __NR_lremovexattr 236 | ||
245 | #define __NR_fremovexattr 237 | ||
246 | #define __NR_tkill 238 | ||
247 | #define __NR_sendfile64 239 | ||
248 | #define __NR_futex 240 | ||
249 | #define __NR_sched_setaffinity 241 | ||
250 | #define __NR_sched_getaffinity 242 | ||
251 | #define __NR_set_thread_area 243 | ||
252 | #define __NR_get_thread_area 244 | ||
253 | #define __NR_io_setup 245 | ||
254 | #define __NR_io_destroy 246 | ||
255 | #define __NR_io_getevents 247 | ||
256 | #define __NR_io_submit 248 | ||
257 | #define __NR_io_cancel 249 | ||
258 | #define __NR_fadvise64 250 | ||
259 | /* 251 is available for reuse (was briefly sys_set_zone_reclaim) */ | ||
260 | #define __NR_exit_group 252 | ||
261 | #define __NR_lookup_dcookie 253 | ||
262 | #define __NR_epoll_create 254 | ||
263 | #define __NR_epoll_ctl 255 | ||
264 | #define __NR_epoll_wait 256 | ||
265 | #define __NR_remap_file_pages 257 | ||
266 | #define __NR_set_tid_address 258 | ||
267 | #define __NR_timer_create 259 | ||
268 | #define __NR_timer_settime (__NR_timer_create+1) | ||
269 | #define __NR_timer_gettime (__NR_timer_create+2) | ||
270 | #define __NR_timer_getoverrun (__NR_timer_create+3) | ||
271 | #define __NR_timer_delete (__NR_timer_create+4) | ||
272 | #define __NR_clock_settime (__NR_timer_create+5) | ||
273 | #define __NR_clock_gettime (__NR_timer_create+6) | ||
274 | #define __NR_clock_getres (__NR_timer_create+7) | ||
275 | #define __NR_clock_nanosleep (__NR_timer_create+8) | ||
276 | #define __NR_statfs64 268 | ||
277 | #define __NR_fstatfs64 269 | ||
278 | #define __NR_tgkill 270 | ||
279 | #define __NR_utimes 271 | ||
280 | #define __NR_fadvise64_64 272 | ||
281 | #define __NR_vserver 273 | ||
282 | #define __NR_mbind 274 | ||
283 | #define __NR_get_mempolicy 275 | ||
284 | #define __NR_set_mempolicy 276 | ||
285 | #define __NR_mq_open 277 | ||
286 | #define __NR_mq_unlink (__NR_mq_open+1) | ||
287 | #define __NR_mq_timedsend (__NR_mq_open+2) | ||
288 | #define __NR_mq_timedreceive (__NR_mq_open+3) | ||
289 | #define __NR_mq_notify (__NR_mq_open+4) | ||
290 | #define __NR_mq_getsetattr (__NR_mq_open+5) | ||
291 | #define __NR_kexec_load 283 | ||
292 | #define __NR_waitid 284 | ||
293 | /* #define __NR_sys_setaltroot 285 */ | ||
294 | #define __NR_add_key 286 | ||
295 | #define __NR_request_key 287 | ||
296 | #define __NR_keyctl 288 | ||
297 | #define __NR_ioprio_set 289 | ||
298 | #define __NR_ioprio_get 290 | ||
299 | #define __NR_inotify_init 291 | ||
300 | #define __NR_inotify_add_watch 292 | ||
301 | #define __NR_inotify_rm_watch 293 | ||
302 | #define __NR_migrate_pages 294 | ||
303 | #define __NR_openat 295 | ||
304 | #define __NR_mkdirat 296 | ||
305 | #define __NR_mknodat 297 | ||
306 | #define __NR_fchownat 298 | ||
307 | #define __NR_futimesat 299 | ||
308 | #define __NR_fstatat64 300 | ||
309 | #define __NR_unlinkat 301 | ||
310 | #define __NR_renameat 302 | ||
311 | #define __NR_linkat 303 | ||
312 | #define __NR_symlinkat 304 | ||
313 | #define __NR_readlinkat 305 | ||
314 | #define __NR_fchmodat 306 | ||
315 | #define __NR_faccessat 307 | ||
316 | #define __NR_pselect6 308 | ||
317 | #define __NR_ppoll 309 | ||
318 | #define __NR_unshare 310 | ||
319 | #define __NR_set_robust_list 311 | ||
320 | #define __NR_get_robust_list 312 | ||
321 | #define __NR_splice 313 | ||
322 | #define __NR_sync_file_range 314 | ||
323 | #define __NR_tee 315 | ||
324 | #define __NR_vmsplice 316 | ||
325 | #define __NR_move_pages 317 | ||
326 | #define __NR_getcpu 318 | ||
327 | #define __NR_epoll_pwait 319 | ||
328 | #define __NR_utimensat 320 | ||
329 | #define __NR_signalfd 321 | ||
330 | #define __NR_timerfd_create 322 | ||
331 | #define __NR_eventfd 323 | ||
332 | #define __NR_fallocate 324 | ||
333 | #define __NR_timerfd_settime 325 | ||
334 | #define __NR_timerfd_gettime 326 | ||
335 | #define __NR_signalfd4 327 | ||
336 | #define __NR_eventfd2 328 | ||
337 | #define __NR_epoll_create1 329 | ||
338 | #define __NR_dup3 330 | ||
339 | #define __NR_pipe2 331 | ||
340 | #define __NR_inotify_init1 332 | ||
341 | #define __NR_preadv 333 | ||
342 | #define __NR_pwritev 334 | ||
343 | #define __NR_rt_tgsigqueueinfo 335 | ||
344 | #define __NR_perf_event_open 336 | ||
345 | #define __NR_recvmmsg 337 | ||
346 | #define __NR_fanotify_init 338 | ||
347 | #define __NR_fanotify_mark 339 | ||
348 | #define __NR_prlimit64 340 | ||
349 | #define __NR_name_to_handle_at 341 | ||
350 | #define __NR_open_by_handle_at 342 | ||
351 | #define __NR_clock_adjtime 343 | ||
352 | #define __NR_syncfs 344 | ||
353 | #define __NR_sendmmsg 345 | ||
354 | #define __NR_setns 346 | ||
355 | #define __NR_process_vm_readv 347 | ||
356 | #define __NR_process_vm_writev 348 | ||
357 | |||
358 | #ifdef __KERNEL__ | ||
359 | |||
360 | #define NR_syscalls 349 | ||
361 | |||
362 | #define __ARCH_WANT_IPC_PARSE_VERSION | ||
363 | #define __ARCH_WANT_OLD_READDIR | ||
364 | #define __ARCH_WANT_OLD_STAT | ||
365 | #define __ARCH_WANT_STAT64 | ||
366 | #define __ARCH_WANT_SYS_ALARM | ||
367 | #define __ARCH_WANT_SYS_GETHOSTNAME | ||
368 | #define __ARCH_WANT_SYS_IPC | ||
369 | #define __ARCH_WANT_SYS_PAUSE | ||
370 | #define __ARCH_WANT_SYS_SGETMASK | ||
371 | #define __ARCH_WANT_SYS_SIGNAL | ||
372 | #define __ARCH_WANT_SYS_TIME | ||
373 | #define __ARCH_WANT_SYS_UTIME | ||
374 | #define __ARCH_WANT_SYS_WAITPID | ||
375 | #define __ARCH_WANT_SYS_SOCKETCALL | ||
376 | #define __ARCH_WANT_SYS_FADVISE64 | ||
377 | #define __ARCH_WANT_SYS_GETPGRP | ||
378 | #define __ARCH_WANT_SYS_LLSEEK | ||
379 | #define __ARCH_WANT_SYS_NICE | ||
380 | #define __ARCH_WANT_SYS_OLD_GETRLIMIT | ||
381 | #define __ARCH_WANT_SYS_OLD_UNAME | ||
382 | #define __ARCH_WANT_SYS_OLD_MMAP | ||
383 | #define __ARCH_WANT_SYS_OLD_SELECT | ||
384 | #define __ARCH_WANT_SYS_OLDUMOUNT | ||
385 | #define __ARCH_WANT_SYS_SIGPENDING | ||
386 | #define __ARCH_WANT_SYS_SIGPROCMASK | ||
387 | #define __ARCH_WANT_SYS_RT_SIGACTION | ||
388 | #define __ARCH_WANT_SYS_RT_SIGSUSPEND | ||
389 | |||
390 | /* | ||
391 | * "Conditional" syscalls | ||
392 | * | ||
393 | * What we want is __attribute__((weak,alias("sys_ni_syscall"))), | ||
394 | * but it doesn't work on all toolchains, so we just do it by hand | ||
395 | */ | ||
396 | #ifndef cond_syscall | ||
397 | #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") | ||
398 | #endif | ||
399 | |||
400 | #endif /* __KERNEL__ */ | ||
401 | #endif /* _ASM_X86_UNISTD_32_H */ | ||
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h deleted file mode 100644 index 0431f193c3f2..000000000000 --- a/arch/x86/include/asm/unistd_64.h +++ /dev/null | |||
@@ -1,732 +0,0 @@ | |||
1 | #ifndef _ASM_X86_UNISTD_64_H | ||
2 | #define _ASM_X86_UNISTD_64_H | ||
3 | |||
4 | #ifndef __SYSCALL | ||
5 | #define __SYSCALL(a, b) | ||
6 | #endif | ||
7 | |||
8 | /* | ||
9 | * This file contains the system call numbers. | ||
10 | * | ||
11 | * Note: holes are not allowed. | ||
12 | */ | ||
13 | |||
14 | /* at least 8 syscall per cacheline */ | ||
15 | #define __NR_read 0 | ||
16 | __SYSCALL(__NR_read, sys_read) | ||
17 | #define __NR_write 1 | ||
18 | __SYSCALL(__NR_write, sys_write) | ||
19 | #define __NR_open 2 | ||
20 | __SYSCALL(__NR_open, sys_open) | ||
21 | #define __NR_close 3 | ||
22 | __SYSCALL(__NR_close, sys_close) | ||
23 | #define __NR_stat 4 | ||
24 | __SYSCALL(__NR_stat, sys_newstat) | ||
25 | #define __NR_fstat 5 | ||
26 | __SYSCALL(__NR_fstat, sys_newfstat) | ||
27 | #define __NR_lstat 6 | ||
28 | __SYSCALL(__NR_lstat, sys_newlstat) | ||
29 | #define __NR_poll 7 | ||
30 | __SYSCALL(__NR_poll, sys_poll) | ||
31 | |||
32 | #define __NR_lseek 8 | ||
33 | __SYSCALL(__NR_lseek, sys_lseek) | ||
34 | #define __NR_mmap 9 | ||
35 | __SYSCALL(__NR_mmap, sys_mmap) | ||
36 | #define __NR_mprotect 10 | ||
37 | __SYSCALL(__NR_mprotect, sys_mprotect) | ||
38 | #define __NR_munmap 11 | ||
39 | __SYSCALL(__NR_munmap, sys_munmap) | ||
40 | #define __NR_brk 12 | ||
41 | __SYSCALL(__NR_brk, sys_brk) | ||
42 | #define __NR_rt_sigaction 13 | ||
43 | __SYSCALL(__NR_rt_sigaction, sys_rt_sigaction) | ||
44 | #define __NR_rt_sigprocmask 14 | ||
45 | __SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask) | ||
46 | #define __NR_rt_sigreturn 15 | ||
47 | __SYSCALL(__NR_rt_sigreturn, stub_rt_sigreturn) | ||
48 | |||
49 | #define __NR_ioctl 16 | ||
50 | __SYSCALL(__NR_ioctl, sys_ioctl) | ||
51 | #define __NR_pread64 17 | ||
52 | __SYSCALL(__NR_pread64, sys_pread64) | ||
53 | #define __NR_pwrite64 18 | ||
54 | __SYSCALL(__NR_pwrite64, sys_pwrite64) | ||
55 | #define __NR_readv 19 | ||
56 | __SYSCALL(__NR_readv, sys_readv) | ||
57 | #define __NR_writev 20 | ||
58 | __SYSCALL(__NR_writev, sys_writev) | ||
59 | #define __NR_access 21 | ||
60 | __SYSCALL(__NR_access, sys_access) | ||
61 | #define __NR_pipe 22 | ||
62 | __SYSCALL(__NR_pipe, sys_pipe) | ||
63 | #define __NR_select 23 | ||
64 | __SYSCALL(__NR_select, sys_select) | ||
65 | |||
66 | #define __NR_sched_yield 24 | ||
67 | __SYSCALL(__NR_sched_yield, sys_sched_yield) | ||
68 | #define __NR_mremap 25 | ||
69 | __SYSCALL(__NR_mremap, sys_mremap) | ||
70 | #define __NR_msync 26 | ||
71 | __SYSCALL(__NR_msync, sys_msync) | ||
72 | #define __NR_mincore 27 | ||
73 | __SYSCALL(__NR_mincore, sys_mincore) | ||
74 | #define __NR_madvise 28 | ||
75 | __SYSCALL(__NR_madvise, sys_madvise) | ||
76 | #define __NR_shmget 29 | ||
77 | __SYSCALL(__NR_shmget, sys_shmget) | ||
78 | #define __NR_shmat 30 | ||
79 | __SYSCALL(__NR_shmat, sys_shmat) | ||
80 | #define __NR_shmctl 31 | ||
81 | __SYSCALL(__NR_shmctl, sys_shmctl) | ||
82 | |||
83 | #define __NR_dup 32 | ||
84 | __SYSCALL(__NR_dup, sys_dup) | ||
85 | #define __NR_dup2 33 | ||
86 | __SYSCALL(__NR_dup2, sys_dup2) | ||
87 | #define __NR_pause 34 | ||
88 | __SYSCALL(__NR_pause, sys_pause) | ||
89 | #define __NR_nanosleep 35 | ||
90 | __SYSCALL(__NR_nanosleep, sys_nanosleep) | ||
91 | #define __NR_getitimer 36 | ||
92 | __SYSCALL(__NR_getitimer, sys_getitimer) | ||
93 | #define __NR_alarm 37 | ||
94 | __SYSCALL(__NR_alarm, sys_alarm) | ||
95 | #define __NR_setitimer 38 | ||
96 | __SYSCALL(__NR_setitimer, sys_setitimer) | ||
97 | #define __NR_getpid 39 | ||
98 | __SYSCALL(__NR_getpid, sys_getpid) | ||
99 | |||
100 | #define __NR_sendfile 40 | ||
101 | __SYSCALL(__NR_sendfile, sys_sendfile64) | ||
102 | #define __NR_socket 41 | ||
103 | __SYSCALL(__NR_socket, sys_socket) | ||
104 | #define __NR_connect 42 | ||
105 | __SYSCALL(__NR_connect, sys_connect) | ||
106 | #define __NR_accept 43 | ||
107 | __SYSCALL(__NR_accept, sys_accept) | ||
108 | #define __NR_sendto 44 | ||
109 | __SYSCALL(__NR_sendto, sys_sendto) | ||
110 | #define __NR_recvfrom 45 | ||
111 | __SYSCALL(__NR_recvfrom, sys_recvfrom) | ||
112 | #define __NR_sendmsg 46 | ||
113 | __SYSCALL(__NR_sendmsg, sys_sendmsg) | ||
114 | #define __NR_recvmsg 47 | ||
115 | __SYSCALL(__NR_recvmsg, sys_recvmsg) | ||
116 | |||
117 | #define __NR_shutdown 48 | ||
118 | __SYSCALL(__NR_shutdown, sys_shutdown) | ||
119 | #define __NR_bind 49 | ||
120 | __SYSCALL(__NR_bind, sys_bind) | ||
121 | #define __NR_listen 50 | ||
122 | __SYSCALL(__NR_listen, sys_listen) | ||
123 | #define __NR_getsockname 51 | ||
124 | __SYSCALL(__NR_getsockname, sys_getsockname) | ||
125 | #define __NR_getpeername 52 | ||
126 | __SYSCALL(__NR_getpeername, sys_getpeername) | ||
127 | #define __NR_socketpair 53 | ||
128 | __SYSCALL(__NR_socketpair, sys_socketpair) | ||
129 | #define __NR_setsockopt 54 | ||
130 | __SYSCALL(__NR_setsockopt, sys_setsockopt) | ||
131 | #define __NR_getsockopt 55 | ||
132 | __SYSCALL(__NR_getsockopt, sys_getsockopt) | ||
133 | |||
134 | #define __NR_clone 56 | ||
135 | __SYSCALL(__NR_clone, stub_clone) | ||
136 | #define __NR_fork 57 | ||
137 | __SYSCALL(__NR_fork, stub_fork) | ||
138 | #define __NR_vfork 58 | ||
139 | __SYSCALL(__NR_vfork, stub_vfork) | ||
140 | #define __NR_execve 59 | ||
141 | __SYSCALL(__NR_execve, stub_execve) | ||
142 | #define __NR_exit 60 | ||
143 | __SYSCALL(__NR_exit, sys_exit) | ||
144 | #define __NR_wait4 61 | ||
145 | __SYSCALL(__NR_wait4, sys_wait4) | ||
146 | #define __NR_kill 62 | ||
147 | __SYSCALL(__NR_kill, sys_kill) | ||
148 | #define __NR_uname 63 | ||
149 | __SYSCALL(__NR_uname, sys_newuname) | ||
150 | |||
151 | #define __NR_semget 64 | ||
152 | __SYSCALL(__NR_semget, sys_semget) | ||
153 | #define __NR_semop 65 | ||
154 | __SYSCALL(__NR_semop, sys_semop) | ||
155 | #define __NR_semctl 66 | ||
156 | __SYSCALL(__NR_semctl, sys_semctl) | ||
157 | #define __NR_shmdt 67 | ||
158 | __SYSCALL(__NR_shmdt, sys_shmdt) | ||
159 | #define __NR_msgget 68 | ||
160 | __SYSCALL(__NR_msgget, sys_msgget) | ||
161 | #define __NR_msgsnd 69 | ||
162 | __SYSCALL(__NR_msgsnd, sys_msgsnd) | ||
163 | #define __NR_msgrcv 70 | ||
164 | __SYSCALL(__NR_msgrcv, sys_msgrcv) | ||
165 | #define __NR_msgctl 71 | ||
166 | __SYSCALL(__NR_msgctl, sys_msgctl) | ||
167 | |||
168 | #define __NR_fcntl 72 | ||
169 | __SYSCALL(__NR_fcntl, sys_fcntl) | ||
170 | #define __NR_flock 73 | ||
171 | __SYSCALL(__NR_flock, sys_flock) | ||
172 | #define __NR_fsync 74 | ||
173 | __SYSCALL(__NR_fsync, sys_fsync) | ||
174 | #define __NR_fdatasync 75 | ||
175 | __SYSCALL(__NR_fdatasync, sys_fdatasync) | ||
176 | #define __NR_truncate 76 | ||
177 | __SYSCALL(__NR_truncate, sys_truncate) | ||
178 | #define __NR_ftruncate 77 | ||
179 | __SYSCALL(__NR_ftruncate, sys_ftruncate) | ||
180 | #define __NR_getdents 78 | ||
181 | __SYSCALL(__NR_getdents, sys_getdents) | ||
182 | #define __NR_getcwd 79 | ||
183 | __SYSCALL(__NR_getcwd, sys_getcwd) | ||
184 | |||
185 | #define __NR_chdir 80 | ||
186 | __SYSCALL(__NR_chdir, sys_chdir) | ||
187 | #define __NR_fchdir 81 | ||
188 | __SYSCALL(__NR_fchdir, sys_fchdir) | ||
189 | #define __NR_rename 82 | ||
190 | __SYSCALL(__NR_rename, sys_rename) | ||
191 | #define __NR_mkdir 83 | ||
192 | __SYSCALL(__NR_mkdir, sys_mkdir) | ||
193 | #define __NR_rmdir 84 | ||
194 | __SYSCALL(__NR_rmdir, sys_rmdir) | ||
195 | #define __NR_creat 85 | ||
196 | __SYSCALL(__NR_creat, sys_creat) | ||
197 | #define __NR_link 86 | ||
198 | __SYSCALL(__NR_link, sys_link) | ||
199 | #define __NR_unlink 87 | ||
200 | __SYSCALL(__NR_unlink, sys_unlink) | ||
201 | |||
202 | #define __NR_symlink 88 | ||
203 | __SYSCALL(__NR_symlink, sys_symlink) | ||
204 | #define __NR_readlink 89 | ||
205 | __SYSCALL(__NR_readlink, sys_readlink) | ||
206 | #define __NR_chmod 90 | ||
207 | __SYSCALL(__NR_chmod, sys_chmod) | ||
208 | #define __NR_fchmod 91 | ||
209 | __SYSCALL(__NR_fchmod, sys_fchmod) | ||
210 | #define __NR_chown 92 | ||
211 | __SYSCALL(__NR_chown, sys_chown) | ||
212 | #define __NR_fchown 93 | ||
213 | __SYSCALL(__NR_fchown, sys_fchown) | ||
214 | #define __NR_lchown 94 | ||
215 | __SYSCALL(__NR_lchown, sys_lchown) | ||
216 | #define __NR_umask 95 | ||
217 | __SYSCALL(__NR_umask, sys_umask) | ||
218 | |||
219 | #define __NR_gettimeofday 96 | ||
220 | __SYSCALL(__NR_gettimeofday, sys_gettimeofday) | ||
221 | #define __NR_getrlimit 97 | ||
222 | __SYSCALL(__NR_getrlimit, sys_getrlimit) | ||
223 | #define __NR_getrusage 98 | ||
224 | __SYSCALL(__NR_getrusage, sys_getrusage) | ||
225 | #define __NR_sysinfo 99 | ||
226 | __SYSCALL(__NR_sysinfo, sys_sysinfo) | ||
227 | #define __NR_times 100 | ||
228 | __SYSCALL(__NR_times, sys_times) | ||
229 | #define __NR_ptrace 101 | ||
230 | __SYSCALL(__NR_ptrace, sys_ptrace) | ||
231 | #define __NR_getuid 102 | ||
232 | __SYSCALL(__NR_getuid, sys_getuid) | ||
233 | #define __NR_syslog 103 | ||
234 | __SYSCALL(__NR_syslog, sys_syslog) | ||
235 | |||
236 | /* at the very end the stuff that never runs during the benchmarks */ | ||
237 | #define __NR_getgid 104 | ||
238 | __SYSCALL(__NR_getgid, sys_getgid) | ||
239 | #define __NR_setuid 105 | ||
240 | __SYSCALL(__NR_setuid, sys_setuid) | ||
241 | #define __NR_setgid 106 | ||
242 | __SYSCALL(__NR_setgid, sys_setgid) | ||
243 | #define __NR_geteuid 107 | ||
244 | __SYSCALL(__NR_geteuid, sys_geteuid) | ||
245 | #define __NR_getegid 108 | ||
246 | __SYSCALL(__NR_getegid, sys_getegid) | ||
247 | #define __NR_setpgid 109 | ||
248 | __SYSCALL(__NR_setpgid, sys_setpgid) | ||
249 | #define __NR_getppid 110 | ||
250 | __SYSCALL(__NR_getppid, sys_getppid) | ||
251 | #define __NR_getpgrp 111 | ||
252 | __SYSCALL(__NR_getpgrp, sys_getpgrp) | ||
253 | |||
254 | #define __NR_setsid 112 | ||
255 | __SYSCALL(__NR_setsid, sys_setsid) | ||
256 | #define __NR_setreuid 113 | ||
257 | __SYSCALL(__NR_setreuid, sys_setreuid) | ||
258 | #define __NR_setregid 114 | ||
259 | __SYSCALL(__NR_setregid, sys_setregid) | ||
260 | #define __NR_getgroups 115 | ||
261 | __SYSCALL(__NR_getgroups, sys_getgroups) | ||
262 | #define __NR_setgroups 116 | ||
263 | __SYSCALL(__NR_setgroups, sys_setgroups) | ||
264 | #define __NR_setresuid 117 | ||
265 | __SYSCALL(__NR_setresuid, sys_setresuid) | ||
266 | #define __NR_getresuid 118 | ||
267 | __SYSCALL(__NR_getresuid, sys_getresuid) | ||
268 | #define __NR_setresgid 119 | ||
269 | __SYSCALL(__NR_setresgid, sys_setresgid) | ||
270 | |||
271 | #define __NR_getresgid 120 | ||
272 | __SYSCALL(__NR_getresgid, sys_getresgid) | ||
273 | #define __NR_getpgid 121 | ||
274 | __SYSCALL(__NR_getpgid, sys_getpgid) | ||
275 | #define __NR_setfsuid 122 | ||
276 | __SYSCALL(__NR_setfsuid, sys_setfsuid) | ||
277 | #define __NR_setfsgid 123 | ||
278 | __SYSCALL(__NR_setfsgid, sys_setfsgid) | ||
279 | #define __NR_getsid 124 | ||
280 | __SYSCALL(__NR_getsid, sys_getsid) | ||
281 | #define __NR_capget 125 | ||
282 | __SYSCALL(__NR_capget, sys_capget) | ||
283 | #define __NR_capset 126 | ||
284 | __SYSCALL(__NR_capset, sys_capset) | ||
285 | |||
286 | #define __NR_rt_sigpending 127 | ||
287 | __SYSCALL(__NR_rt_sigpending, sys_rt_sigpending) | ||
288 | #define __NR_rt_sigtimedwait 128 | ||
289 | __SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait) | ||
290 | #define __NR_rt_sigqueueinfo 129 | ||
291 | __SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo) | ||
292 | #define __NR_rt_sigsuspend 130 | ||
293 | __SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) | ||
294 | #define __NR_sigaltstack 131 | ||
295 | __SYSCALL(__NR_sigaltstack, stub_sigaltstack) | ||
296 | #define __NR_utime 132 | ||
297 | __SYSCALL(__NR_utime, sys_utime) | ||
298 | #define __NR_mknod 133 | ||
299 | __SYSCALL(__NR_mknod, sys_mknod) | ||
300 | |||
301 | /* Only needed for a.out */ | ||
302 | #define __NR_uselib 134 | ||
303 | __SYSCALL(__NR_uselib, sys_ni_syscall) | ||
304 | #define __NR_personality 135 | ||
305 | __SYSCALL(__NR_personality, sys_personality) | ||
306 | |||
307 | #define __NR_ustat 136 | ||
308 | __SYSCALL(__NR_ustat, sys_ustat) | ||
309 | #define __NR_statfs 137 | ||
310 | __SYSCALL(__NR_statfs, sys_statfs) | ||
311 | #define __NR_fstatfs 138 | ||
312 | __SYSCALL(__NR_fstatfs, sys_fstatfs) | ||
313 | #define __NR_sysfs 139 | ||
314 | __SYSCALL(__NR_sysfs, sys_sysfs) | ||
315 | |||
316 | #define __NR_getpriority 140 | ||
317 | __SYSCALL(__NR_getpriority, sys_getpriority) | ||
318 | #define __NR_setpriority 141 | ||
319 | __SYSCALL(__NR_setpriority, sys_setpriority) | ||
320 | #define __NR_sched_setparam 142 | ||
321 | __SYSCALL(__NR_sched_setparam, sys_sched_setparam) | ||
322 | #define __NR_sched_getparam 143 | ||
323 | __SYSCALL(__NR_sched_getparam, sys_sched_getparam) | ||
324 | #define __NR_sched_setscheduler 144 | ||
325 | __SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler) | ||
326 | #define __NR_sched_getscheduler 145 | ||
327 | __SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) | ||
328 | #define __NR_sched_get_priority_max 146 | ||
329 | __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) | ||
330 | #define __NR_sched_get_priority_min 147 | ||
331 | __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) | ||
332 | #define __NR_sched_rr_get_interval 148 | ||
333 | __SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval) | ||
334 | |||
335 | #define __NR_mlock 149 | ||
336 | __SYSCALL(__NR_mlock, sys_mlock) | ||
337 | #define __NR_munlock 150 | ||
338 | __SYSCALL(__NR_munlock, sys_munlock) | ||
339 | #define __NR_mlockall 151 | ||
340 | __SYSCALL(__NR_mlockall, sys_mlockall) | ||
341 | #define __NR_munlockall 152 | ||
342 | __SYSCALL(__NR_munlockall, sys_munlockall) | ||
343 | |||
344 | #define __NR_vhangup 153 | ||
345 | __SYSCALL(__NR_vhangup, sys_vhangup) | ||
346 | |||
347 | #define __NR_modify_ldt 154 | ||
348 | __SYSCALL(__NR_modify_ldt, sys_modify_ldt) | ||
349 | |||
350 | #define __NR_pivot_root 155 | ||
351 | __SYSCALL(__NR_pivot_root, sys_pivot_root) | ||
352 | |||
353 | #define __NR__sysctl 156 | ||
354 | __SYSCALL(__NR__sysctl, sys_sysctl) | ||
355 | |||
356 | #define __NR_prctl 157 | ||
357 | __SYSCALL(__NR_prctl, sys_prctl) | ||
358 | #define __NR_arch_prctl 158 | ||
359 | __SYSCALL(__NR_arch_prctl, sys_arch_prctl) | ||
360 | |||
361 | #define __NR_adjtimex 159 | ||
362 | __SYSCALL(__NR_adjtimex, sys_adjtimex) | ||
363 | |||
364 | #define __NR_setrlimit 160 | ||
365 | __SYSCALL(__NR_setrlimit, sys_setrlimit) | ||
366 | |||
367 | #define __NR_chroot 161 | ||
368 | __SYSCALL(__NR_chroot, sys_chroot) | ||
369 | |||
370 | #define __NR_sync 162 | ||
371 | __SYSCALL(__NR_sync, sys_sync) | ||
372 | |||
373 | #define __NR_acct 163 | ||
374 | __SYSCALL(__NR_acct, sys_acct) | ||
375 | |||
376 | #define __NR_settimeofday 164 | ||
377 | __SYSCALL(__NR_settimeofday, sys_settimeofday) | ||
378 | |||
379 | #define __NR_mount 165 | ||
380 | __SYSCALL(__NR_mount, sys_mount) | ||
381 | #define __NR_umount2 166 | ||
382 | __SYSCALL(__NR_umount2, sys_umount) | ||
383 | |||
384 | #define __NR_swapon 167 | ||
385 | __SYSCALL(__NR_swapon, sys_swapon) | ||
386 | #define __NR_swapoff 168 | ||
387 | __SYSCALL(__NR_swapoff, sys_swapoff) | ||
388 | |||
389 | #define __NR_reboot 169 | ||
390 | __SYSCALL(__NR_reboot, sys_reboot) | ||
391 | |||
392 | #define __NR_sethostname 170 | ||
393 | __SYSCALL(__NR_sethostname, sys_sethostname) | ||
394 | #define __NR_setdomainname 171 | ||
395 | __SYSCALL(__NR_setdomainname, sys_setdomainname) | ||
396 | |||
397 | #define __NR_iopl 172 | ||
398 | __SYSCALL(__NR_iopl, stub_iopl) | ||
399 | #define __NR_ioperm 173 | ||
400 | __SYSCALL(__NR_ioperm, sys_ioperm) | ||
401 | |||
402 | #define __NR_create_module 174 | ||
403 | __SYSCALL(__NR_create_module, sys_ni_syscall) | ||
404 | #define __NR_init_module 175 | ||
405 | __SYSCALL(__NR_init_module, sys_init_module) | ||
406 | #define __NR_delete_module 176 | ||
407 | __SYSCALL(__NR_delete_module, sys_delete_module) | ||
408 | #define __NR_get_kernel_syms 177 | ||
409 | __SYSCALL(__NR_get_kernel_syms, sys_ni_syscall) | ||
410 | #define __NR_query_module 178 | ||
411 | __SYSCALL(__NR_query_module, sys_ni_syscall) | ||
412 | |||
413 | #define __NR_quotactl 179 | ||
414 | __SYSCALL(__NR_quotactl, sys_quotactl) | ||
415 | |||
416 | #define __NR_nfsservctl 180 | ||
417 | __SYSCALL(__NR_nfsservctl, sys_ni_syscall) | ||
418 | |||
419 | /* reserved for LiS/STREAMS */ | ||
420 | #define __NR_getpmsg 181 | ||
421 | __SYSCALL(__NR_getpmsg, sys_ni_syscall) | ||
422 | #define __NR_putpmsg 182 | ||
423 | __SYSCALL(__NR_putpmsg, sys_ni_syscall) | ||
424 | |||
425 | /* reserved for AFS */ | ||
426 | #define __NR_afs_syscall 183 | ||
427 | __SYSCALL(__NR_afs_syscall, sys_ni_syscall) | ||
428 | |||
429 | /* reserved for tux */ | ||
430 | #define __NR_tuxcall 184 | ||
431 | __SYSCALL(__NR_tuxcall, sys_ni_syscall) | ||
432 | |||
433 | #define __NR_security 185 | ||
434 | __SYSCALL(__NR_security, sys_ni_syscall) | ||
435 | |||
436 | #define __NR_gettid 186 | ||
437 | __SYSCALL(__NR_gettid, sys_gettid) | ||
438 | |||
439 | #define __NR_readahead 187 | ||
440 | __SYSCALL(__NR_readahead, sys_readahead) | ||
441 | #define __NR_setxattr 188 | ||
442 | __SYSCALL(__NR_setxattr, sys_setxattr) | ||
443 | #define __NR_lsetxattr 189 | ||
444 | __SYSCALL(__NR_lsetxattr, sys_lsetxattr) | ||
445 | #define __NR_fsetxattr 190 | ||
446 | __SYSCALL(__NR_fsetxattr, sys_fsetxattr) | ||
447 | #define __NR_getxattr 191 | ||
448 | __SYSCALL(__NR_getxattr, sys_getxattr) | ||
449 | #define __NR_lgetxattr 192 | ||
450 | __SYSCALL(__NR_lgetxattr, sys_lgetxattr) | ||
451 | #define __NR_fgetxattr 193 | ||
452 | __SYSCALL(__NR_fgetxattr, sys_fgetxattr) | ||
453 | #define __NR_listxattr 194 | ||
454 | __SYSCALL(__NR_listxattr, sys_listxattr) | ||
455 | #define __NR_llistxattr 195 | ||
456 | __SYSCALL(__NR_llistxattr, sys_llistxattr) | ||
457 | #define __NR_flistxattr 196 | ||
458 | __SYSCALL(__NR_flistxattr, sys_flistxattr) | ||
459 | #define __NR_removexattr 197 | ||
460 | __SYSCALL(__NR_removexattr, sys_removexattr) | ||
461 | #define __NR_lremovexattr 198 | ||
462 | __SYSCALL(__NR_lremovexattr, sys_lremovexattr) | ||
463 | #define __NR_fremovexattr 199 | ||
464 | __SYSCALL(__NR_fremovexattr, sys_fremovexattr) | ||
465 | #define __NR_tkill 200 | ||
466 | __SYSCALL(__NR_tkill, sys_tkill) | ||
467 | #define __NR_time 201 | ||
468 | __SYSCALL(__NR_time, sys_time) | ||
469 | #define __NR_futex 202 | ||
470 | __SYSCALL(__NR_futex, sys_futex) | ||
471 | #define __NR_sched_setaffinity 203 | ||
472 | __SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity) | ||
473 | #define __NR_sched_getaffinity 204 | ||
474 | __SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity) | ||
475 | #define __NR_set_thread_area 205 | ||
476 | __SYSCALL(__NR_set_thread_area, sys_ni_syscall) /* use arch_prctl */ | ||
477 | #define __NR_io_setup 206 | ||
478 | __SYSCALL(__NR_io_setup, sys_io_setup) | ||
479 | #define __NR_io_destroy 207 | ||
480 | __SYSCALL(__NR_io_destroy, sys_io_destroy) | ||
481 | #define __NR_io_getevents 208 | ||
482 | __SYSCALL(__NR_io_getevents, sys_io_getevents) | ||
483 | #define __NR_io_submit 209 | ||
484 | __SYSCALL(__NR_io_submit, sys_io_submit) | ||
485 | #define __NR_io_cancel 210 | ||
486 | __SYSCALL(__NR_io_cancel, sys_io_cancel) | ||
487 | #define __NR_get_thread_area 211 | ||
488 | __SYSCALL(__NR_get_thread_area, sys_ni_syscall) /* use arch_prctl */ | ||
489 | #define __NR_lookup_dcookie 212 | ||
490 | __SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie) | ||
491 | #define __NR_epoll_create 213 | ||
492 | __SYSCALL(__NR_epoll_create, sys_epoll_create) | ||
493 | #define __NR_epoll_ctl_old 214 | ||
494 | __SYSCALL(__NR_epoll_ctl_old, sys_ni_syscall) | ||
495 | #define __NR_epoll_wait_old 215 | ||
496 | __SYSCALL(__NR_epoll_wait_old, sys_ni_syscall) | ||
497 | #define __NR_remap_file_pages 216 | ||
498 | __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) | ||
499 | #define __NR_getdents64 217 | ||
500 | __SYSCALL(__NR_getdents64, sys_getdents64) | ||
501 | #define __NR_set_tid_address 218 | ||
502 | __SYSCALL(__NR_set_tid_address, sys_set_tid_address) | ||
503 | #define __NR_restart_syscall 219 | ||
504 | __SYSCALL(__NR_restart_syscall, sys_restart_syscall) | ||
505 | #define __NR_semtimedop 220 | ||
506 | __SYSCALL(__NR_semtimedop, sys_semtimedop) | ||
507 | #define __NR_fadvise64 221 | ||
508 | __SYSCALL(__NR_fadvise64, sys_fadvise64) | ||
509 | #define __NR_timer_create 222 | ||
510 | __SYSCALL(__NR_timer_create, sys_timer_create) | ||
511 | #define __NR_timer_settime 223 | ||
512 | __SYSCALL(__NR_timer_settime, sys_timer_settime) | ||
513 | #define __NR_timer_gettime 224 | ||
514 | __SYSCALL(__NR_timer_gettime, sys_timer_gettime) | ||
515 | #define __NR_timer_getoverrun 225 | ||
516 | __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) | ||
517 | #define __NR_timer_delete 226 | ||
518 | __SYSCALL(__NR_timer_delete, sys_timer_delete) | ||
519 | #define __NR_clock_settime 227 | ||
520 | __SYSCALL(__NR_clock_settime, sys_clock_settime) | ||
521 | #define __NR_clock_gettime 228 | ||
522 | __SYSCALL(__NR_clock_gettime, sys_clock_gettime) | ||
523 | #define __NR_clock_getres 229 | ||
524 | __SYSCALL(__NR_clock_getres, sys_clock_getres) | ||
525 | #define __NR_clock_nanosleep 230 | ||
526 | __SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep) | ||
527 | #define __NR_exit_group 231 | ||
528 | __SYSCALL(__NR_exit_group, sys_exit_group) | ||
529 | #define __NR_epoll_wait 232 | ||
530 | __SYSCALL(__NR_epoll_wait, sys_epoll_wait) | ||
531 | #define __NR_epoll_ctl 233 | ||
532 | __SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) | ||
533 | #define __NR_tgkill 234 | ||
534 | __SYSCALL(__NR_tgkill, sys_tgkill) | ||
535 | #define __NR_utimes 235 | ||
536 | __SYSCALL(__NR_utimes, sys_utimes) | ||
537 | #define __NR_vserver 236 | ||
538 | __SYSCALL(__NR_vserver, sys_ni_syscall) | ||
539 | #define __NR_mbind 237 | ||
540 | __SYSCALL(__NR_mbind, sys_mbind) | ||
541 | #define __NR_set_mempolicy 238 | ||
542 | __SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) | ||
543 | #define __NR_get_mempolicy 239 | ||
544 | __SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) | ||
545 | #define __NR_mq_open 240 | ||
546 | __SYSCALL(__NR_mq_open, sys_mq_open) | ||
547 | #define __NR_mq_unlink 241 | ||
548 | __SYSCALL(__NR_mq_unlink, sys_mq_unlink) | ||
549 | #define __NR_mq_timedsend 242 | ||
550 | __SYSCALL(__NR_mq_timedsend, sys_mq_timedsend) | ||
551 | #define __NR_mq_timedreceive 243 | ||
552 | __SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive) | ||
553 | #define __NR_mq_notify 244 | ||
554 | __SYSCALL(__NR_mq_notify, sys_mq_notify) | ||
555 | #define __NR_mq_getsetattr 245 | ||
556 | __SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr) | ||
557 | #define __NR_kexec_load 246 | ||
558 | __SYSCALL(__NR_kexec_load, sys_kexec_load) | ||
559 | #define __NR_waitid 247 | ||
560 | __SYSCALL(__NR_waitid, sys_waitid) | ||
561 | #define __NR_add_key 248 | ||
562 | __SYSCALL(__NR_add_key, sys_add_key) | ||
563 | #define __NR_request_key 249 | ||
564 | __SYSCALL(__NR_request_key, sys_request_key) | ||
565 | #define __NR_keyctl 250 | ||
566 | __SYSCALL(__NR_keyctl, sys_keyctl) | ||
567 | #define __NR_ioprio_set 251 | ||
568 | __SYSCALL(__NR_ioprio_set, sys_ioprio_set) | ||
569 | #define __NR_ioprio_get 252 | ||
570 | __SYSCALL(__NR_ioprio_get, sys_ioprio_get) | ||
571 | #define __NR_inotify_init 253 | ||
572 | __SYSCALL(__NR_inotify_init, sys_inotify_init) | ||
573 | #define __NR_inotify_add_watch 254 | ||
574 | __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) | ||
575 | #define __NR_inotify_rm_watch 255 | ||
576 | __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) | ||
577 | #define __NR_migrate_pages 256 | ||
578 | __SYSCALL(__NR_migrate_pages, sys_migrate_pages) | ||
579 | #define __NR_openat 257 | ||
580 | __SYSCALL(__NR_openat, sys_openat) | ||
581 | #define __NR_mkdirat 258 | ||
582 | __SYSCALL(__NR_mkdirat, sys_mkdirat) | ||
583 | #define __NR_mknodat 259 | ||
584 | __SYSCALL(__NR_mknodat, sys_mknodat) | ||
585 | #define __NR_fchownat 260 | ||
586 | __SYSCALL(__NR_fchownat, sys_fchownat) | ||
587 | #define __NR_futimesat 261 | ||
588 | __SYSCALL(__NR_futimesat, sys_futimesat) | ||
589 | #define __NR_newfstatat 262 | ||
590 | __SYSCALL(__NR_newfstatat, sys_newfstatat) | ||
591 | #define __NR_unlinkat 263 | ||
592 | __SYSCALL(__NR_unlinkat, sys_unlinkat) | ||
593 | #define __NR_renameat 264 | ||
594 | __SYSCALL(__NR_renameat, sys_renameat) | ||
595 | #define __NR_linkat 265 | ||
596 | __SYSCALL(__NR_linkat, sys_linkat) | ||
597 | #define __NR_symlinkat 266 | ||
598 | __SYSCALL(__NR_symlinkat, sys_symlinkat) | ||
599 | #define __NR_readlinkat 267 | ||
600 | __SYSCALL(__NR_readlinkat, sys_readlinkat) | ||
601 | #define __NR_fchmodat 268 | ||
602 | __SYSCALL(__NR_fchmodat, sys_fchmodat) | ||
603 | #define __NR_faccessat 269 | ||
604 | __SYSCALL(__NR_faccessat, sys_faccessat) | ||
605 | #define __NR_pselect6 270 | ||
606 | __SYSCALL(__NR_pselect6, sys_pselect6) | ||
607 | #define __NR_ppoll 271 | ||
608 | __SYSCALL(__NR_ppoll, sys_ppoll) | ||
609 | #define __NR_unshare 272 | ||
610 | __SYSCALL(__NR_unshare, sys_unshare) | ||
611 | #define __NR_set_robust_list 273 | ||
612 | __SYSCALL(__NR_set_robust_list, sys_set_robust_list) | ||
613 | #define __NR_get_robust_list 274 | ||
614 | __SYSCALL(__NR_get_robust_list, sys_get_robust_list) | ||
615 | #define __NR_splice 275 | ||
616 | __SYSCALL(__NR_splice, sys_splice) | ||
617 | #define __NR_tee 276 | ||
618 | __SYSCALL(__NR_tee, sys_tee) | ||
619 | #define __NR_sync_file_range 277 | ||
620 | __SYSCALL(__NR_sync_file_range, sys_sync_file_range) | ||
621 | #define __NR_vmsplice 278 | ||
622 | __SYSCALL(__NR_vmsplice, sys_vmsplice) | ||
623 | #define __NR_move_pages 279 | ||
624 | __SYSCALL(__NR_move_pages, sys_move_pages) | ||
625 | #define __NR_utimensat 280 | ||
626 | __SYSCALL(__NR_utimensat, sys_utimensat) | ||
627 | #define __NR_epoll_pwait 281 | ||
628 | __SYSCALL(__NR_epoll_pwait, sys_epoll_pwait) | ||
629 | #define __NR_signalfd 282 | ||
630 | __SYSCALL(__NR_signalfd, sys_signalfd) | ||
631 | #define __NR_timerfd_create 283 | ||
632 | __SYSCALL(__NR_timerfd_create, sys_timerfd_create) | ||
633 | #define __NR_eventfd 284 | ||
634 | __SYSCALL(__NR_eventfd, sys_eventfd) | ||
635 | #define __NR_fallocate 285 | ||
636 | __SYSCALL(__NR_fallocate, sys_fallocate) | ||
637 | #define __NR_timerfd_settime 286 | ||
638 | __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) | ||
639 | #define __NR_timerfd_gettime 287 | ||
640 | __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) | ||
641 | #define __NR_accept4 288 | ||
642 | __SYSCALL(__NR_accept4, sys_accept4) | ||
643 | #define __NR_signalfd4 289 | ||
644 | __SYSCALL(__NR_signalfd4, sys_signalfd4) | ||
645 | #define __NR_eventfd2 290 | ||
646 | __SYSCALL(__NR_eventfd2, sys_eventfd2) | ||
647 | #define __NR_epoll_create1 291 | ||
648 | __SYSCALL(__NR_epoll_create1, sys_epoll_create1) | ||
649 | #define __NR_dup3 292 | ||
650 | __SYSCALL(__NR_dup3, sys_dup3) | ||
651 | #define __NR_pipe2 293 | ||
652 | __SYSCALL(__NR_pipe2, sys_pipe2) | ||
653 | #define __NR_inotify_init1 294 | ||
654 | __SYSCALL(__NR_inotify_init1, sys_inotify_init1) | ||
655 | #define __NR_preadv 295 | ||
656 | __SYSCALL(__NR_preadv, sys_preadv) | ||
657 | #define __NR_pwritev 296 | ||
658 | __SYSCALL(__NR_pwritev, sys_pwritev) | ||
659 | #define __NR_rt_tgsigqueueinfo 297 | ||
660 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) | ||
661 | #define __NR_perf_event_open 298 | ||
662 | __SYSCALL(__NR_perf_event_open, sys_perf_event_open) | ||
663 | #define __NR_recvmmsg 299 | ||
664 | __SYSCALL(__NR_recvmmsg, sys_recvmmsg) | ||
665 | #define __NR_fanotify_init 300 | ||
666 | __SYSCALL(__NR_fanotify_init, sys_fanotify_init) | ||
667 | #define __NR_fanotify_mark 301 | ||
668 | __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) | ||
669 | #define __NR_prlimit64 302 | ||
670 | __SYSCALL(__NR_prlimit64, sys_prlimit64) | ||
671 | #define __NR_name_to_handle_at 303 | ||
672 | __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) | ||
673 | #define __NR_open_by_handle_at 304 | ||
674 | __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) | ||
675 | #define __NR_clock_adjtime 305 | ||
676 | __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) | ||
677 | #define __NR_syncfs 306 | ||
678 | __SYSCALL(__NR_syncfs, sys_syncfs) | ||
679 | #define __NR_sendmmsg 307 | ||
680 | __SYSCALL(__NR_sendmmsg, sys_sendmmsg) | ||
681 | #define __NR_setns 308 | ||
682 | __SYSCALL(__NR_setns, sys_setns) | ||
683 | #define __NR_getcpu 309 | ||
684 | __SYSCALL(__NR_getcpu, sys_getcpu) | ||
685 | #define __NR_process_vm_readv 310 | ||
686 | __SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) | ||
687 | #define __NR_process_vm_writev 311 | ||
688 | __SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) | ||
689 | |||
690 | #ifndef __NO_STUBS | ||
691 | #define __ARCH_WANT_OLD_READDIR | ||
692 | #define __ARCH_WANT_OLD_STAT | ||
693 | #define __ARCH_WANT_SYS_ALARM | ||
694 | #define __ARCH_WANT_SYS_GETHOSTNAME | ||
695 | #define __ARCH_WANT_SYS_PAUSE | ||
696 | #define __ARCH_WANT_SYS_SGETMASK | ||
697 | #define __ARCH_WANT_SYS_SIGNAL | ||
698 | #define __ARCH_WANT_SYS_UTIME | ||
699 | #define __ARCH_WANT_SYS_WAITPID | ||
700 | #define __ARCH_WANT_SYS_SOCKETCALL | ||
701 | #define __ARCH_WANT_SYS_FADVISE64 | ||
702 | #define __ARCH_WANT_SYS_GETPGRP | ||
703 | #define __ARCH_WANT_SYS_LLSEEK | ||
704 | #define __ARCH_WANT_SYS_NICE | ||
705 | #define __ARCH_WANT_SYS_OLD_GETRLIMIT | ||
706 | #define __ARCH_WANT_SYS_OLD_UNAME | ||
707 | #define __ARCH_WANT_SYS_OLDUMOUNT | ||
708 | #define __ARCH_WANT_SYS_SIGPENDING | ||
709 | #define __ARCH_WANT_SYS_SIGPROCMASK | ||
710 | #define __ARCH_WANT_SYS_RT_SIGACTION | ||
711 | #define __ARCH_WANT_SYS_RT_SIGSUSPEND | ||
712 | #define __ARCH_WANT_SYS_TIME | ||
713 | #define __ARCH_WANT_COMPAT_SYS_TIME | ||
714 | #endif /* __NO_STUBS */ | ||
715 | |||
716 | #ifdef __KERNEL__ | ||
717 | |||
718 | #ifndef COMPILE_OFFSETS | ||
719 | #include <asm/asm-offsets.h> | ||
720 | #define NR_syscalls (__NR_syscall_max + 1) | ||
721 | #endif | ||
722 | |||
723 | /* | ||
724 | * "Conditional" syscalls | ||
725 | * | ||
726 | * What we want is __attribute__((weak,alias("sys_ni_syscall"))), | ||
727 | * but it doesn't work on all toolchains, so we just do it by hand | ||
728 | */ | ||
729 | #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") | ||
730 | #endif /* __KERNEL__ */ | ||
731 | |||
732 | #endif /* _ASM_X86_UNISTD_64_H */ | ||
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 1ac860a09849..517d4767ffdd 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -179,6 +179,7 @@ struct x86_msi_ops { | |||
179 | int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); | 179 | int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); |
180 | void (*teardown_msi_irq)(unsigned int irq); | 180 | void (*teardown_msi_irq)(unsigned int irq); |
181 | void (*teardown_msi_irqs)(struct pci_dev *dev); | 181 | void (*teardown_msi_irqs)(struct pci_dev *dev); |
182 | void (*restore_msi_irqs)(struct pci_dev *dev, int irq); | ||
182 | }; | 183 | }; |
183 | 184 | ||
184 | extern struct x86_init_ops x86_init; | 185 | extern struct x86_init_ops x86_init; |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8baca3c4871c..5369059c07a9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -25,7 +25,8 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o | |||
25 | obj-y += probe_roms.o | 25 | obj-y += probe_roms.o |
26 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 26 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
27 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 27 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
28 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 28 | obj-y += syscall_$(BITS).o |
29 | obj-$(CONFIG_X86_64) += vsyscall_64.o | ||
29 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | 30 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o |
30 | obj-y += bootflag.o e820.o | 31 | obj-y += bootflag.o e820.o |
31 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 32 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
@@ -80,6 +81,7 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o | |||
80 | obj-$(CONFIG_AMD_NB) += amd_nb.o | 81 | obj-$(CONFIG_AMD_NB) += amd_nb.o |
81 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | 82 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o |
82 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | 83 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o |
84 | obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o | ||
83 | 85 | ||
84 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 86 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
85 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 87 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 013c1810ce72..be16854591cc 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -119,6 +119,37 @@ bool __init early_is_amd_nb(u32 device) | |||
119 | return false; | 119 | return false; |
120 | } | 120 | } |
121 | 121 | ||
122 | struct resource *amd_get_mmconfig_range(struct resource *res) | ||
123 | { | ||
124 | u32 address; | ||
125 | u64 base, msr; | ||
126 | unsigned segn_busn_bits; | ||
127 | |||
128 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | ||
129 | return NULL; | ||
130 | |||
131 | /* assume all cpus from fam10h have mmconfig */ | ||
132 | if (boot_cpu_data.x86 < 0x10) | ||
133 | return NULL; | ||
134 | |||
135 | address = MSR_FAM10H_MMIO_CONF_BASE; | ||
136 | rdmsrl(address, msr); | ||
137 | |||
138 | /* mmconfig is not enabled */ | ||
139 | if (!(msr & FAM10H_MMIO_CONF_ENABLE)) | ||
140 | return NULL; | ||
141 | |||
142 | base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); | ||
143 | |||
144 | segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & | ||
145 | FAM10H_MMIO_CONF_BUSRANGE_MASK; | ||
146 | |||
147 | res->flags = IORESOURCE_MEM; | ||
148 | res->start = base; | ||
149 | res->end = base + (1ULL<<(segn_busn_bits + 20)) - 1; | ||
150 | return res; | ||
151 | } | ||
152 | |||
122 | int amd_get_subcaches(int cpu) | 153 | int amd_get_subcaches(int cpu) |
123 | { | 154 | { |
124 | struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; | 155 | struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index a46bd383953c..f76623cbe263 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -383,21 +383,21 @@ static int ignore_sys_suspend; | |||
383 | static int ignore_normal_resume; | 383 | static int ignore_normal_resume; |
384 | static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; | 384 | static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; |
385 | 385 | ||
386 | static int debug __read_mostly; | 386 | static bool debug __read_mostly; |
387 | static int smp __read_mostly; | 387 | static bool smp __read_mostly; |
388 | static int apm_disabled = -1; | 388 | static int apm_disabled = -1; |
389 | #ifdef CONFIG_SMP | 389 | #ifdef CONFIG_SMP |
390 | static int power_off; | 390 | static bool power_off; |
391 | #else | 391 | #else |
392 | static int power_off = 1; | 392 | static bool power_off = 1; |
393 | #endif | 393 | #endif |
394 | static int realmode_power_off; | 394 | static bool realmode_power_off; |
395 | #ifdef CONFIG_APM_ALLOW_INTS | 395 | #ifdef CONFIG_APM_ALLOW_INTS |
396 | static int allow_ints = 1; | 396 | static bool allow_ints = 1; |
397 | #else | 397 | #else |
398 | static int allow_ints; | 398 | static bool allow_ints; |
399 | #endif | 399 | #endif |
400 | static int broken_psr; | 400 | static bool broken_psr; |
401 | 401 | ||
402 | static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); | 402 | static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); |
403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); | 403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); |
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 4f13fafc5264..68de2dc962ec 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c | |||
@@ -67,4 +67,6 @@ void common(void) { | |||
67 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | 67 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); |
68 | OFFSET(BP_version, boot_params, hdr.version); | 68 | OFFSET(BP_version, boot_params, hdr.version); |
69 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); | 69 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); |
70 | OFFSET(BP_pref_address, boot_params, hdr.pref_address); | ||
71 | OFFSET(BP_code32_start, boot_params, hdr.code32_start); | ||
70 | } | 72 | } |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 395a10e68067..85d98ab15cdc 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -3,6 +3,11 @@ | |||
3 | #include <linux/lguest.h> | 3 | #include <linux/lguest.h> |
4 | #include "../../../drivers/lguest/lg.h" | 4 | #include "../../../drivers/lguest/lg.h" |
5 | 5 | ||
6 | #define __SYSCALL_I386(nr, sym, compat) [nr] = 1, | ||
7 | static char syscalls[] = { | ||
8 | #include <asm/syscalls_32.h> | ||
9 | }; | ||
10 | |||
6 | /* workaround for a warning with -Wmissing-prototypes */ | 11 | /* workaround for a warning with -Wmissing-prototypes */ |
7 | void foo(void); | 12 | void foo(void); |
8 | 13 | ||
@@ -76,4 +81,7 @@ void foo(void) | |||
76 | OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); | 81 | OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); |
77 | OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); | 82 | OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); |
78 | #endif | 83 | #endif |
84 | BLANK(); | ||
85 | DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); | ||
86 | DEFINE(NR_syscalls, sizeof(syscalls)); | ||
79 | } | 87 | } |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index e72a1194af22..834e897b1e25 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -1,11 +1,12 @@ | |||
1 | #include <asm/ia32.h> | 1 | #include <asm/ia32.h> |
2 | 2 | ||
3 | #define __NO_STUBS 1 | 3 | #define __SYSCALL_64(nr, sym, compat) [nr] = 1, |
4 | #undef __SYSCALL | 4 | static char syscalls_64[] = { |
5 | #undef _ASM_X86_UNISTD_64_H | 5 | #include <asm/syscalls_64.h> |
6 | #define __SYSCALL(nr, sym) [nr] = 1, | 6 | }; |
7 | static char syscalls[] = { | 7 | #define __SYSCALL_I386(nr, sym, compat) [nr] = 1, |
8 | #include <asm/unistd.h> | 8 | static char syscalls_ia32[] = { |
9 | #include <asm/syscalls_32.h> | ||
9 | }; | 10 | }; |
10 | 11 | ||
11 | int main(void) | 12 | int main(void) |
@@ -72,7 +73,11 @@ int main(void) | |||
72 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); | 73 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); |
73 | BLANK(); | 74 | BLANK(); |
74 | 75 | ||
75 | DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); | 76 | DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); |
77 | DEFINE(NR_syscalls, sizeof(syscalls_64)); | ||
78 | |||
79 | DEFINE(__NR_ia32_syscall_max, sizeof(syscalls_ia32) - 1); | ||
80 | DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32)); | ||
76 | 81 | ||
77 | return 0; | 82 | return 0; |
78 | } | 83 | } |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 850f2963a420..d43cad74f166 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid); | |||
1021 | 1021 | ||
1022 | #ifdef CONFIG_X86_64 | 1022 | #ifdef CONFIG_X86_64 |
1023 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; | 1023 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; |
1024 | struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, | ||
1025 | (unsigned long) nmi_idt_table }; | ||
1024 | 1026 | ||
1025 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 1027 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
1026 | irq_stack_union) __aligned(PAGE_SIZE); | 1028 | irq_stack_union) __aligned(PAGE_SIZE); |
@@ -1085,6 +1087,26 @@ unsigned long kernel_eflags; | |||
1085 | */ | 1087 | */ |
1086 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | 1088 | DEFINE_PER_CPU(struct orig_ist, orig_ist); |
1087 | 1089 | ||
1090 | static DEFINE_PER_CPU(unsigned long, debug_stack_addr); | ||
1091 | DEFINE_PER_CPU(int, debug_stack_usage); | ||
1092 | |||
1093 | int is_debug_stack(unsigned long addr) | ||
1094 | { | ||
1095 | return __get_cpu_var(debug_stack_usage) || | ||
1096 | (addr <= __get_cpu_var(debug_stack_addr) && | ||
1097 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); | ||
1098 | } | ||
1099 | |||
1100 | void debug_stack_set_zero(void) | ||
1101 | { | ||
1102 | load_idt((const struct desc_ptr *)&nmi_idt_descr); | ||
1103 | } | ||
1104 | |||
1105 | void debug_stack_reset(void) | ||
1106 | { | ||
1107 | load_idt((const struct desc_ptr *)&idt_descr); | ||
1108 | } | ||
1109 | |||
1088 | #else /* CONFIG_X86_64 */ | 1110 | #else /* CONFIG_X86_64 */ |
1089 | 1111 | ||
1090 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | 1112 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; |
@@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void) | |||
1212 | estacks += exception_stack_sizes[v]; | 1234 | estacks += exception_stack_sizes[v]; |
1213 | oist->ist[v] = t->x86_tss.ist[v] = | 1235 | oist->ist[v] = t->x86_tss.ist[v] = |
1214 | (unsigned long)estacks; | 1236 | (unsigned long)estacks; |
1237 | if (v == DEBUG_STACK-1) | ||
1238 | per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; | ||
1215 | } | 1239 | } |
1216 | } | 1240 | } |
1217 | 1241 | ||
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a3b0811693c9..6b45e5e7a901 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -844,8 +844,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
844 | 844 | ||
845 | #include <linux/kobject.h> | 845 | #include <linux/kobject.h> |
846 | #include <linux/sysfs.h> | 846 | #include <linux/sysfs.h> |
847 | 847 | #include <linux/cpu.h> | |
848 | extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ | ||
849 | 848 | ||
850 | /* pointer to kobject for cpuX/cache */ | 849 | /* pointer to kobject for cpuX/cache */ |
851 | static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); | 850 | static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); |
@@ -1073,9 +1072,9 @@ err_out: | |||
1073 | static DECLARE_BITMAP(cache_dev_map, NR_CPUS); | 1072 | static DECLARE_BITMAP(cache_dev_map, NR_CPUS); |
1074 | 1073 | ||
1075 | /* Add/Remove cache interface for CPU device */ | 1074 | /* Add/Remove cache interface for CPU device */ |
1076 | static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | 1075 | static int __cpuinit cache_add_dev(struct device *dev) |
1077 | { | 1076 | { |
1078 | unsigned int cpu = sys_dev->id; | 1077 | unsigned int cpu = dev->id; |
1079 | unsigned long i, j; | 1078 | unsigned long i, j; |
1080 | struct _index_kobject *this_object; | 1079 | struct _index_kobject *this_object; |
1081 | struct _cpuid4_info *this_leaf; | 1080 | struct _cpuid4_info *this_leaf; |
@@ -1087,7 +1086,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
1087 | 1086 | ||
1088 | retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), | 1087 | retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), |
1089 | &ktype_percpu_entry, | 1088 | &ktype_percpu_entry, |
1090 | &sys_dev->kobj, "%s", "cache"); | 1089 | &dev->kobj, "%s", "cache"); |
1091 | if (retval < 0) { | 1090 | if (retval < 0) { |
1092 | cpuid4_cache_sysfs_exit(cpu); | 1091 | cpuid4_cache_sysfs_exit(cpu); |
1093 | return retval; | 1092 | return retval; |
@@ -1124,9 +1123,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
1124 | return 0; | 1123 | return 0; |
1125 | } | 1124 | } |
1126 | 1125 | ||
1127 | static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | 1126 | static void __cpuinit cache_remove_dev(struct device *dev) |
1128 | { | 1127 | { |
1129 | unsigned int cpu = sys_dev->id; | 1128 | unsigned int cpu = dev->id; |
1130 | unsigned long i; | 1129 | unsigned long i; |
1131 | 1130 | ||
1132 | if (per_cpu(ici_cpuid4_info, cpu) == NULL) | 1131 | if (per_cpu(ici_cpuid4_info, cpu) == NULL) |
@@ -1145,17 +1144,17 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, | |||
1145 | unsigned long action, void *hcpu) | 1144 | unsigned long action, void *hcpu) |
1146 | { | 1145 | { |
1147 | unsigned int cpu = (unsigned long)hcpu; | 1146 | unsigned int cpu = (unsigned long)hcpu; |
1148 | struct sys_device *sys_dev; | 1147 | struct device *dev; |
1149 | 1148 | ||
1150 | sys_dev = get_cpu_sysdev(cpu); | 1149 | dev = get_cpu_device(cpu); |
1151 | switch (action) { | 1150 | switch (action) { |
1152 | case CPU_ONLINE: | 1151 | case CPU_ONLINE: |
1153 | case CPU_ONLINE_FROZEN: | 1152 | case CPU_ONLINE_FROZEN: |
1154 | cache_add_dev(sys_dev); | 1153 | cache_add_dev(dev); |
1155 | break; | 1154 | break; |
1156 | case CPU_DEAD: | 1155 | case CPU_DEAD: |
1157 | case CPU_DEAD_FROZEN: | 1156 | case CPU_DEAD_FROZEN: |
1158 | cache_remove_dev(sys_dev); | 1157 | cache_remove_dev(dev); |
1159 | break; | 1158 | break; |
1160 | } | 1159 | } |
1161 | return NOTIFY_OK; | 1160 | return NOTIFY_OK; |
@@ -1174,9 +1173,9 @@ static int __cpuinit cache_sysfs_init(void) | |||
1174 | 1173 | ||
1175 | for_each_online_cpu(i) { | 1174 | for_each_online_cpu(i) { |
1176 | int err; | 1175 | int err; |
1177 | struct sys_device *sys_dev = get_cpu_sysdev(i); | 1176 | struct device *dev = get_cpu_device(i); |
1178 | 1177 | ||
1179 | err = cache_add_dev(sys_dev); | 1178 | err = cache_add_dev(dev); |
1180 | if (err) | 1179 | if (err) |
1181 | return err; | 1180 | return err; |
1182 | } | 1181 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index fefcc69ee8b5..ed44c8a65858 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -1,4 +1,4 @@ | |||
1 | #include <linux/sysdev.h> | 1 | #include <linux/device.h> |
2 | #include <asm/mce.h> | 2 | #include <asm/mce.h> |
3 | 3 | ||
4 | enum severity_level { | 4 | enum severity_level { |
@@ -17,7 +17,7 @@ enum severity_level { | |||
17 | struct mce_bank { | 17 | struct mce_bank { |
18 | u64 ctl; /* subevents to enable */ | 18 | u64 ctl; /* subevents to enable */ |
19 | unsigned char init; /* initialise bank? */ | 19 | unsigned char init; /* initialise bank? */ |
20 | struct sysdev_attribute attr; /* sysdev attribute */ | 20 | struct device_attribute attr; /* device attribute */ |
21 | char attrname[ATTR_LEN]; /* attribute name */ | 21 | char attrname[ATTR_LEN]; /* attribute name */ |
22 | }; | 22 | }; |
23 | 23 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index cbe82b5918ce..5a11ae2e9e91 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -19,7 +19,7 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
22 | #include <linux/sysdev.h> | 22 | #include <linux/device.h> |
23 | #include <linux/syscore_ops.h> | 23 | #include <linux/syscore_ops.h> |
24 | #include <linux/delay.h> | 24 | #include <linux/delay.h> |
25 | #include <linux/ctype.h> | 25 | #include <linux/ctype.h> |
@@ -1818,7 +1818,7 @@ static struct syscore_ops mce_syscore_ops = { | |||
1818 | }; | 1818 | }; |
1819 | 1819 | ||
1820 | /* | 1820 | /* |
1821 | * mce_sysdev: Sysfs support | 1821 | * mce_device: Sysfs support |
1822 | */ | 1822 | */ |
1823 | 1823 | ||
1824 | static void mce_cpu_restart(void *data) | 1824 | static void mce_cpu_restart(void *data) |
@@ -1854,27 +1854,28 @@ static void mce_enable_ce(void *all) | |||
1854 | __mcheck_cpu_init_timer(); | 1854 | __mcheck_cpu_init_timer(); |
1855 | } | 1855 | } |
1856 | 1856 | ||
1857 | static struct sysdev_class mce_sysdev_class = { | 1857 | static struct bus_type mce_subsys = { |
1858 | .name = "machinecheck", | 1858 | .name = "machinecheck", |
1859 | .dev_name = "machinecheck", | ||
1859 | }; | 1860 | }; |
1860 | 1861 | ||
1861 | DEFINE_PER_CPU(struct sys_device, mce_sysdev); | 1862 | struct device *mce_device[CONFIG_NR_CPUS]; |
1862 | 1863 | ||
1863 | __cpuinitdata | 1864 | __cpuinitdata |
1864 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | 1865 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); |
1865 | 1866 | ||
1866 | static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr) | 1867 | static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) |
1867 | { | 1868 | { |
1868 | return container_of(attr, struct mce_bank, attr); | 1869 | return container_of(attr, struct mce_bank, attr); |
1869 | } | 1870 | } |
1870 | 1871 | ||
1871 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | 1872 | static ssize_t show_bank(struct device *s, struct device_attribute *attr, |
1872 | char *buf) | 1873 | char *buf) |
1873 | { | 1874 | { |
1874 | return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); | 1875 | return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); |
1875 | } | 1876 | } |
1876 | 1877 | ||
1877 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | 1878 | static ssize_t set_bank(struct device *s, struct device_attribute *attr, |
1878 | const char *buf, size_t size) | 1879 | const char *buf, size_t size) |
1879 | { | 1880 | { |
1880 | u64 new; | 1881 | u64 new; |
@@ -1889,14 +1890,14 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | |||
1889 | } | 1890 | } |
1890 | 1891 | ||
1891 | static ssize_t | 1892 | static ssize_t |
1892 | show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) | 1893 | show_trigger(struct device *s, struct device_attribute *attr, char *buf) |
1893 | { | 1894 | { |
1894 | strcpy(buf, mce_helper); | 1895 | strcpy(buf, mce_helper); |
1895 | strcat(buf, "\n"); | 1896 | strcat(buf, "\n"); |
1896 | return strlen(mce_helper) + 1; | 1897 | return strlen(mce_helper) + 1; |
1897 | } | 1898 | } |
1898 | 1899 | ||
1899 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | 1900 | static ssize_t set_trigger(struct device *s, struct device_attribute *attr, |
1900 | const char *buf, size_t siz) | 1901 | const char *buf, size_t siz) |
1901 | { | 1902 | { |
1902 | char *p; | 1903 | char *p; |
@@ -1911,8 +1912,8 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | |||
1911 | return strlen(mce_helper) + !!p; | 1912 | return strlen(mce_helper) + !!p; |
1912 | } | 1913 | } |
1913 | 1914 | ||
1914 | static ssize_t set_ignore_ce(struct sys_device *s, | 1915 | static ssize_t set_ignore_ce(struct device *s, |
1915 | struct sysdev_attribute *attr, | 1916 | struct device_attribute *attr, |
1916 | const char *buf, size_t size) | 1917 | const char *buf, size_t size) |
1917 | { | 1918 | { |
1918 | u64 new; | 1919 | u64 new; |
@@ -1935,8 +1936,8 @@ static ssize_t set_ignore_ce(struct sys_device *s, | |||
1935 | return size; | 1936 | return size; |
1936 | } | 1937 | } |
1937 | 1938 | ||
1938 | static ssize_t set_cmci_disabled(struct sys_device *s, | 1939 | static ssize_t set_cmci_disabled(struct device *s, |
1939 | struct sysdev_attribute *attr, | 1940 | struct device_attribute *attr, |
1940 | const char *buf, size_t size) | 1941 | const char *buf, size_t size) |
1941 | { | 1942 | { |
1942 | u64 new; | 1943 | u64 new; |
@@ -1958,108 +1959,117 @@ static ssize_t set_cmci_disabled(struct sys_device *s, | |||
1958 | return size; | 1959 | return size; |
1959 | } | 1960 | } |
1960 | 1961 | ||
1961 | static ssize_t store_int_with_restart(struct sys_device *s, | 1962 | static ssize_t store_int_with_restart(struct device *s, |
1962 | struct sysdev_attribute *attr, | 1963 | struct device_attribute *attr, |
1963 | const char *buf, size_t size) | 1964 | const char *buf, size_t size) |
1964 | { | 1965 | { |
1965 | ssize_t ret = sysdev_store_int(s, attr, buf, size); | 1966 | ssize_t ret = device_store_int(s, attr, buf, size); |
1966 | mce_restart(); | 1967 | mce_restart(); |
1967 | return ret; | 1968 | return ret; |
1968 | } | 1969 | } |
1969 | 1970 | ||
1970 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | 1971 | static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); |
1971 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | 1972 | static DEVICE_INT_ATTR(tolerant, 0644, tolerant); |
1972 | static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); | 1973 | static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout); |
1973 | static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); | 1974 | static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); |
1974 | 1975 | ||
1975 | static struct sysdev_ext_attribute attr_check_interval = { | 1976 | static struct dev_ext_attribute dev_attr_check_interval = { |
1976 | _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, | 1977 | __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), |
1977 | store_int_with_restart), | ||
1978 | &check_interval | 1978 | &check_interval |
1979 | }; | 1979 | }; |
1980 | 1980 | ||
1981 | static struct sysdev_ext_attribute attr_ignore_ce = { | 1981 | static struct dev_ext_attribute dev_attr_ignore_ce = { |
1982 | _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), | 1982 | __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce), |
1983 | &mce_ignore_ce | 1983 | &mce_ignore_ce |
1984 | }; | 1984 | }; |
1985 | 1985 | ||
1986 | static struct sysdev_ext_attribute attr_cmci_disabled = { | 1986 | static struct dev_ext_attribute dev_attr_cmci_disabled = { |
1987 | _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), | 1987 | __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled), |
1988 | &mce_cmci_disabled | 1988 | &mce_cmci_disabled |
1989 | }; | 1989 | }; |
1990 | 1990 | ||
1991 | static struct sysdev_attribute *mce_sysdev_attrs[] = { | 1991 | static struct device_attribute *mce_device_attrs[] = { |
1992 | &attr_tolerant.attr, | 1992 | &dev_attr_tolerant.attr, |
1993 | &attr_check_interval.attr, | 1993 | &dev_attr_check_interval.attr, |
1994 | &attr_trigger, | 1994 | &dev_attr_trigger, |
1995 | &attr_monarch_timeout.attr, | 1995 | &dev_attr_monarch_timeout.attr, |
1996 | &attr_dont_log_ce.attr, | 1996 | &dev_attr_dont_log_ce.attr, |
1997 | &attr_ignore_ce.attr, | 1997 | &dev_attr_ignore_ce.attr, |
1998 | &attr_cmci_disabled.attr, | 1998 | &dev_attr_cmci_disabled.attr, |
1999 | NULL | 1999 | NULL |
2000 | }; | 2000 | }; |
2001 | 2001 | ||
2002 | static cpumask_var_t mce_sysdev_initialized; | 2002 | static cpumask_var_t mce_device_initialized; |
2003 | 2003 | ||
2004 | /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ | 2004 | static void mce_device_release(struct device *dev) |
2005 | static __cpuinit int mce_sysdev_create(unsigned int cpu) | ||
2006 | { | 2005 | { |
2007 | struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); | 2006 | kfree(dev); |
2007 | } | ||
2008 | |||
2009 | /* Per cpu device init. All of the cpus still share the same ctrl bank: */ | ||
2010 | static __cpuinit int mce_device_create(unsigned int cpu) | ||
2011 | { | ||
2012 | struct device *dev; | ||
2008 | int err; | 2013 | int err; |
2009 | int i, j; | 2014 | int i, j; |
2010 | 2015 | ||
2011 | if (!mce_available(&boot_cpu_data)) | 2016 | if (!mce_available(&boot_cpu_data)) |
2012 | return -EIO; | 2017 | return -EIO; |
2013 | 2018 | ||
2014 | memset(&sysdev->kobj, 0, sizeof(struct kobject)); | 2019 | dev = kzalloc(sizeof *dev, GFP_KERNEL); |
2015 | sysdev->id = cpu; | 2020 | if (!dev) |
2016 | sysdev->cls = &mce_sysdev_class; | 2021 | return -ENOMEM; |
2022 | dev->id = cpu; | ||
2023 | dev->bus = &mce_subsys; | ||
2024 | dev->release = &mce_device_release; | ||
2017 | 2025 | ||
2018 | err = sysdev_register(sysdev); | 2026 | err = device_register(dev); |
2019 | if (err) | 2027 | if (err) |
2020 | return err; | 2028 | return err; |
2021 | 2029 | ||
2022 | for (i = 0; mce_sysdev_attrs[i]; i++) { | 2030 | for (i = 0; mce_device_attrs[i]; i++) { |
2023 | err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]); | 2031 | err = device_create_file(dev, mce_device_attrs[i]); |
2024 | if (err) | 2032 | if (err) |
2025 | goto error; | 2033 | goto error; |
2026 | } | 2034 | } |
2027 | for (j = 0; j < banks; j++) { | 2035 | for (j = 0; j < banks; j++) { |
2028 | err = sysdev_create_file(sysdev, &mce_banks[j].attr); | 2036 | err = device_create_file(dev, &mce_banks[j].attr); |
2029 | if (err) | 2037 | if (err) |
2030 | goto error2; | 2038 | goto error2; |
2031 | } | 2039 | } |
2032 | cpumask_set_cpu(cpu, mce_sysdev_initialized); | 2040 | cpumask_set_cpu(cpu, mce_device_initialized); |
2041 | mce_device[cpu] = dev; | ||
2033 | 2042 | ||
2034 | return 0; | 2043 | return 0; |
2035 | error2: | 2044 | error2: |
2036 | while (--j >= 0) | 2045 | while (--j >= 0) |
2037 | sysdev_remove_file(sysdev, &mce_banks[j].attr); | 2046 | device_remove_file(dev, &mce_banks[j].attr); |
2038 | error: | 2047 | error: |
2039 | while (--i >= 0) | 2048 | while (--i >= 0) |
2040 | sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); | 2049 | device_remove_file(dev, mce_device_attrs[i]); |
2041 | 2050 | ||
2042 | sysdev_unregister(sysdev); | 2051 | device_unregister(dev); |
2043 | 2052 | ||
2044 | return err; | 2053 | return err; |
2045 | } | 2054 | } |
2046 | 2055 | ||
2047 | static __cpuinit void mce_sysdev_remove(unsigned int cpu) | 2056 | static __cpuinit void mce_device_remove(unsigned int cpu) |
2048 | { | 2057 | { |
2049 | struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); | 2058 | struct device *dev = mce_device[cpu]; |
2050 | int i; | 2059 | int i; |
2051 | 2060 | ||
2052 | if (!cpumask_test_cpu(cpu, mce_sysdev_initialized)) | 2061 | if (!cpumask_test_cpu(cpu, mce_device_initialized)) |
2053 | return; | 2062 | return; |
2054 | 2063 | ||
2055 | for (i = 0; mce_sysdev_attrs[i]; i++) | 2064 | for (i = 0; mce_device_attrs[i]; i++) |
2056 | sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); | 2065 | device_remove_file(dev, mce_device_attrs[i]); |
2057 | 2066 | ||
2058 | for (i = 0; i < banks; i++) | 2067 | for (i = 0; i < banks; i++) |
2059 | sysdev_remove_file(sysdev, &mce_banks[i].attr); | 2068 | device_remove_file(dev, &mce_banks[i].attr); |
2060 | 2069 | ||
2061 | sysdev_unregister(sysdev); | 2070 | device_unregister(dev); |
2062 | cpumask_clear_cpu(cpu, mce_sysdev_initialized); | 2071 | cpumask_clear_cpu(cpu, mce_device_initialized); |
2072 | mce_device[cpu] = NULL; | ||
2063 | } | 2073 | } |
2064 | 2074 | ||
2065 | /* Make sure there are no machine checks on offlined CPUs. */ | 2075 | /* Make sure there are no machine checks on offlined CPUs. */ |
@@ -2109,7 +2119,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2109 | switch (action) { | 2119 | switch (action) { |
2110 | case CPU_ONLINE: | 2120 | case CPU_ONLINE: |
2111 | case CPU_ONLINE_FROZEN: | 2121 | case CPU_ONLINE_FROZEN: |
2112 | mce_sysdev_create(cpu); | 2122 | mce_device_create(cpu); |
2113 | if (threshold_cpu_callback) | 2123 | if (threshold_cpu_callback) |
2114 | threshold_cpu_callback(action, cpu); | 2124 | threshold_cpu_callback(action, cpu); |
2115 | break; | 2125 | break; |
@@ -2117,7 +2127,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2117 | case CPU_DEAD_FROZEN: | 2127 | case CPU_DEAD_FROZEN: |
2118 | if (threshold_cpu_callback) | 2128 | if (threshold_cpu_callback) |
2119 | threshold_cpu_callback(action, cpu); | 2129 | threshold_cpu_callback(action, cpu); |
2120 | mce_sysdev_remove(cpu); | 2130 | mce_device_remove(cpu); |
2121 | break; | 2131 | break; |
2122 | case CPU_DOWN_PREPARE: | 2132 | case CPU_DOWN_PREPARE: |
2123 | case CPU_DOWN_PREPARE_FROZEN: | 2133 | case CPU_DOWN_PREPARE_FROZEN: |
@@ -2151,7 +2161,7 @@ static __init void mce_init_banks(void) | |||
2151 | 2161 | ||
2152 | for (i = 0; i < banks; i++) { | 2162 | for (i = 0; i < banks; i++) { |
2153 | struct mce_bank *b = &mce_banks[i]; | 2163 | struct mce_bank *b = &mce_banks[i]; |
2154 | struct sysdev_attribute *a = &b->attr; | 2164 | struct device_attribute *a = &b->attr; |
2155 | 2165 | ||
2156 | sysfs_attr_init(&a->attr); | 2166 | sysfs_attr_init(&a->attr); |
2157 | a->attr.name = b->attrname; | 2167 | a->attr.name = b->attrname; |
@@ -2171,16 +2181,16 @@ static __init int mcheck_init_device(void) | |||
2171 | if (!mce_available(&boot_cpu_data)) | 2181 | if (!mce_available(&boot_cpu_data)) |
2172 | return -EIO; | 2182 | return -EIO; |
2173 | 2183 | ||
2174 | zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL); | 2184 | zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); |
2175 | 2185 | ||
2176 | mce_init_banks(); | 2186 | mce_init_banks(); |
2177 | 2187 | ||
2178 | err = sysdev_class_register(&mce_sysdev_class); | 2188 | err = subsys_system_register(&mce_subsys, NULL); |
2179 | if (err) | 2189 | if (err) |
2180 | return err; | 2190 | return err; |
2181 | 2191 | ||
2182 | for_each_online_cpu(i) { | 2192 | for_each_online_cpu(i) { |
2183 | err = mce_sysdev_create(i); | 2193 | err = mce_device_create(i); |
2184 | if (err) | 2194 | if (err) |
2185 | return err; | 2195 | return err; |
2186 | } | 2196 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 1d76872b6a45..786e76a86322 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/notifier.h> | 17 | #include <linux/notifier.h> |
18 | #include <linux/kobject.h> | 18 | #include <linux/kobject.h> |
19 | #include <linux/percpu.h> | 19 | #include <linux/percpu.h> |
20 | #include <linux/sysdev.h> | ||
21 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
22 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
23 | #include <linux/sysfs.h> | 22 | #include <linux/sysfs.h> |
@@ -524,6 +523,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
524 | { | 523 | { |
525 | int i, err = 0; | 524 | int i, err = 0; |
526 | struct threshold_bank *b = NULL; | 525 | struct threshold_bank *b = NULL; |
526 | struct device *dev = mce_device[cpu]; | ||
527 | char name[32]; | 527 | char name[32]; |
528 | 528 | ||
529 | sprintf(name, "threshold_bank%i", bank); | 529 | sprintf(name, "threshold_bank%i", bank); |
@@ -544,8 +544,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
544 | if (!b) | 544 | if (!b) |
545 | goto out; | 545 | goto out; |
546 | 546 | ||
547 | err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj, | 547 | err = sysfs_create_link(&dev->kobj, b->kobj, name); |
548 | b->kobj, name); | ||
549 | if (err) | 548 | if (err) |
550 | goto out; | 549 | goto out; |
551 | 550 | ||
@@ -566,7 +565,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
566 | goto out; | 565 | goto out; |
567 | } | 566 | } |
568 | 567 | ||
569 | b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj); | 568 | b->kobj = kobject_create_and_add(name, &dev->kobj); |
570 | if (!b->kobj) | 569 | if (!b->kobj) |
571 | goto out_free; | 570 | goto out_free; |
572 | 571 | ||
@@ -586,8 +585,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
586 | if (i == cpu) | 585 | if (i == cpu) |
587 | continue; | 586 | continue; |
588 | 587 | ||
589 | err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj, | 588 | dev = mce_device[i]; |
590 | b->kobj, name); | 589 | if (dev) |
590 | err = sysfs_create_link(&dev->kobj,b->kobj, name); | ||
591 | if (err) | 591 | if (err) |
592 | goto out; | 592 | goto out; |
593 | 593 | ||
@@ -650,6 +650,7 @@ static void deallocate_threshold_block(unsigned int cpu, | |||
650 | static void threshold_remove_bank(unsigned int cpu, int bank) | 650 | static void threshold_remove_bank(unsigned int cpu, int bank) |
651 | { | 651 | { |
652 | struct threshold_bank *b; | 652 | struct threshold_bank *b; |
653 | struct device *dev; | ||
653 | char name[32]; | 654 | char name[32]; |
654 | int i = 0; | 655 | int i = 0; |
655 | 656 | ||
@@ -664,7 +665,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
664 | #ifdef CONFIG_SMP | 665 | #ifdef CONFIG_SMP |
665 | /* sibling symlink */ | 666 | /* sibling symlink */ |
666 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | 667 | if (shared_bank[bank] && b->blocks->cpu != cpu) { |
667 | sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name); | 668 | sysfs_remove_link(&mce_device[cpu]->kobj, name); |
668 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 669 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
669 | 670 | ||
670 | return; | 671 | return; |
@@ -676,7 +677,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
676 | if (i == cpu) | 677 | if (i == cpu) |
677 | continue; | 678 | continue; |
678 | 679 | ||
679 | sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name); | 680 | dev = mce_device[i]; |
681 | if (dev) | ||
682 | sysfs_remove_link(&dev->kobj, name); | ||
680 | per_cpu(threshold_banks, i)[bank] = NULL; | 683 | per_cpu(threshold_banks, i)[bank] = NULL; |
681 | } | 684 | } |
682 | 685 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 39c6089891e4..67bb17a37a0a 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
21 | #include <linux/export.h> | 21 | #include <linux/export.h> |
22 | #include <linux/sysdev.h> | ||
23 | #include <linux/types.h> | 22 | #include <linux/types.h> |
24 | #include <linux/init.h> | 23 | #include <linux/init.h> |
25 | #include <linux/smp.h> | 24 | #include <linux/smp.h> |
@@ -69,16 +68,16 @@ static atomic_t therm_throt_en = ATOMIC_INIT(0); | |||
69 | static u32 lvtthmr_init __read_mostly; | 68 | static u32 lvtthmr_init __read_mostly; |
70 | 69 | ||
71 | #ifdef CONFIG_SYSFS | 70 | #ifdef CONFIG_SYSFS |
72 | #define define_therm_throt_sysdev_one_ro(_name) \ | 71 | #define define_therm_throt_device_one_ro(_name) \ |
73 | static SYSDEV_ATTR(_name, 0444, \ | 72 | static DEVICE_ATTR(_name, 0444, \ |
74 | therm_throt_sysdev_show_##_name, \ | 73 | therm_throt_device_show_##_name, \ |
75 | NULL) \ | 74 | NULL) \ |
76 | 75 | ||
77 | #define define_therm_throt_sysdev_show_func(event, name) \ | 76 | #define define_therm_throt_device_show_func(event, name) \ |
78 | \ | 77 | \ |
79 | static ssize_t therm_throt_sysdev_show_##event##_##name( \ | 78 | static ssize_t therm_throt_device_show_##event##_##name( \ |
80 | struct sys_device *dev, \ | 79 | struct device *dev, \ |
81 | struct sysdev_attribute *attr, \ | 80 | struct device_attribute *attr, \ |
82 | char *buf) \ | 81 | char *buf) \ |
83 | { \ | 82 | { \ |
84 | unsigned int cpu = dev->id; \ | 83 | unsigned int cpu = dev->id; \ |
@@ -95,20 +94,20 @@ static ssize_t therm_throt_sysdev_show_##event##_##name( \ | |||
95 | return ret; \ | 94 | return ret; \ |
96 | } | 95 | } |
97 | 96 | ||
98 | define_therm_throt_sysdev_show_func(core_throttle, count); | 97 | define_therm_throt_device_show_func(core_throttle, count); |
99 | define_therm_throt_sysdev_one_ro(core_throttle_count); | 98 | define_therm_throt_device_one_ro(core_throttle_count); |
100 | 99 | ||
101 | define_therm_throt_sysdev_show_func(core_power_limit, count); | 100 | define_therm_throt_device_show_func(core_power_limit, count); |
102 | define_therm_throt_sysdev_one_ro(core_power_limit_count); | 101 | define_therm_throt_device_one_ro(core_power_limit_count); |
103 | 102 | ||
104 | define_therm_throt_sysdev_show_func(package_throttle, count); | 103 | define_therm_throt_device_show_func(package_throttle, count); |
105 | define_therm_throt_sysdev_one_ro(package_throttle_count); | 104 | define_therm_throt_device_one_ro(package_throttle_count); |
106 | 105 | ||
107 | define_therm_throt_sysdev_show_func(package_power_limit, count); | 106 | define_therm_throt_device_show_func(package_power_limit, count); |
108 | define_therm_throt_sysdev_one_ro(package_power_limit_count); | 107 | define_therm_throt_device_one_ro(package_power_limit_count); |
109 | 108 | ||
110 | static struct attribute *thermal_throttle_attrs[] = { | 109 | static struct attribute *thermal_throttle_attrs[] = { |
111 | &attr_core_throttle_count.attr, | 110 | &dev_attr_core_throttle_count.attr, |
112 | NULL | 111 | NULL |
113 | }; | 112 | }; |
114 | 113 | ||
@@ -223,36 +222,36 @@ static int thresh_event_valid(int event) | |||
223 | 222 | ||
224 | #ifdef CONFIG_SYSFS | 223 | #ifdef CONFIG_SYSFS |
225 | /* Add/Remove thermal_throttle interface for CPU device: */ | 224 | /* Add/Remove thermal_throttle interface for CPU device: */ |
226 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, | 225 | static __cpuinit int thermal_throttle_add_dev(struct device *dev, |
227 | unsigned int cpu) | 226 | unsigned int cpu) |
228 | { | 227 | { |
229 | int err; | 228 | int err; |
230 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 229 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
231 | 230 | ||
232 | err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); | 231 | err = sysfs_create_group(&dev->kobj, &thermal_attr_group); |
233 | if (err) | 232 | if (err) |
234 | return err; | 233 | return err; |
235 | 234 | ||
236 | if (cpu_has(c, X86_FEATURE_PLN)) | 235 | if (cpu_has(c, X86_FEATURE_PLN)) |
237 | err = sysfs_add_file_to_group(&sys_dev->kobj, | 236 | err = sysfs_add_file_to_group(&dev->kobj, |
238 | &attr_core_power_limit_count.attr, | 237 | &dev_attr_core_power_limit_count.attr, |
239 | thermal_attr_group.name); | 238 | thermal_attr_group.name); |
240 | if (cpu_has(c, X86_FEATURE_PTS)) { | 239 | if (cpu_has(c, X86_FEATURE_PTS)) { |
241 | err = sysfs_add_file_to_group(&sys_dev->kobj, | 240 | err = sysfs_add_file_to_group(&dev->kobj, |
242 | &attr_package_throttle_count.attr, | 241 | &dev_attr_package_throttle_count.attr, |
243 | thermal_attr_group.name); | 242 | thermal_attr_group.name); |
244 | if (cpu_has(c, X86_FEATURE_PLN)) | 243 | if (cpu_has(c, X86_FEATURE_PLN)) |
245 | err = sysfs_add_file_to_group(&sys_dev->kobj, | 244 | err = sysfs_add_file_to_group(&dev->kobj, |
246 | &attr_package_power_limit_count.attr, | 245 | &dev_attr_package_power_limit_count.attr, |
247 | thermal_attr_group.name); | 246 | thermal_attr_group.name); |
248 | } | 247 | } |
249 | 248 | ||
250 | return err; | 249 | return err; |
251 | } | 250 | } |
252 | 251 | ||
253 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | 252 | static __cpuinit void thermal_throttle_remove_dev(struct device *dev) |
254 | { | 253 | { |
255 | sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group); | 254 | sysfs_remove_group(&dev->kobj, &thermal_attr_group); |
256 | } | 255 | } |
257 | 256 | ||
258 | /* Mutex protecting device creation against CPU hotplug: */ | 257 | /* Mutex protecting device creation against CPU hotplug: */ |
@@ -265,16 +264,16 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
265 | void *hcpu) | 264 | void *hcpu) |
266 | { | 265 | { |
267 | unsigned int cpu = (unsigned long)hcpu; | 266 | unsigned int cpu = (unsigned long)hcpu; |
268 | struct sys_device *sys_dev; | 267 | struct device *dev; |
269 | int err = 0; | 268 | int err = 0; |
270 | 269 | ||
271 | sys_dev = get_cpu_sysdev(cpu); | 270 | dev = get_cpu_device(cpu); |
272 | 271 | ||
273 | switch (action) { | 272 | switch (action) { |
274 | case CPU_UP_PREPARE: | 273 | case CPU_UP_PREPARE: |
275 | case CPU_UP_PREPARE_FROZEN: | 274 | case CPU_UP_PREPARE_FROZEN: |
276 | mutex_lock(&therm_cpu_lock); | 275 | mutex_lock(&therm_cpu_lock); |
277 | err = thermal_throttle_add_dev(sys_dev, cpu); | 276 | err = thermal_throttle_add_dev(dev, cpu); |
278 | mutex_unlock(&therm_cpu_lock); | 277 | mutex_unlock(&therm_cpu_lock); |
279 | WARN_ON(err); | 278 | WARN_ON(err); |
280 | break; | 279 | break; |
@@ -283,7 +282,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
283 | case CPU_DEAD: | 282 | case CPU_DEAD: |
284 | case CPU_DEAD_FROZEN: | 283 | case CPU_DEAD_FROZEN: |
285 | mutex_lock(&therm_cpu_lock); | 284 | mutex_lock(&therm_cpu_lock); |
286 | thermal_throttle_remove_dev(sys_dev); | 285 | thermal_throttle_remove_dev(dev); |
287 | mutex_unlock(&therm_cpu_lock); | 286 | mutex_unlock(&therm_cpu_lock); |
288 | break; | 287 | break; |
289 | } | 288 | } |
@@ -310,7 +309,7 @@ static __init int thermal_throttle_init_device(void) | |||
310 | #endif | 309 | #endif |
311 | /* connect live CPUs to sysfs */ | 310 | /* connect live CPUs to sysfs */ |
312 | for_each_online_cpu(cpu) { | 311 | for_each_online_cpu(cpu) { |
313 | err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu); | 312 | err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu); |
314 | WARN_ON(err); | 313 | WARN_ON(err); |
315 | } | 314 | } |
316 | #ifdef CONFIG_HOTPLUG_CPU | 315 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 212a6a42527c..a524353d93f2 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -177,7 +177,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier = | |||
177 | .notifier_call = cpuid_class_cpu_callback, | 177 | .notifier_call = cpuid_class_cpu_callback, |
178 | }; | 178 | }; |
179 | 179 | ||
180 | static char *cpuid_devnode(struct device *dev, mode_t *mode) | 180 | static char *cpuid_devnode(struct device *dev, umode_t *mode) |
181 | { | 181 | { |
182 | return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); | 182 | return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); |
183 | } | 183 | } |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 8071e2f3d6eb..62d61e9976eb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/acpi.h> | 19 | #include <linux/acpi.h> |
20 | #include <linux/firmware-map.h> | 20 | #include <linux/firmware-map.h> |
21 | #include <linux/memblock.h> | 21 | #include <linux/memblock.h> |
22 | #include <linux/sort.h> | ||
22 | 23 | ||
23 | #include <asm/e820.h> | 24 | #include <asm/e820.h> |
24 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
@@ -227,22 +228,38 @@ void __init e820_print_map(char *who) | |||
227 | * ____________________33__ | 228 | * ____________________33__ |
228 | * ______________________4_ | 229 | * ______________________4_ |
229 | */ | 230 | */ |
231 | struct change_member { | ||
232 | struct e820entry *pbios; /* pointer to original bios entry */ | ||
233 | unsigned long long addr; /* address for this change point */ | ||
234 | }; | ||
235 | |||
236 | static int __init cpcompare(const void *a, const void *b) | ||
237 | { | ||
238 | struct change_member * const *app = a, * const *bpp = b; | ||
239 | const struct change_member *ap = *app, *bp = *bpp; | ||
240 | |||
241 | /* | ||
242 | * Inputs are pointers to two elements of change_point[]. If their | ||
243 | * addresses are unequal, their difference dominates. If the addresses | ||
244 | * are equal, then consider one that represents the end of its region | ||
245 | * to be greater than one that does not. | ||
246 | */ | ||
247 | if (ap->addr != bp->addr) | ||
248 | return ap->addr > bp->addr ? 1 : -1; | ||
249 | |||
250 | return (ap->addr != ap->pbios->addr) - (bp->addr != bp->pbios->addr); | ||
251 | } | ||
230 | 252 | ||
231 | int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, | 253 | int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, |
232 | u32 *pnr_map) | 254 | u32 *pnr_map) |
233 | { | 255 | { |
234 | struct change_member { | ||
235 | struct e820entry *pbios; /* pointer to original bios entry */ | ||
236 | unsigned long long addr; /* address for this change point */ | ||
237 | }; | ||
238 | static struct change_member change_point_list[2*E820_X_MAX] __initdata; | 256 | static struct change_member change_point_list[2*E820_X_MAX] __initdata; |
239 | static struct change_member *change_point[2*E820_X_MAX] __initdata; | 257 | static struct change_member *change_point[2*E820_X_MAX] __initdata; |
240 | static struct e820entry *overlap_list[E820_X_MAX] __initdata; | 258 | static struct e820entry *overlap_list[E820_X_MAX] __initdata; |
241 | static struct e820entry new_bios[E820_X_MAX] __initdata; | 259 | static struct e820entry new_bios[E820_X_MAX] __initdata; |
242 | struct change_member *change_tmp; | ||
243 | unsigned long current_type, last_type; | 260 | unsigned long current_type, last_type; |
244 | unsigned long long last_addr; | 261 | unsigned long long last_addr; |
245 | int chgidx, still_changing; | 262 | int chgidx; |
246 | int overlap_entries; | 263 | int overlap_entries; |
247 | int new_bios_entry; | 264 | int new_bios_entry; |
248 | int old_nr, new_nr, chg_nr; | 265 | int old_nr, new_nr, chg_nr; |
@@ -279,35 +296,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, | |||
279 | chg_nr = chgidx; | 296 | chg_nr = chgidx; |
280 | 297 | ||
281 | /* sort change-point list by memory addresses (low -> high) */ | 298 | /* sort change-point list by memory addresses (low -> high) */ |
282 | still_changing = 1; | 299 | sort(change_point, chg_nr, sizeof *change_point, cpcompare, NULL); |
283 | while (still_changing) { | ||
284 | still_changing = 0; | ||
285 | for (i = 1; i < chg_nr; i++) { | ||
286 | unsigned long long curaddr, lastaddr; | ||
287 | unsigned long long curpbaddr, lastpbaddr; | ||
288 | |||
289 | curaddr = change_point[i]->addr; | ||
290 | lastaddr = change_point[i - 1]->addr; | ||
291 | curpbaddr = change_point[i]->pbios->addr; | ||
292 | lastpbaddr = change_point[i - 1]->pbios->addr; | ||
293 | |||
294 | /* | ||
295 | * swap entries, when: | ||
296 | * | ||
297 | * curaddr > lastaddr or | ||
298 | * curaddr == lastaddr and curaddr == curpbaddr and | ||
299 | * lastaddr != lastpbaddr | ||
300 | */ | ||
301 | if (curaddr < lastaddr || | ||
302 | (curaddr == lastaddr && curaddr == curpbaddr && | ||
303 | lastaddr != lastpbaddr)) { | ||
304 | change_tmp = change_point[i]; | ||
305 | change_point[i] = change_point[i-1]; | ||
306 | change_point[i-1] = change_tmp; | ||
307 | still_changing = 1; | ||
308 | } | ||
309 | } | ||
310 | } | ||
311 | 300 | ||
312 | /* create a new bios memory map, removing overlaps */ | 301 | /* create a new bios memory map, removing overlaps */ |
313 | overlap_entries = 0; /* number of entries in the overlap table */ | 302 | overlap_entries = 0; /* number of entries in the overlap table */ |
@@ -714,7 +703,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn) | |||
714 | } | 703 | } |
715 | #endif | 704 | #endif |
716 | 705 | ||
717 | #ifdef CONFIG_HIBERNATION | 706 | #ifdef CONFIG_ACPI |
718 | /** | 707 | /** |
719 | * Mark ACPI NVS memory region, so that we can save/restore it during | 708 | * Mark ACPI NVS memory region, so that we can save/restore it during |
720 | * hibernation and the subsequent resume. | 709 | * hibernation and the subsequent resume. |
@@ -727,7 +716,7 @@ static int __init e820_mark_nvs_memory(void) | |||
727 | struct e820entry *ei = &e820.map[i]; | 716 | struct e820entry *ei = &e820.map[i]; |
728 | 717 | ||
729 | if (ei->type == E820_NVS) | 718 | if (ei->type == E820_NVS) |
730 | suspend_nvs_register(ei->addr, ei->size); | 719 | acpi_nvs_register(ei->addr, ei->size); |
731 | } | 720 | } |
732 | 721 | ||
733 | return 0; | 722 | return 0; |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index cd28a350f7f9..9b9f18b49918 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -240,14 +240,14 @@ static int __init setup_early_printk(char *buf) | |||
240 | if (!strncmp(buf, "xen", 3)) | 240 | if (!strncmp(buf, "xen", 3)) |
241 | early_console_register(&xenboot_console, keep); | 241 | early_console_register(&xenboot_console, keep); |
242 | #endif | 242 | #endif |
243 | #ifdef CONFIG_EARLY_PRINTK_MRST | 243 | #ifdef CONFIG_EARLY_PRINTK_INTEL_MID |
244 | if (!strncmp(buf, "mrst", 4)) { | 244 | if (!strncmp(buf, "mrst", 4)) { |
245 | mrst_early_console_init(); | 245 | mrst_early_console_init(); |
246 | early_console_register(&early_mrst_console, keep); | 246 | early_console_register(&early_mrst_console, keep); |
247 | } | 247 | } |
248 | 248 | ||
249 | if (!strncmp(buf, "hsu", 3)) { | 249 | if (!strncmp(buf, "hsu", 3)) { |
250 | hsu_early_console_init(); | 250 | hsu_early_console_init(buf + 3); |
251 | early_console_register(&early_hsu_console, keep); | 251 | early_console_register(&early_hsu_console, keep); |
252 | } | 252 | } |
253 | #endif | 253 | #endif |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 22d0e21b4dd7..79d97e68f042 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -42,6 +42,7 @@ | |||
42 | */ | 42 | */ |
43 | 43 | ||
44 | #include <linux/linkage.h> | 44 | #include <linux/linkage.h> |
45 | #include <linux/err.h> | ||
45 | #include <asm/thread_info.h> | 46 | #include <asm/thread_info.h> |
46 | #include <asm/irqflags.h> | 47 | #include <asm/irqflags.h> |
47 | #include <asm/errno.h> | 48 | #include <asm/errno.h> |
@@ -81,8 +82,6 @@ | |||
81 | * enough to patch inline, increasing performance. | 82 | * enough to patch inline, increasing performance. |
82 | */ | 83 | */ |
83 | 84 | ||
84 | #define nr_syscalls ((syscall_table_size)/4) | ||
85 | |||
86 | #ifdef CONFIG_PREEMPT | 85 | #ifdef CONFIG_PREEMPT |
87 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF | 86 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF |
88 | #else | 87 | #else |
@@ -423,7 +422,7 @@ sysenter_past_esp: | |||
423 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) | 422 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
424 | jnz sysenter_audit | 423 | jnz sysenter_audit |
425 | sysenter_do_call: | 424 | sysenter_do_call: |
426 | cmpl $(nr_syscalls), %eax | 425 | cmpl $(NR_syscalls), %eax |
427 | jae syscall_badsys | 426 | jae syscall_badsys |
428 | call *sys_call_table(,%eax,4) | 427 | call *sys_call_table(,%eax,4) |
429 | movl %eax,PT_EAX(%esp) | 428 | movl %eax,PT_EAX(%esp) |
@@ -455,7 +454,7 @@ sysenter_audit: | |||
455 | movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ | 454 | movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ |
456 | movl %eax,%edx /* 2nd arg: syscall number */ | 455 | movl %eax,%edx /* 2nd arg: syscall number */ |
457 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ | 456 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ |
458 | call audit_syscall_entry | 457 | call __audit_syscall_entry |
459 | pushl_cfi %ebx | 458 | pushl_cfi %ebx |
460 | movl PT_EAX(%esp),%eax /* reload syscall number */ | 459 | movl PT_EAX(%esp),%eax /* reload syscall number */ |
461 | jmp sysenter_do_call | 460 | jmp sysenter_do_call |
@@ -466,11 +465,10 @@ sysexit_audit: | |||
466 | TRACE_IRQS_ON | 465 | TRACE_IRQS_ON |
467 | ENABLE_INTERRUPTS(CLBR_ANY) | 466 | ENABLE_INTERRUPTS(CLBR_ANY) |
468 | movl %eax,%edx /* second arg, syscall return value */ | 467 | movl %eax,%edx /* second arg, syscall return value */ |
469 | cmpl $0,%eax /* is it < 0? */ | 468 | cmpl $-MAX_ERRNO,%eax /* is it an error ? */ |
470 | setl %al /* 1 if so, 0 if not */ | 469 | setbe %al /* 1 if so, 0 if not */ |
471 | movzbl %al,%eax /* zero-extend that */ | 470 | movzbl %al,%eax /* zero-extend that */ |
472 | inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | 471 | call __audit_syscall_exit |
473 | call audit_syscall_exit | ||
474 | DISABLE_INTERRUPTS(CLBR_ANY) | 472 | DISABLE_INTERRUPTS(CLBR_ANY) |
475 | TRACE_IRQS_OFF | 473 | TRACE_IRQS_OFF |
476 | movl TI_flags(%ebp), %ecx | 474 | movl TI_flags(%ebp), %ecx |
@@ -504,7 +502,7 @@ ENTRY(system_call) | |||
504 | # system call tracing in operation / emulation | 502 | # system call tracing in operation / emulation |
505 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) | 503 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
506 | jnz syscall_trace_entry | 504 | jnz syscall_trace_entry |
507 | cmpl $(nr_syscalls), %eax | 505 | cmpl $(NR_syscalls), %eax |
508 | jae syscall_badsys | 506 | jae syscall_badsys |
509 | syscall_call: | 507 | syscall_call: |
510 | call *sys_call_table(,%eax,4) | 508 | call *sys_call_table(,%eax,4) |
@@ -654,7 +652,7 @@ syscall_trace_entry: | |||
654 | movl %esp, %eax | 652 | movl %esp, %eax |
655 | call syscall_trace_enter | 653 | call syscall_trace_enter |
656 | /* What it returned is what we'll actually use. */ | 654 | /* What it returned is what we'll actually use. */ |
657 | cmpl $(nr_syscalls), %eax | 655 | cmpl $(NR_syscalls), %eax |
658 | jnae syscall_call | 656 | jnae syscall_call |
659 | jmp syscall_exit | 657 | jmp syscall_exit |
660 | END(syscall_trace_entry) | 658 | END(syscall_trace_entry) |
@@ -694,29 +692,28 @@ END(syscall_badsys) | |||
694 | * System calls that need a pt_regs pointer. | 692 | * System calls that need a pt_regs pointer. |
695 | */ | 693 | */ |
696 | #define PTREGSCALL0(name) \ | 694 | #define PTREGSCALL0(name) \ |
697 | ALIGN; \ | 695 | ENTRY(ptregs_##name) ; \ |
698 | ptregs_##name: \ | ||
699 | leal 4(%esp),%eax; \ | 696 | leal 4(%esp),%eax; \ |
700 | jmp sys_##name; | 697 | jmp sys_##name; \ |
698 | ENDPROC(ptregs_##name) | ||
701 | 699 | ||
702 | #define PTREGSCALL1(name) \ | 700 | #define PTREGSCALL1(name) \ |
703 | ALIGN; \ | 701 | ENTRY(ptregs_##name) ; \ |
704 | ptregs_##name: \ | ||
705 | leal 4(%esp),%edx; \ | 702 | leal 4(%esp),%edx; \ |
706 | movl (PT_EBX+4)(%esp),%eax; \ | 703 | movl (PT_EBX+4)(%esp),%eax; \ |
707 | jmp sys_##name; | 704 | jmp sys_##name; \ |
705 | ENDPROC(ptregs_##name) | ||
708 | 706 | ||
709 | #define PTREGSCALL2(name) \ | 707 | #define PTREGSCALL2(name) \ |
710 | ALIGN; \ | 708 | ENTRY(ptregs_##name) ; \ |
711 | ptregs_##name: \ | ||
712 | leal 4(%esp),%ecx; \ | 709 | leal 4(%esp),%ecx; \ |
713 | movl (PT_ECX+4)(%esp),%edx; \ | 710 | movl (PT_ECX+4)(%esp),%edx; \ |
714 | movl (PT_EBX+4)(%esp),%eax; \ | 711 | movl (PT_EBX+4)(%esp),%eax; \ |
715 | jmp sys_##name; | 712 | jmp sys_##name; \ |
713 | ENDPROC(ptregs_##name) | ||
716 | 714 | ||
717 | #define PTREGSCALL3(name) \ | 715 | #define PTREGSCALL3(name) \ |
718 | ALIGN; \ | 716 | ENTRY(ptregs_##name) ; \ |
719 | ptregs_##name: \ | ||
720 | CFI_STARTPROC; \ | 717 | CFI_STARTPROC; \ |
721 | leal 4(%esp),%eax; \ | 718 | leal 4(%esp),%eax; \ |
722 | pushl_cfi %eax; \ | 719 | pushl_cfi %eax; \ |
@@ -741,8 +738,7 @@ PTREGSCALL2(vm86) | |||
741 | PTREGSCALL1(vm86old) | 738 | PTREGSCALL1(vm86old) |
742 | 739 | ||
743 | /* Clone is an oddball. The 4th arg is in %edi */ | 740 | /* Clone is an oddball. The 4th arg is in %edi */ |
744 | ALIGN; | 741 | ENTRY(ptregs_clone) |
745 | ptregs_clone: | ||
746 | CFI_STARTPROC | 742 | CFI_STARTPROC |
747 | leal 4(%esp),%eax | 743 | leal 4(%esp),%eax |
748 | pushl_cfi %eax | 744 | pushl_cfi %eax |
@@ -1213,11 +1209,6 @@ return_to_handler: | |||
1213 | jmp *%ecx | 1209 | jmp *%ecx |
1214 | #endif | 1210 | #endif |
1215 | 1211 | ||
1216 | .section .rodata,"a" | ||
1217 | #include "syscall_table_32.S" | ||
1218 | |||
1219 | syscall_table_size=(.-sys_call_table) | ||
1220 | |||
1221 | /* | 1212 | /* |
1222 | * Some functions should be protected against kprobes | 1213 | * Some functions should be protected against kprobes |
1223 | */ | 1214 | */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a20e1cb9dc87..3fe8239fd8fb 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <asm/paravirt.h> | 55 | #include <asm/paravirt.h> |
56 | #include <asm/ftrace.h> | 56 | #include <asm/ftrace.h> |
57 | #include <asm/percpu.h> | 57 | #include <asm/percpu.h> |
58 | #include <linux/err.h> | ||
58 | 59 | ||
59 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 60 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
60 | #include <linux/elf-em.h> | 61 | #include <linux/elf-em.h> |
@@ -548,7 +549,7 @@ badsys: | |||
548 | #ifdef CONFIG_AUDITSYSCALL | 549 | #ifdef CONFIG_AUDITSYSCALL |
549 | /* | 550 | /* |
550 | * Fast path for syscall audit without full syscall trace. | 551 | * Fast path for syscall audit without full syscall trace. |
551 | * We just call audit_syscall_entry() directly, and then | 552 | * We just call __audit_syscall_entry() directly, and then |
552 | * jump back to the normal fast path. | 553 | * jump back to the normal fast path. |
553 | */ | 554 | */ |
554 | auditsys: | 555 | auditsys: |
@@ -558,22 +559,21 @@ auditsys: | |||
558 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ | 559 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ |
559 | movq %rax,%rsi /* 2nd arg: syscall number */ | 560 | movq %rax,%rsi /* 2nd arg: syscall number */ |
560 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ | 561 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ |
561 | call audit_syscall_entry | 562 | call __audit_syscall_entry |
562 | LOAD_ARGS 0 /* reload call-clobbered registers */ | 563 | LOAD_ARGS 0 /* reload call-clobbered registers */ |
563 | jmp system_call_fastpath | 564 | jmp system_call_fastpath |
564 | 565 | ||
565 | /* | 566 | /* |
566 | * Return fast path for syscall audit. Call audit_syscall_exit() | 567 | * Return fast path for syscall audit. Call __audit_syscall_exit() |
567 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | 568 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT |
568 | * masked off. | 569 | * masked off. |
569 | */ | 570 | */ |
570 | sysret_audit: | 571 | sysret_audit: |
571 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ | 572 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ |
572 | cmpq $0,%rsi /* is it < 0? */ | 573 | cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ |
573 | setl %al /* 1 if so, 0 if not */ | 574 | setbe %al /* 1 if so, 0 if not */ |
574 | movzbl %al,%edi /* zero-extend that into %edi */ | 575 | movzbl %al,%edi /* zero-extend that into %edi */ |
575 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | 576 | call __audit_syscall_exit |
576 | call audit_syscall_exit | ||
577 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 577 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
578 | jmp sysret_check | 578 | jmp sysret_check |
579 | #endif /* CONFIG_AUDITSYSCALL */ | 579 | #endif /* CONFIG_AUDITSYSCALL */ |
@@ -1480,62 +1480,214 @@ ENTRY(error_exit) | |||
1480 | CFI_ENDPROC | 1480 | CFI_ENDPROC |
1481 | END(error_exit) | 1481 | END(error_exit) |
1482 | 1482 | ||
1483 | /* | ||
1484 | * Test if a given stack is an NMI stack or not. | ||
1485 | */ | ||
1486 | .macro test_in_nmi reg stack nmi_ret normal_ret | ||
1487 | cmpq %\reg, \stack | ||
1488 | ja \normal_ret | ||
1489 | subq $EXCEPTION_STKSZ, %\reg | ||
1490 | cmpq %\reg, \stack | ||
1491 | jb \normal_ret | ||
1492 | jmp \nmi_ret | ||
1493 | .endm | ||
1483 | 1494 | ||
1484 | /* runs on exception stack */ | 1495 | /* runs on exception stack */ |
1485 | ENTRY(nmi) | 1496 | ENTRY(nmi) |
1486 | INTR_FRAME | 1497 | INTR_FRAME |
1487 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1498 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1488 | pushq_cfi $-1 | 1499 | /* |
1500 | * We allow breakpoints in NMIs. If a breakpoint occurs, then | ||
1501 | * the iretq it performs will take us out of NMI context. | ||
1502 | * This means that we can have nested NMIs where the next | ||
1503 | * NMI is using the top of the stack of the previous NMI. We | ||
1504 | * can't let it execute because the nested NMI will corrupt the | ||
1505 | * stack of the previous NMI. NMI handlers are not re-entrant | ||
1506 | * anyway. | ||
1507 | * | ||
1508 | * To handle this case we do the following: | ||
1509 | * Check the a special location on the stack that contains | ||
1510 | * a variable that is set when NMIs are executing. | ||
1511 | * The interrupted task's stack is also checked to see if it | ||
1512 | * is an NMI stack. | ||
1513 | * If the variable is not set and the stack is not the NMI | ||
1514 | * stack then: | ||
1515 | * o Set the special variable on the stack | ||
1516 | * o Copy the interrupt frame into a "saved" location on the stack | ||
1517 | * o Copy the interrupt frame into a "copy" location on the stack | ||
1518 | * o Continue processing the NMI | ||
1519 | * If the variable is set or the previous stack is the NMI stack: | ||
1520 | * o Modify the "copy" location to jump to the repeate_nmi | ||
1521 | * o return back to the first NMI | ||
1522 | * | ||
1523 | * Now on exit of the first NMI, we first clear the stack variable | ||
1524 | * The NMI stack will tell any nested NMIs at that point that it is | ||
1525 | * nested. Then we pop the stack normally with iret, and if there was | ||
1526 | * a nested NMI that updated the copy interrupt stack frame, a | ||
1527 | * jump will be made to the repeat_nmi code that will handle the second | ||
1528 | * NMI. | ||
1529 | */ | ||
1530 | |||
1531 | /* Use %rdx as out temp variable throughout */ | ||
1532 | pushq_cfi %rdx | ||
1533 | |||
1534 | /* | ||
1535 | * Check the special variable on the stack to see if NMIs are | ||
1536 | * executing. | ||
1537 | */ | ||
1538 | cmp $1, -8(%rsp) | ||
1539 | je nested_nmi | ||
1540 | |||
1541 | /* | ||
1542 | * Now test if the previous stack was an NMI stack. | ||
1543 | * We need the double check. We check the NMI stack to satisfy the | ||
1544 | * race when the first NMI clears the variable before returning. | ||
1545 | * We check the variable because the first NMI could be in a | ||
1546 | * breakpoint routine using a breakpoint stack. | ||
1547 | */ | ||
1548 | lea 6*8(%rsp), %rdx | ||
1549 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi | ||
1550 | |||
1551 | nested_nmi: | ||
1552 | /* | ||
1553 | * Do nothing if we interrupted the fixup in repeat_nmi. | ||
1554 | * It's about to repeat the NMI handler, so we are fine | ||
1555 | * with ignoring this one. | ||
1556 | */ | ||
1557 | movq $repeat_nmi, %rdx | ||
1558 | cmpq 8(%rsp), %rdx | ||
1559 | ja 1f | ||
1560 | movq $end_repeat_nmi, %rdx | ||
1561 | cmpq 8(%rsp), %rdx | ||
1562 | ja nested_nmi_out | ||
1563 | |||
1564 | 1: | ||
1565 | /* Set up the interrupted NMIs stack to jump to repeat_nmi */ | ||
1566 | leaq -6*8(%rsp), %rdx | ||
1567 | movq %rdx, %rsp | ||
1568 | CFI_ADJUST_CFA_OFFSET 6*8 | ||
1569 | pushq_cfi $__KERNEL_DS | ||
1570 | pushq_cfi %rdx | ||
1571 | pushfq_cfi | ||
1572 | pushq_cfi $__KERNEL_CS | ||
1573 | pushq_cfi $repeat_nmi | ||
1574 | |||
1575 | /* Put stack back */ | ||
1576 | addq $(11*8), %rsp | ||
1577 | CFI_ADJUST_CFA_OFFSET -11*8 | ||
1578 | |||
1579 | nested_nmi_out: | ||
1580 | popq_cfi %rdx | ||
1581 | |||
1582 | /* No need to check faults here */ | ||
1583 | INTERRUPT_RETURN | ||
1584 | |||
1585 | first_nmi: | ||
1586 | /* | ||
1587 | * Because nested NMIs will use the pushed location that we | ||
1588 | * stored in rdx, we must keep that space available. | ||
1589 | * Here's what our stack frame will look like: | ||
1590 | * +-------------------------+ | ||
1591 | * | original SS | | ||
1592 | * | original Return RSP | | ||
1593 | * | original RFLAGS | | ||
1594 | * | original CS | | ||
1595 | * | original RIP | | ||
1596 | * +-------------------------+ | ||
1597 | * | temp storage for rdx | | ||
1598 | * +-------------------------+ | ||
1599 | * | NMI executing variable | | ||
1600 | * +-------------------------+ | ||
1601 | * | Saved SS | | ||
1602 | * | Saved Return RSP | | ||
1603 | * | Saved RFLAGS | | ||
1604 | * | Saved CS | | ||
1605 | * | Saved RIP | | ||
1606 | * +-------------------------+ | ||
1607 | * | copied SS | | ||
1608 | * | copied Return RSP | | ||
1609 | * | copied RFLAGS | | ||
1610 | * | copied CS | | ||
1611 | * | copied RIP | | ||
1612 | * +-------------------------+ | ||
1613 | * | pt_regs | | ||
1614 | * +-------------------------+ | ||
1615 | * | ||
1616 | * The saved RIP is used to fix up the copied RIP that a nested | ||
1617 | * NMI may zero out. The original stack frame and the temp storage | ||
1618 | * is also used by nested NMIs and can not be trusted on exit. | ||
1619 | */ | ||
1620 | /* Set the NMI executing variable on the stack. */ | ||
1621 | pushq_cfi $1 | ||
1622 | |||
1623 | /* Copy the stack frame to the Saved frame */ | ||
1624 | .rept 5 | ||
1625 | pushq_cfi 6*8(%rsp) | ||
1626 | .endr | ||
1627 | |||
1628 | /* Make another copy, this one may be modified by nested NMIs */ | ||
1629 | .rept 5 | ||
1630 | pushq_cfi 4*8(%rsp) | ||
1631 | .endr | ||
1632 | |||
1633 | /* Do not pop rdx, nested NMIs will corrupt it */ | ||
1634 | movq 11*8(%rsp), %rdx | ||
1635 | |||
1636 | /* | ||
1637 | * Everything below this point can be preempted by a nested | ||
1638 | * NMI if the first NMI took an exception. Repeated NMIs | ||
1639 | * caused by an exception and nested NMI will start here, and | ||
1640 | * can still be preempted by another NMI. | ||
1641 | */ | ||
1642 | restart_nmi: | ||
1643 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | ||
1489 | subq $ORIG_RAX-R15, %rsp | 1644 | subq $ORIG_RAX-R15, %rsp |
1490 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1645 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1646 | /* | ||
1647 | * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit | ||
1648 | * as we should not be calling schedule in NMI context. | ||
1649 | * Even with normal interrupts enabled. An NMI should not be | ||
1650 | * setting NEED_RESCHED or anything that normal interrupts and | ||
1651 | * exceptions might do. | ||
1652 | */ | ||
1491 | call save_paranoid | 1653 | call save_paranoid |
1492 | DEFAULT_FRAME 0 | 1654 | DEFAULT_FRAME 0 |
1493 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | 1655 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
1494 | movq %rsp,%rdi | 1656 | movq %rsp,%rdi |
1495 | movq $-1,%rsi | 1657 | movq $-1,%rsi |
1496 | call do_nmi | 1658 | call do_nmi |
1497 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1498 | /* paranoidexit; without TRACE_IRQS_OFF */ | ||
1499 | /* ebx: no swapgs flag */ | ||
1500 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1501 | testl %ebx,%ebx /* swapgs needed? */ | 1659 | testl %ebx,%ebx /* swapgs needed? */ |
1502 | jnz nmi_restore | 1660 | jnz nmi_restore |
1503 | testl $3,CS(%rsp) | ||
1504 | jnz nmi_userspace | ||
1505 | nmi_swapgs: | 1661 | nmi_swapgs: |
1506 | SWAPGS_UNSAFE_STACK | 1662 | SWAPGS_UNSAFE_STACK |
1507 | nmi_restore: | 1663 | nmi_restore: |
1508 | RESTORE_ALL 8 | 1664 | RESTORE_ALL 8 |
1665 | /* Clear the NMI executing stack variable */ | ||
1666 | movq $0, 10*8(%rsp) | ||
1509 | jmp irq_return | 1667 | jmp irq_return |
1510 | nmi_userspace: | ||
1511 | GET_THREAD_INFO(%rcx) | ||
1512 | movl TI_flags(%rcx),%ebx | ||
1513 | andl $_TIF_WORK_MASK,%ebx | ||
1514 | jz nmi_swapgs | ||
1515 | movq %rsp,%rdi /* &pt_regs */ | ||
1516 | call sync_regs | ||
1517 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1518 | testl $_TIF_NEED_RESCHED,%ebx | ||
1519 | jnz nmi_schedule | ||
1520 | movl %ebx,%edx /* arg3: thread flags */ | ||
1521 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1522 | xorl %esi,%esi /* arg2: oldset */ | ||
1523 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1524 | call do_notify_resume | ||
1525 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1526 | jmp nmi_userspace | ||
1527 | nmi_schedule: | ||
1528 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1529 | call schedule | ||
1530 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1531 | jmp nmi_userspace | ||
1532 | CFI_ENDPROC | ||
1533 | #else | ||
1534 | jmp paranoid_exit | ||
1535 | CFI_ENDPROC | 1668 | CFI_ENDPROC |
1536 | #endif | ||
1537 | END(nmi) | 1669 | END(nmi) |
1538 | 1670 | ||
1671 | /* | ||
1672 | * If an NMI hit an iret because of an exception or breakpoint, | ||
1673 | * it can lose its NMI context, and a nested NMI may come in. | ||
1674 | * In that case, the nested NMI will change the preempted NMI's | ||
1675 | * stack to jump to here when it does the final iret. | ||
1676 | */ | ||
1677 | repeat_nmi: | ||
1678 | INTR_FRAME | ||
1679 | /* Update the stack variable to say we are still in NMI */ | ||
1680 | movq $1, 5*8(%rsp) | ||
1681 | |||
1682 | /* copy the saved stack back to copy stack */ | ||
1683 | .rept 5 | ||
1684 | pushq_cfi 4*8(%rsp) | ||
1685 | .endr | ||
1686 | |||
1687 | jmp restart_nmi | ||
1688 | CFI_ENDPROC | ||
1689 | end_repeat_nmi: | ||
1690 | |||
1539 | ENTRY(ignore_sysret) | 1691 | ENTRY(ignore_sysret) |
1540 | CFI_STARTPROC | 1692 | CFI_STARTPROC |
1541 | mov $-ENOSYS,%eax | 1693 | mov $-ENOSYS,%eax |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e11e39478a49..40f4eb3766d1 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -417,6 +417,10 @@ ENTRY(phys_base) | |||
417 | ENTRY(idt_table) | 417 | ENTRY(idt_table) |
418 | .skip IDT_ENTRIES * 16 | 418 | .skip IDT_ENTRIES * 16 |
419 | 419 | ||
420 | .align L1_CACHE_BYTES | ||
421 | ENTRY(nmi_idt_table) | ||
422 | .skip IDT_ENTRIES * 16 | ||
423 | |||
420 | __PAGE_ALIGNED_BSS | 424 | __PAGE_ALIGNED_BSS |
421 | .align PAGE_SIZE | 425 | .align PAGE_SIZE |
422 | ENTRY(empty_zero_page) | 426 | ENTRY(empty_zero_page) |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 07b0a56a754d..ad0de0c2714e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -2,7 +2,6 @@ | |||
2 | #include <linux/clockchips.h> | 2 | #include <linux/clockchips.h> |
3 | #include <linux/interrupt.h> | 3 | #include <linux/interrupt.h> |
4 | #include <linux/export.h> | 4 | #include <linux/export.h> |
5 | #include <linux/sysdev.h> | ||
6 | #include <linux/delay.h> | 5 | #include <linux/delay.h> |
7 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
8 | #include <linux/i8253.h> | 7 | #include <linux/i8253.h> |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 72090705a656..40fc86161d92 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -28,6 +28,9 @@ DEFINE_PER_CPU(struct pt_regs *, irq_regs); | |||
28 | EXPORT_PER_CPU_SYMBOL(irq_regs); | 28 | EXPORT_PER_CPU_SYMBOL(irq_regs); |
29 | 29 | ||
30 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 30 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
31 | |||
32 | int sysctl_panic_on_stackoverflow __read_mostly; | ||
33 | |||
31 | /* Debugging check for stack overflow: is there less than 1KB free? */ | 34 | /* Debugging check for stack overflow: is there less than 1KB free? */ |
32 | static int check_stack_overflow(void) | 35 | static int check_stack_overflow(void) |
33 | { | 36 | { |
@@ -43,6 +46,8 @@ static void print_stack_overflow(void) | |||
43 | { | 46 | { |
44 | printk(KERN_WARNING "low stack detected by irq handler\n"); | 47 | printk(KERN_WARNING "low stack detected by irq handler\n"); |
45 | dump_stack(); | 48 | dump_stack(); |
49 | if (sysctl_panic_on_stackoverflow) | ||
50 | panic("low stack detected by irq handler - check messages\n"); | ||
46 | } | 51 | } |
47 | 52 | ||
48 | #else | 53 | #else |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 69bca468c47a..d04d3ecded62 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -26,6 +26,8 @@ EXPORT_PER_CPU_SYMBOL(irq_stat); | |||
26 | DEFINE_PER_CPU(struct pt_regs *, irq_regs); | 26 | DEFINE_PER_CPU(struct pt_regs *, irq_regs); |
27 | EXPORT_PER_CPU_SYMBOL(irq_regs); | 27 | EXPORT_PER_CPU_SYMBOL(irq_regs); |
28 | 28 | ||
29 | int sysctl_panic_on_stackoverflow; | ||
30 | |||
29 | /* | 31 | /* |
30 | * Probabilistic stack overflow check: | 32 | * Probabilistic stack overflow check: |
31 | * | 33 | * |
@@ -36,18 +38,39 @@ EXPORT_PER_CPU_SYMBOL(irq_regs); | |||
36 | static inline void stack_overflow_check(struct pt_regs *regs) | 38 | static inline void stack_overflow_check(struct pt_regs *regs) |
37 | { | 39 | { |
38 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 40 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
41 | #define STACK_TOP_MARGIN 128 | ||
42 | struct orig_ist *oist; | ||
43 | u64 irq_stack_top, irq_stack_bottom; | ||
44 | u64 estack_top, estack_bottom; | ||
39 | u64 curbase = (u64)task_stack_page(current); | 45 | u64 curbase = (u64)task_stack_page(current); |
40 | 46 | ||
41 | if (user_mode_vm(regs)) | 47 | if (user_mode_vm(regs)) |
42 | return; | 48 | return; |
43 | 49 | ||
44 | WARN_ONCE(regs->sp >= curbase && | 50 | if (regs->sp >= curbase + sizeof(struct thread_info) + |
45 | regs->sp <= curbase + THREAD_SIZE && | 51 | sizeof(struct pt_regs) + STACK_TOP_MARGIN && |
46 | regs->sp < curbase + sizeof(struct thread_info) + | 52 | regs->sp <= curbase + THREAD_SIZE) |
47 | sizeof(struct pt_regs) + 128, | 53 | return; |
54 | |||
55 | irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) + | ||
56 | STACK_TOP_MARGIN; | ||
57 | irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr); | ||
58 | if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) | ||
59 | return; | ||
60 | |||
61 | oist = &__get_cpu_var(orig_ist); | ||
62 | estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN; | ||
63 | estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; | ||
64 | if (regs->sp >= estack_top && regs->sp <= estack_bottom) | ||
65 | return; | ||
66 | |||
67 | WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", | ||
68 | current->comm, curbase, regs->sp, | ||
69 | irq_stack_top, irq_stack_bottom, | ||
70 | estack_top, estack_bottom); | ||
48 | 71 | ||
49 | "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", | 72 | if (sysctl_panic_on_stackoverflow) |
50 | current->comm, curbase, regs->sp); | 73 | panic("low stack detected by irq handler - check messages\n"); |
51 | #endif | 74 | #endif |
52 | } | 75 | } |
53 | 76 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index b3300e6bacef..313fb5cddbce 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include <linux/kprobes.h> | 9 | #include <linux/kprobes.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | #include <linux/kernel_stat.h> | 11 | #include <linux/kernel_stat.h> |
12 | #include <linux/sysdev.h> | 12 | #include <linux/device.h> |
13 | #include <linux/bitops.h> | 13 | #include <linux/bitops.h> |
14 | #include <linux/acpi.h> | 14 | #include <linux/acpi.h> |
15 | #include <linux/io.h> | 15 | #include <linux/io.h> |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a9c2116001d6..f0c6fd6f176b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -39,8 +39,6 @@ | |||
39 | #include <asm/desc.h> | 39 | #include <asm/desc.h> |
40 | #include <asm/tlbflush.h> | 40 | #include <asm/tlbflush.h> |
41 | 41 | ||
42 | #define MMU_QUEUE_SIZE 1024 | ||
43 | |||
44 | static int kvmapf = 1; | 42 | static int kvmapf = 1; |
45 | 43 | ||
46 | static int parse_no_kvmapf(char *arg) | 44 | static int parse_no_kvmapf(char *arg) |
@@ -60,21 +58,10 @@ static int parse_no_stealacc(char *arg) | |||
60 | 58 | ||
61 | early_param("no-steal-acc", parse_no_stealacc); | 59 | early_param("no-steal-acc", parse_no_stealacc); |
62 | 60 | ||
63 | struct kvm_para_state { | ||
64 | u8 mmu_queue[MMU_QUEUE_SIZE]; | ||
65 | int mmu_queue_len; | ||
66 | }; | ||
67 | |||
68 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); | ||
69 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); | 61 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); |
70 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); | 62 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); |
71 | static int has_steal_clock = 0; | 63 | static int has_steal_clock = 0; |
72 | 64 | ||
73 | static struct kvm_para_state *kvm_para_state(void) | ||
74 | { | ||
75 | return &per_cpu(para_state, raw_smp_processor_id()); | ||
76 | } | ||
77 | |||
78 | /* | 65 | /* |
79 | * No need for any "IO delay" on KVM | 66 | * No need for any "IO delay" on KVM |
80 | */ | 67 | */ |
@@ -271,151 +258,6 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
271 | } | 258 | } |
272 | } | 259 | } |
273 | 260 | ||
274 | static void kvm_mmu_op(void *buffer, unsigned len) | ||
275 | { | ||
276 | int r; | ||
277 | unsigned long a1, a2; | ||
278 | |||
279 | do { | ||
280 | a1 = __pa(buffer); | ||
281 | a2 = 0; /* on i386 __pa() always returns <4G */ | ||
282 | r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2); | ||
283 | buffer += r; | ||
284 | len -= r; | ||
285 | } while (len); | ||
286 | } | ||
287 | |||
288 | static void mmu_queue_flush(struct kvm_para_state *state) | ||
289 | { | ||
290 | if (state->mmu_queue_len) { | ||
291 | kvm_mmu_op(state->mmu_queue, state->mmu_queue_len); | ||
292 | state->mmu_queue_len = 0; | ||
293 | } | ||
294 | } | ||
295 | |||
296 | static void kvm_deferred_mmu_op(void *buffer, int len) | ||
297 | { | ||
298 | struct kvm_para_state *state = kvm_para_state(); | ||
299 | |||
300 | if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) { | ||
301 | kvm_mmu_op(buffer, len); | ||
302 | return; | ||
303 | } | ||
304 | if (state->mmu_queue_len + len > sizeof state->mmu_queue) | ||
305 | mmu_queue_flush(state); | ||
306 | memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len); | ||
307 | state->mmu_queue_len += len; | ||
308 | } | ||
309 | |||
310 | static void kvm_mmu_write(void *dest, u64 val) | ||
311 | { | ||
312 | __u64 pte_phys; | ||
313 | struct kvm_mmu_op_write_pte wpte; | ||
314 | |||
315 | #ifdef CONFIG_HIGHPTE | ||
316 | struct page *page; | ||
317 | unsigned long dst = (unsigned long) dest; | ||
318 | |||
319 | page = kmap_atomic_to_page(dest); | ||
320 | pte_phys = page_to_pfn(page); | ||
321 | pte_phys <<= PAGE_SHIFT; | ||
322 | pte_phys += (dst & ~(PAGE_MASK)); | ||
323 | #else | ||
324 | pte_phys = (unsigned long)__pa(dest); | ||
325 | #endif | ||
326 | wpte.header.op = KVM_MMU_OP_WRITE_PTE; | ||
327 | wpte.pte_val = val; | ||
328 | wpte.pte_phys = pte_phys; | ||
329 | |||
330 | kvm_deferred_mmu_op(&wpte, sizeof wpte); | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * We only need to hook operations that are MMU writes. We hook these so that | ||
335 | * we can use lazy MMU mode to batch these operations. We could probably | ||
336 | * improve the performance of the host code if we used some of the information | ||
337 | * here to simplify processing of batched writes. | ||
338 | */ | ||
339 | static void kvm_set_pte(pte_t *ptep, pte_t pte) | ||
340 | { | ||
341 | kvm_mmu_write(ptep, pte_val(pte)); | ||
342 | } | ||
343 | |||
344 | static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
345 | pte_t *ptep, pte_t pte) | ||
346 | { | ||
347 | kvm_mmu_write(ptep, pte_val(pte)); | ||
348 | } | ||
349 | |||
350 | static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
351 | { | ||
352 | kvm_mmu_write(pmdp, pmd_val(pmd)); | ||
353 | } | ||
354 | |||
355 | #if PAGETABLE_LEVELS >= 3 | ||
356 | #ifdef CONFIG_X86_PAE | ||
357 | static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) | ||
358 | { | ||
359 | kvm_mmu_write(ptep, pte_val(pte)); | ||
360 | } | ||
361 | |||
362 | static void kvm_pte_clear(struct mm_struct *mm, | ||
363 | unsigned long addr, pte_t *ptep) | ||
364 | { | ||
365 | kvm_mmu_write(ptep, 0); | ||
366 | } | ||
367 | |||
368 | static void kvm_pmd_clear(pmd_t *pmdp) | ||
369 | { | ||
370 | kvm_mmu_write(pmdp, 0); | ||
371 | } | ||
372 | #endif | ||
373 | |||
374 | static void kvm_set_pud(pud_t *pudp, pud_t pud) | ||
375 | { | ||
376 | kvm_mmu_write(pudp, pud_val(pud)); | ||
377 | } | ||
378 | |||
379 | #if PAGETABLE_LEVELS == 4 | ||
380 | static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) | ||
381 | { | ||
382 | kvm_mmu_write(pgdp, pgd_val(pgd)); | ||
383 | } | ||
384 | #endif | ||
385 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
386 | |||
387 | static void kvm_flush_tlb(void) | ||
388 | { | ||
389 | struct kvm_mmu_op_flush_tlb ftlb = { | ||
390 | .header.op = KVM_MMU_OP_FLUSH_TLB, | ||
391 | }; | ||
392 | |||
393 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); | ||
394 | } | ||
395 | |||
396 | static void kvm_release_pt(unsigned long pfn) | ||
397 | { | ||
398 | struct kvm_mmu_op_release_pt rpt = { | ||
399 | .header.op = KVM_MMU_OP_RELEASE_PT, | ||
400 | .pt_phys = (u64)pfn << PAGE_SHIFT, | ||
401 | }; | ||
402 | |||
403 | kvm_mmu_op(&rpt, sizeof rpt); | ||
404 | } | ||
405 | |||
406 | static void kvm_enter_lazy_mmu(void) | ||
407 | { | ||
408 | paravirt_enter_lazy_mmu(); | ||
409 | } | ||
410 | |||
411 | static void kvm_leave_lazy_mmu(void) | ||
412 | { | ||
413 | struct kvm_para_state *state = kvm_para_state(); | ||
414 | |||
415 | mmu_queue_flush(state); | ||
416 | paravirt_leave_lazy_mmu(); | ||
417 | } | ||
418 | |||
419 | static void __init paravirt_ops_setup(void) | 261 | static void __init paravirt_ops_setup(void) |
420 | { | 262 | { |
421 | pv_info.name = "KVM"; | 263 | pv_info.name = "KVM"; |
@@ -424,29 +266,6 @@ static void __init paravirt_ops_setup(void) | |||
424 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) | 266 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) |
425 | pv_cpu_ops.io_delay = kvm_io_delay; | 267 | pv_cpu_ops.io_delay = kvm_io_delay; |
426 | 268 | ||
427 | if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { | ||
428 | pv_mmu_ops.set_pte = kvm_set_pte; | ||
429 | pv_mmu_ops.set_pte_at = kvm_set_pte_at; | ||
430 | pv_mmu_ops.set_pmd = kvm_set_pmd; | ||
431 | #if PAGETABLE_LEVELS >= 3 | ||
432 | #ifdef CONFIG_X86_PAE | ||
433 | pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; | ||
434 | pv_mmu_ops.pte_clear = kvm_pte_clear; | ||
435 | pv_mmu_ops.pmd_clear = kvm_pmd_clear; | ||
436 | #endif | ||
437 | pv_mmu_ops.set_pud = kvm_set_pud; | ||
438 | #if PAGETABLE_LEVELS == 4 | ||
439 | pv_mmu_ops.set_pgd = kvm_set_pgd; | ||
440 | #endif | ||
441 | #endif | ||
442 | pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; | ||
443 | pv_mmu_ops.release_pte = kvm_release_pt; | ||
444 | pv_mmu_ops.release_pmd = kvm_release_pt; | ||
445 | pv_mmu_ops.release_pud = kvm_release_pt; | ||
446 | |||
447 | pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; | ||
448 | pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; | ||
449 | } | ||
450 | #ifdef CONFIG_X86_IO_APIC | 269 | #ifdef CONFIG_X86_IO_APIC |
451 | no_timer_check = 1; | 270 | no_timer_check = 1; |
452 | #endif | 271 | #endif |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 9302e2d0eb4b..fda91c307104 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -292,8 +292,8 @@ static int reload_for_cpu(int cpu) | |||
292 | return err; | 292 | return err; |
293 | } | 293 | } |
294 | 294 | ||
295 | static ssize_t reload_store(struct sys_device *dev, | 295 | static ssize_t reload_store(struct device *dev, |
296 | struct sysdev_attribute *attr, | 296 | struct device_attribute *attr, |
297 | const char *buf, size_t size) | 297 | const char *buf, size_t size) |
298 | { | 298 | { |
299 | unsigned long val; | 299 | unsigned long val; |
@@ -318,30 +318,30 @@ static ssize_t reload_store(struct sys_device *dev, | |||
318 | return ret; | 318 | return ret; |
319 | } | 319 | } |
320 | 320 | ||
321 | static ssize_t version_show(struct sys_device *dev, | 321 | static ssize_t version_show(struct device *dev, |
322 | struct sysdev_attribute *attr, char *buf) | 322 | struct device_attribute *attr, char *buf) |
323 | { | 323 | { |
324 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | 324 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
325 | 325 | ||
326 | return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); | 326 | return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); |
327 | } | 327 | } |
328 | 328 | ||
329 | static ssize_t pf_show(struct sys_device *dev, | 329 | static ssize_t pf_show(struct device *dev, |
330 | struct sysdev_attribute *attr, char *buf) | 330 | struct device_attribute *attr, char *buf) |
331 | { | 331 | { |
332 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | 332 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
333 | 333 | ||
334 | return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); | 334 | return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); |
335 | } | 335 | } |
336 | 336 | ||
337 | static SYSDEV_ATTR(reload, 0200, NULL, reload_store); | 337 | static DEVICE_ATTR(reload, 0200, NULL, reload_store); |
338 | static SYSDEV_ATTR(version, 0400, version_show, NULL); | 338 | static DEVICE_ATTR(version, 0400, version_show, NULL); |
339 | static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); | 339 | static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); |
340 | 340 | ||
341 | static struct attribute *mc_default_attrs[] = { | 341 | static struct attribute *mc_default_attrs[] = { |
342 | &attr_reload.attr, | 342 | &dev_attr_reload.attr, |
343 | &attr_version.attr, | 343 | &dev_attr_version.attr, |
344 | &attr_processor_flags.attr, | 344 | &dev_attr_processor_flags.attr, |
345 | NULL | 345 | NULL |
346 | }; | 346 | }; |
347 | 347 | ||
@@ -405,43 +405,45 @@ static enum ucode_state microcode_update_cpu(int cpu) | |||
405 | return ustate; | 405 | return ustate; |
406 | } | 406 | } |
407 | 407 | ||
408 | static int mc_sysdev_add(struct sys_device *sys_dev) | 408 | static int mc_device_add(struct device *dev, struct subsys_interface *sif) |
409 | { | 409 | { |
410 | int err, cpu = sys_dev->id; | 410 | int err, cpu = dev->id; |
411 | 411 | ||
412 | if (!cpu_online(cpu)) | 412 | if (!cpu_online(cpu)) |
413 | return 0; | 413 | return 0; |
414 | 414 | ||
415 | pr_debug("CPU%d added\n", cpu); | 415 | pr_debug("CPU%d added\n", cpu); |
416 | 416 | ||
417 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); | 417 | err = sysfs_create_group(&dev->kobj, &mc_attr_group); |
418 | if (err) | 418 | if (err) |
419 | return err; | 419 | return err; |
420 | 420 | ||
421 | if (microcode_init_cpu(cpu) == UCODE_ERROR) { | 421 | if (microcode_init_cpu(cpu) == UCODE_ERROR) { |
422 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 422 | sysfs_remove_group(&dev->kobj, &mc_attr_group); |
423 | return -EINVAL; | 423 | return -EINVAL; |
424 | } | 424 | } |
425 | 425 | ||
426 | return err; | 426 | return err; |
427 | } | 427 | } |
428 | 428 | ||
429 | static int mc_sysdev_remove(struct sys_device *sys_dev) | 429 | static int mc_device_remove(struct device *dev, struct subsys_interface *sif) |
430 | { | 430 | { |
431 | int cpu = sys_dev->id; | 431 | int cpu = dev->id; |
432 | 432 | ||
433 | if (!cpu_online(cpu)) | 433 | if (!cpu_online(cpu)) |
434 | return 0; | 434 | return 0; |
435 | 435 | ||
436 | pr_debug("CPU%d removed\n", cpu); | 436 | pr_debug("CPU%d removed\n", cpu); |
437 | microcode_fini_cpu(cpu); | 437 | microcode_fini_cpu(cpu); |
438 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 438 | sysfs_remove_group(&dev->kobj, &mc_attr_group); |
439 | return 0; | 439 | return 0; |
440 | } | 440 | } |
441 | 441 | ||
442 | static struct sysdev_driver mc_sysdev_driver = { | 442 | static struct subsys_interface mc_cpu_interface = { |
443 | .add = mc_sysdev_add, | 443 | .name = "microcode", |
444 | .remove = mc_sysdev_remove, | 444 | .subsys = &cpu_subsys, |
445 | .add_dev = mc_device_add, | ||
446 | .remove_dev = mc_device_remove, | ||
445 | }; | 447 | }; |
446 | 448 | ||
447 | /** | 449 | /** |
@@ -464,9 +466,9 @@ static __cpuinit int | |||
464 | mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | 466 | mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) |
465 | { | 467 | { |
466 | unsigned int cpu = (unsigned long)hcpu; | 468 | unsigned int cpu = (unsigned long)hcpu; |
467 | struct sys_device *sys_dev; | 469 | struct device *dev; |
468 | 470 | ||
469 | sys_dev = get_cpu_sysdev(cpu); | 471 | dev = get_cpu_device(cpu); |
470 | switch (action) { | 472 | switch (action) { |
471 | case CPU_ONLINE: | 473 | case CPU_ONLINE: |
472 | case CPU_ONLINE_FROZEN: | 474 | case CPU_ONLINE_FROZEN: |
@@ -474,13 +476,13 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
474 | case CPU_DOWN_FAILED: | 476 | case CPU_DOWN_FAILED: |
475 | case CPU_DOWN_FAILED_FROZEN: | 477 | case CPU_DOWN_FAILED_FROZEN: |
476 | pr_debug("CPU%d added\n", cpu); | 478 | pr_debug("CPU%d added\n", cpu); |
477 | if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) | 479 | if (sysfs_create_group(&dev->kobj, &mc_attr_group)) |
478 | pr_err("Failed to create group for CPU%d\n", cpu); | 480 | pr_err("Failed to create group for CPU%d\n", cpu); |
479 | break; | 481 | break; |
480 | case CPU_DOWN_PREPARE: | 482 | case CPU_DOWN_PREPARE: |
481 | case CPU_DOWN_PREPARE_FROZEN: | 483 | case CPU_DOWN_PREPARE_FROZEN: |
482 | /* Suspend is in progress, only remove the interface */ | 484 | /* Suspend is in progress, only remove the interface */ |
483 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 485 | sysfs_remove_group(&dev->kobj, &mc_attr_group); |
484 | pr_debug("CPU%d removed\n", cpu); | 486 | pr_debug("CPU%d removed\n", cpu); |
485 | break; | 487 | break; |
486 | 488 | ||
@@ -525,7 +527,7 @@ static int __init microcode_init(void) | |||
525 | get_online_cpus(); | 527 | get_online_cpus(); |
526 | mutex_lock(µcode_mutex); | 528 | mutex_lock(µcode_mutex); |
527 | 529 | ||
528 | error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); | 530 | error = subsys_interface_register(&mc_cpu_interface); |
529 | 531 | ||
530 | mutex_unlock(µcode_mutex); | 532 | mutex_unlock(µcode_mutex); |
531 | put_online_cpus(); | 533 | put_online_cpus(); |
@@ -535,7 +537,7 @@ static int __init microcode_init(void) | |||
535 | 537 | ||
536 | error = microcode_dev_init(); | 538 | error = microcode_dev_init(); |
537 | if (error) | 539 | if (error) |
538 | goto out_sysdev_driver; | 540 | goto out_driver; |
539 | 541 | ||
540 | register_syscore_ops(&mc_syscore_ops); | 542 | register_syscore_ops(&mc_syscore_ops); |
541 | register_hotcpu_notifier(&mc_cpu_notifier); | 543 | register_hotcpu_notifier(&mc_cpu_notifier); |
@@ -545,11 +547,11 @@ static int __init microcode_init(void) | |||
545 | 547 | ||
546 | return 0; | 548 | return 0; |
547 | 549 | ||
548 | out_sysdev_driver: | 550 | out_driver: |
549 | get_online_cpus(); | 551 | get_online_cpus(); |
550 | mutex_lock(µcode_mutex); | 552 | mutex_lock(µcode_mutex); |
551 | 553 | ||
552 | sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); | 554 | subsys_interface_unregister(&mc_cpu_interface); |
553 | 555 | ||
554 | mutex_unlock(µcode_mutex); | 556 | mutex_unlock(µcode_mutex); |
555 | put_online_cpus(); | 557 | put_online_cpus(); |
@@ -573,7 +575,7 @@ static void __exit microcode_exit(void) | |||
573 | get_online_cpus(); | 575 | get_online_cpus(); |
574 | mutex_lock(µcode_mutex); | 576 | mutex_lock(µcode_mutex); |
575 | 577 | ||
576 | sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); | 578 | subsys_interface_unregister(&mc_cpu_interface); |
577 | 579 | ||
578 | mutex_unlock(µcode_mutex); | 580 | mutex_unlock(µcode_mutex); |
579 | put_online_cpus(); | 581 | put_online_cpus(); |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 12fcbe2c143e..96356762a51d 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -236,7 +236,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = { | |||
236 | .notifier_call = msr_class_cpu_callback, | 236 | .notifier_call = msr_class_cpu_callback, |
237 | }; | 237 | }; |
238 | 238 | ||
239 | static char *msr_devnode(struct device *dev, mode_t *mode) | 239 | static char *msr_devnode(struct device *dev, umode_t *mode) |
240 | { | 240 | { |
241 | return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); | 241 | return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); |
242 | } | 242 | } |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e88f37b58ddd..47acaf319165 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
405 | unknown_nmi_error(reason, regs); | 405 | unknown_nmi_error(reason, regs); |
406 | } | 406 | } |
407 | 407 | ||
408 | /* | ||
409 | * NMIs can hit breakpoints which will cause it to lose its | ||
410 | * NMI context with the CPU when the breakpoint does an iret. | ||
411 | */ | ||
412 | #ifdef CONFIG_X86_32 | ||
413 | /* | ||
414 | * For i386, NMIs use the same stack as the kernel, and we can | ||
415 | * add a workaround to the iret problem in C. Simply have 3 states | ||
416 | * the NMI can be in. | ||
417 | * | ||
418 | * 1) not running | ||
419 | * 2) executing | ||
420 | * 3) latched | ||
421 | * | ||
422 | * When no NMI is in progress, it is in the "not running" state. | ||
423 | * When an NMI comes in, it goes into the "executing" state. | ||
424 | * Normally, if another NMI is triggered, it does not interrupt | ||
425 | * the running NMI and the HW will simply latch it so that when | ||
426 | * the first NMI finishes, it will restart the second NMI. | ||
427 | * (Note, the latch is binary, thus multiple NMIs triggering, | ||
428 | * when one is running, are ignored. Only one NMI is restarted.) | ||
429 | * | ||
430 | * If an NMI hits a breakpoint that executes an iret, another | ||
431 | * NMI can preempt it. We do not want to allow this new NMI | ||
432 | * to run, but we want to execute it when the first one finishes. | ||
433 | * We set the state to "latched", and the first NMI will perform | ||
434 | * an cmpxchg on the state, and if it doesn't successfully | ||
435 | * reset the state to "not running" it will restart the next | ||
436 | * NMI. | ||
437 | */ | ||
438 | enum nmi_states { | ||
439 | NMI_NOT_RUNNING, | ||
440 | NMI_EXECUTING, | ||
441 | NMI_LATCHED, | ||
442 | }; | ||
443 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); | ||
444 | |||
445 | #define nmi_nesting_preprocess(regs) \ | ||
446 | do { \ | ||
447 | if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ | ||
448 | __get_cpu_var(nmi_state) = NMI_LATCHED; \ | ||
449 | return; \ | ||
450 | } \ | ||
451 | nmi_restart: \ | ||
452 | __get_cpu_var(nmi_state) = NMI_EXECUTING; \ | ||
453 | } while (0) | ||
454 | |||
455 | #define nmi_nesting_postprocess() \ | ||
456 | do { \ | ||
457 | if (cmpxchg(&__get_cpu_var(nmi_state), \ | ||
458 | NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ | ||
459 | goto nmi_restart; \ | ||
460 | } while (0) | ||
461 | #else /* x86_64 */ | ||
462 | /* | ||
463 | * In x86_64 things are a bit more difficult. This has the same problem | ||
464 | * where an NMI hitting a breakpoint that calls iret will remove the | ||
465 | * NMI context, allowing a nested NMI to enter. What makes this more | ||
466 | * difficult is that both NMIs and breakpoints have their own stack. | ||
467 | * When a new NMI or breakpoint is executed, the stack is set to a fixed | ||
468 | * point. If an NMI is nested, it will have its stack set at that same | ||
469 | * fixed address that the first NMI had, and will start corrupting the | ||
470 | * stack. This is handled in entry_64.S, but the same problem exists with | ||
471 | * the breakpoint stack. | ||
472 | * | ||
473 | * If a breakpoint is being processed, and the debug stack is being used, | ||
474 | * if an NMI comes in and also hits a breakpoint, the stack pointer | ||
475 | * will be set to the same fixed address as the breakpoint that was | ||
476 | * interrupted, causing that stack to be corrupted. To handle this case, | ||
477 | * check if the stack that was interrupted is the debug stack, and if | ||
478 | * so, change the IDT so that new breakpoints will use the current stack | ||
479 | * and not switch to the fixed address. On return of the NMI, switch back | ||
480 | * to the original IDT. | ||
481 | */ | ||
482 | static DEFINE_PER_CPU(int, update_debug_stack); | ||
483 | |||
484 | static inline void nmi_nesting_preprocess(struct pt_regs *regs) | ||
485 | { | ||
486 | /* | ||
487 | * If we interrupted a breakpoint, it is possible that | ||
488 | * the nmi handler will have breakpoints too. We need to | ||
489 | * change the IDT such that breakpoints that happen here | ||
490 | * continue to use the NMI stack. | ||
491 | */ | ||
492 | if (unlikely(is_debug_stack(regs->sp))) { | ||
493 | debug_stack_set_zero(); | ||
494 | __get_cpu_var(update_debug_stack) = 1; | ||
495 | } | ||
496 | } | ||
497 | |||
498 | static inline void nmi_nesting_postprocess(void) | ||
499 | { | ||
500 | if (unlikely(__get_cpu_var(update_debug_stack))) | ||
501 | debug_stack_reset(); | ||
502 | } | ||
503 | #endif | ||
504 | |||
408 | dotraplinkage notrace __kprobes void | 505 | dotraplinkage notrace __kprobes void |
409 | do_nmi(struct pt_regs *regs, long error_code) | 506 | do_nmi(struct pt_regs *regs, long error_code) |
410 | { | 507 | { |
508 | nmi_nesting_preprocess(regs); | ||
509 | |||
411 | nmi_enter(); | 510 | nmi_enter(); |
412 | 511 | ||
413 | inc_irq_stat(__nmi_count); | 512 | inc_irq_stat(__nmi_count); |
@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
416 | default_do_nmi(regs); | 515 | default_do_nmi(regs); |
417 | 516 | ||
418 | nmi_exit(); | 517 | nmi_exit(); |
518 | |||
519 | /* On i386, may loop back to preprocess */ | ||
520 | nmi_nesting_postprocess(); | ||
419 | } | 521 | } |
420 | 522 | ||
421 | void stop_nmi(void) | 523 | void stop_nmi(void) |
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c new file mode 100644 index 000000000000..0d01a8ea4e11 --- /dev/null +++ b/arch/x86/kernel/nmi_selftest.c | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * arch/x86/kernel/nmi-selftest.c | ||
3 | * | ||
4 | * Testsuite for NMI: IPIs | ||
5 | * | ||
6 | * Started by Don Zickus: | ||
7 | * (using lib/locking-selftest.c as a guide) | ||
8 | * | ||
9 | * Copyright (C) 2011 Red Hat, Inc., Don Zickus <dzickus@redhat.com> | ||
10 | */ | ||
11 | |||
12 | #include <linux/smp.h> | ||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/delay.h> | ||
15 | |||
16 | #include <asm/apic.h> | ||
17 | #include <asm/nmi.h> | ||
18 | |||
19 | #define SUCCESS 0 | ||
20 | #define FAILURE 1 | ||
21 | #define TIMEOUT 2 | ||
22 | |||
23 | static int nmi_fail; | ||
24 | |||
25 | /* check to see if NMI IPIs work on this machine */ | ||
26 | static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __read_mostly; | ||
27 | |||
28 | static int testcase_total; | ||
29 | static int testcase_successes; | ||
30 | static int expected_testcase_failures; | ||
31 | static int unexpected_testcase_failures; | ||
32 | static int unexpected_testcase_unknowns; | ||
33 | |||
34 | static int nmi_unk_cb(unsigned int val, struct pt_regs *regs) | ||
35 | { | ||
36 | unexpected_testcase_unknowns++; | ||
37 | return NMI_HANDLED; | ||
38 | } | ||
39 | |||
40 | static void init_nmi_testsuite(void) | ||
41 | { | ||
42 | /* trap all the unknown NMIs we may generate */ | ||
43 | register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); | ||
44 | } | ||
45 | |||
46 | static void cleanup_nmi_testsuite(void) | ||
47 | { | ||
48 | unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk"); | ||
49 | } | ||
50 | |||
51 | static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs) | ||
52 | { | ||
53 | int cpu = raw_smp_processor_id(); | ||
54 | |||
55 | if (cpumask_test_and_clear_cpu(cpu, to_cpumask(nmi_ipi_mask))) | ||
56 | return NMI_HANDLED; | ||
57 | |||
58 | return NMI_DONE; | ||
59 | } | ||
60 | |||
61 | static void test_nmi_ipi(struct cpumask *mask) | ||
62 | { | ||
63 | unsigned long timeout; | ||
64 | |||
65 | if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, | ||
66 | NMI_FLAG_FIRST, "nmi_selftest")) { | ||
67 | nmi_fail = FAILURE; | ||
68 | return; | ||
69 | } | ||
70 | |||
71 | /* sync above data before sending NMI */ | ||
72 | wmb(); | ||
73 | |||
74 | apic->send_IPI_mask(mask, NMI_VECTOR); | ||
75 | |||
76 | /* Don't wait longer than a second */ | ||
77 | timeout = USEC_PER_SEC; | ||
78 | while (!cpumask_empty(mask) && timeout--) | ||
79 | udelay(1); | ||
80 | |||
81 | /* What happens if we timeout, do we still unregister?? */ | ||
82 | unregister_nmi_handler(NMI_LOCAL, "nmi_selftest"); | ||
83 | |||
84 | if (!timeout) | ||
85 | nmi_fail = TIMEOUT; | ||
86 | return; | ||
87 | } | ||
88 | |||
89 | static void remote_ipi(void) | ||
90 | { | ||
91 | cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask); | ||
92 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); | ||
93 | if (!cpumask_empty(to_cpumask(nmi_ipi_mask))) | ||
94 | test_nmi_ipi(to_cpumask(nmi_ipi_mask)); | ||
95 | } | ||
96 | |||
97 | static void local_ipi(void) | ||
98 | { | ||
99 | cpumask_clear(to_cpumask(nmi_ipi_mask)); | ||
100 | cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); | ||
101 | test_nmi_ipi(to_cpumask(nmi_ipi_mask)); | ||
102 | } | ||
103 | |||
104 | static void reset_nmi(void) | ||
105 | { | ||
106 | nmi_fail = 0; | ||
107 | } | ||
108 | |||
109 | static void dotest(void (*testcase_fn)(void), int expected) | ||
110 | { | ||
111 | testcase_fn(); | ||
112 | /* | ||
113 | * Filter out expected failures: | ||
114 | */ | ||
115 | if (nmi_fail != expected) { | ||
116 | unexpected_testcase_failures++; | ||
117 | |||
118 | if (nmi_fail == FAILURE) | ||
119 | printk("FAILED |"); | ||
120 | else if (nmi_fail == TIMEOUT) | ||
121 | printk("TIMEOUT|"); | ||
122 | else | ||
123 | printk("ERROR |"); | ||
124 | dump_stack(); | ||
125 | } else { | ||
126 | testcase_successes++; | ||
127 | printk(" ok |"); | ||
128 | } | ||
129 | testcase_total++; | ||
130 | |||
131 | reset_nmi(); | ||
132 | } | ||
133 | |||
134 | static inline void print_testname(const char *testname) | ||
135 | { | ||
136 | printk("%12s:", testname); | ||
137 | } | ||
138 | |||
139 | void nmi_selftest(void) | ||
140 | { | ||
141 | init_nmi_testsuite(); | ||
142 | |||
143 | /* | ||
144 | * Run the testsuite: | ||
145 | */ | ||
146 | printk("----------------\n"); | ||
147 | printk("| NMI testsuite:\n"); | ||
148 | printk("--------------------\n"); | ||
149 | |||
150 | print_testname("remote IPI"); | ||
151 | dotest(remote_ipi, SUCCESS); | ||
152 | printk("\n"); | ||
153 | print_testname("local IPI"); | ||
154 | dotest(local_ipi, SUCCESS); | ||
155 | printk("\n"); | ||
156 | |||
157 | cleanup_nmi_testsuite(); | ||
158 | |||
159 | if (unexpected_testcase_failures) { | ||
160 | printk("--------------------\n"); | ||
161 | printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n", | ||
162 | unexpected_testcase_failures, testcase_total); | ||
163 | printk("-----------------------------------------------------------------\n"); | ||
164 | } else if (expected_testcase_failures && testcase_successes) { | ||
165 | printk("--------------------\n"); | ||
166 | printk("%3d out of %3d testcases failed, as expected. |\n", | ||
167 | expected_testcase_failures, testcase_total); | ||
168 | printk("----------------------------------------------------\n"); | ||
169 | } else if (expected_testcase_failures && !testcase_successes) { | ||
170 | printk("--------------------\n"); | ||
171 | printk("All %3d testcases failed, as expected. |\n", | ||
172 | expected_testcase_failures); | ||
173 | printk("----------------------------------------\n"); | ||
174 | } else { | ||
175 | printk("--------------------\n"); | ||
176 | printk("Good, all %3d testcases passed! |\n", | ||
177 | testcase_successes); | ||
178 | printk("---------------------------------\n"); | ||
179 | } | ||
180 | } | ||
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 80dc793b3f63..1c4d769e21ea 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -45,6 +45,15 @@ int iommu_detected __read_mostly = 0; | |||
45 | */ | 45 | */ |
46 | int iommu_pass_through __read_mostly; | 46 | int iommu_pass_through __read_mostly; |
47 | 47 | ||
48 | /* | ||
49 | * Group multi-function PCI devices into a single device-group for the | ||
50 | * iommu_device_group interface. This tells the iommu driver to pretend | ||
51 | * it cannot distinguish between functions of a device, exposing only one | ||
52 | * group for the device. Useful for disallowing use of individual PCI | ||
53 | * functions from userspace drivers. | ||
54 | */ | ||
55 | int iommu_group_mf __read_mostly; | ||
56 | |||
48 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; | 57 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; |
49 | 58 | ||
50 | /* Dummy device used for NULL arguments (normally ISA). */ | 59 | /* Dummy device used for NULL arguments (normally ISA). */ |
@@ -169,6 +178,8 @@ static __init int iommu_setup(char *p) | |||
169 | #endif | 178 | #endif |
170 | if (!strncmp(p, "pt", 2)) | 179 | if (!strncmp(p, "pt", 2)) |
171 | iommu_pass_through = 1; | 180 | iommu_pass_through = 1; |
181 | if (!strncmp(p, "group_mf", 8)) | ||
182 | iommu_group_mf = 1; | ||
172 | 183 | ||
173 | gart_parse_options(p); | 184 | gart_parse_options(p); |
174 | 185 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 89a04c7b5bb6..50267386b766 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -1392,20 +1392,18 @@ long syscall_trace_enter(struct pt_regs *regs) | |||
1392 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) | 1392 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1393 | trace_sys_enter(regs, regs->orig_ax); | 1393 | trace_sys_enter(regs, regs->orig_ax); |
1394 | 1394 | ||
1395 | if (unlikely(current->audit_context)) { | 1395 | if (IS_IA32) |
1396 | if (IS_IA32) | 1396 | audit_syscall_entry(AUDIT_ARCH_I386, |
1397 | audit_syscall_entry(AUDIT_ARCH_I386, | 1397 | regs->orig_ax, |
1398 | regs->orig_ax, | 1398 | regs->bx, regs->cx, |
1399 | regs->bx, regs->cx, | 1399 | regs->dx, regs->si); |
1400 | regs->dx, regs->si); | ||
1401 | #ifdef CONFIG_X86_64 | 1400 | #ifdef CONFIG_X86_64 |
1402 | else | 1401 | else |
1403 | audit_syscall_entry(AUDIT_ARCH_X86_64, | 1402 | audit_syscall_entry(AUDIT_ARCH_X86_64, |
1404 | regs->orig_ax, | 1403 | regs->orig_ax, |
1405 | regs->di, regs->si, | 1404 | regs->di, regs->si, |
1406 | regs->dx, regs->r10); | 1405 | regs->dx, regs->r10); |
1407 | #endif | 1406 | #endif |
1408 | } | ||
1409 | 1407 | ||
1410 | return ret ?: regs->orig_ax; | 1408 | return ret ?: regs->orig_ax; |
1411 | } | 1409 | } |
@@ -1414,8 +1412,7 @@ void syscall_trace_leave(struct pt_regs *regs) | |||
1414 | { | 1412 | { |
1415 | bool step; | 1413 | bool step; |
1416 | 1414 | ||
1417 | if (unlikely(current->audit_context)) | 1415 | audit_syscall_exit(regs); |
1418 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | ||
1419 | 1416 | ||
1420 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) | 1417 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1421 | trace_sys_exit(regs, regs->ax); | 1418 | trace_sys_exit(regs, regs->ax); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d05444ac2aea..d7d5099fe874 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -749,12 +749,7 @@ void __init setup_arch(char **cmdline_p) | |||
749 | #endif | 749 | #endif |
750 | #ifdef CONFIG_EFI | 750 | #ifdef CONFIG_EFI |
751 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | 751 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
752 | #ifdef CONFIG_X86_32 | 752 | EFI_LOADER_SIGNATURE, 4)) { |
753 | "EL32", | ||
754 | #else | ||
755 | "EL64", | ||
756 | #endif | ||
757 | 4)) { | ||
758 | efi_enabled = 1; | 753 | efi_enabled = 1; |
759 | efi_memblock_x86_reserve_range(); | 754 | efi_memblock_x86_reserve_range(); |
760 | } | 755 | } |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 54ddaeb221c1..46a01bdc27e2 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -682,7 +682,6 @@ static int | |||
682 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | 682 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, |
683 | struct pt_regs *regs) | 683 | struct pt_regs *regs) |
684 | { | 684 | { |
685 | sigset_t blocked; | ||
686 | int ret; | 685 | int ret; |
687 | 686 | ||
688 | /* Are we from a system call? */ | 687 | /* Are we from a system call? */ |
@@ -733,10 +732,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
733 | */ | 732 | */ |
734 | regs->flags &= ~X86_EFLAGS_TF; | 733 | regs->flags &= ~X86_EFLAGS_TF; |
735 | 734 | ||
736 | sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); | 735 | block_sigmask(ka, sig); |
737 | if (!(ka->sa.sa_flags & SA_NODEFER)) | ||
738 | sigaddset(&blocked, sig); | ||
739 | set_current_blocked(&blocked); | ||
740 | 736 | ||
741 | tracehook_signal_handler(sig, info, ka, regs, | 737 | tracehook_signal_handler(sig, info, ka, regs, |
742 | test_thread_flag(TIF_SINGLESTEP)); | 738 | test_thread_flag(TIF_SINGLESTEP)); |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 16204dc15484..66c74f481cab 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/mmu_context.h> | 29 | #include <asm/mmu_context.h> |
30 | #include <asm/proto.h> | 30 | #include <asm/proto.h> |
31 | #include <asm/apic.h> | 31 | #include <asm/apic.h> |
32 | #include <asm/nmi.h> | ||
32 | /* | 33 | /* |
33 | * Some notes on x86 processor bugs affecting SMP operation: | 34 | * Some notes on x86 processor bugs affecting SMP operation: |
34 | * | 35 | * |
@@ -148,6 +149,60 @@ void native_send_call_func_ipi(const struct cpumask *mask) | |||
148 | free_cpumask_var(allbutself); | 149 | free_cpumask_var(allbutself); |
149 | } | 150 | } |
150 | 151 | ||
152 | static atomic_t stopping_cpu = ATOMIC_INIT(-1); | ||
153 | |||
154 | static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) | ||
155 | { | ||
156 | /* We are registered on stopping cpu too, avoid spurious NMI */ | ||
157 | if (raw_smp_processor_id() == atomic_read(&stopping_cpu)) | ||
158 | return NMI_HANDLED; | ||
159 | |||
160 | stop_this_cpu(NULL); | ||
161 | |||
162 | return NMI_HANDLED; | ||
163 | } | ||
164 | |||
165 | static void native_nmi_stop_other_cpus(int wait) | ||
166 | { | ||
167 | unsigned long flags; | ||
168 | unsigned long timeout; | ||
169 | |||
170 | if (reboot_force) | ||
171 | return; | ||
172 | |||
173 | /* | ||
174 | * Use an own vector here because smp_call_function | ||
175 | * does lots of things not suitable in a panic situation. | ||
176 | */ | ||
177 | if (num_online_cpus() > 1) { | ||
178 | /* did someone beat us here? */ | ||
179 | if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1) | ||
180 | return; | ||
181 | |||
182 | if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, | ||
183 | NMI_FLAG_FIRST, "smp_stop")) | ||
184 | /* Note: we ignore failures here */ | ||
185 | return; | ||
186 | |||
187 | /* sync above data before sending NMI */ | ||
188 | wmb(); | ||
189 | |||
190 | apic->send_IPI_allbutself(NMI_VECTOR); | ||
191 | |||
192 | /* | ||
193 | * Don't wait longer than a second if the caller | ||
194 | * didn't ask us to wait. | ||
195 | */ | ||
196 | timeout = USEC_PER_SEC; | ||
197 | while (num_online_cpus() > 1 && (wait || timeout--)) | ||
198 | udelay(1); | ||
199 | } | ||
200 | |||
201 | local_irq_save(flags); | ||
202 | disable_local_APIC(); | ||
203 | local_irq_restore(flags); | ||
204 | } | ||
205 | |||
151 | /* | 206 | /* |
152 | * this function calls the 'stop' function on all other CPUs in the system. | 207 | * this function calls the 'stop' function on all other CPUs in the system. |
153 | */ | 208 | */ |
@@ -160,7 +215,7 @@ asmlinkage void smp_reboot_interrupt(void) | |||
160 | irq_exit(); | 215 | irq_exit(); |
161 | } | 216 | } |
162 | 217 | ||
163 | static void native_stop_other_cpus(int wait) | 218 | static void native_irq_stop_other_cpus(int wait) |
164 | { | 219 | { |
165 | unsigned long flags; | 220 | unsigned long flags; |
166 | unsigned long timeout; | 221 | unsigned long timeout; |
@@ -194,6 +249,11 @@ static void native_stop_other_cpus(int wait) | |||
194 | local_irq_restore(flags); | 249 | local_irq_restore(flags); |
195 | } | 250 | } |
196 | 251 | ||
252 | static void native_smp_disable_nmi_ipi(void) | ||
253 | { | ||
254 | smp_ops.stop_other_cpus = native_irq_stop_other_cpus; | ||
255 | } | ||
256 | |||
197 | /* | 257 | /* |
198 | * Reschedule call back. | 258 | * Reschedule call back. |
199 | */ | 259 | */ |
@@ -225,12 +285,20 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
225 | irq_exit(); | 285 | irq_exit(); |
226 | } | 286 | } |
227 | 287 | ||
288 | static int __init nonmi_ipi_setup(char *str) | ||
289 | { | ||
290 | native_smp_disable_nmi_ipi(); | ||
291 | return 1; | ||
292 | } | ||
293 | |||
294 | __setup("nonmi_ipi", nonmi_ipi_setup); | ||
295 | |||
228 | struct smp_ops smp_ops = { | 296 | struct smp_ops smp_ops = { |
229 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | 297 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, |
230 | .smp_prepare_cpus = native_smp_prepare_cpus, | 298 | .smp_prepare_cpus = native_smp_prepare_cpus, |
231 | .smp_cpus_done = native_smp_cpus_done, | 299 | .smp_cpus_done = native_smp_cpus_done, |
232 | 300 | ||
233 | .stop_other_cpus = native_stop_other_cpus, | 301 | .stop_other_cpus = native_nmi_stop_other_cpus, |
234 | .smp_send_reschedule = native_smp_send_reschedule, | 302 | .smp_send_reschedule = native_smp_send_reschedule, |
235 | 303 | ||
236 | .cpu_up = native_cpu_up, | 304 | .cpu_up = native_cpu_up, |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e38e21754eea..66d250c00d11 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -207,23 +207,29 @@ static void __cpuinit smp_callin(void) | |||
207 | * Need to setup vector mappings before we enable interrupts. | 207 | * Need to setup vector mappings before we enable interrupts. |
208 | */ | 208 | */ |
209 | setup_vector_irq(smp_processor_id()); | 209 | setup_vector_irq(smp_processor_id()); |
210 | |||
211 | /* | ||
212 | * Save our processor parameters. Note: this information | ||
213 | * is needed for clock calibration. | ||
214 | */ | ||
215 | smp_store_cpu_info(cpuid); | ||
216 | |||
210 | /* | 217 | /* |
211 | * Get our bogomips. | 218 | * Get our bogomips. |
219 | * Update loops_per_jiffy in cpu_data. Previous call to | ||
220 | * smp_store_cpu_info() stored a value that is close but not as | ||
221 | * accurate as the value just calculated. | ||
212 | * | 222 | * |
213 | * Need to enable IRQs because it can take longer and then | 223 | * Need to enable IRQs because it can take longer and then |
214 | * the NMI watchdog might kill us. | 224 | * the NMI watchdog might kill us. |
215 | */ | 225 | */ |
216 | local_irq_enable(); | 226 | local_irq_enable(); |
217 | calibrate_delay(); | 227 | calibrate_delay(); |
228 | cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; | ||
218 | local_irq_disable(); | 229 | local_irq_disable(); |
219 | pr_debug("Stack at about %p\n", &cpuid); | 230 | pr_debug("Stack at about %p\n", &cpuid); |
220 | 231 | ||
221 | /* | 232 | /* |
222 | * Save our processor parameters | ||
223 | */ | ||
224 | smp_store_cpu_info(cpuid); | ||
225 | |||
226 | /* | ||
227 | * This must be done before setting cpu_online_mask | 233 | * This must be done before setting cpu_online_mask |
228 | * or calling notify_cpu_starting. | 234 | * or calling notify_cpu_starting. |
229 | */ | 235 | */ |
@@ -1143,6 +1149,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1143 | { | 1149 | { |
1144 | pr_debug("Boot done.\n"); | 1150 | pr_debug("Boot done.\n"); |
1145 | 1151 | ||
1152 | nmi_selftest(); | ||
1146 | impress_friends(); | 1153 | impress_friends(); |
1147 | #ifdef CONFIG_X86_IO_APIC | 1154 | #ifdef CONFIG_X86_IO_APIC |
1148 | setup_ioapic_dest(); | 1155 | setup_ioapic_dest(); |
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c new file mode 100644 index 000000000000..147fcd4941c4 --- /dev/null +++ b/arch/x86/kernel/syscall_32.c | |||
@@ -0,0 +1,25 @@ | |||
1 | /* System call table for i386. */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <linux/sys.h> | ||
5 | #include <linux/cache.h> | ||
6 | #include <asm/asm-offsets.h> | ||
7 | |||
8 | #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; | ||
9 | #include <asm/syscalls_32.h> | ||
10 | #undef __SYSCALL_I386 | ||
11 | |||
12 | #define __SYSCALL_I386(nr, sym, compat) [nr] = sym, | ||
13 | |||
14 | typedef asmlinkage void (*sys_call_ptr_t)(void); | ||
15 | |||
16 | extern asmlinkage void sys_ni_syscall(void); | ||
17 | |||
18 | const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { | ||
19 | /* | ||
20 | * Smells like a compiler bug -- it doesn't work | ||
21 | * when the & below is removed. | ||
22 | */ | ||
23 | [0 ... __NR_syscall_max] = &sys_ni_syscall, | ||
24 | #include <asm/syscalls_32.h> | ||
25 | }; | ||
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index de87d6008295..7ac7943be02c 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c | |||
@@ -5,15 +5,11 @@ | |||
5 | #include <linux/cache.h> | 5 | #include <linux/cache.h> |
6 | #include <asm/asm-offsets.h> | 6 | #include <asm/asm-offsets.h> |
7 | 7 | ||
8 | #define __NO_STUBS | 8 | #define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; |
9 | #include <asm/syscalls_64.h> | ||
10 | #undef __SYSCALL_64 | ||
9 | 11 | ||
10 | #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; | 12 | #define __SYSCALL_64(nr, sym, compat) [nr] = sym, |
11 | #undef _ASM_X86_UNISTD_64_H | ||
12 | #include <asm/unistd_64.h> | ||
13 | |||
14 | #undef __SYSCALL | ||
15 | #define __SYSCALL(nr, sym) [nr] = sym, | ||
16 | #undef _ASM_X86_UNISTD_64_H | ||
17 | 13 | ||
18 | typedef void (*sys_call_ptr_t)(void); | 14 | typedef void (*sys_call_ptr_t)(void); |
19 | 15 | ||
@@ -21,9 +17,9 @@ extern void sys_ni_syscall(void); | |||
21 | 17 | ||
22 | const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { | 18 | const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { |
23 | /* | 19 | /* |
24 | *Smells like a like a compiler bug -- it doesn't work | 20 | * Smells like a compiler bug -- it doesn't work |
25 | *when the & below is removed. | 21 | * when the & below is removed. |
26 | */ | 22 | */ |
27 | [0 ... __NR_syscall_max] = &sys_ni_syscall, | 23 | [0 ... __NR_syscall_max] = &sys_ni_syscall, |
28 | #include <asm/unistd_64.h> | 24 | #include <asm/syscalls_64.h> |
29 | }; | 25 | }; |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S deleted file mode 100644 index 9a0e31293920..000000000000 --- a/arch/x86/kernel/syscall_table_32.S +++ /dev/null | |||
@@ -1,350 +0,0 @@ | |||
1 | ENTRY(sys_call_table) | ||
2 | .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ | ||
3 | .long sys_exit | ||
4 | .long ptregs_fork | ||
5 | .long sys_read | ||
6 | .long sys_write | ||
7 | .long sys_open /* 5 */ | ||
8 | .long sys_close | ||
9 | .long sys_waitpid | ||
10 | .long sys_creat | ||
11 | .long sys_link | ||
12 | .long sys_unlink /* 10 */ | ||
13 | .long ptregs_execve | ||
14 | .long sys_chdir | ||
15 | .long sys_time | ||
16 | .long sys_mknod | ||
17 | .long sys_chmod /* 15 */ | ||
18 | .long sys_lchown16 | ||
19 | .long sys_ni_syscall /* old break syscall holder */ | ||
20 | .long sys_stat | ||
21 | .long sys_lseek | ||
22 | .long sys_getpid /* 20 */ | ||
23 | .long sys_mount | ||
24 | .long sys_oldumount | ||
25 | .long sys_setuid16 | ||
26 | .long sys_getuid16 | ||
27 | .long sys_stime /* 25 */ | ||
28 | .long sys_ptrace | ||
29 | .long sys_alarm | ||
30 | .long sys_fstat | ||
31 | .long sys_pause | ||
32 | .long sys_utime /* 30 */ | ||
33 | .long sys_ni_syscall /* old stty syscall holder */ | ||
34 | .long sys_ni_syscall /* old gtty syscall holder */ | ||
35 | .long sys_access | ||
36 | .long sys_nice | ||
37 | .long sys_ni_syscall /* 35 - old ftime syscall holder */ | ||
38 | .long sys_sync | ||
39 | .long sys_kill | ||
40 | .long sys_rename | ||
41 | .long sys_mkdir | ||
42 | .long sys_rmdir /* 40 */ | ||
43 | .long sys_dup | ||
44 | .long sys_pipe | ||
45 | .long sys_times | ||
46 | .long sys_ni_syscall /* old prof syscall holder */ | ||
47 | .long sys_brk /* 45 */ | ||
48 | .long sys_setgid16 | ||
49 | .long sys_getgid16 | ||
50 | .long sys_signal | ||
51 | .long sys_geteuid16 | ||
52 | .long sys_getegid16 /* 50 */ | ||
53 | .long sys_acct | ||
54 | .long sys_umount /* recycled never used phys() */ | ||
55 | .long sys_ni_syscall /* old lock syscall holder */ | ||
56 | .long sys_ioctl | ||
57 | .long sys_fcntl /* 55 */ | ||
58 | .long sys_ni_syscall /* old mpx syscall holder */ | ||
59 | .long sys_setpgid | ||
60 | .long sys_ni_syscall /* old ulimit syscall holder */ | ||
61 | .long sys_olduname | ||
62 | .long sys_umask /* 60 */ | ||
63 | .long sys_chroot | ||
64 | .long sys_ustat | ||
65 | .long sys_dup2 | ||
66 | .long sys_getppid | ||
67 | .long sys_getpgrp /* 65 */ | ||
68 | .long sys_setsid | ||
69 | .long sys_sigaction | ||
70 | .long sys_sgetmask | ||
71 | .long sys_ssetmask | ||
72 | .long sys_setreuid16 /* 70 */ | ||
73 | .long sys_setregid16 | ||
74 | .long sys_sigsuspend | ||
75 | .long sys_sigpending | ||
76 | .long sys_sethostname | ||
77 | .long sys_setrlimit /* 75 */ | ||
78 | .long sys_old_getrlimit | ||
79 | .long sys_getrusage | ||
80 | .long sys_gettimeofday | ||
81 | .long sys_settimeofday | ||
82 | .long sys_getgroups16 /* 80 */ | ||
83 | .long sys_setgroups16 | ||
84 | .long sys_old_select | ||
85 | .long sys_symlink | ||
86 | .long sys_lstat | ||
87 | .long sys_readlink /* 85 */ | ||
88 | .long sys_uselib | ||
89 | .long sys_swapon | ||
90 | .long sys_reboot | ||
91 | .long sys_old_readdir | ||
92 | .long sys_old_mmap /* 90 */ | ||
93 | .long sys_munmap | ||
94 | .long sys_truncate | ||
95 | .long sys_ftruncate | ||
96 | .long sys_fchmod | ||
97 | .long sys_fchown16 /* 95 */ | ||
98 | .long sys_getpriority | ||
99 | .long sys_setpriority | ||
100 | .long sys_ni_syscall /* old profil syscall holder */ | ||
101 | .long sys_statfs | ||
102 | .long sys_fstatfs /* 100 */ | ||
103 | .long sys_ioperm | ||
104 | .long sys_socketcall | ||
105 | .long sys_syslog | ||
106 | .long sys_setitimer | ||
107 | .long sys_getitimer /* 105 */ | ||
108 | .long sys_newstat | ||
109 | .long sys_newlstat | ||
110 | .long sys_newfstat | ||
111 | .long sys_uname | ||
112 | .long ptregs_iopl /* 110 */ | ||
113 | .long sys_vhangup | ||
114 | .long sys_ni_syscall /* old "idle" system call */ | ||
115 | .long ptregs_vm86old | ||
116 | .long sys_wait4 | ||
117 | .long sys_swapoff /* 115 */ | ||
118 | .long sys_sysinfo | ||
119 | .long sys_ipc | ||
120 | .long sys_fsync | ||
121 | .long ptregs_sigreturn | ||
122 | .long ptregs_clone /* 120 */ | ||
123 | .long sys_setdomainname | ||
124 | .long sys_newuname | ||
125 | .long sys_modify_ldt | ||
126 | .long sys_adjtimex | ||
127 | .long sys_mprotect /* 125 */ | ||
128 | .long sys_sigprocmask | ||
129 | .long sys_ni_syscall /* old "create_module" */ | ||
130 | .long sys_init_module | ||
131 | .long sys_delete_module | ||
132 | .long sys_ni_syscall /* 130: old "get_kernel_syms" */ | ||
133 | .long sys_quotactl | ||
134 | .long sys_getpgid | ||
135 | .long sys_fchdir | ||
136 | .long sys_bdflush | ||
137 | .long sys_sysfs /* 135 */ | ||
138 | .long sys_personality | ||
139 | .long sys_ni_syscall /* reserved for afs_syscall */ | ||
140 | .long sys_setfsuid16 | ||
141 | .long sys_setfsgid16 | ||
142 | .long sys_llseek /* 140 */ | ||
143 | .long sys_getdents | ||
144 | .long sys_select | ||
145 | .long sys_flock | ||
146 | .long sys_msync | ||
147 | .long sys_readv /* 145 */ | ||
148 | .long sys_writev | ||
149 | .long sys_getsid | ||
150 | .long sys_fdatasync | ||
151 | .long sys_sysctl | ||
152 | .long sys_mlock /* 150 */ | ||
153 | .long sys_munlock | ||
154 | .long sys_mlockall | ||
155 | .long sys_munlockall | ||
156 | .long sys_sched_setparam | ||
157 | .long sys_sched_getparam /* 155 */ | ||
158 | .long sys_sched_setscheduler | ||
159 | .long sys_sched_getscheduler | ||
160 | .long sys_sched_yield | ||
161 | .long sys_sched_get_priority_max | ||
162 | .long sys_sched_get_priority_min /* 160 */ | ||
163 | .long sys_sched_rr_get_interval | ||
164 | .long sys_nanosleep | ||
165 | .long sys_mremap | ||
166 | .long sys_setresuid16 | ||
167 | .long sys_getresuid16 /* 165 */ | ||
168 | .long ptregs_vm86 | ||
169 | .long sys_ni_syscall /* Old sys_query_module */ | ||
170 | .long sys_poll | ||
171 | .long sys_ni_syscall /* Old nfsservctl */ | ||
172 | .long sys_setresgid16 /* 170 */ | ||
173 | .long sys_getresgid16 | ||
174 | .long sys_prctl | ||
175 | .long ptregs_rt_sigreturn | ||
176 | .long sys_rt_sigaction | ||
177 | .long sys_rt_sigprocmask /* 175 */ | ||
178 | .long sys_rt_sigpending | ||
179 | .long sys_rt_sigtimedwait | ||
180 | .long sys_rt_sigqueueinfo | ||
181 | .long sys_rt_sigsuspend | ||
182 | .long sys_pread64 /* 180 */ | ||
183 | .long sys_pwrite64 | ||
184 | .long sys_chown16 | ||
185 | .long sys_getcwd | ||
186 | .long sys_capget | ||
187 | .long sys_capset /* 185 */ | ||
188 | .long ptregs_sigaltstack | ||
189 | .long sys_sendfile | ||
190 | .long sys_ni_syscall /* reserved for streams1 */ | ||
191 | .long sys_ni_syscall /* reserved for streams2 */ | ||
192 | .long ptregs_vfork /* 190 */ | ||
193 | .long sys_getrlimit | ||
194 | .long sys_mmap_pgoff | ||
195 | .long sys_truncate64 | ||
196 | .long sys_ftruncate64 | ||
197 | .long sys_stat64 /* 195 */ | ||
198 | .long sys_lstat64 | ||
199 | .long sys_fstat64 | ||
200 | .long sys_lchown | ||
201 | .long sys_getuid | ||
202 | .long sys_getgid /* 200 */ | ||
203 | .long sys_geteuid | ||
204 | .long sys_getegid | ||
205 | .long sys_setreuid | ||
206 | .long sys_setregid | ||
207 | .long sys_getgroups /* 205 */ | ||
208 | .long sys_setgroups | ||
209 | .long sys_fchown | ||
210 | .long sys_setresuid | ||
211 | .long sys_getresuid | ||
212 | .long sys_setresgid /* 210 */ | ||
213 | .long sys_getresgid | ||
214 | .long sys_chown | ||
215 | .long sys_setuid | ||
216 | .long sys_setgid | ||
217 | .long sys_setfsuid /* 215 */ | ||
218 | .long sys_setfsgid | ||
219 | .long sys_pivot_root | ||
220 | .long sys_mincore | ||
221 | .long sys_madvise | ||
222 | .long sys_getdents64 /* 220 */ | ||
223 | .long sys_fcntl64 | ||
224 | .long sys_ni_syscall /* reserved for TUX */ | ||
225 | .long sys_ni_syscall | ||
226 | .long sys_gettid | ||
227 | .long sys_readahead /* 225 */ | ||
228 | .long sys_setxattr | ||
229 | .long sys_lsetxattr | ||
230 | .long sys_fsetxattr | ||
231 | .long sys_getxattr | ||
232 | .long sys_lgetxattr /* 230 */ | ||
233 | .long sys_fgetxattr | ||
234 | .long sys_listxattr | ||
235 | .long sys_llistxattr | ||
236 | .long sys_flistxattr | ||
237 | .long sys_removexattr /* 235 */ | ||
238 | .long sys_lremovexattr | ||
239 | .long sys_fremovexattr | ||
240 | .long sys_tkill | ||
241 | .long sys_sendfile64 | ||
242 | .long sys_futex /* 240 */ | ||
243 | .long sys_sched_setaffinity | ||
244 | .long sys_sched_getaffinity | ||
245 | .long sys_set_thread_area | ||
246 | .long sys_get_thread_area | ||
247 | .long sys_io_setup /* 245 */ | ||
248 | .long sys_io_destroy | ||
249 | .long sys_io_getevents | ||
250 | .long sys_io_submit | ||
251 | .long sys_io_cancel | ||
252 | .long sys_fadvise64 /* 250 */ | ||
253 | .long sys_ni_syscall | ||
254 | .long sys_exit_group | ||
255 | .long sys_lookup_dcookie | ||
256 | .long sys_epoll_create | ||
257 | .long sys_epoll_ctl /* 255 */ | ||
258 | .long sys_epoll_wait | ||
259 | .long sys_remap_file_pages | ||
260 | .long sys_set_tid_address | ||
261 | .long sys_timer_create | ||
262 | .long sys_timer_settime /* 260 */ | ||
263 | .long sys_timer_gettime | ||
264 | .long sys_timer_getoverrun | ||
265 | .long sys_timer_delete | ||
266 | .long sys_clock_settime | ||
267 | .long sys_clock_gettime /* 265 */ | ||
268 | .long sys_clock_getres | ||
269 | .long sys_clock_nanosleep | ||
270 | .long sys_statfs64 | ||
271 | .long sys_fstatfs64 | ||
272 | .long sys_tgkill /* 270 */ | ||
273 | .long sys_utimes | ||
274 | .long sys_fadvise64_64 | ||
275 | .long sys_ni_syscall /* sys_vserver */ | ||
276 | .long sys_mbind | ||
277 | .long sys_get_mempolicy | ||
278 | .long sys_set_mempolicy | ||
279 | .long sys_mq_open | ||
280 | .long sys_mq_unlink | ||
281 | .long sys_mq_timedsend | ||
282 | .long sys_mq_timedreceive /* 280 */ | ||
283 | .long sys_mq_notify | ||
284 | .long sys_mq_getsetattr | ||
285 | .long sys_kexec_load | ||
286 | .long sys_waitid | ||
287 | .long sys_ni_syscall /* 285 */ /* available */ | ||
288 | .long sys_add_key | ||
289 | .long sys_request_key | ||
290 | .long sys_keyctl | ||
291 | .long sys_ioprio_set | ||
292 | .long sys_ioprio_get /* 290 */ | ||
293 | .long sys_inotify_init | ||
294 | .long sys_inotify_add_watch | ||
295 | .long sys_inotify_rm_watch | ||
296 | .long sys_migrate_pages | ||
297 | .long sys_openat /* 295 */ | ||
298 | .long sys_mkdirat | ||
299 | .long sys_mknodat | ||
300 | .long sys_fchownat | ||
301 | .long sys_futimesat | ||
302 | .long sys_fstatat64 /* 300 */ | ||
303 | .long sys_unlinkat | ||
304 | .long sys_renameat | ||
305 | .long sys_linkat | ||
306 | .long sys_symlinkat | ||
307 | .long sys_readlinkat /* 305 */ | ||
308 | .long sys_fchmodat | ||
309 | .long sys_faccessat | ||
310 | .long sys_pselect6 | ||
311 | .long sys_ppoll | ||
312 | .long sys_unshare /* 310 */ | ||
313 | .long sys_set_robust_list | ||
314 | .long sys_get_robust_list | ||
315 | .long sys_splice | ||
316 | .long sys_sync_file_range | ||
317 | .long sys_tee /* 315 */ | ||
318 | .long sys_vmsplice | ||
319 | .long sys_move_pages | ||
320 | .long sys_getcpu | ||
321 | .long sys_epoll_pwait | ||
322 | .long sys_utimensat /* 320 */ | ||
323 | .long sys_signalfd | ||
324 | .long sys_timerfd_create | ||
325 | .long sys_eventfd | ||
326 | .long sys_fallocate | ||
327 | .long sys_timerfd_settime /* 325 */ | ||
328 | .long sys_timerfd_gettime | ||
329 | .long sys_signalfd4 | ||
330 | .long sys_eventfd2 | ||
331 | .long sys_epoll_create1 | ||
332 | .long sys_dup3 /* 330 */ | ||
333 | .long sys_pipe2 | ||
334 | .long sys_inotify_init1 | ||
335 | .long sys_preadv | ||
336 | .long sys_pwritev | ||
337 | .long sys_rt_tgsigqueueinfo /* 335 */ | ||
338 | .long sys_perf_event_open | ||
339 | .long sys_recvmmsg | ||
340 | .long sys_fanotify_init | ||
341 | .long sys_fanotify_mark | ||
342 | .long sys_prlimit64 /* 340 */ | ||
343 | .long sys_name_to_handle_at | ||
344 | .long sys_open_by_handle_at | ||
345 | .long sys_clock_adjtime | ||
346 | .long sys_syncfs | ||
347 | .long sys_sendmmsg /* 345 */ | ||
348 | .long sys_setns | ||
349 | .long sys_process_vm_readv | ||
350 | .long sys_process_vm_writev | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index fa1191fb679d..482ec3af2067 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -311,9 +311,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | |||
311 | == NOTIFY_STOP) | 311 | == NOTIFY_STOP) |
312 | return; | 312 | return; |
313 | 313 | ||
314 | /* | ||
315 | * Let others (NMI) know that the debug stack is in use | ||
316 | * as we may switch to the interrupt stack. | ||
317 | */ | ||
318 | debug_stack_usage_inc(); | ||
314 | preempt_conditional_sti(regs); | 319 | preempt_conditional_sti(regs); |
315 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | 320 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); |
316 | preempt_conditional_cli(regs); | 321 | preempt_conditional_cli(regs); |
322 | debug_stack_usage_dec(); | ||
317 | } | 323 | } |
318 | 324 | ||
319 | #ifdef CONFIG_X86_64 | 325 | #ifdef CONFIG_X86_64 |
@@ -406,6 +412,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
406 | SIGTRAP) == NOTIFY_STOP) | 412 | SIGTRAP) == NOTIFY_STOP) |
407 | return; | 413 | return; |
408 | 414 | ||
415 | /* | ||
416 | * Let others (NMI) know that the debug stack is in use | ||
417 | * as we may switch to the interrupt stack. | ||
418 | */ | ||
419 | debug_stack_usage_inc(); | ||
420 | |||
409 | /* It's safe to allow irq's after DR6 has been saved */ | 421 | /* It's safe to allow irq's after DR6 has been saved */ |
410 | preempt_conditional_sti(regs); | 422 | preempt_conditional_sti(regs); |
411 | 423 | ||
@@ -413,6 +425,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
413 | handle_vm86_trap((struct kernel_vm86_regs *) regs, | 425 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
414 | error_code, 1); | 426 | error_code, 1); |
415 | preempt_conditional_cli(regs); | 427 | preempt_conditional_cli(regs); |
428 | debug_stack_usage_dec(); | ||
416 | return; | 429 | return; |
417 | } | 430 | } |
418 | 431 | ||
@@ -432,6 +445,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
432 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) | 445 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) |
433 | send_sigtrap(tsk, regs, error_code, si_code); | 446 | send_sigtrap(tsk, regs, error_code, si_code); |
434 | preempt_conditional_cli(regs); | 447 | preempt_conditional_cli(regs); |
448 | debug_stack_usage_dec(); | ||
435 | 449 | ||
436 | return; | 450 | return; |
437 | } | 451 | } |
@@ -718,4 +732,10 @@ void __init trap_init(void) | |||
718 | cpu_init(); | 732 | cpu_init(); |
719 | 733 | ||
720 | x86_init.irqs.trap_init(); | 734 | x86_init.irqs.trap_init(); |
735 | |||
736 | #ifdef CONFIG_X86_64 | ||
737 | memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16); | ||
738 | set_nmi_gate(1, &debug); | ||
739 | set_nmi_gate(3, &int3); | ||
740 | #endif | ||
721 | } | 741 | } |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index f54694611172..a62c201c97ec 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -993,3 +993,23 @@ void __init tsc_init(void) | |||
993 | check_system_tsc_reliable(); | 993 | check_system_tsc_reliable(); |
994 | } | 994 | } |
995 | 995 | ||
996 | #ifdef CONFIG_SMP | ||
997 | /* | ||
998 | * If we have a constant TSC and are using the TSC for the delay loop, | ||
999 | * we can skip clock calibration if another cpu in the same socket has already | ||
1000 | * been calibrated. This assumes that CONSTANT_TSC applies to all | ||
1001 | * cpus in the socket - this should be a safe assumption. | ||
1002 | */ | ||
1003 | unsigned long __cpuinit calibrate_delay_is_known(void) | ||
1004 | { | ||
1005 | int i, cpu = smp_processor_id(); | ||
1006 | |||
1007 | if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) | ||
1008 | return 0; | ||
1009 | |||
1010 | for_each_online_cpu(i) | ||
1011 | if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) | ||
1012 | return cpu_data(i).loops_per_jiffy; | ||
1013 | return 0; | ||
1014 | } | ||
1015 | #endif | ||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 863f8753ab0a..b466cab5ba15 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -335,9 +335,11 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
335 | if (info->flags & VM86_SCREEN_BITMAP) | 335 | if (info->flags & VM86_SCREEN_BITMAP) |
336 | mark_screen_rdonly(tsk->mm); | 336 | mark_screen_rdonly(tsk->mm); |
337 | 337 | ||
338 | /*call audit_syscall_exit since we do not exit via the normal paths */ | 338 | /*call __audit_syscall_exit since we do not exit via the normal paths */ |
339 | #ifdef CONFIG_AUDITSYSCALL | ||
339 | if (unlikely(current->audit_context)) | 340 | if (unlikely(current->audit_context)) |
340 | audit_syscall_exit(AUDITSC_RESULT(0), 0); | 341 | __audit_syscall_exit(1, 0); |
342 | #endif | ||
341 | 343 | ||
342 | __asm__ __volatile__( | 344 | __asm__ __volatile__( |
343 | "movl %0,%%esp\n\t" | 345 | "movl %0,%%esp\n\t" |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 91f83e21b989..947a06ccc673 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -115,4 +115,5 @@ struct x86_msi_ops x86_msi = { | |||
115 | .setup_msi_irqs = native_setup_msi_irqs, | 115 | .setup_msi_irqs = native_setup_msi_irqs, |
116 | .teardown_msi_irq = native_teardown_msi_irq, | 116 | .teardown_msi_irq = native_teardown_msi_irq, |
117 | .teardown_msi_irqs = default_teardown_msi_irqs, | 117 | .teardown_msi_irqs = default_teardown_msi_irqs, |
118 | .restore_msi_irqs = default_restore_msi_irqs, | ||
118 | }; | 119 | }; |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index ff5790d8e990..1a7fe868f375 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -35,6 +35,7 @@ config KVM | |||
35 | select KVM_MMIO | 35 | select KVM_MMIO |
36 | select TASKSTATS | 36 | select TASKSTATS |
37 | select TASK_DELAY_ACCT | 37 | select TASK_DELAY_ACCT |
38 | select PERF_EVENTS | ||
38 | ---help--- | 39 | ---help--- |
39 | Support hosting fully virtualized guest machines using hardware | 40 | Support hosting fully virtualized guest machines using hardware |
40 | virtualization extensions. You will need a fairly recent | 41 | virtualization extensions. You will need a fairly recent |
@@ -52,6 +53,8 @@ config KVM | |||
52 | config KVM_INTEL | 53 | config KVM_INTEL |
53 | tristate "KVM for Intel processors support" | 54 | tristate "KVM for Intel processors support" |
54 | depends on KVM | 55 | depends on KVM |
56 | # for perf_guest_get_msrs(): | ||
57 | depends on CPU_SUP_INTEL | ||
55 | ---help--- | 58 | ---help--- |
56 | Provides support for KVM on Intel processors equipped with the VT | 59 | Provides support for KVM on Intel processors equipped with the VT |
57 | extensions. | 60 | extensions. |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index f15501f431c8..4f579e8dcacf 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | |||
12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) | 12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) |
13 | 13 | ||
14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
15 | i8254.o timer.o | 15 | i8254.o timer.o cpuid.o pmu.o |
16 | kvm-intel-y += vmx.o | 16 | kvm-intel-y += vmx.o |
17 | kvm-amd-y += svm.o | 17 | kvm-amd-y += svm.o |
18 | 18 | ||
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c new file mode 100644 index 000000000000..89b02bfaaca5 --- /dev/null +++ b/arch/x86/kvm/cpuid.c | |||
@@ -0,0 +1,670 @@ | |||
1 | /* | ||
2 | * Kernel-based Virtual Machine driver for Linux | ||
3 | * cpuid support routines | ||
4 | * | ||
5 | * derived from arch/x86/kvm/x86.c | ||
6 | * | ||
7 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. | ||
8 | * Copyright IBM Corporation, 2008 | ||
9 | * | ||
10 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
11 | * the COPYING file in the top-level directory. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <linux/kvm_host.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/vmalloc.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | #include <asm/user.h> | ||
20 | #include <asm/xsave.h> | ||
21 | #include "cpuid.h" | ||
22 | #include "lapic.h" | ||
23 | #include "mmu.h" | ||
24 | #include "trace.h" | ||
25 | |||
26 | void kvm_update_cpuid(struct kvm_vcpu *vcpu) | ||
27 | { | ||
28 | struct kvm_cpuid_entry2 *best; | ||
29 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
30 | |||
31 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
32 | if (!best) | ||
33 | return; | ||
34 | |||
35 | /* Update OSXSAVE bit */ | ||
36 | if (cpu_has_xsave && best->function == 0x1) { | ||
37 | best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); | ||
38 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) | ||
39 | best->ecx |= bit(X86_FEATURE_OSXSAVE); | ||
40 | } | ||
41 | |||
42 | if (apic) { | ||
43 | if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER)) | ||
44 | apic->lapic_timer.timer_mode_mask = 3 << 17; | ||
45 | else | ||
46 | apic->lapic_timer.timer_mode_mask = 1 << 17; | ||
47 | } | ||
48 | |||
49 | kvm_pmu_cpuid_update(vcpu); | ||
50 | } | ||
51 | |||
52 | static int is_efer_nx(void) | ||
53 | { | ||
54 | unsigned long long efer = 0; | ||
55 | |||
56 | rdmsrl_safe(MSR_EFER, &efer); | ||
57 | return efer & EFER_NX; | ||
58 | } | ||
59 | |||
60 | static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) | ||
61 | { | ||
62 | int i; | ||
63 | struct kvm_cpuid_entry2 *e, *entry; | ||
64 | |||
65 | entry = NULL; | ||
66 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | ||
67 | e = &vcpu->arch.cpuid_entries[i]; | ||
68 | if (e->function == 0x80000001) { | ||
69 | entry = e; | ||
70 | break; | ||
71 | } | ||
72 | } | ||
73 | if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { | ||
74 | entry->edx &= ~(1 << 20); | ||
75 | printk(KERN_INFO "kvm: guest NX capability removed\n"); | ||
76 | } | ||
77 | } | ||
78 | |||
79 | /* when an old userspace process fills a new kernel module */ | ||
80 | int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||
81 | struct kvm_cpuid *cpuid, | ||
82 | struct kvm_cpuid_entry __user *entries) | ||
83 | { | ||
84 | int r, i; | ||
85 | struct kvm_cpuid_entry *cpuid_entries; | ||
86 | |||
87 | r = -E2BIG; | ||
88 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
89 | goto out; | ||
90 | r = -ENOMEM; | ||
91 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent); | ||
92 | if (!cpuid_entries) | ||
93 | goto out; | ||
94 | r = -EFAULT; | ||
95 | if (copy_from_user(cpuid_entries, entries, | ||
96 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | ||
97 | goto out_free; | ||
98 | for (i = 0; i < cpuid->nent; i++) { | ||
99 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | ||
100 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | ||
101 | vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx; | ||
102 | vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx; | ||
103 | vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx; | ||
104 | vcpu->arch.cpuid_entries[i].index = 0; | ||
105 | vcpu->arch.cpuid_entries[i].flags = 0; | ||
106 | vcpu->arch.cpuid_entries[i].padding[0] = 0; | ||
107 | vcpu->arch.cpuid_entries[i].padding[1] = 0; | ||
108 | vcpu->arch.cpuid_entries[i].padding[2] = 0; | ||
109 | } | ||
110 | vcpu->arch.cpuid_nent = cpuid->nent; | ||
111 | cpuid_fix_nx_cap(vcpu); | ||
112 | r = 0; | ||
113 | kvm_apic_set_version(vcpu); | ||
114 | kvm_x86_ops->cpuid_update(vcpu); | ||
115 | kvm_update_cpuid(vcpu); | ||
116 | |||
117 | out_free: | ||
118 | vfree(cpuid_entries); | ||
119 | out: | ||
120 | return r; | ||
121 | } | ||
122 | |||
123 | int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | ||
124 | struct kvm_cpuid2 *cpuid, | ||
125 | struct kvm_cpuid_entry2 __user *entries) | ||
126 | { | ||
127 | int r; | ||
128 | |||
129 | r = -E2BIG; | ||
130 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
131 | goto out; | ||
132 | r = -EFAULT; | ||
133 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | ||
134 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | ||
135 | goto out; | ||
136 | vcpu->arch.cpuid_nent = cpuid->nent; | ||
137 | kvm_apic_set_version(vcpu); | ||
138 | kvm_x86_ops->cpuid_update(vcpu); | ||
139 | kvm_update_cpuid(vcpu); | ||
140 | return 0; | ||
141 | |||
142 | out: | ||
143 | return r; | ||
144 | } | ||
145 | |||
146 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | ||
147 | struct kvm_cpuid2 *cpuid, | ||
148 | struct kvm_cpuid_entry2 __user *entries) | ||
149 | { | ||
150 | int r; | ||
151 | |||
152 | r = -E2BIG; | ||
153 | if (cpuid->nent < vcpu->arch.cpuid_nent) | ||
154 | goto out; | ||
155 | r = -EFAULT; | ||
156 | if (copy_to_user(entries, &vcpu->arch.cpuid_entries, | ||
157 | vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) | ||
158 | goto out; | ||
159 | return 0; | ||
160 | |||
161 | out: | ||
162 | cpuid->nent = vcpu->arch.cpuid_nent; | ||
163 | return r; | ||
164 | } | ||
165 | |||
166 | static void cpuid_mask(u32 *word, int wordnum) | ||
167 | { | ||
168 | *word &= boot_cpu_data.x86_capability[wordnum]; | ||
169 | } | ||
170 | |||
171 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
172 | u32 index) | ||
173 | { | ||
174 | entry->function = function; | ||
175 | entry->index = index; | ||
176 | cpuid_count(entry->function, entry->index, | ||
177 | &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); | ||
178 | entry->flags = 0; | ||
179 | } | ||
180 | |||
181 | static bool supported_xcr0_bit(unsigned bit) | ||
182 | { | ||
183 | u64 mask = ((u64)1 << bit); | ||
184 | |||
185 | return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; | ||
186 | } | ||
187 | |||
188 | #define F(x) bit(X86_FEATURE_##x) | ||
189 | |||
190 | static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
191 | u32 index, int *nent, int maxnent) | ||
192 | { | ||
193 | int r; | ||
194 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; | ||
195 | #ifdef CONFIG_X86_64 | ||
196 | unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) | ||
197 | ? F(GBPAGES) : 0; | ||
198 | unsigned f_lm = F(LM); | ||
199 | #else | ||
200 | unsigned f_gbpages = 0; | ||
201 | unsigned f_lm = 0; | ||
202 | #endif | ||
203 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | ||
204 | |||
205 | /* cpuid 1.edx */ | ||
206 | const u32 kvm_supported_word0_x86_features = | ||
207 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
208 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
209 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | | ||
210 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
211 | F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | | ||
212 | 0 /* Reserved, DS, ACPI */ | F(MMX) | | ||
213 | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | | ||
214 | 0 /* HTT, TM, Reserved, PBE */; | ||
215 | /* cpuid 0x80000001.edx */ | ||
216 | const u32 kvm_supported_word1_x86_features = | ||
217 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
218 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
219 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | | ||
220 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
221 | F(PAT) | F(PSE36) | 0 /* Reserved */ | | ||
222 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | | ||
223 | F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | | ||
224 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | ||
225 | /* cpuid 1.ecx */ | ||
226 | const u32 kvm_supported_word4_x86_features = | ||
227 | F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | | ||
228 | 0 /* DS-CPL, VMX, SMX, EST */ | | ||
229 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | ||
230 | F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | | ||
231 | 0 /* Reserved, DCA */ | F(XMM4_1) | | ||
232 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | ||
233 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | | ||
234 | F(F16C) | F(RDRAND); | ||
235 | /* cpuid 0x80000001.ecx */ | ||
236 | const u32 kvm_supported_word6_x86_features = | ||
237 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | | ||
238 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | | ||
239 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | | ||
240 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); | ||
241 | |||
242 | /* cpuid 0xC0000001.edx */ | ||
243 | const u32 kvm_supported_word5_x86_features = | ||
244 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | | ||
245 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | | ||
246 | F(PMM) | F(PMM_EN); | ||
247 | |||
248 | /* cpuid 7.0.ebx */ | ||
249 | const u32 kvm_supported_word9_x86_features = | ||
250 | F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS); | ||
251 | |||
252 | /* all calls to cpuid_count() should be made on the same cpu */ | ||
253 | get_cpu(); | ||
254 | |||
255 | r = -E2BIG; | ||
256 | |||
257 | if (*nent >= maxnent) | ||
258 | goto out; | ||
259 | |||
260 | do_cpuid_1_ent(entry, function, index); | ||
261 | ++*nent; | ||
262 | |||
263 | switch (function) { | ||
264 | case 0: | ||
265 | entry->eax = min(entry->eax, (u32)0xd); | ||
266 | break; | ||
267 | case 1: | ||
268 | entry->edx &= kvm_supported_word0_x86_features; | ||
269 | cpuid_mask(&entry->edx, 0); | ||
270 | entry->ecx &= kvm_supported_word4_x86_features; | ||
271 | cpuid_mask(&entry->ecx, 4); | ||
272 | /* we support x2apic emulation even if host does not support | ||
273 | * it since we emulate x2apic in software */ | ||
274 | entry->ecx |= F(X2APIC); | ||
275 | break; | ||
276 | /* function 2 entries are STATEFUL. That is, repeated cpuid commands | ||
277 | * may return different values. This forces us to get_cpu() before | ||
278 | * issuing the first command, and also to emulate this annoying behavior | ||
279 | * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */ | ||
280 | case 2: { | ||
281 | int t, times = entry->eax & 0xff; | ||
282 | |||
283 | entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
284 | entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
285 | for (t = 1; t < times; ++t) { | ||
286 | if (*nent >= maxnent) | ||
287 | goto out; | ||
288 | |||
289 | do_cpuid_1_ent(&entry[t], function, 0); | ||
290 | entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
291 | ++*nent; | ||
292 | } | ||
293 | break; | ||
294 | } | ||
295 | /* function 4 has additional index. */ | ||
296 | case 4: { | ||
297 | int i, cache_type; | ||
298 | |||
299 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
300 | /* read more entries until cache_type is zero */ | ||
301 | for (i = 1; ; ++i) { | ||
302 | if (*nent >= maxnent) | ||
303 | goto out; | ||
304 | |||
305 | cache_type = entry[i - 1].eax & 0x1f; | ||
306 | if (!cache_type) | ||
307 | break; | ||
308 | do_cpuid_1_ent(&entry[i], function, i); | ||
309 | entry[i].flags |= | ||
310 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
311 | ++*nent; | ||
312 | } | ||
313 | break; | ||
314 | } | ||
315 | case 7: { | ||
316 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
317 | /* Mask ebx against host capbability word 9 */ | ||
318 | if (index == 0) { | ||
319 | entry->ebx &= kvm_supported_word9_x86_features; | ||
320 | cpuid_mask(&entry->ebx, 9); | ||
321 | } else | ||
322 | entry->ebx = 0; | ||
323 | entry->eax = 0; | ||
324 | entry->ecx = 0; | ||
325 | entry->edx = 0; | ||
326 | break; | ||
327 | } | ||
328 | case 9: | ||
329 | break; | ||
330 | case 0xa: { /* Architectural Performance Monitoring */ | ||
331 | struct x86_pmu_capability cap; | ||
332 | union cpuid10_eax eax; | ||
333 | union cpuid10_edx edx; | ||
334 | |||
335 | perf_get_x86_pmu_capability(&cap); | ||
336 | |||
337 | /* | ||
338 | * Only support guest architectural pmu on a host | ||
339 | * with architectural pmu. | ||
340 | */ | ||
341 | if (!cap.version) | ||
342 | memset(&cap, 0, sizeof(cap)); | ||
343 | |||
344 | eax.split.version_id = min(cap.version, 2); | ||
345 | eax.split.num_counters = cap.num_counters_gp; | ||
346 | eax.split.bit_width = cap.bit_width_gp; | ||
347 | eax.split.mask_length = cap.events_mask_len; | ||
348 | |||
349 | edx.split.num_counters_fixed = cap.num_counters_fixed; | ||
350 | edx.split.bit_width_fixed = cap.bit_width_fixed; | ||
351 | edx.split.reserved = 0; | ||
352 | |||
353 | entry->eax = eax.full; | ||
354 | entry->ebx = cap.events_mask; | ||
355 | entry->ecx = 0; | ||
356 | entry->edx = edx.full; | ||
357 | break; | ||
358 | } | ||
359 | /* function 0xb has additional index. */ | ||
360 | case 0xb: { | ||
361 | int i, level_type; | ||
362 | |||
363 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
364 | /* read more entries until level_type is zero */ | ||
365 | for (i = 1; ; ++i) { | ||
366 | if (*nent >= maxnent) | ||
367 | goto out; | ||
368 | |||
369 | level_type = entry[i - 1].ecx & 0xff00; | ||
370 | if (!level_type) | ||
371 | break; | ||
372 | do_cpuid_1_ent(&entry[i], function, i); | ||
373 | entry[i].flags |= | ||
374 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
375 | ++*nent; | ||
376 | } | ||
377 | break; | ||
378 | } | ||
379 | case 0xd: { | ||
380 | int idx, i; | ||
381 | |||
382 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
383 | for (idx = 1, i = 1; idx < 64; ++idx) { | ||
384 | if (*nent >= maxnent) | ||
385 | goto out; | ||
386 | |||
387 | do_cpuid_1_ent(&entry[i], function, idx); | ||
388 | if (entry[i].eax == 0 || !supported_xcr0_bit(idx)) | ||
389 | continue; | ||
390 | entry[i].flags |= | ||
391 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
392 | ++*nent; | ||
393 | ++i; | ||
394 | } | ||
395 | break; | ||
396 | } | ||
397 | case KVM_CPUID_SIGNATURE: { | ||
398 | char signature[12] = "KVMKVMKVM\0\0"; | ||
399 | u32 *sigptr = (u32 *)signature; | ||
400 | entry->eax = 0; | ||
401 | entry->ebx = sigptr[0]; | ||
402 | entry->ecx = sigptr[1]; | ||
403 | entry->edx = sigptr[2]; | ||
404 | break; | ||
405 | } | ||
406 | case KVM_CPUID_FEATURES: | ||
407 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | ||
408 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | ||
409 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | ||
410 | (1 << KVM_FEATURE_ASYNC_PF) | | ||
411 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | ||
412 | |||
413 | if (sched_info_on()) | ||
414 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | ||
415 | |||
416 | entry->ebx = 0; | ||
417 | entry->ecx = 0; | ||
418 | entry->edx = 0; | ||
419 | break; | ||
420 | case 0x80000000: | ||
421 | entry->eax = min(entry->eax, 0x8000001a); | ||
422 | break; | ||
423 | case 0x80000001: | ||
424 | entry->edx &= kvm_supported_word1_x86_features; | ||
425 | cpuid_mask(&entry->edx, 1); | ||
426 | entry->ecx &= kvm_supported_word6_x86_features; | ||
427 | cpuid_mask(&entry->ecx, 6); | ||
428 | break; | ||
429 | case 0x80000008: { | ||
430 | unsigned g_phys_as = (entry->eax >> 16) & 0xff; | ||
431 | unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); | ||
432 | unsigned phys_as = entry->eax & 0xff; | ||
433 | |||
434 | if (!g_phys_as) | ||
435 | g_phys_as = phys_as; | ||
436 | entry->eax = g_phys_as | (virt_as << 8); | ||
437 | entry->ebx = entry->edx = 0; | ||
438 | break; | ||
439 | } | ||
440 | case 0x80000019: | ||
441 | entry->ecx = entry->edx = 0; | ||
442 | break; | ||
443 | case 0x8000001a: | ||
444 | break; | ||
445 | case 0x8000001d: | ||
446 | break; | ||
447 | /*Add support for Centaur's CPUID instruction*/ | ||
448 | case 0xC0000000: | ||
449 | /*Just support up to 0xC0000004 now*/ | ||
450 | entry->eax = min(entry->eax, 0xC0000004); | ||
451 | break; | ||
452 | case 0xC0000001: | ||
453 | entry->edx &= kvm_supported_word5_x86_features; | ||
454 | cpuid_mask(&entry->edx, 5); | ||
455 | break; | ||
456 | case 3: /* Processor serial number */ | ||
457 | case 5: /* MONITOR/MWAIT */ | ||
458 | case 6: /* Thermal management */ | ||
459 | case 0x80000007: /* Advanced power management */ | ||
460 | case 0xC0000002: | ||
461 | case 0xC0000003: | ||
462 | case 0xC0000004: | ||
463 | default: | ||
464 | entry->eax = entry->ebx = entry->ecx = entry->edx = 0; | ||
465 | break; | ||
466 | } | ||
467 | |||
468 | kvm_x86_ops->set_supported_cpuid(function, entry); | ||
469 | |||
470 | r = 0; | ||
471 | |||
472 | out: | ||
473 | put_cpu(); | ||
474 | |||
475 | return r; | ||
476 | } | ||
477 | |||
478 | #undef F | ||
479 | |||
480 | struct kvm_cpuid_param { | ||
481 | u32 func; | ||
482 | u32 idx; | ||
483 | bool has_leaf_count; | ||
484 | bool (*qualifier)(struct kvm_cpuid_param *param); | ||
485 | }; | ||
486 | |||
487 | static bool is_centaur_cpu(struct kvm_cpuid_param *param) | ||
488 | { | ||
489 | return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; | ||
490 | } | ||
491 | |||
492 | int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
493 | struct kvm_cpuid_entry2 __user *entries) | ||
494 | { | ||
495 | struct kvm_cpuid_entry2 *cpuid_entries; | ||
496 | int limit, nent = 0, r = -E2BIG, i; | ||
497 | u32 func; | ||
498 | static struct kvm_cpuid_param param[] = { | ||
499 | { .func = 0, .has_leaf_count = true }, | ||
500 | { .func = 0x80000000, .has_leaf_count = true }, | ||
501 | { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true }, | ||
502 | { .func = KVM_CPUID_SIGNATURE }, | ||
503 | { .func = KVM_CPUID_FEATURES }, | ||
504 | }; | ||
505 | |||
506 | if (cpuid->nent < 1) | ||
507 | goto out; | ||
508 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
509 | cpuid->nent = KVM_MAX_CPUID_ENTRIES; | ||
510 | r = -ENOMEM; | ||
511 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); | ||
512 | if (!cpuid_entries) | ||
513 | goto out; | ||
514 | |||
515 | r = 0; | ||
516 | for (i = 0; i < ARRAY_SIZE(param); i++) { | ||
517 | struct kvm_cpuid_param *ent = ¶m[i]; | ||
518 | |||
519 | if (ent->qualifier && !ent->qualifier(ent)) | ||
520 | continue; | ||
521 | |||
522 | r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, | ||
523 | &nent, cpuid->nent); | ||
524 | |||
525 | if (r) | ||
526 | goto out_free; | ||
527 | |||
528 | if (!ent->has_leaf_count) | ||
529 | continue; | ||
530 | |||
531 | limit = cpuid_entries[nent - 1].eax; | ||
532 | for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) | ||
533 | r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, | ||
534 | &nent, cpuid->nent); | ||
535 | |||
536 | if (r) | ||
537 | goto out_free; | ||
538 | } | ||
539 | |||
540 | r = -EFAULT; | ||
541 | if (copy_to_user(entries, cpuid_entries, | ||
542 | nent * sizeof(struct kvm_cpuid_entry2))) | ||
543 | goto out_free; | ||
544 | cpuid->nent = nent; | ||
545 | r = 0; | ||
546 | |||
547 | out_free: | ||
548 | vfree(cpuid_entries); | ||
549 | out: | ||
550 | return r; | ||
551 | } | ||
552 | |||
553 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | ||
554 | { | ||
555 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; | ||
556 | int j, nent = vcpu->arch.cpuid_nent; | ||
557 | |||
558 | e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
559 | /* when no next entry is found, the current entry[i] is reselected */ | ||
560 | for (j = i + 1; ; j = (j + 1) % nent) { | ||
561 | struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; | ||
562 | if (ej->function == e->function) { | ||
563 | ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
564 | return j; | ||
565 | } | ||
566 | } | ||
567 | return 0; /* silence gcc, even though control never reaches here */ | ||
568 | } | ||
569 | |||
570 | /* find an entry with matching function, matching index (if needed), and that | ||
571 | * should be read next (if it's stateful) */ | ||
572 | static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, | ||
573 | u32 function, u32 index) | ||
574 | { | ||
575 | if (e->function != function) | ||
576 | return 0; | ||
577 | if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) | ||
578 | return 0; | ||
579 | if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && | ||
580 | !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) | ||
581 | return 0; | ||
582 | return 1; | ||
583 | } | ||
584 | |||
585 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | ||
586 | u32 function, u32 index) | ||
587 | { | ||
588 | int i; | ||
589 | struct kvm_cpuid_entry2 *best = NULL; | ||
590 | |||
591 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | ||
592 | struct kvm_cpuid_entry2 *e; | ||
593 | |||
594 | e = &vcpu->arch.cpuid_entries[i]; | ||
595 | if (is_matching_cpuid_entry(e, function, index)) { | ||
596 | if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) | ||
597 | move_to_next_stateful_cpuid_entry(vcpu, i); | ||
598 | best = e; | ||
599 | break; | ||
600 | } | ||
601 | } | ||
602 | return best; | ||
603 | } | ||
604 | EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); | ||
605 | |||
606 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | ||
607 | { | ||
608 | struct kvm_cpuid_entry2 *best; | ||
609 | |||
610 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
611 | if (!best || best->eax < 0x80000008) | ||
612 | goto not_found; | ||
613 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
614 | if (best) | ||
615 | return best->eax & 0xff; | ||
616 | not_found: | ||
617 | return 36; | ||
618 | } | ||
619 | |||
620 | /* | ||
621 | * If no match is found, check whether we exceed the vCPU's limit | ||
622 | * and return the content of the highest valid _standard_ leaf instead. | ||
623 | * This is to satisfy the CPUID specification. | ||
624 | */ | ||
625 | static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, | ||
626 | u32 function, u32 index) | ||
627 | { | ||
628 | struct kvm_cpuid_entry2 *maxlevel; | ||
629 | |||
630 | maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); | ||
631 | if (!maxlevel || maxlevel->eax >= function) | ||
632 | return NULL; | ||
633 | if (function & 0x80000000) { | ||
634 | maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0); | ||
635 | if (!maxlevel) | ||
636 | return NULL; | ||
637 | } | ||
638 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); | ||
639 | } | ||
640 | |||
641 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | ||
642 | { | ||
643 | u32 function, index; | ||
644 | struct kvm_cpuid_entry2 *best; | ||
645 | |||
646 | function = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
647 | index = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
648 | kvm_register_write(vcpu, VCPU_REGS_RAX, 0); | ||
649 | kvm_register_write(vcpu, VCPU_REGS_RBX, 0); | ||
650 | kvm_register_write(vcpu, VCPU_REGS_RCX, 0); | ||
651 | kvm_register_write(vcpu, VCPU_REGS_RDX, 0); | ||
652 | best = kvm_find_cpuid_entry(vcpu, function, index); | ||
653 | |||
654 | if (!best) | ||
655 | best = check_cpuid_limit(vcpu, function, index); | ||
656 | |||
657 | if (best) { | ||
658 | kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); | ||
659 | kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); | ||
660 | kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); | ||
661 | kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); | ||
662 | } | ||
663 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
664 | trace_kvm_cpuid(function, | ||
665 | kvm_register_read(vcpu, VCPU_REGS_RAX), | ||
666 | kvm_register_read(vcpu, VCPU_REGS_RBX), | ||
667 | kvm_register_read(vcpu, VCPU_REGS_RCX), | ||
668 | kvm_register_read(vcpu, VCPU_REGS_RDX)); | ||
669 | } | ||
670 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | ||
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h new file mode 100644 index 000000000000..5b97e1797a6d --- /dev/null +++ b/arch/x86/kvm/cpuid.h | |||
@@ -0,0 +1,46 @@ | |||
1 | #ifndef ARCH_X86_KVM_CPUID_H | ||
2 | #define ARCH_X86_KVM_CPUID_H | ||
3 | |||
4 | #include "x86.h" | ||
5 | |||
6 | void kvm_update_cpuid(struct kvm_vcpu *vcpu); | ||
7 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | ||
8 | u32 function, u32 index); | ||
9 | int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
10 | struct kvm_cpuid_entry2 __user *entries); | ||
11 | int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||
12 | struct kvm_cpuid *cpuid, | ||
13 | struct kvm_cpuid_entry __user *entries); | ||
14 | int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | ||
15 | struct kvm_cpuid2 *cpuid, | ||
16 | struct kvm_cpuid_entry2 __user *entries); | ||
17 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | ||
18 | struct kvm_cpuid2 *cpuid, | ||
19 | struct kvm_cpuid_entry2 __user *entries); | ||
20 | |||
21 | |||
22 | static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | ||
23 | { | ||
24 | struct kvm_cpuid_entry2 *best; | ||
25 | |||
26 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
27 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | ||
28 | } | ||
29 | |||
30 | static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) | ||
31 | { | ||
32 | struct kvm_cpuid_entry2 *best; | ||
33 | |||
34 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
35 | return best && (best->ebx & bit(X86_FEATURE_SMEP)); | ||
36 | } | ||
37 | |||
38 | static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | ||
39 | { | ||
40 | struct kvm_cpuid_entry2 *best; | ||
41 | |||
42 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
43 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); | ||
44 | } | ||
45 | |||
46 | #endif | ||
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index f1e3be18a08f..05a562b85025 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -125,8 +125,9 @@ | |||
125 | #define Lock (1<<26) /* lock prefix is allowed for the instruction */ | 125 | #define Lock (1<<26) /* lock prefix is allowed for the instruction */ |
126 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ | 126 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ |
127 | #define No64 (1<<28) | 127 | #define No64 (1<<28) |
128 | #define PageTable (1 << 29) /* instruction used to write page table */ | ||
128 | /* Source 2 operand type */ | 129 | /* Source 2 operand type */ |
129 | #define Src2Shift (29) | 130 | #define Src2Shift (30) |
130 | #define Src2None (OpNone << Src2Shift) | 131 | #define Src2None (OpNone << Src2Shift) |
131 | #define Src2CL (OpCL << Src2Shift) | 132 | #define Src2CL (OpCL << Src2Shift) |
132 | #define Src2ImmByte (OpImmByte << Src2Shift) | 133 | #define Src2ImmByte (OpImmByte << Src2Shift) |
@@ -1674,11 +1675,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) | |||
1674 | return X86EMUL_CONTINUE; | 1675 | return X86EMUL_CONTINUE; |
1675 | } | 1676 | } |
1676 | 1677 | ||
1677 | static int em_grp1a(struct x86_emulate_ctxt *ctxt) | ||
1678 | { | ||
1679 | return emulate_pop(ctxt, &ctxt->dst.val, ctxt->dst.bytes); | ||
1680 | } | ||
1681 | |||
1682 | static int em_grp2(struct x86_emulate_ctxt *ctxt) | 1678 | static int em_grp2(struct x86_emulate_ctxt *ctxt) |
1683 | { | 1679 | { |
1684 | switch (ctxt->modrm_reg) { | 1680 | switch (ctxt->modrm_reg) { |
@@ -1788,7 +1784,7 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) | |||
1788 | return rc; | 1784 | return rc; |
1789 | } | 1785 | } |
1790 | 1786 | ||
1791 | static int em_grp9(struct x86_emulate_ctxt *ctxt) | 1787 | static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) |
1792 | { | 1788 | { |
1793 | u64 old = ctxt->dst.orig_val64; | 1789 | u64 old = ctxt->dst.orig_val64; |
1794 | 1790 | ||
@@ -1831,6 +1827,24 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) | |||
1831 | return rc; | 1827 | return rc; |
1832 | } | 1828 | } |
1833 | 1829 | ||
1830 | static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) | ||
1831 | { | ||
1832 | /* Save real source value, then compare EAX against destination. */ | ||
1833 | ctxt->src.orig_val = ctxt->src.val; | ||
1834 | ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; | ||
1835 | emulate_2op_SrcV(ctxt, "cmp"); | ||
1836 | |||
1837 | if (ctxt->eflags & EFLG_ZF) { | ||
1838 | /* Success: write back to memory. */ | ||
1839 | ctxt->dst.val = ctxt->src.orig_val; | ||
1840 | } else { | ||
1841 | /* Failure: write the value we saw to EAX. */ | ||
1842 | ctxt->dst.type = OP_REG; | ||
1843 | ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; | ||
1844 | } | ||
1845 | return X86EMUL_CONTINUE; | ||
1846 | } | ||
1847 | |||
1834 | static int em_lseg(struct x86_emulate_ctxt *ctxt) | 1848 | static int em_lseg(struct x86_emulate_ctxt *ctxt) |
1835 | { | 1849 | { |
1836 | int seg = ctxt->src2.val; | 1850 | int seg = ctxt->src2.val; |
@@ -2481,6 +2495,15 @@ static int em_das(struct x86_emulate_ctxt *ctxt) | |||
2481 | return X86EMUL_CONTINUE; | 2495 | return X86EMUL_CONTINUE; |
2482 | } | 2496 | } |
2483 | 2497 | ||
2498 | static int em_call(struct x86_emulate_ctxt *ctxt) | ||
2499 | { | ||
2500 | long rel = ctxt->src.val; | ||
2501 | |||
2502 | ctxt->src.val = (unsigned long)ctxt->_eip; | ||
2503 | jmp_rel(ctxt, rel); | ||
2504 | return em_push(ctxt); | ||
2505 | } | ||
2506 | |||
2484 | static int em_call_far(struct x86_emulate_ctxt *ctxt) | 2507 | static int em_call_far(struct x86_emulate_ctxt *ctxt) |
2485 | { | 2508 | { |
2486 | u16 sel, old_cs; | 2509 | u16 sel, old_cs; |
@@ -2622,12 +2645,75 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt) | |||
2622 | return X86EMUL_CONTINUE; | 2645 | return X86EMUL_CONTINUE; |
2623 | } | 2646 | } |
2624 | 2647 | ||
2648 | static int em_rdpmc(struct x86_emulate_ctxt *ctxt) | ||
2649 | { | ||
2650 | u64 pmc; | ||
2651 | |||
2652 | if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc)) | ||
2653 | return emulate_gp(ctxt, 0); | ||
2654 | ctxt->regs[VCPU_REGS_RAX] = (u32)pmc; | ||
2655 | ctxt->regs[VCPU_REGS_RDX] = pmc >> 32; | ||
2656 | return X86EMUL_CONTINUE; | ||
2657 | } | ||
2658 | |||
2625 | static int em_mov(struct x86_emulate_ctxt *ctxt) | 2659 | static int em_mov(struct x86_emulate_ctxt *ctxt) |
2626 | { | 2660 | { |
2627 | ctxt->dst.val = ctxt->src.val; | 2661 | ctxt->dst.val = ctxt->src.val; |
2628 | return X86EMUL_CONTINUE; | 2662 | return X86EMUL_CONTINUE; |
2629 | } | 2663 | } |
2630 | 2664 | ||
2665 | static int em_cr_write(struct x86_emulate_ctxt *ctxt) | ||
2666 | { | ||
2667 | if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) | ||
2668 | return emulate_gp(ctxt, 0); | ||
2669 | |||
2670 | /* Disable writeback. */ | ||
2671 | ctxt->dst.type = OP_NONE; | ||
2672 | return X86EMUL_CONTINUE; | ||
2673 | } | ||
2674 | |||
2675 | static int em_dr_write(struct x86_emulate_ctxt *ctxt) | ||
2676 | { | ||
2677 | unsigned long val; | ||
2678 | |||
2679 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
2680 | val = ctxt->src.val & ~0ULL; | ||
2681 | else | ||
2682 | val = ctxt->src.val & ~0U; | ||
2683 | |||
2684 | /* #UD condition is already handled. */ | ||
2685 | if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0) | ||
2686 | return emulate_gp(ctxt, 0); | ||
2687 | |||
2688 | /* Disable writeback. */ | ||
2689 | ctxt->dst.type = OP_NONE; | ||
2690 | return X86EMUL_CONTINUE; | ||
2691 | } | ||
2692 | |||
2693 | static int em_wrmsr(struct x86_emulate_ctxt *ctxt) | ||
2694 | { | ||
2695 | u64 msr_data; | ||
2696 | |||
2697 | msr_data = (u32)ctxt->regs[VCPU_REGS_RAX] | ||
2698 | | ((u64)ctxt->regs[VCPU_REGS_RDX] << 32); | ||
2699 | if (ctxt->ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) | ||
2700 | return emulate_gp(ctxt, 0); | ||
2701 | |||
2702 | return X86EMUL_CONTINUE; | ||
2703 | } | ||
2704 | |||
2705 | static int em_rdmsr(struct x86_emulate_ctxt *ctxt) | ||
2706 | { | ||
2707 | u64 msr_data; | ||
2708 | |||
2709 | if (ctxt->ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) | ||
2710 | return emulate_gp(ctxt, 0); | ||
2711 | |||
2712 | ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data; | ||
2713 | ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32; | ||
2714 | return X86EMUL_CONTINUE; | ||
2715 | } | ||
2716 | |||
2631 | static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt) | 2717 | static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt) |
2632 | { | 2718 | { |
2633 | if (ctxt->modrm_reg > VCPU_SREG_GS) | 2719 | if (ctxt->modrm_reg > VCPU_SREG_GS) |
@@ -2775,6 +2861,24 @@ static int em_jcxz(struct x86_emulate_ctxt *ctxt) | |||
2775 | return X86EMUL_CONTINUE; | 2861 | return X86EMUL_CONTINUE; |
2776 | } | 2862 | } |
2777 | 2863 | ||
2864 | static int em_in(struct x86_emulate_ctxt *ctxt) | ||
2865 | { | ||
2866 | if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val, | ||
2867 | &ctxt->dst.val)) | ||
2868 | return X86EMUL_IO_NEEDED; | ||
2869 | |||
2870 | return X86EMUL_CONTINUE; | ||
2871 | } | ||
2872 | |||
2873 | static int em_out(struct x86_emulate_ctxt *ctxt) | ||
2874 | { | ||
2875 | ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val, | ||
2876 | &ctxt->src.val, 1); | ||
2877 | /* Disable writeback. */ | ||
2878 | ctxt->dst.type = OP_NONE; | ||
2879 | return X86EMUL_CONTINUE; | ||
2880 | } | ||
2881 | |||
2778 | static int em_cli(struct x86_emulate_ctxt *ctxt) | 2882 | static int em_cli(struct x86_emulate_ctxt *ctxt) |
2779 | { | 2883 | { |
2780 | if (emulator_bad_iopl(ctxt)) | 2884 | if (emulator_bad_iopl(ctxt)) |
@@ -2794,6 +2898,69 @@ static int em_sti(struct x86_emulate_ctxt *ctxt) | |||
2794 | return X86EMUL_CONTINUE; | 2898 | return X86EMUL_CONTINUE; |
2795 | } | 2899 | } |
2796 | 2900 | ||
2901 | static int em_bt(struct x86_emulate_ctxt *ctxt) | ||
2902 | { | ||
2903 | /* Disable writeback. */ | ||
2904 | ctxt->dst.type = OP_NONE; | ||
2905 | /* only subword offset */ | ||
2906 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; | ||
2907 | |||
2908 | emulate_2op_SrcV_nobyte(ctxt, "bt"); | ||
2909 | return X86EMUL_CONTINUE; | ||
2910 | } | ||
2911 | |||
2912 | static int em_bts(struct x86_emulate_ctxt *ctxt) | ||
2913 | { | ||
2914 | emulate_2op_SrcV_nobyte(ctxt, "bts"); | ||
2915 | return X86EMUL_CONTINUE; | ||
2916 | } | ||
2917 | |||
2918 | static int em_btr(struct x86_emulate_ctxt *ctxt) | ||
2919 | { | ||
2920 | emulate_2op_SrcV_nobyte(ctxt, "btr"); | ||
2921 | return X86EMUL_CONTINUE; | ||
2922 | } | ||
2923 | |||
2924 | static int em_btc(struct x86_emulate_ctxt *ctxt) | ||
2925 | { | ||
2926 | emulate_2op_SrcV_nobyte(ctxt, "btc"); | ||
2927 | return X86EMUL_CONTINUE; | ||
2928 | } | ||
2929 | |||
2930 | static int em_bsf(struct x86_emulate_ctxt *ctxt) | ||
2931 | { | ||
2932 | u8 zf; | ||
2933 | |||
2934 | __asm__ ("bsf %2, %0; setz %1" | ||
2935 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
2936 | : "r"(ctxt->src.val)); | ||
2937 | |||
2938 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
2939 | if (zf) { | ||
2940 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
2941 | /* Disable writeback. */ | ||
2942 | ctxt->dst.type = OP_NONE; | ||
2943 | } | ||
2944 | return X86EMUL_CONTINUE; | ||
2945 | } | ||
2946 | |||
2947 | static int em_bsr(struct x86_emulate_ctxt *ctxt) | ||
2948 | { | ||
2949 | u8 zf; | ||
2950 | |||
2951 | __asm__ ("bsr %2, %0; setz %1" | ||
2952 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
2953 | : "r"(ctxt->src.val)); | ||
2954 | |||
2955 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
2956 | if (zf) { | ||
2957 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
2958 | /* Disable writeback. */ | ||
2959 | ctxt->dst.type = OP_NONE; | ||
2960 | } | ||
2961 | return X86EMUL_CONTINUE; | ||
2962 | } | ||
2963 | |||
2797 | static bool valid_cr(int nr) | 2964 | static bool valid_cr(int nr) |
2798 | { | 2965 | { |
2799 | switch (nr) { | 2966 | switch (nr) { |
@@ -2867,9 +3034,6 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) | |||
2867 | break; | 3034 | break; |
2868 | } | 3035 | } |
2869 | case 4: { | 3036 | case 4: { |
2870 | u64 cr4; | ||
2871 | |||
2872 | cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2873 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | 3037 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); |
2874 | 3038 | ||
2875 | if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) | 3039 | if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) |
@@ -3003,6 +3167,8 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3003 | #define D2bv(_f) D((_f) | ByteOp), D(_f) | 3167 | #define D2bv(_f) D((_f) | ByteOp), D(_f) |
3004 | #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) | 3168 | #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) |
3005 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) | 3169 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) |
3170 | #define I2bvIP(_f, _e, _i, _p) \ | ||
3171 | IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) | ||
3006 | 3172 | ||
3007 | #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ | 3173 | #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ |
3008 | I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ | 3174 | I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ |
@@ -3033,17 +3199,17 @@ static struct opcode group7_rm7[] = { | |||
3033 | 3199 | ||
3034 | static struct opcode group1[] = { | 3200 | static struct opcode group1[] = { |
3035 | I(Lock, em_add), | 3201 | I(Lock, em_add), |
3036 | I(Lock, em_or), | 3202 | I(Lock | PageTable, em_or), |
3037 | I(Lock, em_adc), | 3203 | I(Lock, em_adc), |
3038 | I(Lock, em_sbb), | 3204 | I(Lock, em_sbb), |
3039 | I(Lock, em_and), | 3205 | I(Lock | PageTable, em_and), |
3040 | I(Lock, em_sub), | 3206 | I(Lock, em_sub), |
3041 | I(Lock, em_xor), | 3207 | I(Lock, em_xor), |
3042 | I(0, em_cmp), | 3208 | I(0, em_cmp), |
3043 | }; | 3209 | }; |
3044 | 3210 | ||
3045 | static struct opcode group1A[] = { | 3211 | static struct opcode group1A[] = { |
3046 | D(DstMem | SrcNone | ModRM | Mov | Stack), N, N, N, N, N, N, N, | 3212 | I(DstMem | SrcNone | ModRM | Mov | Stack, em_pop), N, N, N, N, N, N, N, |
3047 | }; | 3213 | }; |
3048 | 3214 | ||
3049 | static struct opcode group3[] = { | 3215 | static struct opcode group3[] = { |
@@ -3058,16 +3224,19 @@ static struct opcode group3[] = { | |||
3058 | }; | 3224 | }; |
3059 | 3225 | ||
3060 | static struct opcode group4[] = { | 3226 | static struct opcode group4[] = { |
3061 | D(ByteOp | DstMem | SrcNone | ModRM | Lock), D(ByteOp | DstMem | SrcNone | ModRM | Lock), | 3227 | I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45), |
3228 | I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45), | ||
3062 | N, N, N, N, N, N, | 3229 | N, N, N, N, N, N, |
3063 | }; | 3230 | }; |
3064 | 3231 | ||
3065 | static struct opcode group5[] = { | 3232 | static struct opcode group5[] = { |
3066 | D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock), | 3233 | I(DstMem | SrcNone | ModRM | Lock, em_grp45), |
3067 | D(SrcMem | ModRM | Stack), | 3234 | I(DstMem | SrcNone | ModRM | Lock, em_grp45), |
3235 | I(SrcMem | ModRM | Stack, em_grp45), | ||
3068 | I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far), | 3236 | I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far), |
3069 | D(SrcMem | ModRM | Stack), D(SrcMemFAddr | ModRM | ImplicitOps), | 3237 | I(SrcMem | ModRM | Stack, em_grp45), |
3070 | D(SrcMem | ModRM | Stack), N, | 3238 | I(SrcMemFAddr | ModRM | ImplicitOps, em_grp45), |
3239 | I(SrcMem | ModRM | Stack, em_grp45), N, | ||
3071 | }; | 3240 | }; |
3072 | 3241 | ||
3073 | static struct opcode group6[] = { | 3242 | static struct opcode group6[] = { |
@@ -3096,18 +3265,21 @@ static struct group_dual group7 = { { | |||
3096 | 3265 | ||
3097 | static struct opcode group8[] = { | 3266 | static struct opcode group8[] = { |
3098 | N, N, N, N, | 3267 | N, N, N, N, |
3099 | D(DstMem | SrcImmByte | ModRM), D(DstMem | SrcImmByte | ModRM | Lock), | 3268 | I(DstMem | SrcImmByte | ModRM, em_bt), |
3100 | D(DstMem | SrcImmByte | ModRM | Lock), D(DstMem | SrcImmByte | ModRM | Lock), | 3269 | I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_bts), |
3270 | I(DstMem | SrcImmByte | ModRM | Lock, em_btr), | ||
3271 | I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_btc), | ||
3101 | }; | 3272 | }; |
3102 | 3273 | ||
3103 | static struct group_dual group9 = { { | 3274 | static struct group_dual group9 = { { |
3104 | N, D(DstMem64 | ModRM | Lock), N, N, N, N, N, N, | 3275 | N, I(DstMem64 | ModRM | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, |
3105 | }, { | 3276 | }, { |
3106 | N, N, N, N, N, N, N, N, | 3277 | N, N, N, N, N, N, N, N, |
3107 | } }; | 3278 | } }; |
3108 | 3279 | ||
3109 | static struct opcode group11[] = { | 3280 | static struct opcode group11[] = { |
3110 | I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), | 3281 | I(DstMem | SrcImm | ModRM | Mov | PageTable, em_mov), |
3282 | X7(D(Undefined)), | ||
3111 | }; | 3283 | }; |
3112 | 3284 | ||
3113 | static struct gprefix pfx_0f_6f_0f_7f = { | 3285 | static struct gprefix pfx_0f_6f_0f_7f = { |
@@ -3120,7 +3292,7 @@ static struct opcode opcode_table[256] = { | |||
3120 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), | 3292 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), |
3121 | I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), | 3293 | I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), |
3122 | /* 0x08 - 0x0F */ | 3294 | /* 0x08 - 0x0F */ |
3123 | I6ALU(Lock, em_or), | 3295 | I6ALU(Lock | PageTable, em_or), |
3124 | I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), | 3296 | I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), |
3125 | N, | 3297 | N, |
3126 | /* 0x10 - 0x17 */ | 3298 | /* 0x10 - 0x17 */ |
@@ -3132,7 +3304,7 @@ static struct opcode opcode_table[256] = { | |||
3132 | I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), | 3304 | I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), |
3133 | I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), | 3305 | I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), |
3134 | /* 0x20 - 0x27 */ | 3306 | /* 0x20 - 0x27 */ |
3135 | I6ALU(Lock, em_and), N, N, | 3307 | I6ALU(Lock | PageTable, em_and), N, N, |
3136 | /* 0x28 - 0x2F */ | 3308 | /* 0x28 - 0x2F */ |
3137 | I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), | 3309 | I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), |
3138 | /* 0x30 - 0x37 */ | 3310 | /* 0x30 - 0x37 */ |
@@ -3155,8 +3327,8 @@ static struct opcode opcode_table[256] = { | |||
3155 | I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), | 3327 | I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), |
3156 | I(SrcImmByte | Mov | Stack, em_push), | 3328 | I(SrcImmByte | Mov | Stack, em_push), |
3157 | I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), | 3329 | I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), |
3158 | D2bvIP(DstDI | SrcDX | Mov | String, ins, check_perm_in), /* insb, insw/insd */ | 3330 | I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */ |
3159 | D2bvIP(SrcSI | DstDX | String, outs, check_perm_out), /* outsb, outsw/outsd */ | 3331 | I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */ |
3160 | /* 0x70 - 0x7F */ | 3332 | /* 0x70 - 0x7F */ |
3161 | X16(D(SrcImmByte)), | 3333 | X16(D(SrcImmByte)), |
3162 | /* 0x80 - 0x87 */ | 3334 | /* 0x80 - 0x87 */ |
@@ -3165,11 +3337,11 @@ static struct opcode opcode_table[256] = { | |||
3165 | G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1), | 3337 | G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1), |
3166 | G(DstMem | SrcImmByte | ModRM | Group, group1), | 3338 | G(DstMem | SrcImmByte | ModRM | Group, group1), |
3167 | I2bv(DstMem | SrcReg | ModRM, em_test), | 3339 | I2bv(DstMem | SrcReg | ModRM, em_test), |
3168 | I2bv(DstMem | SrcReg | ModRM | Lock, em_xchg), | 3340 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), |
3169 | /* 0x88 - 0x8F */ | 3341 | /* 0x88 - 0x8F */ |
3170 | I2bv(DstMem | SrcReg | ModRM | Mov, em_mov), | 3342 | I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), |
3171 | I2bv(DstReg | SrcMem | ModRM | Mov, em_mov), | 3343 | I2bv(DstReg | SrcMem | ModRM | Mov, em_mov), |
3172 | I(DstMem | SrcNone | ModRM | Mov, em_mov_rm_sreg), | 3344 | I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg), |
3173 | D(ModRM | SrcMem | NoAccess | DstReg), | 3345 | D(ModRM | SrcMem | NoAccess | DstReg), |
3174 | I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm), | 3346 | I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm), |
3175 | G(0, group1A), | 3347 | G(0, group1A), |
@@ -3182,7 +3354,7 @@ static struct opcode opcode_table[256] = { | |||
3182 | II(ImplicitOps | Stack, em_popf, popf), N, N, | 3354 | II(ImplicitOps | Stack, em_popf, popf), N, N, |
3183 | /* 0xA0 - 0xA7 */ | 3355 | /* 0xA0 - 0xA7 */ |
3184 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), | 3356 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), |
3185 | I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), | 3357 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), |
3186 | I2bv(SrcSI | DstDI | Mov | String, em_mov), | 3358 | I2bv(SrcSI | DstDI | Mov | String, em_mov), |
3187 | I2bv(SrcSI | DstDI | String, em_cmp), | 3359 | I2bv(SrcSI | DstDI | String, em_cmp), |
3188 | /* 0xA8 - 0xAF */ | 3360 | /* 0xA8 - 0xAF */ |
@@ -3213,13 +3385,13 @@ static struct opcode opcode_table[256] = { | |||
3213 | /* 0xE0 - 0xE7 */ | 3385 | /* 0xE0 - 0xE7 */ |
3214 | X3(I(SrcImmByte, em_loop)), | 3386 | X3(I(SrcImmByte, em_loop)), |
3215 | I(SrcImmByte, em_jcxz), | 3387 | I(SrcImmByte, em_jcxz), |
3216 | D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), | 3388 | I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in), |
3217 | D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), | 3389 | I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out), |
3218 | /* 0xE8 - 0xEF */ | 3390 | /* 0xE8 - 0xEF */ |
3219 | D(SrcImm | Stack), D(SrcImm | ImplicitOps), | 3391 | I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps), |
3220 | I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps), | 3392 | I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps), |
3221 | D2bvIP(SrcDX | DstAcc, in, check_perm_in), | 3393 | I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in), |
3222 | D2bvIP(SrcAcc | DstDX, out, check_perm_out), | 3394 | I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out), |
3223 | /* 0xF0 - 0xF7 */ | 3395 | /* 0xF0 - 0xF7 */ |
3224 | N, DI(ImplicitOps, icebp), N, N, | 3396 | N, DI(ImplicitOps, icebp), N, N, |
3225 | DI(ImplicitOps | Priv, hlt), D(ImplicitOps), | 3397 | DI(ImplicitOps | Priv, hlt), D(ImplicitOps), |
@@ -3242,15 +3414,15 @@ static struct opcode twobyte_table[256] = { | |||
3242 | /* 0x20 - 0x2F */ | 3414 | /* 0x20 - 0x2F */ |
3243 | DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), | 3415 | DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), |
3244 | DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), | 3416 | DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), |
3245 | DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), | 3417 | IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), |
3246 | DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write), | 3418 | IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), |
3247 | N, N, N, N, | 3419 | N, N, N, N, |
3248 | N, N, N, N, N, N, N, N, | 3420 | N, N, N, N, N, N, N, N, |
3249 | /* 0x30 - 0x3F */ | 3421 | /* 0x30 - 0x3F */ |
3250 | DI(ImplicitOps | Priv, wrmsr), | 3422 | II(ImplicitOps | Priv, em_wrmsr, wrmsr), |
3251 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), | 3423 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), |
3252 | DI(ImplicitOps | Priv, rdmsr), | 3424 | II(ImplicitOps | Priv, em_rdmsr, rdmsr), |
3253 | DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), | 3425 | IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), |
3254 | I(ImplicitOps | VendorSpecific, em_sysenter), | 3426 | I(ImplicitOps | VendorSpecific, em_sysenter), |
3255 | I(ImplicitOps | Priv | VendorSpecific, em_sysexit), | 3427 | I(ImplicitOps | Priv | VendorSpecific, em_sysexit), |
3256 | N, N, | 3428 | N, N, |
@@ -3275,26 +3447,28 @@ static struct opcode twobyte_table[256] = { | |||
3275 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), | 3447 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), |
3276 | /* 0xA0 - 0xA7 */ | 3448 | /* 0xA0 - 0xA7 */ |
3277 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), | 3449 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), |
3278 | DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), | 3450 | DI(ImplicitOps, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), |
3279 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3451 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
3280 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, | 3452 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, |
3281 | /* 0xA8 - 0xAF */ | 3453 | /* 0xA8 - 0xAF */ |
3282 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), | 3454 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), |
3283 | DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3455 | DI(ImplicitOps, rsm), |
3456 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), | ||
3284 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3457 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
3285 | D(DstMem | SrcReg | Src2CL | ModRM), | 3458 | D(DstMem | SrcReg | Src2CL | ModRM), |
3286 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), | 3459 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), |
3287 | /* 0xB0 - 0xB7 */ | 3460 | /* 0xB0 - 0xB7 */ |
3288 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3461 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), |
3289 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), | 3462 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), |
3290 | D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3463 | I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), |
3291 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), | 3464 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), |
3292 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), | 3465 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), |
3293 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3466 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3294 | /* 0xB8 - 0xBF */ | 3467 | /* 0xB8 - 0xBF */ |
3295 | N, N, | 3468 | N, N, |
3296 | G(BitOp, group8), D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3469 | G(BitOp, group8), |
3297 | D(DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM), | 3470 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), |
3471 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), | ||
3298 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3472 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3299 | /* 0xC0 - 0xCF */ | 3473 | /* 0xC0 - 0xCF */ |
3300 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3474 | D2bv(DstMem | SrcReg | ModRM | Lock), |
@@ -3320,6 +3494,7 @@ static struct opcode twobyte_table[256] = { | |||
3320 | #undef D2bv | 3494 | #undef D2bv |
3321 | #undef D2bvIP | 3495 | #undef D2bvIP |
3322 | #undef I2bv | 3496 | #undef I2bv |
3497 | #undef I2bvIP | ||
3323 | #undef I6ALU | 3498 | #undef I6ALU |
3324 | 3499 | ||
3325 | static unsigned imm_size(struct x86_emulate_ctxt *ctxt) | 3500 | static unsigned imm_size(struct x86_emulate_ctxt *ctxt) |
@@ -3697,6 +3872,11 @@ done: | |||
3697 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; | 3872 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; |
3698 | } | 3873 | } |
3699 | 3874 | ||
3875 | bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt) | ||
3876 | { | ||
3877 | return ctxt->d & PageTable; | ||
3878 | } | ||
3879 | |||
3700 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | 3880 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) |
3701 | { | 3881 | { |
3702 | /* The second termination condition only applies for REPE | 3882 | /* The second termination condition only applies for REPE |
@@ -3720,7 +3900,6 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | |||
3720 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | 3900 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) |
3721 | { | 3901 | { |
3722 | struct x86_emulate_ops *ops = ctxt->ops; | 3902 | struct x86_emulate_ops *ops = ctxt->ops; |
3723 | u64 msr_data; | ||
3724 | int rc = X86EMUL_CONTINUE; | 3903 | int rc = X86EMUL_CONTINUE; |
3725 | int saved_dst_type = ctxt->dst.type; | 3904 | int saved_dst_type = ctxt->dst.type; |
3726 | 3905 | ||
@@ -3854,15 +4033,6 @@ special_insn: | |||
3854 | goto cannot_emulate; | 4033 | goto cannot_emulate; |
3855 | ctxt->dst.val = (s32) ctxt->src.val; | 4034 | ctxt->dst.val = (s32) ctxt->src.val; |
3856 | break; | 4035 | break; |
3857 | case 0x6c: /* insb */ | ||
3858 | case 0x6d: /* insw/insd */ | ||
3859 | ctxt->src.val = ctxt->regs[VCPU_REGS_RDX]; | ||
3860 | goto do_io_in; | ||
3861 | case 0x6e: /* outsb */ | ||
3862 | case 0x6f: /* outsw/outsd */ | ||
3863 | ctxt->dst.val = ctxt->regs[VCPU_REGS_RDX]; | ||
3864 | goto do_io_out; | ||
3865 | break; | ||
3866 | case 0x70 ... 0x7f: /* jcc (short) */ | 4036 | case 0x70 ... 0x7f: /* jcc (short) */ |
3867 | if (test_cc(ctxt->b, ctxt->eflags)) | 4037 | if (test_cc(ctxt->b, ctxt->eflags)) |
3868 | jmp_rel(ctxt, ctxt->src.val); | 4038 | jmp_rel(ctxt, ctxt->src.val); |
@@ -3870,9 +4040,6 @@ special_insn: | |||
3870 | case 0x8d: /* lea r16/r32, m */ | 4040 | case 0x8d: /* lea r16/r32, m */ |
3871 | ctxt->dst.val = ctxt->src.addr.mem.ea; | 4041 | ctxt->dst.val = ctxt->src.addr.mem.ea; |
3872 | break; | 4042 | break; |
3873 | case 0x8f: /* pop (sole member of Grp1a) */ | ||
3874 | rc = em_grp1a(ctxt); | ||
3875 | break; | ||
3876 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ | 4043 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ |
3877 | if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX]) | 4044 | if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX]) |
3878 | break; | 4045 | break; |
@@ -3905,38 +4072,11 @@ special_insn: | |||
3905 | ctxt->src.val = ctxt->regs[VCPU_REGS_RCX]; | 4072 | ctxt->src.val = ctxt->regs[VCPU_REGS_RCX]; |
3906 | rc = em_grp2(ctxt); | 4073 | rc = em_grp2(ctxt); |
3907 | break; | 4074 | break; |
3908 | case 0xe4: /* inb */ | ||
3909 | case 0xe5: /* in */ | ||
3910 | goto do_io_in; | ||
3911 | case 0xe6: /* outb */ | ||
3912 | case 0xe7: /* out */ | ||
3913 | goto do_io_out; | ||
3914 | case 0xe8: /* call (near) */ { | ||
3915 | long int rel = ctxt->src.val; | ||
3916 | ctxt->src.val = (unsigned long) ctxt->_eip; | ||
3917 | jmp_rel(ctxt, rel); | ||
3918 | rc = em_push(ctxt); | ||
3919 | break; | ||
3920 | } | ||
3921 | case 0xe9: /* jmp rel */ | 4075 | case 0xe9: /* jmp rel */ |
3922 | case 0xeb: /* jmp rel short */ | 4076 | case 0xeb: /* jmp rel short */ |
3923 | jmp_rel(ctxt, ctxt->src.val); | 4077 | jmp_rel(ctxt, ctxt->src.val); |
3924 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | 4078 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ |
3925 | break; | 4079 | break; |
3926 | case 0xec: /* in al,dx */ | ||
3927 | case 0xed: /* in (e/r)ax,dx */ | ||
3928 | do_io_in: | ||
3929 | if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val, | ||
3930 | &ctxt->dst.val)) | ||
3931 | goto done; /* IO is needed */ | ||
3932 | break; | ||
3933 | case 0xee: /* out dx,al */ | ||
3934 | case 0xef: /* out dx,(e/r)ax */ | ||
3935 | do_io_out: | ||
3936 | ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val, | ||
3937 | &ctxt->src.val, 1); | ||
3938 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | ||
3939 | break; | ||
3940 | case 0xf4: /* hlt */ | 4080 | case 0xf4: /* hlt */ |
3941 | ctxt->ops->halt(ctxt); | 4081 | ctxt->ops->halt(ctxt); |
3942 | break; | 4082 | break; |
@@ -3956,12 +4096,6 @@ special_insn: | |||
3956 | case 0xfd: /* std */ | 4096 | case 0xfd: /* std */ |
3957 | ctxt->eflags |= EFLG_DF; | 4097 | ctxt->eflags |= EFLG_DF; |
3958 | break; | 4098 | break; |
3959 | case 0xfe: /* Grp4 */ | ||
3960 | rc = em_grp45(ctxt); | ||
3961 | break; | ||
3962 | case 0xff: /* Grp5 */ | ||
3963 | rc = em_grp45(ctxt); | ||
3964 | break; | ||
3965 | default: | 4099 | default: |
3966 | goto cannot_emulate; | 4100 | goto cannot_emulate; |
3967 | } | 4101 | } |
@@ -4036,49 +4170,6 @@ twobyte_insn: | |||
4036 | case 0x21: /* mov from dr to reg */ | 4170 | case 0x21: /* mov from dr to reg */ |
4037 | ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); | 4171 | ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); |
4038 | break; | 4172 | break; |
4039 | case 0x22: /* mov reg, cr */ | ||
4040 | if (ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) { | ||
4041 | emulate_gp(ctxt, 0); | ||
4042 | rc = X86EMUL_PROPAGATE_FAULT; | ||
4043 | goto done; | ||
4044 | } | ||
4045 | ctxt->dst.type = OP_NONE; | ||
4046 | break; | ||
4047 | case 0x23: /* mov from reg to dr */ | ||
4048 | if (ops->set_dr(ctxt, ctxt->modrm_reg, ctxt->src.val & | ||
4049 | ((ctxt->mode == X86EMUL_MODE_PROT64) ? | ||
4050 | ~0ULL : ~0U)) < 0) { | ||
4051 | /* #UD condition is already handled by the code above */ | ||
4052 | emulate_gp(ctxt, 0); | ||
4053 | rc = X86EMUL_PROPAGATE_FAULT; | ||
4054 | goto done; | ||
4055 | } | ||
4056 | |||
4057 | ctxt->dst.type = OP_NONE; /* no writeback */ | ||
4058 | break; | ||
4059 | case 0x30: | ||
4060 | /* wrmsr */ | ||
4061 | msr_data = (u32)ctxt->regs[VCPU_REGS_RAX] | ||
4062 | | ((u64)ctxt->regs[VCPU_REGS_RDX] << 32); | ||
4063 | if (ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) { | ||
4064 | emulate_gp(ctxt, 0); | ||
4065 | rc = X86EMUL_PROPAGATE_FAULT; | ||
4066 | goto done; | ||
4067 | } | ||
4068 | rc = X86EMUL_CONTINUE; | ||
4069 | break; | ||
4070 | case 0x32: | ||
4071 | /* rdmsr */ | ||
4072 | if (ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) { | ||
4073 | emulate_gp(ctxt, 0); | ||
4074 | rc = X86EMUL_PROPAGATE_FAULT; | ||
4075 | goto done; | ||
4076 | } else { | ||
4077 | ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data; | ||
4078 | ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32; | ||
4079 | } | ||
4080 | rc = X86EMUL_CONTINUE; | ||
4081 | break; | ||
4082 | case 0x40 ... 0x4f: /* cmov */ | 4173 | case 0x40 ... 0x4f: /* cmov */ |
4083 | ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; | 4174 | ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; |
4084 | if (!test_cc(ctxt->b, ctxt->eflags)) | 4175 | if (!test_cc(ctxt->b, ctxt->eflags)) |
@@ -4091,93 +4182,21 @@ twobyte_insn: | |||
4091 | case 0x90 ... 0x9f: /* setcc r/m8 */ | 4182 | case 0x90 ... 0x9f: /* setcc r/m8 */ |
4092 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); | 4183 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); |
4093 | break; | 4184 | break; |
4094 | case 0xa3: | ||
4095 | bt: /* bt */ | ||
4096 | ctxt->dst.type = OP_NONE; | ||
4097 | /* only subword offset */ | ||
4098 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; | ||
4099 | emulate_2op_SrcV_nobyte(ctxt, "bt"); | ||
4100 | break; | ||
4101 | case 0xa4: /* shld imm8, r, r/m */ | 4185 | case 0xa4: /* shld imm8, r, r/m */ |
4102 | case 0xa5: /* shld cl, r, r/m */ | 4186 | case 0xa5: /* shld cl, r, r/m */ |
4103 | emulate_2op_cl(ctxt, "shld"); | 4187 | emulate_2op_cl(ctxt, "shld"); |
4104 | break; | 4188 | break; |
4105 | case 0xab: | ||
4106 | bts: /* bts */ | ||
4107 | emulate_2op_SrcV_nobyte(ctxt, "bts"); | ||
4108 | break; | ||
4109 | case 0xac: /* shrd imm8, r, r/m */ | 4189 | case 0xac: /* shrd imm8, r, r/m */ |
4110 | case 0xad: /* shrd cl, r, r/m */ | 4190 | case 0xad: /* shrd cl, r, r/m */ |
4111 | emulate_2op_cl(ctxt, "shrd"); | 4191 | emulate_2op_cl(ctxt, "shrd"); |
4112 | break; | 4192 | break; |
4113 | case 0xae: /* clflush */ | 4193 | case 0xae: /* clflush */ |
4114 | break; | 4194 | break; |
4115 | case 0xb0 ... 0xb1: /* cmpxchg */ | ||
4116 | /* | ||
4117 | * Save real source value, then compare EAX against | ||
4118 | * destination. | ||
4119 | */ | ||
4120 | ctxt->src.orig_val = ctxt->src.val; | ||
4121 | ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; | ||
4122 | emulate_2op_SrcV(ctxt, "cmp"); | ||
4123 | if (ctxt->eflags & EFLG_ZF) { | ||
4124 | /* Success: write back to memory. */ | ||
4125 | ctxt->dst.val = ctxt->src.orig_val; | ||
4126 | } else { | ||
4127 | /* Failure: write the value we saw to EAX. */ | ||
4128 | ctxt->dst.type = OP_REG; | ||
4129 | ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; | ||
4130 | } | ||
4131 | break; | ||
4132 | case 0xb3: | ||
4133 | btr: /* btr */ | ||
4134 | emulate_2op_SrcV_nobyte(ctxt, "btr"); | ||
4135 | break; | ||
4136 | case 0xb6 ... 0xb7: /* movzx */ | 4195 | case 0xb6 ... 0xb7: /* movzx */ |
4137 | ctxt->dst.bytes = ctxt->op_bytes; | 4196 | ctxt->dst.bytes = ctxt->op_bytes; |
4138 | ctxt->dst.val = (ctxt->d & ByteOp) ? (u8) ctxt->src.val | 4197 | ctxt->dst.val = (ctxt->d & ByteOp) ? (u8) ctxt->src.val |
4139 | : (u16) ctxt->src.val; | 4198 | : (u16) ctxt->src.val; |
4140 | break; | 4199 | break; |
4141 | case 0xba: /* Grp8 */ | ||
4142 | switch (ctxt->modrm_reg & 3) { | ||
4143 | case 0: | ||
4144 | goto bt; | ||
4145 | case 1: | ||
4146 | goto bts; | ||
4147 | case 2: | ||
4148 | goto btr; | ||
4149 | case 3: | ||
4150 | goto btc; | ||
4151 | } | ||
4152 | break; | ||
4153 | case 0xbb: | ||
4154 | btc: /* btc */ | ||
4155 | emulate_2op_SrcV_nobyte(ctxt, "btc"); | ||
4156 | break; | ||
4157 | case 0xbc: { /* bsf */ | ||
4158 | u8 zf; | ||
4159 | __asm__ ("bsf %2, %0; setz %1" | ||
4160 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
4161 | : "r"(ctxt->src.val)); | ||
4162 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
4163 | if (zf) { | ||
4164 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
4165 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | ||
4166 | } | ||
4167 | break; | ||
4168 | } | ||
4169 | case 0xbd: { /* bsr */ | ||
4170 | u8 zf; | ||
4171 | __asm__ ("bsr %2, %0; setz %1" | ||
4172 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
4173 | : "r"(ctxt->src.val)); | ||
4174 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
4175 | if (zf) { | ||
4176 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
4177 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | ||
4178 | } | ||
4179 | break; | ||
4180 | } | ||
4181 | case 0xbe ... 0xbf: /* movsx */ | 4200 | case 0xbe ... 0xbf: /* movsx */ |
4182 | ctxt->dst.bytes = ctxt->op_bytes; | 4201 | ctxt->dst.bytes = ctxt->op_bytes; |
4183 | ctxt->dst.val = (ctxt->d & ByteOp) ? (s8) ctxt->src.val : | 4202 | ctxt->dst.val = (ctxt->d & ByteOp) ? (s8) ctxt->src.val : |
@@ -4194,9 +4213,6 @@ twobyte_insn: | |||
4194 | ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : | 4213 | ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : |
4195 | (u64) ctxt->src.val; | 4214 | (u64) ctxt->src.val; |
4196 | break; | 4215 | break; |
4197 | case 0xc7: /* Grp9 (cmpxchg8b) */ | ||
4198 | rc = em_grp9(ctxt); | ||
4199 | break; | ||
4200 | default: | 4216 | default: |
4201 | goto cannot_emulate; | 4217 | goto cannot_emulate; |
4202 | } | 4218 | } |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 405f2620392f..d68f99df690c 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -344,7 +344,7 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | |||
344 | struct kvm_timer *pt = &ps->pit_timer; | 344 | struct kvm_timer *pt = &ps->pit_timer; |
345 | s64 interval; | 345 | s64 interval; |
346 | 346 | ||
347 | if (!irqchip_in_kernel(kvm)) | 347 | if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) |
348 | return; | 348 | return; |
349 | 349 | ||
350 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | 350 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); |
@@ -397,15 +397,11 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
397 | case 1: | 397 | case 1: |
398 | /* FIXME: enhance mode 4 precision */ | 398 | /* FIXME: enhance mode 4 precision */ |
399 | case 4: | 399 | case 4: |
400 | if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) { | 400 | create_pit_timer(kvm, val, 0); |
401 | create_pit_timer(kvm, val, 0); | ||
402 | } | ||
403 | break; | 401 | break; |
404 | case 2: | 402 | case 2: |
405 | case 3: | 403 | case 3: |
406 | if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){ | 404 | create_pit_timer(kvm, val, 1); |
407 | create_pit_timer(kvm, val, 1); | ||
408 | } | ||
409 | break; | 405 | break; |
410 | default: | 406 | default: |
411 | destroy_pit_timer(kvm->arch.vpit); | 407 | destroy_pit_timer(kvm->arch.vpit); |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index cac4746d7ffb..b6a73537e1ef 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -262,9 +262,10 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
262 | 262 | ||
263 | void kvm_pic_reset(struct kvm_kpic_state *s) | 263 | void kvm_pic_reset(struct kvm_kpic_state *s) |
264 | { | 264 | { |
265 | int irq; | 265 | int irq, i; |
266 | struct kvm_vcpu *vcpu0 = s->pics_state->kvm->bsp_vcpu; | 266 | struct kvm_vcpu *vcpu; |
267 | u8 irr = s->irr, isr = s->imr; | 267 | u8 irr = s->irr, isr = s->imr; |
268 | bool found = false; | ||
268 | 269 | ||
269 | s->last_irr = 0; | 270 | s->last_irr = 0; |
270 | s->irr = 0; | 271 | s->irr = 0; |
@@ -281,12 +282,19 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
281 | s->special_fully_nested_mode = 0; | 282 | s->special_fully_nested_mode = 0; |
282 | s->init4 = 0; | 283 | s->init4 = 0; |
283 | 284 | ||
284 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | 285 | kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm) |
285 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | 286 | if (kvm_apic_accept_pic_intr(vcpu)) { |
286 | if (irr & (1 << irq) || isr & (1 << irq)) { | 287 | found = true; |
287 | pic_clear_isr(s, irq); | 288 | break; |
288 | } | 289 | } |
289 | } | 290 | |
291 | |||
292 | if (!found) | ||
293 | return; | ||
294 | |||
295 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) | ||
296 | if (irr & (1 << irq) || isr & (1 << irq)) | ||
297 | pic_clear_isr(s, irq); | ||
290 | } | 298 | } |
291 | 299 | ||
292 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) | 300 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 54abb40199d6..cfdc6e0ef002 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "irq.h" | 38 | #include "irq.h" |
39 | #include "trace.h" | 39 | #include "trace.h" |
40 | #include "x86.h" | 40 | #include "x86.h" |
41 | #include "cpuid.h" | ||
41 | 42 | ||
42 | #ifndef CONFIG_X86_64 | 43 | #ifndef CONFIG_X86_64 |
43 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) | 44 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) |
@@ -1120,7 +1121,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) | |||
1120 | return 0; | 1121 | return 0; |
1121 | } | 1122 | } |
1122 | 1123 | ||
1123 | static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) | 1124 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) |
1124 | { | 1125 | { |
1125 | u32 reg = apic_get_reg(apic, lvt_type); | 1126 | u32 reg = apic_get_reg(apic, lvt_type); |
1126 | int vector, mode, trig_mode; | 1127 | int vector, mode, trig_mode; |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 138e8cc6fea6..6f4ce2575d09 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -34,6 +34,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu); | |||
34 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); | 34 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); |
35 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | 35 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); |
36 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); | 36 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); |
37 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); | ||
37 | 38 | ||
38 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | 39 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); |
39 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); | 40 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f1b36cf3e3d0..224b02c3cda9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -59,15 +59,6 @@ enum { | |||
59 | AUDIT_POST_SYNC | 59 | AUDIT_POST_SYNC |
60 | }; | 60 | }; |
61 | 61 | ||
62 | char *audit_point_name[] = { | ||
63 | "pre page fault", | ||
64 | "post page fault", | ||
65 | "pre pte write", | ||
66 | "post pte write", | ||
67 | "pre sync", | ||
68 | "post sync" | ||
69 | }; | ||
70 | |||
71 | #undef MMU_DEBUG | 62 | #undef MMU_DEBUG |
72 | 63 | ||
73 | #ifdef MMU_DEBUG | 64 | #ifdef MMU_DEBUG |
@@ -83,13 +74,10 @@ char *audit_point_name[] = { | |||
83 | #endif | 74 | #endif |
84 | 75 | ||
85 | #ifdef MMU_DEBUG | 76 | #ifdef MMU_DEBUG |
86 | static int dbg = 0; | 77 | static bool dbg = 0; |
87 | module_param(dbg, bool, 0644); | 78 | module_param(dbg, bool, 0644); |
88 | #endif | 79 | #endif |
89 | 80 | ||
90 | static int oos_shadow = 1; | ||
91 | module_param(oos_shadow, bool, 0644); | ||
92 | |||
93 | #ifndef MMU_DEBUG | 81 | #ifndef MMU_DEBUG |
94 | #define ASSERT(x) do { } while (0) | 82 | #define ASSERT(x) do { } while (0) |
95 | #else | 83 | #else |
@@ -593,6 +581,11 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | |||
593 | return 0; | 581 | return 0; |
594 | } | 582 | } |
595 | 583 | ||
584 | static int mmu_memory_cache_free_objects(struct kvm_mmu_memory_cache *cache) | ||
585 | { | ||
586 | return cache->nobjs; | ||
587 | } | ||
588 | |||
596 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, | 589 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, |
597 | struct kmem_cache *cache) | 590 | struct kmem_cache *cache) |
598 | { | 591 | { |
@@ -953,21 +946,35 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) | |||
953 | } | 946 | } |
954 | } | 947 | } |
955 | 948 | ||
949 | static unsigned long *__gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level, | ||
950 | struct kvm_memory_slot *slot) | ||
951 | { | ||
952 | struct kvm_lpage_info *linfo; | ||
953 | |||
954 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | ||
955 | return &slot->rmap[gfn - slot->base_gfn]; | ||
956 | |||
957 | linfo = lpage_info_slot(gfn, slot, level); | ||
958 | return &linfo->rmap_pde; | ||
959 | } | ||
960 | |||
956 | /* | 961 | /* |
957 | * Take gfn and return the reverse mapping to it. | 962 | * Take gfn and return the reverse mapping to it. |
958 | */ | 963 | */ |
959 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | 964 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) |
960 | { | 965 | { |
961 | struct kvm_memory_slot *slot; | 966 | struct kvm_memory_slot *slot; |
962 | struct kvm_lpage_info *linfo; | ||
963 | 967 | ||
964 | slot = gfn_to_memslot(kvm, gfn); | 968 | slot = gfn_to_memslot(kvm, gfn); |
965 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | 969 | return __gfn_to_rmap(kvm, gfn, level, slot); |
966 | return &slot->rmap[gfn - slot->base_gfn]; | 970 | } |
967 | 971 | ||
968 | linfo = lpage_info_slot(gfn, slot, level); | 972 | static bool rmap_can_add(struct kvm_vcpu *vcpu) |
973 | { | ||
974 | struct kvm_mmu_memory_cache *cache; | ||
969 | 975 | ||
970 | return &linfo->rmap_pde; | 976 | cache = &vcpu->arch.mmu_pte_list_desc_cache; |
977 | return mmu_memory_cache_free_objects(cache); | ||
971 | } | 978 | } |
972 | 979 | ||
973 | static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | 980 | static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) |
@@ -1004,17 +1011,16 @@ static void drop_spte(struct kvm *kvm, u64 *sptep) | |||
1004 | rmap_remove(kvm, sptep); | 1011 | rmap_remove(kvm, sptep); |
1005 | } | 1012 | } |
1006 | 1013 | ||
1007 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | 1014 | int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, |
1015 | struct kvm_memory_slot *slot) | ||
1008 | { | 1016 | { |
1009 | unsigned long *rmapp; | 1017 | unsigned long *rmapp; |
1010 | u64 *spte; | 1018 | u64 *spte; |
1011 | int i, write_protected = 0; | 1019 | int i, write_protected = 0; |
1012 | 1020 | ||
1013 | rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); | 1021 | rmapp = __gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL, slot); |
1014 | |||
1015 | spte = rmap_next(kvm, rmapp, NULL); | 1022 | spte = rmap_next(kvm, rmapp, NULL); |
1016 | while (spte) { | 1023 | while (spte) { |
1017 | BUG_ON(!spte); | ||
1018 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1024 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
1019 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 1025 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
1020 | if (is_writable_pte(*spte)) { | 1026 | if (is_writable_pte(*spte)) { |
@@ -1027,12 +1033,11 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
1027 | /* check for huge page mappings */ | 1033 | /* check for huge page mappings */ |
1028 | for (i = PT_DIRECTORY_LEVEL; | 1034 | for (i = PT_DIRECTORY_LEVEL; |
1029 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 1035 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
1030 | rmapp = gfn_to_rmap(kvm, gfn, i); | 1036 | rmapp = __gfn_to_rmap(kvm, gfn, i, slot); |
1031 | spte = rmap_next(kvm, rmapp, NULL); | 1037 | spte = rmap_next(kvm, rmapp, NULL); |
1032 | while (spte) { | 1038 | while (spte) { |
1033 | BUG_ON(!spte); | ||
1034 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1039 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
1035 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | 1040 | BUG_ON(!is_large_pte(*spte)); |
1036 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 1041 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); |
1037 | if (is_writable_pte(*spte)) { | 1042 | if (is_writable_pte(*spte)) { |
1038 | drop_spte(kvm, spte); | 1043 | drop_spte(kvm, spte); |
@@ -1047,6 +1052,14 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
1047 | return write_protected; | 1052 | return write_protected; |
1048 | } | 1053 | } |
1049 | 1054 | ||
1055 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | ||
1056 | { | ||
1057 | struct kvm_memory_slot *slot; | ||
1058 | |||
1059 | slot = gfn_to_memslot(kvm, gfn); | ||
1060 | return kvm_mmu_rmap_write_protect(kvm, gfn, slot); | ||
1061 | } | ||
1062 | |||
1050 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1063 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1051 | unsigned long data) | 1064 | unsigned long data) |
1052 | { | 1065 | { |
@@ -1103,15 +1116,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
1103 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | 1116 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, |
1104 | unsigned long data)) | 1117 | unsigned long data)) |
1105 | { | 1118 | { |
1106 | int i, j; | 1119 | int j; |
1107 | int ret; | 1120 | int ret; |
1108 | int retval = 0; | 1121 | int retval = 0; |
1109 | struct kvm_memslots *slots; | 1122 | struct kvm_memslots *slots; |
1123 | struct kvm_memory_slot *memslot; | ||
1110 | 1124 | ||
1111 | slots = kvm_memslots(kvm); | 1125 | slots = kvm_memslots(kvm); |
1112 | 1126 | ||
1113 | for (i = 0; i < slots->nmemslots; i++) { | 1127 | kvm_for_each_memslot(memslot, slots) { |
1114 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | ||
1115 | unsigned long start = memslot->userspace_addr; | 1128 | unsigned long start = memslot->userspace_addr; |
1116 | unsigned long end; | 1129 | unsigned long end; |
1117 | 1130 | ||
@@ -1324,7 +1337,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
1324 | PAGE_SIZE); | 1337 | PAGE_SIZE); |
1325 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 1338 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
1326 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 1339 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
1327 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 1340 | bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM); |
1328 | sp->parent_ptes = 0; | 1341 | sp->parent_ptes = 0; |
1329 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1342 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
1330 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); | 1343 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); |
@@ -1511,6 +1524,13 @@ static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, | |||
1511 | return ret; | 1524 | return ret; |
1512 | } | 1525 | } |
1513 | 1526 | ||
1527 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
1528 | #include "mmu_audit.c" | ||
1529 | #else | ||
1530 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { } | ||
1531 | static void mmu_audit_disable(void) { } | ||
1532 | #endif | ||
1533 | |||
1514 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1534 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
1515 | struct list_head *invalid_list) | 1535 | struct list_head *invalid_list) |
1516 | { | 1536 | { |
@@ -1640,6 +1660,18 @@ static void init_shadow_page_table(struct kvm_mmu_page *sp) | |||
1640 | sp->spt[i] = 0ull; | 1660 | sp->spt[i] = 0ull; |
1641 | } | 1661 | } |
1642 | 1662 | ||
1663 | static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) | ||
1664 | { | ||
1665 | sp->write_flooding_count = 0; | ||
1666 | } | ||
1667 | |||
1668 | static void clear_sp_write_flooding_count(u64 *spte) | ||
1669 | { | ||
1670 | struct kvm_mmu_page *sp = page_header(__pa(spte)); | ||
1671 | |||
1672 | __clear_sp_write_flooding_count(sp); | ||
1673 | } | ||
1674 | |||
1643 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | 1675 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, |
1644 | gfn_t gfn, | 1676 | gfn_t gfn, |
1645 | gva_t gaddr, | 1677 | gva_t gaddr, |
@@ -1683,6 +1715,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1683 | } else if (sp->unsync) | 1715 | } else if (sp->unsync) |
1684 | kvm_mmu_mark_parents_unsync(sp); | 1716 | kvm_mmu_mark_parents_unsync(sp); |
1685 | 1717 | ||
1718 | __clear_sp_write_flooding_count(sp); | ||
1686 | trace_kvm_mmu_get_page(sp, false); | 1719 | trace_kvm_mmu_get_page(sp, false); |
1687 | return sp; | 1720 | return sp; |
1688 | } | 1721 | } |
@@ -1796,7 +1829,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1796 | } | 1829 | } |
1797 | } | 1830 | } |
1798 | 1831 | ||
1799 | static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, | 1832 | static bool mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, |
1800 | u64 *spte) | 1833 | u64 *spte) |
1801 | { | 1834 | { |
1802 | u64 pte; | 1835 | u64 pte; |
@@ -1804,17 +1837,21 @@ static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
1804 | 1837 | ||
1805 | pte = *spte; | 1838 | pte = *spte; |
1806 | if (is_shadow_present_pte(pte)) { | 1839 | if (is_shadow_present_pte(pte)) { |
1807 | if (is_last_spte(pte, sp->role.level)) | 1840 | if (is_last_spte(pte, sp->role.level)) { |
1808 | drop_spte(kvm, spte); | 1841 | drop_spte(kvm, spte); |
1809 | else { | 1842 | if (is_large_pte(pte)) |
1843 | --kvm->stat.lpages; | ||
1844 | } else { | ||
1810 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 1845 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
1811 | drop_parent_pte(child, spte); | 1846 | drop_parent_pte(child, spte); |
1812 | } | 1847 | } |
1813 | } else if (is_mmio_spte(pte)) | 1848 | return true; |
1849 | } | ||
1850 | |||
1851 | if (is_mmio_spte(pte)) | ||
1814 | mmu_spte_clear_no_track(spte); | 1852 | mmu_spte_clear_no_track(spte); |
1815 | 1853 | ||
1816 | if (is_large_pte(pte)) | 1854 | return false; |
1817 | --kvm->stat.lpages; | ||
1818 | } | 1855 | } |
1819 | 1856 | ||
1820 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, | 1857 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, |
@@ -1831,15 +1868,6 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) | |||
1831 | mmu_page_remove_parent_pte(sp, parent_pte); | 1868 | mmu_page_remove_parent_pte(sp, parent_pte); |
1832 | } | 1869 | } |
1833 | 1870 | ||
1834 | static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm) | ||
1835 | { | ||
1836 | int i; | ||
1837 | struct kvm_vcpu *vcpu; | ||
1838 | |||
1839 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
1840 | vcpu->arch.last_pte_updated = NULL; | ||
1841 | } | ||
1842 | |||
1843 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | 1871 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) |
1844 | { | 1872 | { |
1845 | u64 *parent_pte; | 1873 | u64 *parent_pte; |
@@ -1899,7 +1927,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
1899 | } | 1927 | } |
1900 | 1928 | ||
1901 | sp->role.invalid = 1; | 1929 | sp->role.invalid = 1; |
1902 | kvm_mmu_reset_last_pte_updated(kvm); | ||
1903 | return ret; | 1930 | return ret; |
1904 | } | 1931 | } |
1905 | 1932 | ||
@@ -1985,7 +2012,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) | |||
1985 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; | 2012 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; |
1986 | } | 2013 | } |
1987 | 2014 | ||
1988 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | 2015 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) |
1989 | { | 2016 | { |
1990 | struct kvm_mmu_page *sp; | 2017 | struct kvm_mmu_page *sp; |
1991 | struct hlist_node *node; | 2018 | struct hlist_node *node; |
@@ -1994,7 +2021,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
1994 | 2021 | ||
1995 | pgprintk("%s: looking for gfn %llx\n", __func__, gfn); | 2022 | pgprintk("%s: looking for gfn %llx\n", __func__, gfn); |
1996 | r = 0; | 2023 | r = 0; |
1997 | 2024 | spin_lock(&kvm->mmu_lock); | |
1998 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { | 2025 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
1999 | pgprintk("%s: gfn %llx role %x\n", __func__, gfn, | 2026 | pgprintk("%s: gfn %llx role %x\n", __func__, gfn, |
2000 | sp->role.word); | 2027 | sp->role.word); |
@@ -2002,22 +2029,11 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
2002 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | 2029 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
2003 | } | 2030 | } |
2004 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 2031 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
2005 | return r; | 2032 | spin_unlock(&kvm->mmu_lock); |
2006 | } | ||
2007 | |||
2008 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | ||
2009 | { | ||
2010 | struct kvm_mmu_page *sp; | ||
2011 | struct hlist_node *node; | ||
2012 | LIST_HEAD(invalid_list); | ||
2013 | 2033 | ||
2014 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { | 2034 | return r; |
2015 | pgprintk("%s: zap %llx %x\n", | ||
2016 | __func__, gfn, sp->role.word); | ||
2017 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | ||
2018 | } | ||
2019 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
2020 | } | 2035 | } |
2036 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); | ||
2021 | 2037 | ||
2022 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | 2038 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) |
2023 | { | 2039 | { |
@@ -2169,8 +2185,6 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2169 | return 1; | 2185 | return 1; |
2170 | 2186 | ||
2171 | if (!need_unsync && !s->unsync) { | 2187 | if (!need_unsync && !s->unsync) { |
2172 | if (!oos_shadow) | ||
2173 | return 1; | ||
2174 | need_unsync = true; | 2188 | need_unsync = true; |
2175 | } | 2189 | } |
2176 | } | 2190 | } |
@@ -2191,11 +2205,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2191 | if (set_mmio_spte(sptep, gfn, pfn, pte_access)) | 2205 | if (set_mmio_spte(sptep, gfn, pfn, pte_access)) |
2192 | return 0; | 2206 | return 0; |
2193 | 2207 | ||
2194 | /* | ||
2195 | * We don't set the accessed bit, since we sometimes want to see | ||
2196 | * whether the guest actually used the pte (in order to detect | ||
2197 | * demand paging). | ||
2198 | */ | ||
2199 | spte = PT_PRESENT_MASK; | 2208 | spte = PT_PRESENT_MASK; |
2200 | if (!speculative) | 2209 | if (!speculative) |
2201 | spte |= shadow_accessed_mask; | 2210 | spte |= shadow_accessed_mask; |
@@ -2346,10 +2355,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2346 | } | 2355 | } |
2347 | } | 2356 | } |
2348 | kvm_release_pfn_clean(pfn); | 2357 | kvm_release_pfn_clean(pfn); |
2349 | if (speculative) { | ||
2350 | vcpu->arch.last_pte_updated = sptep; | ||
2351 | vcpu->arch.last_pte_gfn = gfn; | ||
2352 | } | ||
2353 | } | 2358 | } |
2354 | 2359 | ||
2355 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | 2360 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) |
@@ -2840,12 +2845,12 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2840 | return; | 2845 | return; |
2841 | 2846 | ||
2842 | vcpu_clear_mmio_info(vcpu, ~0ul); | 2847 | vcpu_clear_mmio_info(vcpu, ~0ul); |
2843 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); | 2848 | kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); |
2844 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | 2849 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { |
2845 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2850 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2846 | sp = page_header(root); | 2851 | sp = page_header(root); |
2847 | mmu_sync_children(vcpu, sp); | 2852 | mmu_sync_children(vcpu, sp); |
2848 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); | 2853 | kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); |
2849 | return; | 2854 | return; |
2850 | } | 2855 | } |
2851 | for (i = 0; i < 4; ++i) { | 2856 | for (i = 0; i < 4; ++i) { |
@@ -2857,7 +2862,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2857 | mmu_sync_children(vcpu, sp); | 2862 | mmu_sync_children(vcpu, sp); |
2858 | } | 2863 | } |
2859 | } | 2864 | } |
2860 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); | 2865 | kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); |
2861 | } | 2866 | } |
2862 | 2867 | ||
2863 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | 2868 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) |
@@ -3510,28 +3515,119 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, | |||
3510 | kvm_mmu_flush_tlb(vcpu); | 3515 | kvm_mmu_flush_tlb(vcpu); |
3511 | } | 3516 | } |
3512 | 3517 | ||
3513 | static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | 3518 | static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, |
3519 | const u8 *new, int *bytes) | ||
3514 | { | 3520 | { |
3515 | u64 *spte = vcpu->arch.last_pte_updated; | 3521 | u64 gentry; |
3522 | int r; | ||
3523 | |||
3524 | /* | ||
3525 | * Assume that the pte write on a page table of the same type | ||
3526 | * as the current vcpu paging mode since we update the sptes only | ||
3527 | * when they have the same mode. | ||
3528 | */ | ||
3529 | if (is_pae(vcpu) && *bytes == 4) { | ||
3530 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
3531 | *gpa &= ~(gpa_t)7; | ||
3532 | *bytes = 8; | ||
3533 | r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); | ||
3534 | if (r) | ||
3535 | gentry = 0; | ||
3536 | new = (const u8 *)&gentry; | ||
3537 | } | ||
3516 | 3538 | ||
3517 | return !!(spte && (*spte & shadow_accessed_mask)); | 3539 | switch (*bytes) { |
3540 | case 4: | ||
3541 | gentry = *(const u32 *)new; | ||
3542 | break; | ||
3543 | case 8: | ||
3544 | gentry = *(const u64 *)new; | ||
3545 | break; | ||
3546 | default: | ||
3547 | gentry = 0; | ||
3548 | break; | ||
3549 | } | ||
3550 | |||
3551 | return gentry; | ||
3518 | } | 3552 | } |
3519 | 3553 | ||
3520 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) | 3554 | /* |
3555 | * If we're seeing too many writes to a page, it may no longer be a page table, | ||
3556 | * or we may be forking, in which case it is better to unmap the page. | ||
3557 | */ | ||
3558 | static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte) | ||
3521 | { | 3559 | { |
3522 | u64 *spte = vcpu->arch.last_pte_updated; | 3560 | /* |
3561 | * Skip write-flooding detected for the sp whose level is 1, because | ||
3562 | * it can become unsync, then the guest page is not write-protected. | ||
3563 | */ | ||
3564 | if (sp->role.level == 1) | ||
3565 | return false; | ||
3523 | 3566 | ||
3524 | if (spte | 3567 | return ++sp->write_flooding_count >= 3; |
3525 | && vcpu->arch.last_pte_gfn == gfn | 3568 | } |
3526 | && shadow_accessed_mask | 3569 | |
3527 | && !(*spte & shadow_accessed_mask) | 3570 | /* |
3528 | && is_shadow_present_pte(*spte)) | 3571 | * Misaligned accesses are too much trouble to fix up; also, they usually |
3529 | set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | 3572 | * indicate a page is not used as a page table. |
3573 | */ | ||
3574 | static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, | ||
3575 | int bytes) | ||
3576 | { | ||
3577 | unsigned offset, pte_size, misaligned; | ||
3578 | |||
3579 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | ||
3580 | gpa, bytes, sp->role.word); | ||
3581 | |||
3582 | offset = offset_in_page(gpa); | ||
3583 | pte_size = sp->role.cr4_pae ? 8 : 4; | ||
3584 | |||
3585 | /* | ||
3586 | * Sometimes, the OS only writes the last one bytes to update status | ||
3587 | * bits, for example, in linux, andb instruction is used in clear_bit(). | ||
3588 | */ | ||
3589 | if (!(offset & (pte_size - 1)) && bytes == 1) | ||
3590 | return false; | ||
3591 | |||
3592 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | ||
3593 | misaligned |= bytes < 4; | ||
3594 | |||
3595 | return misaligned; | ||
3596 | } | ||
3597 | |||
3598 | static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) | ||
3599 | { | ||
3600 | unsigned page_offset, quadrant; | ||
3601 | u64 *spte; | ||
3602 | int level; | ||
3603 | |||
3604 | page_offset = offset_in_page(gpa); | ||
3605 | level = sp->role.level; | ||
3606 | *nspte = 1; | ||
3607 | if (!sp->role.cr4_pae) { | ||
3608 | page_offset <<= 1; /* 32->64 */ | ||
3609 | /* | ||
3610 | * A 32-bit pde maps 4MB while the shadow pdes map | ||
3611 | * only 2MB. So we need to double the offset again | ||
3612 | * and zap two pdes instead of one. | ||
3613 | */ | ||
3614 | if (level == PT32_ROOT_LEVEL) { | ||
3615 | page_offset &= ~7; /* kill rounding error */ | ||
3616 | page_offset <<= 1; | ||
3617 | *nspte = 2; | ||
3618 | } | ||
3619 | quadrant = page_offset >> PAGE_SHIFT; | ||
3620 | page_offset &= ~PAGE_MASK; | ||
3621 | if (quadrant != sp->role.quadrant) | ||
3622 | return NULL; | ||
3623 | } | ||
3624 | |||
3625 | spte = &sp->spt[page_offset / sizeof(*spte)]; | ||
3626 | return spte; | ||
3530 | } | 3627 | } |
3531 | 3628 | ||
3532 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 3629 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
3533 | const u8 *new, int bytes, | 3630 | const u8 *new, int bytes) |
3534 | bool guest_initiated) | ||
3535 | { | 3631 | { |
3536 | gfn_t gfn = gpa >> PAGE_SHIFT; | 3632 | gfn_t gfn = gpa >> PAGE_SHIFT; |
3537 | union kvm_mmu_page_role mask = { .word = 0 }; | 3633 | union kvm_mmu_page_role mask = { .word = 0 }; |
@@ -3539,8 +3635,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3539 | struct hlist_node *node; | 3635 | struct hlist_node *node; |
3540 | LIST_HEAD(invalid_list); | 3636 | LIST_HEAD(invalid_list); |
3541 | u64 entry, gentry, *spte; | 3637 | u64 entry, gentry, *spte; |
3542 | unsigned pte_size, page_offset, misaligned, quadrant, offset; | 3638 | int npte; |
3543 | int level, npte, invlpg_counter, r, flooded = 0; | ||
3544 | bool remote_flush, local_flush, zap_page; | 3639 | bool remote_flush, local_flush, zap_page; |
3545 | 3640 | ||
3546 | /* | 3641 | /* |
@@ -3551,112 +3646,45 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3551 | return; | 3646 | return; |
3552 | 3647 | ||
3553 | zap_page = remote_flush = local_flush = false; | 3648 | zap_page = remote_flush = local_flush = false; |
3554 | offset = offset_in_page(gpa); | ||
3555 | 3649 | ||
3556 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 3650 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
3557 | 3651 | ||
3558 | invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); | 3652 | gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); |
3559 | 3653 | ||
3560 | /* | 3654 | /* |
3561 | * Assume that the pte write on a page table of the same type | 3655 | * No need to care whether allocation memory is successful |
3562 | * as the current vcpu paging mode since we update the sptes only | 3656 | * or not since pte prefetch is skiped if it does not have |
3563 | * when they have the same mode. | 3657 | * enough objects in the cache. |
3564 | */ | 3658 | */ |
3565 | if ((is_pae(vcpu) && bytes == 4) || !new) { | 3659 | mmu_topup_memory_caches(vcpu); |
3566 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
3567 | if (is_pae(vcpu)) { | ||
3568 | gpa &= ~(gpa_t)7; | ||
3569 | bytes = 8; | ||
3570 | } | ||
3571 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); | ||
3572 | if (r) | ||
3573 | gentry = 0; | ||
3574 | new = (const u8 *)&gentry; | ||
3575 | } | ||
3576 | |||
3577 | switch (bytes) { | ||
3578 | case 4: | ||
3579 | gentry = *(const u32 *)new; | ||
3580 | break; | ||
3581 | case 8: | ||
3582 | gentry = *(const u64 *)new; | ||
3583 | break; | ||
3584 | default: | ||
3585 | gentry = 0; | ||
3586 | break; | ||
3587 | } | ||
3588 | 3660 | ||
3589 | spin_lock(&vcpu->kvm->mmu_lock); | 3661 | spin_lock(&vcpu->kvm->mmu_lock); |
3590 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | ||
3591 | gentry = 0; | ||
3592 | kvm_mmu_free_some_pages(vcpu); | ||
3593 | ++vcpu->kvm->stat.mmu_pte_write; | 3662 | ++vcpu->kvm->stat.mmu_pte_write; |
3594 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); | 3663 | kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); |
3595 | if (guest_initiated) { | ||
3596 | kvm_mmu_access_page(vcpu, gfn); | ||
3597 | if (gfn == vcpu->arch.last_pt_write_gfn | ||
3598 | && !last_updated_pte_accessed(vcpu)) { | ||
3599 | ++vcpu->arch.last_pt_write_count; | ||
3600 | if (vcpu->arch.last_pt_write_count >= 3) | ||
3601 | flooded = 1; | ||
3602 | } else { | ||
3603 | vcpu->arch.last_pt_write_gfn = gfn; | ||
3604 | vcpu->arch.last_pt_write_count = 1; | ||
3605 | vcpu->arch.last_pte_updated = NULL; | ||
3606 | } | ||
3607 | } | ||
3608 | 3664 | ||
3609 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; | 3665 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; |
3610 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { | 3666 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { |
3611 | pte_size = sp->role.cr4_pae ? 8 : 4; | 3667 | spte = get_written_sptes(sp, gpa, &npte); |
3612 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 3668 | |
3613 | misaligned |= bytes < 4; | 3669 | if (detect_write_misaligned(sp, gpa, bytes) || |
3614 | if (misaligned || flooded) { | 3670 | detect_write_flooding(sp, spte)) { |
3615 | /* | ||
3616 | * Misaligned accesses are too much trouble to fix | ||
3617 | * up; also, they usually indicate a page is not used | ||
3618 | * as a page table. | ||
3619 | * | ||
3620 | * If we're seeing too many writes to a page, | ||
3621 | * it may no longer be a page table, or we may be | ||
3622 | * forking, in which case it is better to unmap the | ||
3623 | * page. | ||
3624 | */ | ||
3625 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | ||
3626 | gpa, bytes, sp->role.word); | ||
3627 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, | 3671 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
3628 | &invalid_list); | 3672 | &invalid_list); |
3629 | ++vcpu->kvm->stat.mmu_flooded; | 3673 | ++vcpu->kvm->stat.mmu_flooded; |
3630 | continue; | 3674 | continue; |
3631 | } | 3675 | } |
3632 | page_offset = offset; | 3676 | |
3633 | level = sp->role.level; | 3677 | spte = get_written_sptes(sp, gpa, &npte); |
3634 | npte = 1; | 3678 | if (!spte) |
3635 | if (!sp->role.cr4_pae) { | 3679 | continue; |
3636 | page_offset <<= 1; /* 32->64 */ | 3680 | |
3637 | /* | ||
3638 | * A 32-bit pde maps 4MB while the shadow pdes map | ||
3639 | * only 2MB. So we need to double the offset again | ||
3640 | * and zap two pdes instead of one. | ||
3641 | */ | ||
3642 | if (level == PT32_ROOT_LEVEL) { | ||
3643 | page_offset &= ~7; /* kill rounding error */ | ||
3644 | page_offset <<= 1; | ||
3645 | npte = 2; | ||
3646 | } | ||
3647 | quadrant = page_offset >> PAGE_SHIFT; | ||
3648 | page_offset &= ~PAGE_MASK; | ||
3649 | if (quadrant != sp->role.quadrant) | ||
3650 | continue; | ||
3651 | } | ||
3652 | local_flush = true; | 3681 | local_flush = true; |
3653 | spte = &sp->spt[page_offset / sizeof(*spte)]; | ||
3654 | while (npte--) { | 3682 | while (npte--) { |
3655 | entry = *spte; | 3683 | entry = *spte; |
3656 | mmu_page_zap_pte(vcpu->kvm, sp, spte); | 3684 | mmu_page_zap_pte(vcpu->kvm, sp, spte); |
3657 | if (gentry && | 3685 | if (gentry && |
3658 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | 3686 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) |
3659 | & mask.word)) | 3687 | & mask.word) && rmap_can_add(vcpu)) |
3660 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 3688 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
3661 | if (!remote_flush && need_remote_flush(entry, *spte)) | 3689 | if (!remote_flush && need_remote_flush(entry, *spte)) |
3662 | remote_flush = true; | 3690 | remote_flush = true; |
@@ -3665,7 +3693,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3665 | } | 3693 | } |
3666 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); | 3694 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); |
3667 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 3695 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
3668 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); | 3696 | kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); |
3669 | spin_unlock(&vcpu->kvm->mmu_lock); | 3697 | spin_unlock(&vcpu->kvm->mmu_lock); |
3670 | } | 3698 | } |
3671 | 3699 | ||
@@ -3679,9 +3707,8 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
3679 | 3707 | ||
3680 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); | 3708 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
3681 | 3709 | ||
3682 | spin_lock(&vcpu->kvm->mmu_lock); | ||
3683 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3710 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
3684 | spin_unlock(&vcpu->kvm->mmu_lock); | 3711 | |
3685 | return r; | 3712 | return r; |
3686 | } | 3713 | } |
3687 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | 3714 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); |
@@ -3702,10 +3729,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | |||
3702 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 3729 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
3703 | } | 3730 | } |
3704 | 3731 | ||
3732 | static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr) | ||
3733 | { | ||
3734 | if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu)) | ||
3735 | return vcpu_match_mmio_gpa(vcpu, addr); | ||
3736 | |||
3737 | return vcpu_match_mmio_gva(vcpu, addr); | ||
3738 | } | ||
3739 | |||
3705 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | 3740 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, |
3706 | void *insn, int insn_len) | 3741 | void *insn, int insn_len) |
3707 | { | 3742 | { |
3708 | int r; | 3743 | int r, emulation_type = EMULTYPE_RETRY; |
3709 | enum emulation_result er; | 3744 | enum emulation_result er; |
3710 | 3745 | ||
3711 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); | 3746 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); |
@@ -3717,11 +3752,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | |||
3717 | goto out; | 3752 | goto out; |
3718 | } | 3753 | } |
3719 | 3754 | ||
3720 | r = mmu_topup_memory_caches(vcpu); | 3755 | if (is_mmio_page_fault(vcpu, cr2)) |
3721 | if (r) | 3756 | emulation_type = 0; |
3722 | goto out; | ||
3723 | 3757 | ||
3724 | er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len); | 3758 | er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); |
3725 | 3759 | ||
3726 | switch (er) { | 3760 | switch (er) { |
3727 | case EMULATE_DONE: | 3761 | case EMULATE_DONE: |
@@ -3792,7 +3826,11 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |||
3792 | int kvm_mmu_create(struct kvm_vcpu *vcpu) | 3826 | int kvm_mmu_create(struct kvm_vcpu *vcpu) |
3793 | { | 3827 | { |
3794 | ASSERT(vcpu); | 3828 | ASSERT(vcpu); |
3795 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3829 | |
3830 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | ||
3831 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | ||
3832 | vcpu->arch.mmu.translate_gpa = translate_gpa; | ||
3833 | vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; | ||
3796 | 3834 | ||
3797 | return alloc_mmu_pages(vcpu); | 3835 | return alloc_mmu_pages(vcpu); |
3798 | } | 3836 | } |
@@ -3852,14 +3890,14 @@ restart: | |||
3852 | spin_unlock(&kvm->mmu_lock); | 3890 | spin_unlock(&kvm->mmu_lock); |
3853 | } | 3891 | } |
3854 | 3892 | ||
3855 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, | 3893 | static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, |
3856 | struct list_head *invalid_list) | 3894 | struct list_head *invalid_list) |
3857 | { | 3895 | { |
3858 | struct kvm_mmu_page *page; | 3896 | struct kvm_mmu_page *page; |
3859 | 3897 | ||
3860 | page = container_of(kvm->arch.active_mmu_pages.prev, | 3898 | page = container_of(kvm->arch.active_mmu_pages.prev, |
3861 | struct kvm_mmu_page, link); | 3899 | struct kvm_mmu_page, link); |
3862 | return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); | 3900 | kvm_mmu_prepare_zap_page(kvm, page, invalid_list); |
3863 | } | 3901 | } |
3864 | 3902 | ||
3865 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 3903 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) |
@@ -3874,15 +3912,15 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
3874 | raw_spin_lock(&kvm_lock); | 3912 | raw_spin_lock(&kvm_lock); |
3875 | 3913 | ||
3876 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3914 | list_for_each_entry(kvm, &vm_list, vm_list) { |
3877 | int idx, freed_pages; | 3915 | int idx; |
3878 | LIST_HEAD(invalid_list); | 3916 | LIST_HEAD(invalid_list); |
3879 | 3917 | ||
3880 | idx = srcu_read_lock(&kvm->srcu); | 3918 | idx = srcu_read_lock(&kvm->srcu); |
3881 | spin_lock(&kvm->mmu_lock); | 3919 | spin_lock(&kvm->mmu_lock); |
3882 | if (!kvm_freed && nr_to_scan > 0 && | 3920 | if (!kvm_freed && nr_to_scan > 0 && |
3883 | kvm->arch.n_used_mmu_pages > 0) { | 3921 | kvm->arch.n_used_mmu_pages > 0) { |
3884 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, | 3922 | kvm_mmu_remove_some_alloc_mmu_pages(kvm, |
3885 | &invalid_list); | 3923 | &invalid_list); |
3886 | kvm_freed = kvm; | 3924 | kvm_freed = kvm; |
3887 | } | 3925 | } |
3888 | nr_to_scan--; | 3926 | nr_to_scan--; |
@@ -3944,15 +3982,15 @@ nomem: | |||
3944 | */ | 3982 | */ |
3945 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | 3983 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) |
3946 | { | 3984 | { |
3947 | int i; | ||
3948 | unsigned int nr_mmu_pages; | 3985 | unsigned int nr_mmu_pages; |
3949 | unsigned int nr_pages = 0; | 3986 | unsigned int nr_pages = 0; |
3950 | struct kvm_memslots *slots; | 3987 | struct kvm_memslots *slots; |
3988 | struct kvm_memory_slot *memslot; | ||
3951 | 3989 | ||
3952 | slots = kvm_memslots(kvm); | 3990 | slots = kvm_memslots(kvm); |
3953 | 3991 | ||
3954 | for (i = 0; i < slots->nmemslots; i++) | 3992 | kvm_for_each_memslot(memslot, slots) |
3955 | nr_pages += slots->memslots[i].npages; | 3993 | nr_pages += memslot->npages; |
3956 | 3994 | ||
3957 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; | 3995 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; |
3958 | nr_mmu_pages = max(nr_mmu_pages, | 3996 | nr_mmu_pages = max(nr_mmu_pages, |
@@ -3961,127 +3999,6 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
3961 | return nr_mmu_pages; | 3999 | return nr_mmu_pages; |
3962 | } | 4000 | } |
3963 | 4001 | ||
3964 | static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer, | ||
3965 | unsigned len) | ||
3966 | { | ||
3967 | if (len > buffer->len) | ||
3968 | return NULL; | ||
3969 | return buffer->ptr; | ||
3970 | } | ||
3971 | |||
3972 | static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer, | ||
3973 | unsigned len) | ||
3974 | { | ||
3975 | void *ret; | ||
3976 | |||
3977 | ret = pv_mmu_peek_buffer(buffer, len); | ||
3978 | if (!ret) | ||
3979 | return ret; | ||
3980 | buffer->ptr += len; | ||
3981 | buffer->len -= len; | ||
3982 | buffer->processed += len; | ||
3983 | return ret; | ||
3984 | } | ||
3985 | |||
3986 | static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, | ||
3987 | gpa_t addr, gpa_t value) | ||
3988 | { | ||
3989 | int bytes = 8; | ||
3990 | int r; | ||
3991 | |||
3992 | if (!is_long_mode(vcpu) && !is_pae(vcpu)) | ||
3993 | bytes = 4; | ||
3994 | |||
3995 | r = mmu_topup_memory_caches(vcpu); | ||
3996 | if (r) | ||
3997 | return r; | ||
3998 | |||
3999 | if (!emulator_write_phys(vcpu, addr, &value, bytes)) | ||
4000 | return -EFAULT; | ||
4001 | |||
4002 | return 1; | ||
4003 | } | ||
4004 | |||
4005 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) | ||
4006 | { | ||
4007 | (void)kvm_set_cr3(vcpu, kvm_read_cr3(vcpu)); | ||
4008 | return 1; | ||
4009 | } | ||
4010 | |||
4011 | static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr) | ||
4012 | { | ||
4013 | spin_lock(&vcpu->kvm->mmu_lock); | ||
4014 | mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT); | ||
4015 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
4016 | return 1; | ||
4017 | } | ||
4018 | |||
4019 | static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu, | ||
4020 | struct kvm_pv_mmu_op_buffer *buffer) | ||
4021 | { | ||
4022 | struct kvm_mmu_op_header *header; | ||
4023 | |||
4024 | header = pv_mmu_peek_buffer(buffer, sizeof *header); | ||
4025 | if (!header) | ||
4026 | return 0; | ||
4027 | switch (header->op) { | ||
4028 | case KVM_MMU_OP_WRITE_PTE: { | ||
4029 | struct kvm_mmu_op_write_pte *wpte; | ||
4030 | |||
4031 | wpte = pv_mmu_read_buffer(buffer, sizeof *wpte); | ||
4032 | if (!wpte) | ||
4033 | return 0; | ||
4034 | return kvm_pv_mmu_write(vcpu, wpte->pte_phys, | ||
4035 | wpte->pte_val); | ||
4036 | } | ||
4037 | case KVM_MMU_OP_FLUSH_TLB: { | ||
4038 | struct kvm_mmu_op_flush_tlb *ftlb; | ||
4039 | |||
4040 | ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb); | ||
4041 | if (!ftlb) | ||
4042 | return 0; | ||
4043 | return kvm_pv_mmu_flush_tlb(vcpu); | ||
4044 | } | ||
4045 | case KVM_MMU_OP_RELEASE_PT: { | ||
4046 | struct kvm_mmu_op_release_pt *rpt; | ||
4047 | |||
4048 | rpt = pv_mmu_read_buffer(buffer, sizeof *rpt); | ||
4049 | if (!rpt) | ||
4050 | return 0; | ||
4051 | return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys); | ||
4052 | } | ||
4053 | default: return 0; | ||
4054 | } | ||
4055 | } | ||
4056 | |||
4057 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | ||
4058 | gpa_t addr, unsigned long *ret) | ||
4059 | { | ||
4060 | int r; | ||
4061 | struct kvm_pv_mmu_op_buffer *buffer = &vcpu->arch.mmu_op_buffer; | ||
4062 | |||
4063 | buffer->ptr = buffer->buf; | ||
4064 | buffer->len = min_t(unsigned long, bytes, sizeof buffer->buf); | ||
4065 | buffer->processed = 0; | ||
4066 | |||
4067 | r = kvm_read_guest(vcpu->kvm, addr, buffer->buf, buffer->len); | ||
4068 | if (r) | ||
4069 | goto out; | ||
4070 | |||
4071 | while (buffer->len) { | ||
4072 | r = kvm_pv_mmu_op_one(vcpu, buffer); | ||
4073 | if (r < 0) | ||
4074 | goto out; | ||
4075 | if (r == 0) | ||
4076 | break; | ||
4077 | } | ||
4078 | |||
4079 | r = 1; | ||
4080 | out: | ||
4081 | *ret = buffer->processed; | ||
4082 | return r; | ||
4083 | } | ||
4084 | |||
4085 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) | 4002 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) |
4086 | { | 4003 | { |
4087 | struct kvm_shadow_walk_iterator iterator; | 4004 | struct kvm_shadow_walk_iterator iterator; |
@@ -4110,12 +4027,6 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | |||
4110 | mmu_free_memory_caches(vcpu); | 4027 | mmu_free_memory_caches(vcpu); |
4111 | } | 4028 | } |
4112 | 4029 | ||
4113 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
4114 | #include "mmu_audit.c" | ||
4115 | #else | ||
4116 | static void mmu_audit_disable(void) { } | ||
4117 | #endif | ||
4118 | |||
4119 | void kvm_mmu_module_exit(void) | 4030 | void kvm_mmu_module_exit(void) |
4120 | { | 4031 | { |
4121 | mmu_destroy_caches(); | 4032 | mmu_destroy_caches(); |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 746ec259d024..fe15dcc07a6b 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -19,6 +19,15 @@ | |||
19 | 19 | ||
20 | #include <linux/ratelimit.h> | 20 | #include <linux/ratelimit.h> |
21 | 21 | ||
22 | char const *audit_point_name[] = { | ||
23 | "pre page fault", | ||
24 | "post page fault", | ||
25 | "pre pte write", | ||
26 | "post pte write", | ||
27 | "pre sync", | ||
28 | "post sync" | ||
29 | }; | ||
30 | |||
22 | #define audit_printk(kvm, fmt, args...) \ | 31 | #define audit_printk(kvm, fmt, args...) \ |
23 | printk(KERN_ERR "audit: (%s) error: " \ | 32 | printk(KERN_ERR "audit: (%s) error: " \ |
24 | fmt, audit_point_name[kvm->arch.audit_point], ##args) | 33 | fmt, audit_point_name[kvm->arch.audit_point], ##args) |
@@ -224,7 +233,10 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu) | |||
224 | mmu_spte_walk(vcpu, audit_spte); | 233 | mmu_spte_walk(vcpu, audit_spte); |
225 | } | 234 | } |
226 | 235 | ||
227 | static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point) | 236 | static bool mmu_audit; |
237 | static struct jump_label_key mmu_audit_key; | ||
238 | |||
239 | static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) | ||
228 | { | 240 | { |
229 | static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); | 241 | static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); |
230 | 242 | ||
@@ -236,18 +248,18 @@ static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point) | |||
236 | audit_vcpu_spte(vcpu); | 248 | audit_vcpu_spte(vcpu); |
237 | } | 249 | } |
238 | 250 | ||
239 | static bool mmu_audit; | 251 | static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) |
252 | { | ||
253 | if (static_branch((&mmu_audit_key))) | ||
254 | __kvm_mmu_audit(vcpu, point); | ||
255 | } | ||
240 | 256 | ||
241 | static void mmu_audit_enable(void) | 257 | static void mmu_audit_enable(void) |
242 | { | 258 | { |
243 | int ret; | ||
244 | |||
245 | if (mmu_audit) | 259 | if (mmu_audit) |
246 | return; | 260 | return; |
247 | 261 | ||
248 | ret = register_trace_kvm_mmu_audit(kvm_mmu_audit, NULL); | 262 | jump_label_inc(&mmu_audit_key); |
249 | WARN_ON(ret); | ||
250 | |||
251 | mmu_audit = true; | 263 | mmu_audit = true; |
252 | } | 264 | } |
253 | 265 | ||
@@ -256,8 +268,7 @@ static void mmu_audit_disable(void) | |||
256 | if (!mmu_audit) | 268 | if (!mmu_audit) |
257 | return; | 269 | return; |
258 | 270 | ||
259 | unregister_trace_kvm_mmu_audit(kvm_mmu_audit, NULL); | 271 | jump_label_dec(&mmu_audit_key); |
260 | tracepoint_synchronize_unregister(); | ||
261 | mmu_audit = false; | 272 | mmu_audit = false; |
262 | } | 273 | } |
263 | 274 | ||
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index eed67f34146d..89fb0e81322a 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -243,25 +243,6 @@ TRACE_EVENT( | |||
243 | TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, | 243 | TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, |
244 | __entry->access) | 244 | __entry->access) |
245 | ); | 245 | ); |
246 | |||
247 | TRACE_EVENT( | ||
248 | kvm_mmu_audit, | ||
249 | TP_PROTO(struct kvm_vcpu *vcpu, int audit_point), | ||
250 | TP_ARGS(vcpu, audit_point), | ||
251 | |||
252 | TP_STRUCT__entry( | ||
253 | __field(struct kvm_vcpu *, vcpu) | ||
254 | __field(int, audit_point) | ||
255 | ), | ||
256 | |||
257 | TP_fast_assign( | ||
258 | __entry->vcpu = vcpu; | ||
259 | __entry->audit_point = audit_point; | ||
260 | ), | ||
261 | |||
262 | TP_printk("vcpu:%d %s", __entry->vcpu->cpu, | ||
263 | audit_point_name[__entry->audit_point]) | ||
264 | ); | ||
265 | #endif /* _TRACE_KVMMMU_H */ | 246 | #endif /* _TRACE_KVMMMU_H */ |
266 | 247 | ||
267 | #undef TRACE_INCLUDE_PATH | 248 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 92994100638b..15610285ebb6 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -497,6 +497,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
497 | shadow_walk_next(&it)) { | 497 | shadow_walk_next(&it)) { |
498 | gfn_t table_gfn; | 498 | gfn_t table_gfn; |
499 | 499 | ||
500 | clear_sp_write_flooding_count(it.sptep); | ||
500 | drop_large_spte(vcpu, it.sptep); | 501 | drop_large_spte(vcpu, it.sptep); |
501 | 502 | ||
502 | sp = NULL; | 503 | sp = NULL; |
@@ -522,6 +523,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
522 | shadow_walk_next(&it)) { | 523 | shadow_walk_next(&it)) { |
523 | gfn_t direct_gfn; | 524 | gfn_t direct_gfn; |
524 | 525 | ||
526 | clear_sp_write_flooding_count(it.sptep); | ||
525 | validate_direct_spte(vcpu, it.sptep, direct_access); | 527 | validate_direct_spte(vcpu, it.sptep, direct_access); |
526 | 528 | ||
527 | drop_large_spte(vcpu, it.sptep); | 529 | drop_large_spte(vcpu, it.sptep); |
@@ -536,6 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
536 | link_shadow_page(it.sptep, sp); | 538 | link_shadow_page(it.sptep, sp); |
537 | } | 539 | } |
538 | 540 | ||
541 | clear_sp_write_flooding_count(it.sptep); | ||
539 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, | 542 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, |
540 | user_fault, write_fault, emulate, it.level, | 543 | user_fault, write_fault, emulate, it.level, |
541 | gw->gfn, pfn, prefault, map_writable); | 544 | gw->gfn, pfn, prefault, map_writable); |
@@ -599,11 +602,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
599 | */ | 602 | */ |
600 | if (!r) { | 603 | if (!r) { |
601 | pgprintk("%s: guest page fault\n", __func__); | 604 | pgprintk("%s: guest page fault\n", __func__); |
602 | if (!prefault) { | 605 | if (!prefault) |
603 | inject_page_fault(vcpu, &walker.fault); | 606 | inject_page_fault(vcpu, &walker.fault); |
604 | /* reset fork detector */ | 607 | |
605 | vcpu->arch.last_pt_write_count = 0; | ||
606 | } | ||
607 | return 0; | 608 | return 0; |
608 | } | 609 | } |
609 | 610 | ||
@@ -631,7 +632,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
631 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 632 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
632 | goto out_unlock; | 633 | goto out_unlock; |
633 | 634 | ||
634 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); | 635 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); |
635 | kvm_mmu_free_some_pages(vcpu); | 636 | kvm_mmu_free_some_pages(vcpu); |
636 | if (!force_pt_level) | 637 | if (!force_pt_level) |
637 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 638 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); |
@@ -641,11 +642,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
641 | pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__, | 642 | pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__, |
642 | sptep, *sptep, emulate); | 643 | sptep, *sptep, emulate); |
643 | 644 | ||
644 | if (!emulate) | ||
645 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ | ||
646 | |||
647 | ++vcpu->stat.pf_fixed; | 645 | ++vcpu->stat.pf_fixed; |
648 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); | 646 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); |
649 | spin_unlock(&vcpu->kvm->mmu_lock); | 647 | spin_unlock(&vcpu->kvm->mmu_lock); |
650 | 648 | ||
651 | return emulate; | 649 | return emulate; |
@@ -656,65 +654,66 @@ out_unlock: | |||
656 | return 0; | 654 | return 0; |
657 | } | 655 | } |
658 | 656 | ||
657 | static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) | ||
658 | { | ||
659 | int offset = 0; | ||
660 | |||
661 | WARN_ON(sp->role.level != 1); | ||
662 | |||
663 | if (PTTYPE == 32) | ||
664 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | ||
665 | |||
666 | return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); | ||
667 | } | ||
668 | |||
659 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 669 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
660 | { | 670 | { |
661 | struct kvm_shadow_walk_iterator iterator; | 671 | struct kvm_shadow_walk_iterator iterator; |
662 | struct kvm_mmu_page *sp; | 672 | struct kvm_mmu_page *sp; |
663 | gpa_t pte_gpa = -1; | ||
664 | int level; | 673 | int level; |
665 | u64 *sptep; | 674 | u64 *sptep; |
666 | int need_flush = 0; | ||
667 | 675 | ||
668 | vcpu_clear_mmio_info(vcpu, gva); | 676 | vcpu_clear_mmio_info(vcpu, gva); |
669 | 677 | ||
670 | spin_lock(&vcpu->kvm->mmu_lock); | 678 | /* |
679 | * No need to check return value here, rmap_can_add() can | ||
680 | * help us to skip pte prefetch later. | ||
681 | */ | ||
682 | mmu_topup_memory_caches(vcpu); | ||
671 | 683 | ||
684 | spin_lock(&vcpu->kvm->mmu_lock); | ||
672 | for_each_shadow_entry(vcpu, gva, iterator) { | 685 | for_each_shadow_entry(vcpu, gva, iterator) { |
673 | level = iterator.level; | 686 | level = iterator.level; |
674 | sptep = iterator.sptep; | 687 | sptep = iterator.sptep; |
675 | 688 | ||
676 | sp = page_header(__pa(sptep)); | 689 | sp = page_header(__pa(sptep)); |
677 | if (is_last_spte(*sptep, level)) { | 690 | if (is_last_spte(*sptep, level)) { |
678 | int offset, shift; | 691 | pt_element_t gpte; |
692 | gpa_t pte_gpa; | ||
679 | 693 | ||
680 | if (!sp->unsync) | 694 | if (!sp->unsync) |
681 | break; | 695 | break; |
682 | 696 | ||
683 | shift = PAGE_SHIFT - | 697 | pte_gpa = FNAME(get_level1_sp_gpa)(sp); |
684 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; | ||
685 | offset = sp->role.quadrant << shift; | ||
686 | |||
687 | pte_gpa = (sp->gfn << PAGE_SHIFT) + offset; | ||
688 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | 698 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); |
689 | 699 | ||
690 | if (is_shadow_present_pte(*sptep)) { | 700 | if (mmu_page_zap_pte(vcpu->kvm, sp, sptep)) |
691 | if (is_large_pte(*sptep)) | 701 | kvm_flush_remote_tlbs(vcpu->kvm); |
692 | --vcpu->kvm->stat.lpages; | ||
693 | drop_spte(vcpu->kvm, sptep); | ||
694 | need_flush = 1; | ||
695 | } else if (is_mmio_spte(*sptep)) | ||
696 | mmu_spte_clear_no_track(sptep); | ||
697 | 702 | ||
698 | break; | 703 | if (!rmap_can_add(vcpu)) |
704 | break; | ||
705 | |||
706 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | ||
707 | sizeof(pt_element_t))) | ||
708 | break; | ||
709 | |||
710 | FNAME(update_pte)(vcpu, sp, sptep, &gpte); | ||
699 | } | 711 | } |
700 | 712 | ||
701 | if (!is_shadow_present_pte(*sptep) || !sp->unsync_children) | 713 | if (!is_shadow_present_pte(*sptep) || !sp->unsync_children) |
702 | break; | 714 | break; |
703 | } | 715 | } |
704 | |||
705 | if (need_flush) | ||
706 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
707 | |||
708 | atomic_inc(&vcpu->kvm->arch.invlpg_counter); | ||
709 | |||
710 | spin_unlock(&vcpu->kvm->mmu_lock); | 716 | spin_unlock(&vcpu->kvm->mmu_lock); |
711 | |||
712 | if (pte_gpa == -1) | ||
713 | return; | ||
714 | |||
715 | if (mmu_topup_memory_caches(vcpu)) | ||
716 | return; | ||
717 | kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); | ||
718 | } | 717 | } |
719 | 718 | ||
720 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | 719 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
@@ -769,19 +768,14 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
769 | */ | 768 | */ |
770 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 769 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
771 | { | 770 | { |
772 | int i, offset, nr_present; | 771 | int i, nr_present = 0; |
773 | bool host_writable; | 772 | bool host_writable; |
774 | gpa_t first_pte_gpa; | 773 | gpa_t first_pte_gpa; |
775 | 774 | ||
776 | offset = nr_present = 0; | ||
777 | |||
778 | /* direct kvm_mmu_page can not be unsync. */ | 775 | /* direct kvm_mmu_page can not be unsync. */ |
779 | BUG_ON(sp->role.direct); | 776 | BUG_ON(sp->role.direct); |
780 | 777 | ||
781 | if (PTTYPE == 32) | 778 | first_pte_gpa = FNAME(get_level1_sp_gpa)(sp); |
782 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | ||
783 | |||
784 | first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); | ||
785 | 779 | ||
786 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { | 780 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { |
787 | unsigned pte_access; | 781 | unsigned pte_access; |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c new file mode 100644 index 000000000000..7aad5446f393 --- /dev/null +++ b/arch/x86/kvm/pmu.c | |||
@@ -0,0 +1,533 @@ | |||
1 | /* | ||
2 | * Kernel-based Virtual Machine -- Performane Monitoring Unit support | ||
3 | * | ||
4 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. | ||
5 | * | ||
6 | * Authors: | ||
7 | * Avi Kivity <avi@redhat.com> | ||
8 | * Gleb Natapov <gleb@redhat.com> | ||
9 | * | ||
10 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
11 | * the COPYING file in the top-level directory. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <linux/types.h> | ||
16 | #include <linux/kvm_host.h> | ||
17 | #include <linux/perf_event.h> | ||
18 | #include "x86.h" | ||
19 | #include "cpuid.h" | ||
20 | #include "lapic.h" | ||
21 | |||
22 | static struct kvm_arch_event_perf_mapping { | ||
23 | u8 eventsel; | ||
24 | u8 unit_mask; | ||
25 | unsigned event_type; | ||
26 | bool inexact; | ||
27 | } arch_events[] = { | ||
28 | /* Index must match CPUID 0x0A.EBX bit vector */ | ||
29 | [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES }, | ||
30 | [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, | ||
31 | [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES }, | ||
32 | [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES }, | ||
33 | [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, | ||
34 | [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | ||
35 | [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, | ||
36 | }; | ||
37 | |||
38 | /* mapping between fixed pmc index and arch_events array */ | ||
39 | int fixed_pmc_events[] = {1, 0, 2}; | ||
40 | |||
41 | static bool pmc_is_gp(struct kvm_pmc *pmc) | ||
42 | { | ||
43 | return pmc->type == KVM_PMC_GP; | ||
44 | } | ||
45 | |||
46 | static inline u64 pmc_bitmask(struct kvm_pmc *pmc) | ||
47 | { | ||
48 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
49 | |||
50 | return pmu->counter_bitmask[pmc->type]; | ||
51 | } | ||
52 | |||
53 | static inline bool pmc_enabled(struct kvm_pmc *pmc) | ||
54 | { | ||
55 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
56 | return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl); | ||
57 | } | ||
58 | |||
59 | static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, | ||
60 | u32 base) | ||
61 | { | ||
62 | if (msr >= base && msr < base + pmu->nr_arch_gp_counters) | ||
63 | return &pmu->gp_counters[msr - base]; | ||
64 | return NULL; | ||
65 | } | ||
66 | |||
67 | static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) | ||
68 | { | ||
69 | int base = MSR_CORE_PERF_FIXED_CTR0; | ||
70 | if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) | ||
71 | return &pmu->fixed_counters[msr - base]; | ||
72 | return NULL; | ||
73 | } | ||
74 | |||
75 | static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx) | ||
76 | { | ||
77 | return get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + idx); | ||
78 | } | ||
79 | |||
80 | static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx) | ||
81 | { | ||
82 | if (idx < X86_PMC_IDX_FIXED) | ||
83 | return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0); | ||
84 | else | ||
85 | return get_fixed_pmc_idx(pmu, idx - X86_PMC_IDX_FIXED); | ||
86 | } | ||
87 | |||
88 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu) | ||
89 | { | ||
90 | if (vcpu->arch.apic) | ||
91 | kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); | ||
92 | } | ||
93 | |||
94 | static void trigger_pmi(struct irq_work *irq_work) | ||
95 | { | ||
96 | struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, | ||
97 | irq_work); | ||
98 | struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu, | ||
99 | arch.pmu); | ||
100 | |||
101 | kvm_deliver_pmi(vcpu); | ||
102 | } | ||
103 | |||
104 | static void kvm_perf_overflow(struct perf_event *perf_event, | ||
105 | struct perf_sample_data *data, | ||
106 | struct pt_regs *regs) | ||
107 | { | ||
108 | struct kvm_pmc *pmc = perf_event->overflow_handler_context; | ||
109 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
110 | __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); | ||
111 | } | ||
112 | |||
113 | static void kvm_perf_overflow_intr(struct perf_event *perf_event, | ||
114 | struct perf_sample_data *data, struct pt_regs *regs) | ||
115 | { | ||
116 | struct kvm_pmc *pmc = perf_event->overflow_handler_context; | ||
117 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
118 | if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) { | ||
119 | kvm_perf_overflow(perf_event, data, regs); | ||
120 | kvm_make_request(KVM_REQ_PMU, pmc->vcpu); | ||
121 | /* | ||
122 | * Inject PMI. If vcpu was in a guest mode during NMI PMI | ||
123 | * can be ejected on a guest mode re-entry. Otherwise we can't | ||
124 | * be sure that vcpu wasn't executing hlt instruction at the | ||
125 | * time of vmexit and is not going to re-enter guest mode until, | ||
126 | * woken up. So we should wake it, but this is impossible from | ||
127 | * NMI context. Do it from irq work instead. | ||
128 | */ | ||
129 | if (!kvm_is_in_guest()) | ||
130 | irq_work_queue(&pmc->vcpu->arch.pmu.irq_work); | ||
131 | else | ||
132 | kvm_make_request(KVM_REQ_PMI, pmc->vcpu); | ||
133 | } | ||
134 | } | ||
135 | |||
136 | static u64 read_pmc(struct kvm_pmc *pmc) | ||
137 | { | ||
138 | u64 counter, enabled, running; | ||
139 | |||
140 | counter = pmc->counter; | ||
141 | |||
142 | if (pmc->perf_event) | ||
143 | counter += perf_event_read_value(pmc->perf_event, | ||
144 | &enabled, &running); | ||
145 | |||
146 | /* FIXME: Scaling needed? */ | ||
147 | |||
148 | return counter & pmc_bitmask(pmc); | ||
149 | } | ||
150 | |||
151 | static void stop_counter(struct kvm_pmc *pmc) | ||
152 | { | ||
153 | if (pmc->perf_event) { | ||
154 | pmc->counter = read_pmc(pmc); | ||
155 | perf_event_release_kernel(pmc->perf_event); | ||
156 | pmc->perf_event = NULL; | ||
157 | } | ||
158 | } | ||
159 | |||
160 | static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | ||
161 | unsigned config, bool exclude_user, bool exclude_kernel, | ||
162 | bool intr) | ||
163 | { | ||
164 | struct perf_event *event; | ||
165 | struct perf_event_attr attr = { | ||
166 | .type = type, | ||
167 | .size = sizeof(attr), | ||
168 | .pinned = true, | ||
169 | .exclude_idle = true, | ||
170 | .exclude_host = 1, | ||
171 | .exclude_user = exclude_user, | ||
172 | .exclude_kernel = exclude_kernel, | ||
173 | .config = config, | ||
174 | }; | ||
175 | |||
176 | attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); | ||
177 | |||
178 | event = perf_event_create_kernel_counter(&attr, -1, current, | ||
179 | intr ? kvm_perf_overflow_intr : | ||
180 | kvm_perf_overflow, pmc); | ||
181 | if (IS_ERR(event)) { | ||
182 | printk_once("kvm: pmu event creation failed %ld\n", | ||
183 | PTR_ERR(event)); | ||
184 | return; | ||
185 | } | ||
186 | |||
187 | pmc->perf_event = event; | ||
188 | clear_bit(pmc->idx, (unsigned long*)&pmc->vcpu->arch.pmu.reprogram_pmi); | ||
189 | } | ||
190 | |||
191 | static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select, | ||
192 | u8 unit_mask) | ||
193 | { | ||
194 | int i; | ||
195 | |||
196 | for (i = 0; i < ARRAY_SIZE(arch_events); i++) | ||
197 | if (arch_events[i].eventsel == event_select | ||
198 | && arch_events[i].unit_mask == unit_mask | ||
199 | && (pmu->available_event_types & (1 << i))) | ||
200 | break; | ||
201 | |||
202 | if (i == ARRAY_SIZE(arch_events)) | ||
203 | return PERF_COUNT_HW_MAX; | ||
204 | |||
205 | return arch_events[i].event_type; | ||
206 | } | ||
207 | |||
208 | static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | ||
209 | { | ||
210 | unsigned config, type = PERF_TYPE_RAW; | ||
211 | u8 event_select, unit_mask; | ||
212 | |||
213 | pmc->eventsel = eventsel; | ||
214 | |||
215 | stop_counter(pmc); | ||
216 | |||
217 | if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_enabled(pmc)) | ||
218 | return; | ||
219 | |||
220 | event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; | ||
221 | unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; | ||
222 | |||
223 | if (!(event_select & (ARCH_PERFMON_EVENTSEL_EDGE | | ||
224 | ARCH_PERFMON_EVENTSEL_INV | | ||
225 | ARCH_PERFMON_EVENTSEL_CMASK))) { | ||
226 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, | ||
227 | unit_mask); | ||
228 | if (config != PERF_COUNT_HW_MAX) | ||
229 | type = PERF_TYPE_HARDWARE; | ||
230 | } | ||
231 | |||
232 | if (type == PERF_TYPE_RAW) | ||
233 | config = eventsel & X86_RAW_EVENT_MASK; | ||
234 | |||
235 | reprogram_counter(pmc, type, config, | ||
236 | !(eventsel & ARCH_PERFMON_EVENTSEL_USR), | ||
237 | !(eventsel & ARCH_PERFMON_EVENTSEL_OS), | ||
238 | eventsel & ARCH_PERFMON_EVENTSEL_INT); | ||
239 | } | ||
240 | |||
241 | static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) | ||
242 | { | ||
243 | unsigned en = en_pmi & 0x3; | ||
244 | bool pmi = en_pmi & 0x8; | ||
245 | |||
246 | stop_counter(pmc); | ||
247 | |||
248 | if (!en || !pmc_enabled(pmc)) | ||
249 | return; | ||
250 | |||
251 | reprogram_counter(pmc, PERF_TYPE_HARDWARE, | ||
252 | arch_events[fixed_pmc_events[idx]].event_type, | ||
253 | !(en & 0x2), /* exclude user */ | ||
254 | !(en & 0x1), /* exclude kernel */ | ||
255 | pmi); | ||
256 | } | ||
257 | |||
258 | static inline u8 fixed_en_pmi(u64 ctrl, int idx) | ||
259 | { | ||
260 | return (ctrl >> (idx * 4)) & 0xf; | ||
261 | } | ||
262 | |||
263 | static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) | ||
264 | { | ||
265 | int i; | ||
266 | |||
267 | for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { | ||
268 | u8 en_pmi = fixed_en_pmi(data, i); | ||
269 | struct kvm_pmc *pmc = get_fixed_pmc_idx(pmu, i); | ||
270 | |||
271 | if (fixed_en_pmi(pmu->fixed_ctr_ctrl, i) == en_pmi) | ||
272 | continue; | ||
273 | |||
274 | reprogram_fixed_counter(pmc, en_pmi, i); | ||
275 | } | ||
276 | |||
277 | pmu->fixed_ctr_ctrl = data; | ||
278 | } | ||
279 | |||
280 | static void reprogram_idx(struct kvm_pmu *pmu, int idx) | ||
281 | { | ||
282 | struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx); | ||
283 | |||
284 | if (!pmc) | ||
285 | return; | ||
286 | |||
287 | if (pmc_is_gp(pmc)) | ||
288 | reprogram_gp_counter(pmc, pmc->eventsel); | ||
289 | else { | ||
290 | int fidx = idx - X86_PMC_IDX_FIXED; | ||
291 | reprogram_fixed_counter(pmc, | ||
292 | fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx); | ||
293 | } | ||
294 | } | ||
295 | |||
296 | static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data) | ||
297 | { | ||
298 | int bit; | ||
299 | u64 diff = pmu->global_ctrl ^ data; | ||
300 | |||
301 | pmu->global_ctrl = data; | ||
302 | |||
303 | for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) | ||
304 | reprogram_idx(pmu, bit); | ||
305 | } | ||
306 | |||
307 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr) | ||
308 | { | ||
309 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
310 | int ret; | ||
311 | |||
312 | switch (msr) { | ||
313 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
314 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
315 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
316 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
317 | ret = pmu->version > 1; | ||
318 | break; | ||
319 | default: | ||
320 | ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) | ||
321 | || get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) | ||
322 | || get_fixed_pmc(pmu, msr); | ||
323 | break; | ||
324 | } | ||
325 | return ret; | ||
326 | } | ||
327 | |||
328 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) | ||
329 | { | ||
330 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
331 | struct kvm_pmc *pmc; | ||
332 | |||
333 | switch (index) { | ||
334 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
335 | *data = pmu->fixed_ctr_ctrl; | ||
336 | return 0; | ||
337 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
338 | *data = pmu->global_status; | ||
339 | return 0; | ||
340 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
341 | *data = pmu->global_ctrl; | ||
342 | return 0; | ||
343 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
344 | *data = pmu->global_ovf_ctrl; | ||
345 | return 0; | ||
346 | default: | ||
347 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || | ||
348 | (pmc = get_fixed_pmc(pmu, index))) { | ||
349 | *data = read_pmc(pmc); | ||
350 | return 0; | ||
351 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | ||
352 | *data = pmc->eventsel; | ||
353 | return 0; | ||
354 | } | ||
355 | } | ||
356 | return 1; | ||
357 | } | ||
358 | |||
359 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | ||
360 | { | ||
361 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
362 | struct kvm_pmc *pmc; | ||
363 | |||
364 | switch (index) { | ||
365 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
366 | if (pmu->fixed_ctr_ctrl == data) | ||
367 | return 0; | ||
368 | if (!(data & 0xfffffffffffff444)) { | ||
369 | reprogram_fixed_counters(pmu, data); | ||
370 | return 0; | ||
371 | } | ||
372 | break; | ||
373 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
374 | break; /* RO MSR */ | ||
375 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
376 | if (pmu->global_ctrl == data) | ||
377 | return 0; | ||
378 | if (!(data & pmu->global_ctrl_mask)) { | ||
379 | global_ctrl_changed(pmu, data); | ||
380 | return 0; | ||
381 | } | ||
382 | break; | ||
383 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
384 | if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { | ||
385 | pmu->global_status &= ~data; | ||
386 | pmu->global_ovf_ctrl = data; | ||
387 | return 0; | ||
388 | } | ||
389 | break; | ||
390 | default: | ||
391 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || | ||
392 | (pmc = get_fixed_pmc(pmu, index))) { | ||
393 | data = (s64)(s32)data; | ||
394 | pmc->counter += data - read_pmc(pmc); | ||
395 | return 0; | ||
396 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | ||
397 | if (data == pmc->eventsel) | ||
398 | return 0; | ||
399 | if (!(data & 0xffffffff00200000ull)) { | ||
400 | reprogram_gp_counter(pmc, data); | ||
401 | return 0; | ||
402 | } | ||
403 | } | ||
404 | } | ||
405 | return 1; | ||
406 | } | ||
407 | |||
408 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) | ||
409 | { | ||
410 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
411 | bool fast_mode = pmc & (1u << 31); | ||
412 | bool fixed = pmc & (1u << 30); | ||
413 | struct kvm_pmc *counters; | ||
414 | u64 ctr; | ||
415 | |||
416 | pmc &= (3u << 30) - 1; | ||
417 | if (!fixed && pmc >= pmu->nr_arch_gp_counters) | ||
418 | return 1; | ||
419 | if (fixed && pmc >= pmu->nr_arch_fixed_counters) | ||
420 | return 1; | ||
421 | counters = fixed ? pmu->fixed_counters : pmu->gp_counters; | ||
422 | ctr = read_pmc(&counters[pmc]); | ||
423 | if (fast_mode) | ||
424 | ctr = (u32)ctr; | ||
425 | *data = ctr; | ||
426 | |||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | ||
431 | { | ||
432 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
433 | struct kvm_cpuid_entry2 *entry; | ||
434 | unsigned bitmap_len; | ||
435 | |||
436 | pmu->nr_arch_gp_counters = 0; | ||
437 | pmu->nr_arch_fixed_counters = 0; | ||
438 | pmu->counter_bitmask[KVM_PMC_GP] = 0; | ||
439 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; | ||
440 | pmu->version = 0; | ||
441 | |||
442 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | ||
443 | if (!entry) | ||
444 | return; | ||
445 | |||
446 | pmu->version = entry->eax & 0xff; | ||
447 | if (!pmu->version) | ||
448 | return; | ||
449 | |||
450 | pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, | ||
451 | X86_PMC_MAX_GENERIC); | ||
452 | pmu->counter_bitmask[KVM_PMC_GP] = | ||
453 | ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; | ||
454 | bitmap_len = (entry->eax >> 24) & 0xff; | ||
455 | pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); | ||
456 | |||
457 | if (pmu->version == 1) { | ||
458 | pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1; | ||
459 | return; | ||
460 | } | ||
461 | |||
462 | pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), | ||
463 | X86_PMC_MAX_FIXED); | ||
464 | pmu->counter_bitmask[KVM_PMC_FIXED] = | ||
465 | ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; | ||
466 | pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1) | ||
467 | | (((1ull << pmu->nr_arch_fixed_counters) - 1) | ||
468 | << X86_PMC_IDX_FIXED)); | ||
469 | } | ||
470 | |||
471 | void kvm_pmu_init(struct kvm_vcpu *vcpu) | ||
472 | { | ||
473 | int i; | ||
474 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
475 | |||
476 | memset(pmu, 0, sizeof(*pmu)); | ||
477 | for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { | ||
478 | pmu->gp_counters[i].type = KVM_PMC_GP; | ||
479 | pmu->gp_counters[i].vcpu = vcpu; | ||
480 | pmu->gp_counters[i].idx = i; | ||
481 | } | ||
482 | for (i = 0; i < X86_PMC_MAX_FIXED; i++) { | ||
483 | pmu->fixed_counters[i].type = KVM_PMC_FIXED; | ||
484 | pmu->fixed_counters[i].vcpu = vcpu; | ||
485 | pmu->fixed_counters[i].idx = i + X86_PMC_IDX_FIXED; | ||
486 | } | ||
487 | init_irq_work(&pmu->irq_work, trigger_pmi); | ||
488 | kvm_pmu_cpuid_update(vcpu); | ||
489 | } | ||
490 | |||
491 | void kvm_pmu_reset(struct kvm_vcpu *vcpu) | ||
492 | { | ||
493 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
494 | int i; | ||
495 | |||
496 | irq_work_sync(&pmu->irq_work); | ||
497 | for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { | ||
498 | struct kvm_pmc *pmc = &pmu->gp_counters[i]; | ||
499 | stop_counter(pmc); | ||
500 | pmc->counter = pmc->eventsel = 0; | ||
501 | } | ||
502 | |||
503 | for (i = 0; i < X86_PMC_MAX_FIXED; i++) | ||
504 | stop_counter(&pmu->fixed_counters[i]); | ||
505 | |||
506 | pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = | ||
507 | pmu->global_ovf_ctrl = 0; | ||
508 | } | ||
509 | |||
510 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu) | ||
511 | { | ||
512 | kvm_pmu_reset(vcpu); | ||
513 | } | ||
514 | |||
515 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu) | ||
516 | { | ||
517 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
518 | u64 bitmask; | ||
519 | int bit; | ||
520 | |||
521 | bitmask = pmu->reprogram_pmi; | ||
522 | |||
523 | for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) { | ||
524 | struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit); | ||
525 | |||
526 | if (unlikely(!pmc || !pmc->perf_event)) { | ||
527 | clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi); | ||
528 | continue; | ||
529 | } | ||
530 | |||
531 | reprogram_idx(pmu, bit); | ||
532 | } | ||
533 | } | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e32243eac2f4..5fa553babe56 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1014,6 +1014,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1014 | set_intercept(svm, INTERCEPT_NMI); | 1014 | set_intercept(svm, INTERCEPT_NMI); |
1015 | set_intercept(svm, INTERCEPT_SMI); | 1015 | set_intercept(svm, INTERCEPT_SMI); |
1016 | set_intercept(svm, INTERCEPT_SELECTIVE_CR0); | 1016 | set_intercept(svm, INTERCEPT_SELECTIVE_CR0); |
1017 | set_intercept(svm, INTERCEPT_RDPMC); | ||
1017 | set_intercept(svm, INTERCEPT_CPUID); | 1018 | set_intercept(svm, INTERCEPT_CPUID); |
1018 | set_intercept(svm, INTERCEPT_INVD); | 1019 | set_intercept(svm, INTERCEPT_INVD); |
1019 | set_intercept(svm, INTERCEPT_HLT); | 1020 | set_intercept(svm, INTERCEPT_HLT); |
@@ -2770,6 +2771,19 @@ static int emulate_on_interception(struct vcpu_svm *svm) | |||
2770 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; | 2771 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; |
2771 | } | 2772 | } |
2772 | 2773 | ||
2774 | static int rdpmc_interception(struct vcpu_svm *svm) | ||
2775 | { | ||
2776 | int err; | ||
2777 | |||
2778 | if (!static_cpu_has(X86_FEATURE_NRIPS)) | ||
2779 | return emulate_on_interception(svm); | ||
2780 | |||
2781 | err = kvm_rdpmc(&svm->vcpu); | ||
2782 | kvm_complete_insn_gp(&svm->vcpu, err); | ||
2783 | |||
2784 | return 1; | ||
2785 | } | ||
2786 | |||
2773 | bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) | 2787 | bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) |
2774 | { | 2788 | { |
2775 | unsigned long cr0 = svm->vcpu.arch.cr0; | 2789 | unsigned long cr0 = svm->vcpu.arch.cr0; |
@@ -3190,6 +3204,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
3190 | [SVM_EXIT_SMI] = nop_on_interception, | 3204 | [SVM_EXIT_SMI] = nop_on_interception, |
3191 | [SVM_EXIT_INIT] = nop_on_interception, | 3205 | [SVM_EXIT_INIT] = nop_on_interception, |
3192 | [SVM_EXIT_VINTR] = interrupt_window_interception, | 3206 | [SVM_EXIT_VINTR] = interrupt_window_interception, |
3207 | [SVM_EXIT_RDPMC] = rdpmc_interception, | ||
3193 | [SVM_EXIT_CPUID] = cpuid_interception, | 3208 | [SVM_EXIT_CPUID] = cpuid_interception, |
3194 | [SVM_EXIT_IRET] = iret_interception, | 3209 | [SVM_EXIT_IRET] = iret_interception, |
3195 | [SVM_EXIT_INVD] = emulate_on_interception, | 3210 | [SVM_EXIT_INVD] = emulate_on_interception, |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index ae432ea1cd83..6b85cc647f34 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
@@ -18,9 +18,10 @@ | |||
18 | #include <linux/atomic.h> | 18 | #include <linux/atomic.h> |
19 | #include "kvm_timer.h" | 19 | #include "kvm_timer.h" |
20 | 20 | ||
21 | static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | 21 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data) |
22 | { | 22 | { |
23 | int restart_timer = 0; | 23 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); |
24 | struct kvm_vcpu *vcpu = ktimer->vcpu; | ||
24 | wait_queue_head_t *q = &vcpu->wq; | 25 | wait_queue_head_t *q = &vcpu->wq; |
25 | 26 | ||
26 | /* | 27 | /* |
@@ -40,26 +41,7 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
40 | 41 | ||
41 | if (ktimer->t_ops->is_periodic(ktimer)) { | 42 | if (ktimer->t_ops->is_periodic(ktimer)) { |
42 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); | 43 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); |
43 | restart_timer = 1; | ||
44 | } | ||
45 | |||
46 | return restart_timer; | ||
47 | } | ||
48 | |||
49 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data) | ||
50 | { | ||
51 | int restart_timer; | ||
52 | struct kvm_vcpu *vcpu; | ||
53 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); | ||
54 | |||
55 | vcpu = ktimer->vcpu; | ||
56 | if (!vcpu) | ||
57 | return HRTIMER_NORESTART; | ||
58 | |||
59 | restart_timer = __kvm_timer_fn(vcpu, ktimer); | ||
60 | if (restart_timer) | ||
61 | return HRTIMER_RESTART; | 44 | return HRTIMER_RESTART; |
62 | else | 45 | } else |
63 | return HRTIMER_NORESTART; | 46 | return HRTIMER_NORESTART; |
64 | } | 47 | } |
65 | |||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 579a0b51696a..d29216c462b3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -18,6 +18,7 @@ | |||
18 | 18 | ||
19 | #include "irq.h" | 19 | #include "irq.h" |
20 | #include "mmu.h" | 20 | #include "mmu.h" |
21 | #include "cpuid.h" | ||
21 | 22 | ||
22 | #include <linux/kvm_host.h> | 23 | #include <linux/kvm_host.h> |
23 | #include <linux/module.h> | 24 | #include <linux/module.h> |
@@ -50,29 +51,29 @@ | |||
50 | MODULE_AUTHOR("Qumranet"); | 51 | MODULE_AUTHOR("Qumranet"); |
51 | MODULE_LICENSE("GPL"); | 52 | MODULE_LICENSE("GPL"); |
52 | 53 | ||
53 | static int __read_mostly enable_vpid = 1; | 54 | static bool __read_mostly enable_vpid = 1; |
54 | module_param_named(vpid, enable_vpid, bool, 0444); | 55 | module_param_named(vpid, enable_vpid, bool, 0444); |
55 | 56 | ||
56 | static int __read_mostly flexpriority_enabled = 1; | 57 | static bool __read_mostly flexpriority_enabled = 1; |
57 | module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); | 58 | module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); |
58 | 59 | ||
59 | static int __read_mostly enable_ept = 1; | 60 | static bool __read_mostly enable_ept = 1; |
60 | module_param_named(ept, enable_ept, bool, S_IRUGO); | 61 | module_param_named(ept, enable_ept, bool, S_IRUGO); |
61 | 62 | ||
62 | static int __read_mostly enable_unrestricted_guest = 1; | 63 | static bool __read_mostly enable_unrestricted_guest = 1; |
63 | module_param_named(unrestricted_guest, | 64 | module_param_named(unrestricted_guest, |
64 | enable_unrestricted_guest, bool, S_IRUGO); | 65 | enable_unrestricted_guest, bool, S_IRUGO); |
65 | 66 | ||
66 | static int __read_mostly emulate_invalid_guest_state = 0; | 67 | static bool __read_mostly emulate_invalid_guest_state = 0; |
67 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 68 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
68 | 69 | ||
69 | static int __read_mostly vmm_exclusive = 1; | 70 | static bool __read_mostly vmm_exclusive = 1; |
70 | module_param(vmm_exclusive, bool, S_IRUGO); | 71 | module_param(vmm_exclusive, bool, S_IRUGO); |
71 | 72 | ||
72 | static int __read_mostly yield_on_hlt = 1; | 73 | static bool __read_mostly yield_on_hlt = 1; |
73 | module_param(yield_on_hlt, bool, S_IRUGO); | 74 | module_param(yield_on_hlt, bool, S_IRUGO); |
74 | 75 | ||
75 | static int __read_mostly fasteoi = 1; | 76 | static bool __read_mostly fasteoi = 1; |
76 | module_param(fasteoi, bool, S_IRUGO); | 77 | module_param(fasteoi, bool, S_IRUGO); |
77 | 78 | ||
78 | /* | 79 | /* |
@@ -80,7 +81,7 @@ module_param(fasteoi, bool, S_IRUGO); | |||
80 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not | 81 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not |
81 | * use VMX instructions. | 82 | * use VMX instructions. |
82 | */ | 83 | */ |
83 | static int __read_mostly nested = 0; | 84 | static bool __read_mostly nested = 0; |
84 | module_param(nested, bool, S_IRUGO); | 85 | module_param(nested, bool, S_IRUGO); |
85 | 86 | ||
86 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 87 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ |
@@ -1747,7 +1748,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
1747 | int save_nmsrs, index; | 1748 | int save_nmsrs, index; |
1748 | unsigned long *msr_bitmap; | 1749 | unsigned long *msr_bitmap; |
1749 | 1750 | ||
1750 | vmx_load_host_state(vmx); | ||
1751 | save_nmsrs = 0; | 1751 | save_nmsrs = 0; |
1752 | #ifdef CONFIG_X86_64 | 1752 | #ifdef CONFIG_X86_64 |
1753 | if (is_long_mode(&vmx->vcpu)) { | 1753 | if (is_long_mode(&vmx->vcpu)) { |
@@ -1956,6 +1956,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
1956 | #endif | 1956 | #endif |
1957 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | | 1957 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | |
1958 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | | 1958 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | |
1959 | CPU_BASED_RDPMC_EXITING | | ||
1959 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 1960 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
1960 | /* | 1961 | /* |
1961 | * We can allow some features even when not supported by the | 1962 | * We can allow some features even when not supported by the |
@@ -2142,12 +2143,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2142 | return 1; | 2143 | return 1; |
2143 | /* Otherwise falls through */ | 2144 | /* Otherwise falls through */ |
2144 | default: | 2145 | default: |
2145 | vmx_load_host_state(to_vmx(vcpu)); | ||
2146 | if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) | 2146 | if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) |
2147 | return 0; | 2147 | return 0; |
2148 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 2148 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
2149 | if (msr) { | 2149 | if (msr) { |
2150 | vmx_load_host_state(to_vmx(vcpu)); | ||
2151 | data = msr->data; | 2150 | data = msr->data; |
2152 | break; | 2151 | break; |
2153 | } | 2152 | } |
@@ -2171,7 +2170,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
2171 | 2170 | ||
2172 | switch (msr_index) { | 2171 | switch (msr_index) { |
2173 | case MSR_EFER: | 2172 | case MSR_EFER: |
2174 | vmx_load_host_state(vmx); | ||
2175 | ret = kvm_set_msr_common(vcpu, msr_index, data); | 2173 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
2176 | break; | 2174 | break; |
2177 | #ifdef CONFIG_X86_64 | 2175 | #ifdef CONFIG_X86_64 |
@@ -2220,7 +2218,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
2220 | break; | 2218 | break; |
2221 | msr = find_msr_entry(vmx, msr_index); | 2219 | msr = find_msr_entry(vmx, msr_index); |
2222 | if (msr) { | 2220 | if (msr) { |
2223 | vmx_load_host_state(vmx); | ||
2224 | msr->data = data; | 2221 | msr->data = data; |
2225 | break; | 2222 | break; |
2226 | } | 2223 | } |
@@ -2414,7 +2411,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2414 | CPU_BASED_USE_TSC_OFFSETING | | 2411 | CPU_BASED_USE_TSC_OFFSETING | |
2415 | CPU_BASED_MWAIT_EXITING | | 2412 | CPU_BASED_MWAIT_EXITING | |
2416 | CPU_BASED_MONITOR_EXITING | | 2413 | CPU_BASED_MONITOR_EXITING | |
2417 | CPU_BASED_INVLPG_EXITING; | 2414 | CPU_BASED_INVLPG_EXITING | |
2415 | CPU_BASED_RDPMC_EXITING; | ||
2418 | 2416 | ||
2419 | if (yield_on_hlt) | 2417 | if (yield_on_hlt) |
2420 | min |= CPU_BASED_HLT_EXITING; | 2418 | min |= CPU_BASED_HLT_EXITING; |
@@ -2716,11 +2714,13 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
2716 | { | 2714 | { |
2717 | if (!kvm->arch.tss_addr) { | 2715 | if (!kvm->arch.tss_addr) { |
2718 | struct kvm_memslots *slots; | 2716 | struct kvm_memslots *slots; |
2717 | struct kvm_memory_slot *slot; | ||
2719 | gfn_t base_gfn; | 2718 | gfn_t base_gfn; |
2720 | 2719 | ||
2721 | slots = kvm_memslots(kvm); | 2720 | slots = kvm_memslots(kvm); |
2722 | base_gfn = slots->memslots[0].base_gfn + | 2721 | slot = id_to_memslot(slots, 0); |
2723 | kvm->memslots->memslots[0].npages - 3; | 2722 | base_gfn = slot->base_gfn + slot->npages - 3; |
2723 | |||
2724 | return base_gfn << PAGE_SHIFT; | 2724 | return base_gfn << PAGE_SHIFT; |
2725 | } | 2725 | } |
2726 | return kvm->arch.tss_addr; | 2726 | return kvm->arch.tss_addr; |
@@ -3945,12 +3945,15 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) | |||
3945 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 3945 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
3946 | { | 3946 | { |
3947 | u32 cpu_based_vm_exec_control; | 3947 | u32 cpu_based_vm_exec_control; |
3948 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) | 3948 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { |
3949 | /* We can get here when nested_run_pending caused | 3949 | /* |
3950 | * vmx_interrupt_allowed() to return false. In this case, do | 3950 | * We get here if vmx_interrupt_allowed() said we can't |
3951 | * nothing - the interrupt will be injected later. | 3951 | * inject to L1 now because L2 must run. Ask L2 to exit |
3952 | * right after entry, so we can inject to L1 more promptly. | ||
3952 | */ | 3953 | */ |
3954 | kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); | ||
3953 | return; | 3955 | return; |
3956 | } | ||
3954 | 3957 | ||
3955 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 3958 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
3956 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | 3959 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; |
@@ -4077,11 +4080,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
4077 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 4080 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
4078 | { | 4081 | { |
4079 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { | 4082 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { |
4080 | struct vmcs12 *vmcs12; | 4083 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
4081 | if (to_vmx(vcpu)->nested.nested_run_pending) | 4084 | if (to_vmx(vcpu)->nested.nested_run_pending || |
4085 | (vmcs12->idt_vectoring_info_field & | ||
4086 | VECTORING_INFO_VALID_MASK)) | ||
4082 | return 0; | 4087 | return 0; |
4083 | nested_vmx_vmexit(vcpu); | 4088 | nested_vmx_vmexit(vcpu); |
4084 | vmcs12 = get_vmcs12(vcpu); | ||
4085 | vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; | 4089 | vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; |
4086 | vmcs12->vm_exit_intr_info = 0; | 4090 | vmcs12->vm_exit_intr_info = 0; |
4087 | /* fall through to normal code, but now in L1, not L2 */ | 4091 | /* fall through to normal code, but now in L1, not L2 */ |
@@ -4611,6 +4615,16 @@ static int handle_invlpg(struct kvm_vcpu *vcpu) | |||
4611 | return 1; | 4615 | return 1; |
4612 | } | 4616 | } |
4613 | 4617 | ||
4618 | static int handle_rdpmc(struct kvm_vcpu *vcpu) | ||
4619 | { | ||
4620 | int err; | ||
4621 | |||
4622 | err = kvm_rdpmc(vcpu); | ||
4623 | kvm_complete_insn_gp(vcpu, err); | ||
4624 | |||
4625 | return 1; | ||
4626 | } | ||
4627 | |||
4614 | static int handle_wbinvd(struct kvm_vcpu *vcpu) | 4628 | static int handle_wbinvd(struct kvm_vcpu *vcpu) |
4615 | { | 4629 | { |
4616 | skip_emulated_instruction(vcpu); | 4630 | skip_emulated_instruction(vcpu); |
@@ -5561,6 +5575,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
5561 | [EXIT_REASON_HLT] = handle_halt, | 5575 | [EXIT_REASON_HLT] = handle_halt, |
5562 | [EXIT_REASON_INVD] = handle_invd, | 5576 | [EXIT_REASON_INVD] = handle_invd, |
5563 | [EXIT_REASON_INVLPG] = handle_invlpg, | 5577 | [EXIT_REASON_INVLPG] = handle_invlpg, |
5578 | [EXIT_REASON_RDPMC] = handle_rdpmc, | ||
5564 | [EXIT_REASON_VMCALL] = handle_vmcall, | 5579 | [EXIT_REASON_VMCALL] = handle_vmcall, |
5565 | [EXIT_REASON_VMCLEAR] = handle_vmclear, | 5580 | [EXIT_REASON_VMCLEAR] = handle_vmclear, |
5566 | [EXIT_REASON_VMLAUNCH] = handle_vmlaunch, | 5581 | [EXIT_REASON_VMLAUNCH] = handle_vmlaunch, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4c938da2ba00..14d6cadc4ba6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include "tss.h" | 26 | #include "tss.h" |
27 | #include "kvm_cache_regs.h" | 27 | #include "kvm_cache_regs.h" |
28 | #include "x86.h" | 28 | #include "x86.h" |
29 | #include "cpuid.h" | ||
29 | 30 | ||
30 | #include <linux/clocksource.h> | 31 | #include <linux/clocksource.h> |
31 | #include <linux/interrupt.h> | 32 | #include <linux/interrupt.h> |
@@ -82,15 +83,13 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); | |||
82 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | 83 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU |
83 | 84 | ||
84 | static void update_cr8_intercept(struct kvm_vcpu *vcpu); | 85 | static void update_cr8_intercept(struct kvm_vcpu *vcpu); |
85 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
86 | struct kvm_cpuid_entry2 __user *entries); | ||
87 | static void process_nmi(struct kvm_vcpu *vcpu); | 86 | static void process_nmi(struct kvm_vcpu *vcpu); |
88 | 87 | ||
89 | struct kvm_x86_ops *kvm_x86_ops; | 88 | struct kvm_x86_ops *kvm_x86_ops; |
90 | EXPORT_SYMBOL_GPL(kvm_x86_ops); | 89 | EXPORT_SYMBOL_GPL(kvm_x86_ops); |
91 | 90 | ||
92 | int ignore_msrs = 0; | 91 | static bool ignore_msrs = 0; |
93 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); | 92 | module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); |
94 | 93 | ||
95 | bool kvm_has_tsc_control; | 94 | bool kvm_has_tsc_control; |
96 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | 95 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); |
@@ -574,54 +573,6 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | |||
574 | } | 573 | } |
575 | EXPORT_SYMBOL_GPL(kvm_set_xcr); | 574 | EXPORT_SYMBOL_GPL(kvm_set_xcr); |
576 | 575 | ||
577 | static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | ||
578 | { | ||
579 | struct kvm_cpuid_entry2 *best; | ||
580 | |||
581 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
582 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | ||
583 | } | ||
584 | |||
585 | static bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) | ||
586 | { | ||
587 | struct kvm_cpuid_entry2 *best; | ||
588 | |||
589 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
590 | return best && (best->ebx & bit(X86_FEATURE_SMEP)); | ||
591 | } | ||
592 | |||
593 | static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | ||
594 | { | ||
595 | struct kvm_cpuid_entry2 *best; | ||
596 | |||
597 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
598 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); | ||
599 | } | ||
600 | |||
601 | static void update_cpuid(struct kvm_vcpu *vcpu) | ||
602 | { | ||
603 | struct kvm_cpuid_entry2 *best; | ||
604 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
605 | |||
606 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
607 | if (!best) | ||
608 | return; | ||
609 | |||
610 | /* Update OSXSAVE bit */ | ||
611 | if (cpu_has_xsave && best->function == 0x1) { | ||
612 | best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); | ||
613 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) | ||
614 | best->ecx |= bit(X86_FEATURE_OSXSAVE); | ||
615 | } | ||
616 | |||
617 | if (apic) { | ||
618 | if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER)) | ||
619 | apic->lapic_timer.timer_mode_mask = 3 << 17; | ||
620 | else | ||
621 | apic->lapic_timer.timer_mode_mask = 1 << 17; | ||
622 | } | ||
623 | } | ||
624 | |||
625 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 576 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
626 | { | 577 | { |
627 | unsigned long old_cr4 = kvm_read_cr4(vcpu); | 578 | unsigned long old_cr4 = kvm_read_cr4(vcpu); |
@@ -655,7 +606,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
655 | kvm_mmu_reset_context(vcpu); | 606 | kvm_mmu_reset_context(vcpu); |
656 | 607 | ||
657 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) | 608 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) |
658 | update_cpuid(vcpu); | 609 | kvm_update_cpuid(vcpu); |
659 | 610 | ||
660 | return 0; | 611 | return 0; |
661 | } | 612 | } |
@@ -809,6 +760,21 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | |||
809 | } | 760 | } |
810 | EXPORT_SYMBOL_GPL(kvm_get_dr); | 761 | EXPORT_SYMBOL_GPL(kvm_get_dr); |
811 | 762 | ||
763 | bool kvm_rdpmc(struct kvm_vcpu *vcpu) | ||
764 | { | ||
765 | u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
766 | u64 data; | ||
767 | int err; | ||
768 | |||
769 | err = kvm_pmu_read_pmc(vcpu, ecx, &data); | ||
770 | if (err) | ||
771 | return err; | ||
772 | kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data); | ||
773 | kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32); | ||
774 | return err; | ||
775 | } | ||
776 | EXPORT_SYMBOL_GPL(kvm_rdpmc); | ||
777 | |||
812 | /* | 778 | /* |
813 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS | 779 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS |
814 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. | 780 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. |
@@ -1358,12 +1324,11 @@ static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) | |||
1358 | if (page_num >= blob_size) | 1324 | if (page_num >= blob_size) |
1359 | goto out; | 1325 | goto out; |
1360 | r = -ENOMEM; | 1326 | r = -ENOMEM; |
1361 | page = kzalloc(PAGE_SIZE, GFP_KERNEL); | 1327 | page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE); |
1362 | if (!page) | 1328 | if (IS_ERR(page)) { |
1329 | r = PTR_ERR(page); | ||
1363 | goto out; | 1330 | goto out; |
1364 | r = -EFAULT; | 1331 | } |
1365 | if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE)) | ||
1366 | goto out_free; | ||
1367 | if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) | 1332 | if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) |
1368 | goto out_free; | 1333 | goto out_free; |
1369 | r = 0; | 1334 | r = 0; |
@@ -1652,8 +1617,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1652 | * which we perfectly emulate ;-). Any other value should be at least | 1617 | * which we perfectly emulate ;-). Any other value should be at least |
1653 | * reported, some guests depend on them. | 1618 | * reported, some guests depend on them. |
1654 | */ | 1619 | */ |
1655 | case MSR_P6_EVNTSEL0: | ||
1656 | case MSR_P6_EVNTSEL1: | ||
1657 | case MSR_K7_EVNTSEL0: | 1620 | case MSR_K7_EVNTSEL0: |
1658 | case MSR_K7_EVNTSEL1: | 1621 | case MSR_K7_EVNTSEL1: |
1659 | case MSR_K7_EVNTSEL2: | 1622 | case MSR_K7_EVNTSEL2: |
@@ -1665,8 +1628,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1665 | /* at least RHEL 4 unconditionally writes to the perfctr registers, | 1628 | /* at least RHEL 4 unconditionally writes to the perfctr registers, |
1666 | * so we ignore writes to make it happy. | 1629 | * so we ignore writes to make it happy. |
1667 | */ | 1630 | */ |
1668 | case MSR_P6_PERFCTR0: | ||
1669 | case MSR_P6_PERFCTR1: | ||
1670 | case MSR_K7_PERFCTR0: | 1631 | case MSR_K7_PERFCTR0: |
1671 | case MSR_K7_PERFCTR1: | 1632 | case MSR_K7_PERFCTR1: |
1672 | case MSR_K7_PERFCTR2: | 1633 | case MSR_K7_PERFCTR2: |
@@ -1703,6 +1664,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1703 | default: | 1664 | default: |
1704 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 1665 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
1705 | return xen_hvm_config(vcpu, data); | 1666 | return xen_hvm_config(vcpu, data); |
1667 | if (kvm_pmu_msr(vcpu, msr)) | ||
1668 | return kvm_pmu_set_msr(vcpu, msr, data); | ||
1706 | if (!ignore_msrs) { | 1669 | if (!ignore_msrs) { |
1707 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 1670 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
1708 | msr, data); | 1671 | msr, data); |
@@ -1865,10 +1828,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1865 | case MSR_K8_SYSCFG: | 1828 | case MSR_K8_SYSCFG: |
1866 | case MSR_K7_HWCR: | 1829 | case MSR_K7_HWCR: |
1867 | case MSR_VM_HSAVE_PA: | 1830 | case MSR_VM_HSAVE_PA: |
1868 | case MSR_P6_PERFCTR0: | ||
1869 | case MSR_P6_PERFCTR1: | ||
1870 | case MSR_P6_EVNTSEL0: | ||
1871 | case MSR_P6_EVNTSEL1: | ||
1872 | case MSR_K7_EVNTSEL0: | 1831 | case MSR_K7_EVNTSEL0: |
1873 | case MSR_K7_PERFCTR0: | 1832 | case MSR_K7_PERFCTR0: |
1874 | case MSR_K8_INT_PENDING_MSG: | 1833 | case MSR_K8_INT_PENDING_MSG: |
@@ -1979,6 +1938,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1979 | data = 0xbe702111; | 1938 | data = 0xbe702111; |
1980 | break; | 1939 | break; |
1981 | default: | 1940 | default: |
1941 | if (kvm_pmu_msr(vcpu, msr)) | ||
1942 | return kvm_pmu_get_msr(vcpu, msr, pdata); | ||
1982 | if (!ignore_msrs) { | 1943 | if (!ignore_msrs) { |
1983 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 1944 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
1984 | return 1; | 1945 | return 1; |
@@ -2037,15 +1998,12 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, | |||
2037 | if (msrs.nmsrs >= MAX_IO_MSRS) | 1998 | if (msrs.nmsrs >= MAX_IO_MSRS) |
2038 | goto out; | 1999 | goto out; |
2039 | 2000 | ||
2040 | r = -ENOMEM; | ||
2041 | size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; | 2001 | size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; |
2042 | entries = kmalloc(size, GFP_KERNEL); | 2002 | entries = memdup_user(user_msrs->entries, size); |
2043 | if (!entries) | 2003 | if (IS_ERR(entries)) { |
2004 | r = PTR_ERR(entries); | ||
2044 | goto out; | 2005 | goto out; |
2045 | 2006 | } | |
2046 | r = -EFAULT; | ||
2047 | if (copy_from_user(entries, user_msrs->entries, size)) | ||
2048 | goto out_free; | ||
2049 | 2007 | ||
2050 | r = n = __msr_io(vcpu, &msrs, entries, do_msr); | 2008 | r = n = __msr_io(vcpu, &msrs, entries, do_msr); |
2051 | if (r < 0) | 2009 | if (r < 0) |
@@ -2265,466 +2223,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
2265 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); | 2223 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); |
2266 | } | 2224 | } |
2267 | 2225 | ||
2268 | static int is_efer_nx(void) | ||
2269 | { | ||
2270 | unsigned long long efer = 0; | ||
2271 | |||
2272 | rdmsrl_safe(MSR_EFER, &efer); | ||
2273 | return efer & EFER_NX; | ||
2274 | } | ||
2275 | |||
2276 | static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) | ||
2277 | { | ||
2278 | int i; | ||
2279 | struct kvm_cpuid_entry2 *e, *entry; | ||
2280 | |||
2281 | entry = NULL; | ||
2282 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | ||
2283 | e = &vcpu->arch.cpuid_entries[i]; | ||
2284 | if (e->function == 0x80000001) { | ||
2285 | entry = e; | ||
2286 | break; | ||
2287 | } | ||
2288 | } | ||
2289 | if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { | ||
2290 | entry->edx &= ~(1 << 20); | ||
2291 | printk(KERN_INFO "kvm: guest NX capability removed\n"); | ||
2292 | } | ||
2293 | } | ||
2294 | |||
2295 | /* when an old userspace process fills a new kernel module */ | ||
2296 | static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||
2297 | struct kvm_cpuid *cpuid, | ||
2298 | struct kvm_cpuid_entry __user *entries) | ||
2299 | { | ||
2300 | int r, i; | ||
2301 | struct kvm_cpuid_entry *cpuid_entries; | ||
2302 | |||
2303 | r = -E2BIG; | ||
2304 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
2305 | goto out; | ||
2306 | r = -ENOMEM; | ||
2307 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent); | ||
2308 | if (!cpuid_entries) | ||
2309 | goto out; | ||
2310 | r = -EFAULT; | ||
2311 | if (copy_from_user(cpuid_entries, entries, | ||
2312 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | ||
2313 | goto out_free; | ||
2314 | for (i = 0; i < cpuid->nent; i++) { | ||
2315 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | ||
2316 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | ||
2317 | vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx; | ||
2318 | vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx; | ||
2319 | vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx; | ||
2320 | vcpu->arch.cpuid_entries[i].index = 0; | ||
2321 | vcpu->arch.cpuid_entries[i].flags = 0; | ||
2322 | vcpu->arch.cpuid_entries[i].padding[0] = 0; | ||
2323 | vcpu->arch.cpuid_entries[i].padding[1] = 0; | ||
2324 | vcpu->arch.cpuid_entries[i].padding[2] = 0; | ||
2325 | } | ||
2326 | vcpu->arch.cpuid_nent = cpuid->nent; | ||
2327 | cpuid_fix_nx_cap(vcpu); | ||
2328 | r = 0; | ||
2329 | kvm_apic_set_version(vcpu); | ||
2330 | kvm_x86_ops->cpuid_update(vcpu); | ||
2331 | update_cpuid(vcpu); | ||
2332 | |||
2333 | out_free: | ||
2334 | vfree(cpuid_entries); | ||
2335 | out: | ||
2336 | return r; | ||
2337 | } | ||
2338 | |||
2339 | static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | ||
2340 | struct kvm_cpuid2 *cpuid, | ||
2341 | struct kvm_cpuid_entry2 __user *entries) | ||
2342 | { | ||
2343 | int r; | ||
2344 | |||
2345 | r = -E2BIG; | ||
2346 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
2347 | goto out; | ||
2348 | r = -EFAULT; | ||
2349 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | ||
2350 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | ||
2351 | goto out; | ||
2352 | vcpu->arch.cpuid_nent = cpuid->nent; | ||
2353 | kvm_apic_set_version(vcpu); | ||
2354 | kvm_x86_ops->cpuid_update(vcpu); | ||
2355 | update_cpuid(vcpu); | ||
2356 | return 0; | ||
2357 | |||
2358 | out: | ||
2359 | return r; | ||
2360 | } | ||
2361 | |||
2362 | static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | ||
2363 | struct kvm_cpuid2 *cpuid, | ||
2364 | struct kvm_cpuid_entry2 __user *entries) | ||
2365 | { | ||
2366 | int r; | ||
2367 | |||
2368 | r = -E2BIG; | ||
2369 | if (cpuid->nent < vcpu->arch.cpuid_nent) | ||
2370 | goto out; | ||
2371 | r = -EFAULT; | ||
2372 | if (copy_to_user(entries, &vcpu->arch.cpuid_entries, | ||
2373 | vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) | ||
2374 | goto out; | ||
2375 | return 0; | ||
2376 | |||
2377 | out: | ||
2378 | cpuid->nent = vcpu->arch.cpuid_nent; | ||
2379 | return r; | ||
2380 | } | ||
2381 | |||
2382 | static void cpuid_mask(u32 *word, int wordnum) | ||
2383 | { | ||
2384 | *word &= boot_cpu_data.x86_capability[wordnum]; | ||
2385 | } | ||
2386 | |||
2387 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
2388 | u32 index) | ||
2389 | { | ||
2390 | entry->function = function; | ||
2391 | entry->index = index; | ||
2392 | cpuid_count(entry->function, entry->index, | ||
2393 | &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); | ||
2394 | entry->flags = 0; | ||
2395 | } | ||
2396 | |||
2397 | static bool supported_xcr0_bit(unsigned bit) | ||
2398 | { | ||
2399 | u64 mask = ((u64)1 << bit); | ||
2400 | |||
2401 | return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; | ||
2402 | } | ||
2403 | |||
2404 | #define F(x) bit(X86_FEATURE_##x) | ||
2405 | |||
2406 | static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
2407 | u32 index, int *nent, int maxnent) | ||
2408 | { | ||
2409 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; | ||
2410 | #ifdef CONFIG_X86_64 | ||
2411 | unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) | ||
2412 | ? F(GBPAGES) : 0; | ||
2413 | unsigned f_lm = F(LM); | ||
2414 | #else | ||
2415 | unsigned f_gbpages = 0; | ||
2416 | unsigned f_lm = 0; | ||
2417 | #endif | ||
2418 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | ||
2419 | |||
2420 | /* cpuid 1.edx */ | ||
2421 | const u32 kvm_supported_word0_x86_features = | ||
2422 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
2423 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
2424 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | | ||
2425 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
2426 | F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | | ||
2427 | 0 /* Reserved, DS, ACPI */ | F(MMX) | | ||
2428 | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | | ||
2429 | 0 /* HTT, TM, Reserved, PBE */; | ||
2430 | /* cpuid 0x80000001.edx */ | ||
2431 | const u32 kvm_supported_word1_x86_features = | ||
2432 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
2433 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
2434 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | | ||
2435 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
2436 | F(PAT) | F(PSE36) | 0 /* Reserved */ | | ||
2437 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | | ||
2438 | F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | | ||
2439 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | ||
2440 | /* cpuid 1.ecx */ | ||
2441 | const u32 kvm_supported_word4_x86_features = | ||
2442 | F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | | ||
2443 | 0 /* DS-CPL, VMX, SMX, EST */ | | ||
2444 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | ||
2445 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | | ||
2446 | 0 /* Reserved, DCA */ | F(XMM4_1) | | ||
2447 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | ||
2448 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | | ||
2449 | F(F16C) | F(RDRAND); | ||
2450 | /* cpuid 0x80000001.ecx */ | ||
2451 | const u32 kvm_supported_word6_x86_features = | ||
2452 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | | ||
2453 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | | ||
2454 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | | ||
2455 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); | ||
2456 | |||
2457 | /* cpuid 0xC0000001.edx */ | ||
2458 | const u32 kvm_supported_word5_x86_features = | ||
2459 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | | ||
2460 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | | ||
2461 | F(PMM) | F(PMM_EN); | ||
2462 | |||
2463 | /* cpuid 7.0.ebx */ | ||
2464 | const u32 kvm_supported_word9_x86_features = | ||
2465 | F(SMEP) | F(FSGSBASE) | F(ERMS); | ||
2466 | |||
2467 | /* all calls to cpuid_count() should be made on the same cpu */ | ||
2468 | get_cpu(); | ||
2469 | do_cpuid_1_ent(entry, function, index); | ||
2470 | ++*nent; | ||
2471 | |||
2472 | switch (function) { | ||
2473 | case 0: | ||
2474 | entry->eax = min(entry->eax, (u32)0xd); | ||
2475 | break; | ||
2476 | case 1: | ||
2477 | entry->edx &= kvm_supported_word0_x86_features; | ||
2478 | cpuid_mask(&entry->edx, 0); | ||
2479 | entry->ecx &= kvm_supported_word4_x86_features; | ||
2480 | cpuid_mask(&entry->ecx, 4); | ||
2481 | /* we support x2apic emulation even if host does not support | ||
2482 | * it since we emulate x2apic in software */ | ||
2483 | entry->ecx |= F(X2APIC); | ||
2484 | break; | ||
2485 | /* function 2 entries are STATEFUL. That is, repeated cpuid commands | ||
2486 | * may return different values. This forces us to get_cpu() before | ||
2487 | * issuing the first command, and also to emulate this annoying behavior | ||
2488 | * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */ | ||
2489 | case 2: { | ||
2490 | int t, times = entry->eax & 0xff; | ||
2491 | |||
2492 | entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
2493 | entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
2494 | for (t = 1; t < times && *nent < maxnent; ++t) { | ||
2495 | do_cpuid_1_ent(&entry[t], function, 0); | ||
2496 | entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
2497 | ++*nent; | ||
2498 | } | ||
2499 | break; | ||
2500 | } | ||
2501 | /* function 4 has additional index. */ | ||
2502 | case 4: { | ||
2503 | int i, cache_type; | ||
2504 | |||
2505 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2506 | /* read more entries until cache_type is zero */ | ||
2507 | for (i = 1; *nent < maxnent; ++i) { | ||
2508 | cache_type = entry[i - 1].eax & 0x1f; | ||
2509 | if (!cache_type) | ||
2510 | break; | ||
2511 | do_cpuid_1_ent(&entry[i], function, i); | ||
2512 | entry[i].flags |= | ||
2513 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2514 | ++*nent; | ||
2515 | } | ||
2516 | break; | ||
2517 | } | ||
2518 | case 7: { | ||
2519 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2520 | /* Mask ebx against host capbability word 9 */ | ||
2521 | if (index == 0) { | ||
2522 | entry->ebx &= kvm_supported_word9_x86_features; | ||
2523 | cpuid_mask(&entry->ebx, 9); | ||
2524 | } else | ||
2525 | entry->ebx = 0; | ||
2526 | entry->eax = 0; | ||
2527 | entry->ecx = 0; | ||
2528 | entry->edx = 0; | ||
2529 | break; | ||
2530 | } | ||
2531 | case 9: | ||
2532 | break; | ||
2533 | /* function 0xb has additional index. */ | ||
2534 | case 0xb: { | ||
2535 | int i, level_type; | ||
2536 | |||
2537 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2538 | /* read more entries until level_type is zero */ | ||
2539 | for (i = 1; *nent < maxnent; ++i) { | ||
2540 | level_type = entry[i - 1].ecx & 0xff00; | ||
2541 | if (!level_type) | ||
2542 | break; | ||
2543 | do_cpuid_1_ent(&entry[i], function, i); | ||
2544 | entry[i].flags |= | ||
2545 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2546 | ++*nent; | ||
2547 | } | ||
2548 | break; | ||
2549 | } | ||
2550 | case 0xd: { | ||
2551 | int idx, i; | ||
2552 | |||
2553 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2554 | for (idx = 1, i = 1; *nent < maxnent && idx < 64; ++idx) { | ||
2555 | do_cpuid_1_ent(&entry[i], function, idx); | ||
2556 | if (entry[i].eax == 0 || !supported_xcr0_bit(idx)) | ||
2557 | continue; | ||
2558 | entry[i].flags |= | ||
2559 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2560 | ++*nent; | ||
2561 | ++i; | ||
2562 | } | ||
2563 | break; | ||
2564 | } | ||
2565 | case KVM_CPUID_SIGNATURE: { | ||
2566 | char signature[12] = "KVMKVMKVM\0\0"; | ||
2567 | u32 *sigptr = (u32 *)signature; | ||
2568 | entry->eax = 0; | ||
2569 | entry->ebx = sigptr[0]; | ||
2570 | entry->ecx = sigptr[1]; | ||
2571 | entry->edx = sigptr[2]; | ||
2572 | break; | ||
2573 | } | ||
2574 | case KVM_CPUID_FEATURES: | ||
2575 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | ||
2576 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | ||
2577 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | ||
2578 | (1 << KVM_FEATURE_ASYNC_PF) | | ||
2579 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | ||
2580 | |||
2581 | if (sched_info_on()) | ||
2582 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | ||
2583 | |||
2584 | entry->ebx = 0; | ||
2585 | entry->ecx = 0; | ||
2586 | entry->edx = 0; | ||
2587 | break; | ||
2588 | case 0x80000000: | ||
2589 | entry->eax = min(entry->eax, 0x8000001a); | ||
2590 | break; | ||
2591 | case 0x80000001: | ||
2592 | entry->edx &= kvm_supported_word1_x86_features; | ||
2593 | cpuid_mask(&entry->edx, 1); | ||
2594 | entry->ecx &= kvm_supported_word6_x86_features; | ||
2595 | cpuid_mask(&entry->ecx, 6); | ||
2596 | break; | ||
2597 | case 0x80000008: { | ||
2598 | unsigned g_phys_as = (entry->eax >> 16) & 0xff; | ||
2599 | unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); | ||
2600 | unsigned phys_as = entry->eax & 0xff; | ||
2601 | |||
2602 | if (!g_phys_as) | ||
2603 | g_phys_as = phys_as; | ||
2604 | entry->eax = g_phys_as | (virt_as << 8); | ||
2605 | entry->ebx = entry->edx = 0; | ||
2606 | break; | ||
2607 | } | ||
2608 | case 0x80000019: | ||
2609 | entry->ecx = entry->edx = 0; | ||
2610 | break; | ||
2611 | case 0x8000001a: | ||
2612 | break; | ||
2613 | case 0x8000001d: | ||
2614 | break; | ||
2615 | /*Add support for Centaur's CPUID instruction*/ | ||
2616 | case 0xC0000000: | ||
2617 | /*Just support up to 0xC0000004 now*/ | ||
2618 | entry->eax = min(entry->eax, 0xC0000004); | ||
2619 | break; | ||
2620 | case 0xC0000001: | ||
2621 | entry->edx &= kvm_supported_word5_x86_features; | ||
2622 | cpuid_mask(&entry->edx, 5); | ||
2623 | break; | ||
2624 | case 3: /* Processor serial number */ | ||
2625 | case 5: /* MONITOR/MWAIT */ | ||
2626 | case 6: /* Thermal management */ | ||
2627 | case 0xA: /* Architectural Performance Monitoring */ | ||
2628 | case 0x80000007: /* Advanced power management */ | ||
2629 | case 0xC0000002: | ||
2630 | case 0xC0000003: | ||
2631 | case 0xC0000004: | ||
2632 | default: | ||
2633 | entry->eax = entry->ebx = entry->ecx = entry->edx = 0; | ||
2634 | break; | ||
2635 | } | ||
2636 | |||
2637 | kvm_x86_ops->set_supported_cpuid(function, entry); | ||
2638 | |||
2639 | put_cpu(); | ||
2640 | } | ||
2641 | |||
2642 | #undef F | ||
2643 | |||
2644 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
2645 | struct kvm_cpuid_entry2 __user *entries) | ||
2646 | { | ||
2647 | struct kvm_cpuid_entry2 *cpuid_entries; | ||
2648 | int limit, nent = 0, r = -E2BIG; | ||
2649 | u32 func; | ||
2650 | |||
2651 | if (cpuid->nent < 1) | ||
2652 | goto out; | ||
2653 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
2654 | cpuid->nent = KVM_MAX_CPUID_ENTRIES; | ||
2655 | r = -ENOMEM; | ||
2656 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); | ||
2657 | if (!cpuid_entries) | ||
2658 | goto out; | ||
2659 | |||
2660 | do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent); | ||
2661 | limit = cpuid_entries[0].eax; | ||
2662 | for (func = 1; func <= limit && nent < cpuid->nent; ++func) | ||
2663 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | ||
2664 | &nent, cpuid->nent); | ||
2665 | r = -E2BIG; | ||
2666 | if (nent >= cpuid->nent) | ||
2667 | goto out_free; | ||
2668 | |||
2669 | do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent); | ||
2670 | limit = cpuid_entries[nent - 1].eax; | ||
2671 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) | ||
2672 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | ||
2673 | &nent, cpuid->nent); | ||
2674 | |||
2675 | |||
2676 | |||
2677 | r = -E2BIG; | ||
2678 | if (nent >= cpuid->nent) | ||
2679 | goto out_free; | ||
2680 | |||
2681 | /* Add support for Centaur's CPUID instruction. */ | ||
2682 | if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { | ||
2683 | do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0, | ||
2684 | &nent, cpuid->nent); | ||
2685 | |||
2686 | r = -E2BIG; | ||
2687 | if (nent >= cpuid->nent) | ||
2688 | goto out_free; | ||
2689 | |||
2690 | limit = cpuid_entries[nent - 1].eax; | ||
2691 | for (func = 0xC0000001; | ||
2692 | func <= limit && nent < cpuid->nent; ++func) | ||
2693 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | ||
2694 | &nent, cpuid->nent); | ||
2695 | |||
2696 | r = -E2BIG; | ||
2697 | if (nent >= cpuid->nent) | ||
2698 | goto out_free; | ||
2699 | } | ||
2700 | |||
2701 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, | ||
2702 | cpuid->nent); | ||
2703 | |||
2704 | r = -E2BIG; | ||
2705 | if (nent >= cpuid->nent) | ||
2706 | goto out_free; | ||
2707 | |||
2708 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent, | ||
2709 | cpuid->nent); | ||
2710 | |||
2711 | r = -E2BIG; | ||
2712 | if (nent >= cpuid->nent) | ||
2713 | goto out_free; | ||
2714 | |||
2715 | r = -EFAULT; | ||
2716 | if (copy_to_user(entries, cpuid_entries, | ||
2717 | nent * sizeof(struct kvm_cpuid_entry2))) | ||
2718 | goto out_free; | ||
2719 | cpuid->nent = nent; | ||
2720 | r = 0; | ||
2721 | |||
2722 | out_free: | ||
2723 | vfree(cpuid_entries); | ||
2724 | out: | ||
2725 | return r; | ||
2726 | } | ||
2727 | |||
2728 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2226 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
2729 | struct kvm_lapic_state *s) | 2227 | struct kvm_lapic_state *s) |
2730 | { | 2228 | { |
@@ -3042,13 +2540,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3042 | r = -EINVAL; | 2540 | r = -EINVAL; |
3043 | if (!vcpu->arch.apic) | 2541 | if (!vcpu->arch.apic) |
3044 | goto out; | 2542 | goto out; |
3045 | u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 2543 | u.lapic = memdup_user(argp, sizeof(*u.lapic)); |
3046 | r = -ENOMEM; | 2544 | if (IS_ERR(u.lapic)) { |
3047 | if (!u.lapic) | 2545 | r = PTR_ERR(u.lapic); |
3048 | goto out; | ||
3049 | r = -EFAULT; | ||
3050 | if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state))) | ||
3051 | goto out; | 2546 | goto out; |
2547 | } | ||
2548 | |||
3052 | r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); | 2549 | r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); |
3053 | if (r) | 2550 | if (r) |
3054 | goto out; | 2551 | goto out; |
@@ -3227,14 +2724,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3227 | break; | 2724 | break; |
3228 | } | 2725 | } |
3229 | case KVM_SET_XSAVE: { | 2726 | case KVM_SET_XSAVE: { |
3230 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); | 2727 | u.xsave = memdup_user(argp, sizeof(*u.xsave)); |
3231 | r = -ENOMEM; | 2728 | if (IS_ERR(u.xsave)) { |
3232 | if (!u.xsave) | 2729 | r = PTR_ERR(u.xsave); |
3233 | break; | 2730 | goto out; |
3234 | 2731 | } | |
3235 | r = -EFAULT; | ||
3236 | if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave))) | ||
3237 | break; | ||
3238 | 2732 | ||
3239 | r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); | 2733 | r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); |
3240 | break; | 2734 | break; |
@@ -3255,15 +2749,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3255 | break; | 2749 | break; |
3256 | } | 2750 | } |
3257 | case KVM_SET_XCRS: { | 2751 | case KVM_SET_XCRS: { |
3258 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); | 2752 | u.xcrs = memdup_user(argp, sizeof(*u.xcrs)); |
3259 | r = -ENOMEM; | 2753 | if (IS_ERR(u.xcrs)) { |
3260 | if (!u.xcrs) | 2754 | r = PTR_ERR(u.xcrs); |
3261 | break; | 2755 | goto out; |
3262 | 2756 | } | |
3263 | r = -EFAULT; | ||
3264 | if (copy_from_user(u.xcrs, argp, | ||
3265 | sizeof(struct kvm_xcrs))) | ||
3266 | break; | ||
3267 | 2757 | ||
3268 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); | 2758 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); |
3269 | break; | 2759 | break; |
@@ -3460,16 +2950,59 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
3460 | return 0; | 2950 | return 0; |
3461 | } | 2951 | } |
3462 | 2952 | ||
2953 | /** | ||
2954 | * write_protect_slot - write protect a slot for dirty logging | ||
2955 | * @kvm: the kvm instance | ||
2956 | * @memslot: the slot we protect | ||
2957 | * @dirty_bitmap: the bitmap indicating which pages are dirty | ||
2958 | * @nr_dirty_pages: the number of dirty pages | ||
2959 | * | ||
2960 | * We have two ways to find all sptes to protect: | ||
2961 | * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and | ||
2962 | * checks ones that have a spte mapping a page in the slot. | ||
2963 | * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap. | ||
2964 | * | ||
2965 | * Generally speaking, if there are not so many dirty pages compared to the | ||
2966 | * number of shadow pages, we should use the latter. | ||
2967 | * | ||
2968 | * Note that letting others write into a page marked dirty in the old bitmap | ||
2969 | * by using the remaining tlb entry is not a problem. That page will become | ||
2970 | * write protected again when we flush the tlb and then be reported dirty to | ||
2971 | * the user space by copying the old bitmap. | ||
2972 | */ | ||
2973 | static void write_protect_slot(struct kvm *kvm, | ||
2974 | struct kvm_memory_slot *memslot, | ||
2975 | unsigned long *dirty_bitmap, | ||
2976 | unsigned long nr_dirty_pages) | ||
2977 | { | ||
2978 | /* Not many dirty pages compared to # of shadow pages. */ | ||
2979 | if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { | ||
2980 | unsigned long gfn_offset; | ||
2981 | |||
2982 | for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { | ||
2983 | unsigned long gfn = memslot->base_gfn + gfn_offset; | ||
2984 | |||
2985 | spin_lock(&kvm->mmu_lock); | ||
2986 | kvm_mmu_rmap_write_protect(kvm, gfn, memslot); | ||
2987 | spin_unlock(&kvm->mmu_lock); | ||
2988 | } | ||
2989 | kvm_flush_remote_tlbs(kvm); | ||
2990 | } else { | ||
2991 | spin_lock(&kvm->mmu_lock); | ||
2992 | kvm_mmu_slot_remove_write_access(kvm, memslot->id); | ||
2993 | spin_unlock(&kvm->mmu_lock); | ||
2994 | } | ||
2995 | } | ||
2996 | |||
3463 | /* | 2997 | /* |
3464 | * Get (and clear) the dirty memory log for a memory slot. | 2998 | * Get (and clear) the dirty memory log for a memory slot. |
3465 | */ | 2999 | */ |
3466 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 3000 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
3467 | struct kvm_dirty_log *log) | 3001 | struct kvm_dirty_log *log) |
3468 | { | 3002 | { |
3469 | int r, i; | 3003 | int r; |
3470 | struct kvm_memory_slot *memslot; | 3004 | struct kvm_memory_slot *memslot; |
3471 | unsigned long n; | 3005 | unsigned long n, nr_dirty_pages; |
3472 | unsigned long is_dirty = 0; | ||
3473 | 3006 | ||
3474 | mutex_lock(&kvm->slots_lock); | 3007 | mutex_lock(&kvm->slots_lock); |
3475 | 3008 | ||
@@ -3477,43 +3010,41 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3477 | if (log->slot >= KVM_MEMORY_SLOTS) | 3010 | if (log->slot >= KVM_MEMORY_SLOTS) |
3478 | goto out; | 3011 | goto out; |
3479 | 3012 | ||
3480 | memslot = &kvm->memslots->memslots[log->slot]; | 3013 | memslot = id_to_memslot(kvm->memslots, log->slot); |
3481 | r = -ENOENT; | 3014 | r = -ENOENT; |
3482 | if (!memslot->dirty_bitmap) | 3015 | if (!memslot->dirty_bitmap) |
3483 | goto out; | 3016 | goto out; |
3484 | 3017 | ||
3485 | n = kvm_dirty_bitmap_bytes(memslot); | 3018 | n = kvm_dirty_bitmap_bytes(memslot); |
3486 | 3019 | nr_dirty_pages = memslot->nr_dirty_pages; | |
3487 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) | ||
3488 | is_dirty = memslot->dirty_bitmap[i]; | ||
3489 | 3020 | ||
3490 | /* If nothing is dirty, don't bother messing with page tables. */ | 3021 | /* If nothing is dirty, don't bother messing with page tables. */ |
3491 | if (is_dirty) { | 3022 | if (nr_dirty_pages) { |
3492 | struct kvm_memslots *slots, *old_slots; | 3023 | struct kvm_memslots *slots, *old_slots; |
3493 | unsigned long *dirty_bitmap; | 3024 | unsigned long *dirty_bitmap, *dirty_bitmap_head; |
3494 | 3025 | ||
3495 | dirty_bitmap = memslot->dirty_bitmap_head; | 3026 | dirty_bitmap = memslot->dirty_bitmap; |
3496 | if (memslot->dirty_bitmap == dirty_bitmap) | 3027 | dirty_bitmap_head = memslot->dirty_bitmap_head; |
3497 | dirty_bitmap += n / sizeof(long); | 3028 | if (dirty_bitmap == dirty_bitmap_head) |
3498 | memset(dirty_bitmap, 0, n); | 3029 | dirty_bitmap_head += n / sizeof(long); |
3030 | memset(dirty_bitmap_head, 0, n); | ||
3499 | 3031 | ||
3500 | r = -ENOMEM; | 3032 | r = -ENOMEM; |
3501 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 3033 | slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL); |
3502 | if (!slots) | 3034 | if (!slots) |
3503 | goto out; | 3035 | goto out; |
3504 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 3036 | |
3505 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | 3037 | memslot = id_to_memslot(slots, log->slot); |
3506 | slots->generation++; | 3038 | memslot->nr_dirty_pages = 0; |
3039 | memslot->dirty_bitmap = dirty_bitmap_head; | ||
3040 | update_memslots(slots, NULL); | ||
3507 | 3041 | ||
3508 | old_slots = kvm->memslots; | 3042 | old_slots = kvm->memslots; |
3509 | rcu_assign_pointer(kvm->memslots, slots); | 3043 | rcu_assign_pointer(kvm->memslots, slots); |
3510 | synchronize_srcu_expedited(&kvm->srcu); | 3044 | synchronize_srcu_expedited(&kvm->srcu); |
3511 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; | ||
3512 | kfree(old_slots); | 3045 | kfree(old_slots); |
3513 | 3046 | ||
3514 | spin_lock(&kvm->mmu_lock); | 3047 | write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages); |
3515 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | ||
3516 | spin_unlock(&kvm->mmu_lock); | ||
3517 | 3048 | ||
3518 | r = -EFAULT; | 3049 | r = -EFAULT; |
3519 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | 3050 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) |
@@ -3658,14 +3189,14 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3658 | } | 3189 | } |
3659 | case KVM_GET_IRQCHIP: { | 3190 | case KVM_GET_IRQCHIP: { |
3660 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | 3191 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ |
3661 | struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); | 3192 | struct kvm_irqchip *chip; |
3662 | 3193 | ||
3663 | r = -ENOMEM; | 3194 | chip = memdup_user(argp, sizeof(*chip)); |
3664 | if (!chip) | 3195 | if (IS_ERR(chip)) { |
3196 | r = PTR_ERR(chip); | ||
3665 | goto out; | 3197 | goto out; |
3666 | r = -EFAULT; | 3198 | } |
3667 | if (copy_from_user(chip, argp, sizeof *chip)) | 3199 | |
3668 | goto get_irqchip_out; | ||
3669 | r = -ENXIO; | 3200 | r = -ENXIO; |
3670 | if (!irqchip_in_kernel(kvm)) | 3201 | if (!irqchip_in_kernel(kvm)) |
3671 | goto get_irqchip_out; | 3202 | goto get_irqchip_out; |
@@ -3684,14 +3215,14 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3684 | } | 3215 | } |
3685 | case KVM_SET_IRQCHIP: { | 3216 | case KVM_SET_IRQCHIP: { |
3686 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | 3217 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ |
3687 | struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); | 3218 | struct kvm_irqchip *chip; |
3688 | 3219 | ||
3689 | r = -ENOMEM; | 3220 | chip = memdup_user(argp, sizeof(*chip)); |
3690 | if (!chip) | 3221 | if (IS_ERR(chip)) { |
3222 | r = PTR_ERR(chip); | ||
3691 | goto out; | 3223 | goto out; |
3692 | r = -EFAULT; | 3224 | } |
3693 | if (copy_from_user(chip, argp, sizeof *chip)) | 3225 | |
3694 | goto set_irqchip_out; | ||
3695 | r = -ENXIO; | 3226 | r = -ENXIO; |
3696 | if (!irqchip_in_kernel(kvm)) | 3227 | if (!irqchip_in_kernel(kvm)) |
3697 | goto set_irqchip_out; | 3228 | goto set_irqchip_out; |
@@ -3898,12 +3429,7 @@ void kvm_get_segment(struct kvm_vcpu *vcpu, | |||
3898 | kvm_x86_ops->get_segment(vcpu, var, seg); | 3429 | kvm_x86_ops->get_segment(vcpu, var, seg); |
3899 | } | 3430 | } |
3900 | 3431 | ||
3901 | static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | 3432 | gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) |
3902 | { | ||
3903 | return gpa; | ||
3904 | } | ||
3905 | |||
3906 | static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | ||
3907 | { | 3433 | { |
3908 | gpa_t t_gpa; | 3434 | gpa_t t_gpa; |
3909 | struct x86_exception exception; | 3435 | struct x86_exception exception; |
@@ -4087,7 +3613,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
4087 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | 3613 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); |
4088 | if (ret < 0) | 3614 | if (ret < 0) |
4089 | return 0; | 3615 | return 0; |
4090 | kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); | 3616 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); |
4091 | return 1; | 3617 | return 1; |
4092 | } | 3618 | } |
4093 | 3619 | ||
@@ -4324,7 +3850,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, | |||
4324 | if (!exchanged) | 3850 | if (!exchanged) |
4325 | return X86EMUL_CMPXCHG_FAILED; | 3851 | return X86EMUL_CMPXCHG_FAILED; |
4326 | 3852 | ||
4327 | kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1); | 3853 | kvm_mmu_pte_write(vcpu, gpa, new, bytes); |
4328 | 3854 | ||
4329 | return X86EMUL_CONTINUE; | 3855 | return X86EMUL_CONTINUE; |
4330 | 3856 | ||
@@ -4349,32 +3875,24 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | |||
4349 | return r; | 3875 | return r; |
4350 | } | 3876 | } |
4351 | 3877 | ||
4352 | 3878 | static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, | |
4353 | static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, | 3879 | unsigned short port, void *val, |
4354 | int size, unsigned short port, void *val, | 3880 | unsigned int count, bool in) |
4355 | unsigned int count) | ||
4356 | { | 3881 | { |
4357 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 3882 | trace_kvm_pio(!in, port, size, count); |
4358 | |||
4359 | if (vcpu->arch.pio.count) | ||
4360 | goto data_avail; | ||
4361 | |||
4362 | trace_kvm_pio(0, port, size, count); | ||
4363 | 3883 | ||
4364 | vcpu->arch.pio.port = port; | 3884 | vcpu->arch.pio.port = port; |
4365 | vcpu->arch.pio.in = 1; | 3885 | vcpu->arch.pio.in = in; |
4366 | vcpu->arch.pio.count = count; | 3886 | vcpu->arch.pio.count = count; |
4367 | vcpu->arch.pio.size = size; | 3887 | vcpu->arch.pio.size = size; |
4368 | 3888 | ||
4369 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | 3889 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { |
4370 | data_avail: | ||
4371 | memcpy(val, vcpu->arch.pio_data, size * count); | ||
4372 | vcpu->arch.pio.count = 0; | 3890 | vcpu->arch.pio.count = 0; |
4373 | return 1; | 3891 | return 1; |
4374 | } | 3892 | } |
4375 | 3893 | ||
4376 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3894 | vcpu->run->exit_reason = KVM_EXIT_IO; |
4377 | vcpu->run->io.direction = KVM_EXIT_IO_IN; | 3895 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
4378 | vcpu->run->io.size = size; | 3896 | vcpu->run->io.size = size; |
4379 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | 3897 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; |
4380 | vcpu->run->io.count = count; | 3898 | vcpu->run->io.count = count; |
@@ -4383,36 +3901,37 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
4383 | return 0; | 3901 | return 0; |
4384 | } | 3902 | } |
4385 | 3903 | ||
4386 | static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, | 3904 | static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, |
4387 | int size, unsigned short port, | 3905 | int size, unsigned short port, void *val, |
4388 | const void *val, unsigned int count) | 3906 | unsigned int count) |
4389 | { | 3907 | { |
4390 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 3908 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
3909 | int ret; | ||
4391 | 3910 | ||
4392 | trace_kvm_pio(1, port, size, count); | 3911 | if (vcpu->arch.pio.count) |
4393 | 3912 | goto data_avail; | |
4394 | vcpu->arch.pio.port = port; | ||
4395 | vcpu->arch.pio.in = 0; | ||
4396 | vcpu->arch.pio.count = count; | ||
4397 | vcpu->arch.pio.size = size; | ||
4398 | |||
4399 | memcpy(vcpu->arch.pio_data, val, size * count); | ||
4400 | 3913 | ||
4401 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | 3914 | ret = emulator_pio_in_out(vcpu, size, port, val, count, true); |
3915 | if (ret) { | ||
3916 | data_avail: | ||
3917 | memcpy(val, vcpu->arch.pio_data, size * count); | ||
4402 | vcpu->arch.pio.count = 0; | 3918 | vcpu->arch.pio.count = 0; |
4403 | return 1; | 3919 | return 1; |
4404 | } | 3920 | } |
4405 | 3921 | ||
4406 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
4407 | vcpu->run->io.direction = KVM_EXIT_IO_OUT; | ||
4408 | vcpu->run->io.size = size; | ||
4409 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
4410 | vcpu->run->io.count = count; | ||
4411 | vcpu->run->io.port = port; | ||
4412 | |||
4413 | return 0; | 3922 | return 0; |
4414 | } | 3923 | } |
4415 | 3924 | ||
3925 | static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, | ||
3926 | int size, unsigned short port, | ||
3927 | const void *val, unsigned int count) | ||
3928 | { | ||
3929 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3930 | |||
3931 | memcpy(vcpu->arch.pio_data, val, size * count); | ||
3932 | return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false); | ||
3933 | } | ||
3934 | |||
4416 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 3935 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
4417 | { | 3936 | { |
4418 | return kvm_x86_ops->get_segment_base(vcpu, seg); | 3937 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
@@ -4627,6 +4146,12 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, | |||
4627 | return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); | 4146 | return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); |
4628 | } | 4147 | } |
4629 | 4148 | ||
4149 | static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, | ||
4150 | u32 pmc, u64 *pdata) | ||
4151 | { | ||
4152 | return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata); | ||
4153 | } | ||
4154 | |||
4630 | static void emulator_halt(struct x86_emulate_ctxt *ctxt) | 4155 | static void emulator_halt(struct x86_emulate_ctxt *ctxt) |
4631 | { | 4156 | { |
4632 | emul_to_vcpu(ctxt)->arch.halt_request = 1; | 4157 | emul_to_vcpu(ctxt)->arch.halt_request = 1; |
@@ -4679,6 +4204,7 @@ static struct x86_emulate_ops emulate_ops = { | |||
4679 | .set_dr = emulator_set_dr, | 4204 | .set_dr = emulator_set_dr, |
4680 | .set_msr = emulator_set_msr, | 4205 | .set_msr = emulator_set_msr, |
4681 | .get_msr = emulator_get_msr, | 4206 | .get_msr = emulator_get_msr, |
4207 | .read_pmc = emulator_read_pmc, | ||
4682 | .halt = emulator_halt, | 4208 | .halt = emulator_halt, |
4683 | .wbinvd = emulator_wbinvd, | 4209 | .wbinvd = emulator_wbinvd, |
4684 | .fix_hypercall = emulator_fix_hypercall, | 4210 | .fix_hypercall = emulator_fix_hypercall, |
@@ -4836,6 +4362,50 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
4836 | return false; | 4362 | return false; |
4837 | } | 4363 | } |
4838 | 4364 | ||
4365 | static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | ||
4366 | unsigned long cr2, int emulation_type) | ||
4367 | { | ||
4368 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4369 | unsigned long last_retry_eip, last_retry_addr, gpa = cr2; | ||
4370 | |||
4371 | last_retry_eip = vcpu->arch.last_retry_eip; | ||
4372 | last_retry_addr = vcpu->arch.last_retry_addr; | ||
4373 | |||
4374 | /* | ||
4375 | * If the emulation is caused by #PF and it is non-page_table | ||
4376 | * writing instruction, it means the VM-EXIT is caused by shadow | ||
4377 | * page protected, we can zap the shadow page and retry this | ||
4378 | * instruction directly. | ||
4379 | * | ||
4380 | * Note: if the guest uses a non-page-table modifying instruction | ||
4381 | * on the PDE that points to the instruction, then we will unmap | ||
4382 | * the instruction and go to an infinite loop. So, we cache the | ||
4383 | * last retried eip and the last fault address, if we meet the eip | ||
4384 | * and the address again, we can break out of the potential infinite | ||
4385 | * loop. | ||
4386 | */ | ||
4387 | vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0; | ||
4388 | |||
4389 | if (!(emulation_type & EMULTYPE_RETRY)) | ||
4390 | return false; | ||
4391 | |||
4392 | if (x86_page_table_writing_insn(ctxt)) | ||
4393 | return false; | ||
4394 | |||
4395 | if (ctxt->eip == last_retry_eip && last_retry_addr == cr2) | ||
4396 | return false; | ||
4397 | |||
4398 | vcpu->arch.last_retry_eip = ctxt->eip; | ||
4399 | vcpu->arch.last_retry_addr = cr2; | ||
4400 | |||
4401 | if (!vcpu->arch.mmu.direct_map) | ||
4402 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); | ||
4403 | |||
4404 | kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
4405 | |||
4406 | return true; | ||
4407 | } | ||
4408 | |||
4839 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, | 4409 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, |
4840 | unsigned long cr2, | 4410 | unsigned long cr2, |
4841 | int emulation_type, | 4411 | int emulation_type, |
@@ -4877,6 +4447,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4877 | return EMULATE_DONE; | 4447 | return EMULATE_DONE; |
4878 | } | 4448 | } |
4879 | 4449 | ||
4450 | if (retry_instruction(ctxt, cr2, emulation_type)) | ||
4451 | return EMULATE_DONE; | ||
4452 | |||
4880 | /* this is needed for vmware backdoor interface to work since it | 4453 | /* this is needed for vmware backdoor interface to work since it |
4881 | changes registers values during IO operation */ | 4454 | changes registers values during IO operation */ |
4882 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { | 4455 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { |
@@ -5095,17 +4668,17 @@ static void kvm_timer_init(void) | |||
5095 | 4668 | ||
5096 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); | 4669 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); |
5097 | 4670 | ||
5098 | static int kvm_is_in_guest(void) | 4671 | int kvm_is_in_guest(void) |
5099 | { | 4672 | { |
5100 | return percpu_read(current_vcpu) != NULL; | 4673 | return __this_cpu_read(current_vcpu) != NULL; |
5101 | } | 4674 | } |
5102 | 4675 | ||
5103 | static int kvm_is_user_mode(void) | 4676 | static int kvm_is_user_mode(void) |
5104 | { | 4677 | { |
5105 | int user_mode = 3; | 4678 | int user_mode = 3; |
5106 | 4679 | ||
5107 | if (percpu_read(current_vcpu)) | 4680 | if (__this_cpu_read(current_vcpu)) |
5108 | user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu)); | 4681 | user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu)); |
5109 | 4682 | ||
5110 | return user_mode != 0; | 4683 | return user_mode != 0; |
5111 | } | 4684 | } |
@@ -5114,8 +4687,8 @@ static unsigned long kvm_get_guest_ip(void) | |||
5114 | { | 4687 | { |
5115 | unsigned long ip = 0; | 4688 | unsigned long ip = 0; |
5116 | 4689 | ||
5117 | if (percpu_read(current_vcpu)) | 4690 | if (__this_cpu_read(current_vcpu)) |
5118 | ip = kvm_rip_read(percpu_read(current_vcpu)); | 4691 | ip = kvm_rip_read(__this_cpu_read(current_vcpu)); |
5119 | 4692 | ||
5120 | return ip; | 4693 | return ip; |
5121 | } | 4694 | } |
@@ -5128,13 +4701,13 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { | |||
5128 | 4701 | ||
5129 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) | 4702 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) |
5130 | { | 4703 | { |
5131 | percpu_write(current_vcpu, vcpu); | 4704 | __this_cpu_write(current_vcpu, vcpu); |
5132 | } | 4705 | } |
5133 | EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); | 4706 | EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); |
5134 | 4707 | ||
5135 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) | 4708 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) |
5136 | { | 4709 | { |
5137 | percpu_write(current_vcpu, NULL); | 4710 | __this_cpu_write(current_vcpu, NULL); |
5138 | } | 4711 | } |
5139 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); | 4712 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); |
5140 | 4713 | ||
@@ -5233,15 +4806,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) | |||
5233 | } | 4806 | } |
5234 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); | 4807 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); |
5235 | 4808 | ||
5236 | static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0, | ||
5237 | unsigned long a1) | ||
5238 | { | ||
5239 | if (is_long_mode(vcpu)) | ||
5240 | return a0; | ||
5241 | else | ||
5242 | return a0 | ((gpa_t)a1 << 32); | ||
5243 | } | ||
5244 | |||
5245 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | 4809 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) |
5246 | { | 4810 | { |
5247 | u64 param, ingpa, outgpa, ret; | 4811 | u64 param, ingpa, outgpa, ret; |
@@ -5337,9 +4901,6 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
5337 | case KVM_HC_VAPIC_POLL_IRQ: | 4901 | case KVM_HC_VAPIC_POLL_IRQ: |
5338 | ret = 0; | 4902 | ret = 0; |
5339 | break; | 4903 | break; |
5340 | case KVM_HC_MMU_OP: | ||
5341 | r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret); | ||
5342 | break; | ||
5343 | default: | 4904 | default: |
5344 | ret = -KVM_ENOSYS; | 4905 | ret = -KVM_ENOSYS; |
5345 | break; | 4906 | break; |
@@ -5369,125 +4930,6 @@ int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) | |||
5369 | return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); | 4930 | return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); |
5370 | } | 4931 | } |
5371 | 4932 | ||
5372 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | ||
5373 | { | ||
5374 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; | ||
5375 | int j, nent = vcpu->arch.cpuid_nent; | ||
5376 | |||
5377 | e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
5378 | /* when no next entry is found, the current entry[i] is reselected */ | ||
5379 | for (j = i + 1; ; j = (j + 1) % nent) { | ||
5380 | struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; | ||
5381 | if (ej->function == e->function) { | ||
5382 | ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
5383 | return j; | ||
5384 | } | ||
5385 | } | ||
5386 | return 0; /* silence gcc, even though control never reaches here */ | ||
5387 | } | ||
5388 | |||
5389 | /* find an entry with matching function, matching index (if needed), and that | ||
5390 | * should be read next (if it's stateful) */ | ||
5391 | static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, | ||
5392 | u32 function, u32 index) | ||
5393 | { | ||
5394 | if (e->function != function) | ||
5395 | return 0; | ||
5396 | if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) | ||
5397 | return 0; | ||
5398 | if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && | ||
5399 | !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) | ||
5400 | return 0; | ||
5401 | return 1; | ||
5402 | } | ||
5403 | |||
5404 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | ||
5405 | u32 function, u32 index) | ||
5406 | { | ||
5407 | int i; | ||
5408 | struct kvm_cpuid_entry2 *best = NULL; | ||
5409 | |||
5410 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | ||
5411 | struct kvm_cpuid_entry2 *e; | ||
5412 | |||
5413 | e = &vcpu->arch.cpuid_entries[i]; | ||
5414 | if (is_matching_cpuid_entry(e, function, index)) { | ||
5415 | if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) | ||
5416 | move_to_next_stateful_cpuid_entry(vcpu, i); | ||
5417 | best = e; | ||
5418 | break; | ||
5419 | } | ||
5420 | } | ||
5421 | return best; | ||
5422 | } | ||
5423 | EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); | ||
5424 | |||
5425 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | ||
5426 | { | ||
5427 | struct kvm_cpuid_entry2 *best; | ||
5428 | |||
5429 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
5430 | if (!best || best->eax < 0x80000008) | ||
5431 | goto not_found; | ||
5432 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
5433 | if (best) | ||
5434 | return best->eax & 0xff; | ||
5435 | not_found: | ||
5436 | return 36; | ||
5437 | } | ||
5438 | |||
5439 | /* | ||
5440 | * If no match is found, check whether we exceed the vCPU's limit | ||
5441 | * and return the content of the highest valid _standard_ leaf instead. | ||
5442 | * This is to satisfy the CPUID specification. | ||
5443 | */ | ||
5444 | static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, | ||
5445 | u32 function, u32 index) | ||
5446 | { | ||
5447 | struct kvm_cpuid_entry2 *maxlevel; | ||
5448 | |||
5449 | maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); | ||
5450 | if (!maxlevel || maxlevel->eax >= function) | ||
5451 | return NULL; | ||
5452 | if (function & 0x80000000) { | ||
5453 | maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0); | ||
5454 | if (!maxlevel) | ||
5455 | return NULL; | ||
5456 | } | ||
5457 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); | ||
5458 | } | ||
5459 | |||
5460 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | ||
5461 | { | ||
5462 | u32 function, index; | ||
5463 | struct kvm_cpuid_entry2 *best; | ||
5464 | |||
5465 | function = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
5466 | index = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
5467 | kvm_register_write(vcpu, VCPU_REGS_RAX, 0); | ||
5468 | kvm_register_write(vcpu, VCPU_REGS_RBX, 0); | ||
5469 | kvm_register_write(vcpu, VCPU_REGS_RCX, 0); | ||
5470 | kvm_register_write(vcpu, VCPU_REGS_RDX, 0); | ||
5471 | best = kvm_find_cpuid_entry(vcpu, function, index); | ||
5472 | |||
5473 | if (!best) | ||
5474 | best = check_cpuid_limit(vcpu, function, index); | ||
5475 | |||
5476 | if (best) { | ||
5477 | kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); | ||
5478 | kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); | ||
5479 | kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); | ||
5480 | kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); | ||
5481 | } | ||
5482 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
5483 | trace_kvm_cpuid(function, | ||
5484 | kvm_register_read(vcpu, VCPU_REGS_RAX), | ||
5485 | kvm_register_read(vcpu, VCPU_REGS_RBX), | ||
5486 | kvm_register_read(vcpu, VCPU_REGS_RCX), | ||
5487 | kvm_register_read(vcpu, VCPU_REGS_RDX)); | ||
5488 | } | ||
5489 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | ||
5490 | |||
5491 | /* | 4933 | /* |
5492 | * Check if userspace requested an interrupt window, and that the | 4934 | * Check if userspace requested an interrupt window, and that the |
5493 | * interrupt window is open. | 4935 | * interrupt window is open. |
@@ -5648,6 +5090,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5648 | int r; | 5090 | int r; |
5649 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 5091 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
5650 | vcpu->run->request_interrupt_window; | 5092 | vcpu->run->request_interrupt_window; |
5093 | bool req_immediate_exit = 0; | ||
5651 | 5094 | ||
5652 | if (vcpu->requests) { | 5095 | if (vcpu->requests) { |
5653 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) | 5096 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) |
@@ -5687,7 +5130,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5687 | record_steal_time(vcpu); | 5130 | record_steal_time(vcpu); |
5688 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | 5131 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) |
5689 | process_nmi(vcpu); | 5132 | process_nmi(vcpu); |
5690 | 5133 | req_immediate_exit = | |
5134 | kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); | ||
5135 | if (kvm_check_request(KVM_REQ_PMU, vcpu)) | ||
5136 | kvm_handle_pmu_event(vcpu); | ||
5137 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | ||
5138 | kvm_deliver_pmi(vcpu); | ||
5691 | } | 5139 | } |
5692 | 5140 | ||
5693 | r = kvm_mmu_reload(vcpu); | 5141 | r = kvm_mmu_reload(vcpu); |
@@ -5738,6 +5186,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5738 | 5186 | ||
5739 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 5187 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
5740 | 5188 | ||
5189 | if (req_immediate_exit) | ||
5190 | smp_send_reschedule(vcpu->cpu); | ||
5191 | |||
5741 | kvm_guest_enter(); | 5192 | kvm_guest_enter(); |
5742 | 5193 | ||
5743 | if (unlikely(vcpu->arch.switch_db_regs)) { | 5194 | if (unlikely(vcpu->arch.switch_db_regs)) { |
@@ -5943,10 +5394,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
5943 | if (r <= 0) | 5394 | if (r <= 0) |
5944 | goto out; | 5395 | goto out; |
5945 | 5396 | ||
5946 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) | ||
5947 | kvm_register_write(vcpu, VCPU_REGS_RAX, | ||
5948 | kvm_run->hypercall.ret); | ||
5949 | |||
5950 | r = __vcpu_run(vcpu); | 5397 | r = __vcpu_run(vcpu); |
5951 | 5398 | ||
5952 | out: | 5399 | out: |
@@ -6148,7 +5595,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
6148 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; | 5595 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; |
6149 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5596 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
6150 | if (sregs->cr4 & X86_CR4_OSXSAVE) | 5597 | if (sregs->cr4 & X86_CR4_OSXSAVE) |
6151 | update_cpuid(vcpu); | 5598 | kvm_update_cpuid(vcpu); |
6152 | 5599 | ||
6153 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 5600 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
6154 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5601 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
@@ -6425,6 +5872,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
6425 | kvm_async_pf_hash_reset(vcpu); | 5872 | kvm_async_pf_hash_reset(vcpu); |
6426 | vcpu->arch.apf.halted = false; | 5873 | vcpu->arch.apf.halted = false; |
6427 | 5874 | ||
5875 | kvm_pmu_reset(vcpu); | ||
5876 | |||
6428 | return kvm_x86_ops->vcpu_reset(vcpu); | 5877 | return kvm_x86_ops->vcpu_reset(vcpu); |
6429 | } | 5878 | } |
6430 | 5879 | ||
@@ -6473,10 +5922,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
6473 | kvm = vcpu->kvm; | 5922 | kvm = vcpu->kvm; |
6474 | 5923 | ||
6475 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; | 5924 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; |
6476 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | ||
6477 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | ||
6478 | vcpu->arch.mmu.translate_gpa = translate_gpa; | ||
6479 | vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; | ||
6480 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) | 5925 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) |
6481 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 5926 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
6482 | else | 5927 | else |
@@ -6513,6 +5958,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
6513 | goto fail_free_mce_banks; | 5958 | goto fail_free_mce_banks; |
6514 | 5959 | ||
6515 | kvm_async_pf_hash_reset(vcpu); | 5960 | kvm_async_pf_hash_reset(vcpu); |
5961 | kvm_pmu_init(vcpu); | ||
6516 | 5962 | ||
6517 | return 0; | 5963 | return 0; |
6518 | fail_free_mce_banks: | 5964 | fail_free_mce_banks: |
@@ -6531,6 +5977,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
6531 | { | 5977 | { |
6532 | int idx; | 5978 | int idx; |
6533 | 5979 | ||
5980 | kvm_pmu_destroy(vcpu); | ||
6534 | kfree(vcpu->arch.mce_banks); | 5981 | kfree(vcpu->arch.mce_banks); |
6535 | kvm_free_lapic(vcpu); | 5982 | kvm_free_lapic(vcpu); |
6536 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 5983 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index d36fe237c665..cb80c293cdd8 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -33,9 +33,6 @@ static inline bool kvm_exception_is_soft(unsigned int nr) | |||
33 | return (nr == BP_VECTOR) || (nr == OF_VECTOR); | 33 | return (nr == BP_VECTOR) || (nr == OF_VECTOR); |
34 | } | 34 | } |
35 | 35 | ||
36 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | ||
37 | u32 function, u32 index); | ||
38 | |||
39 | static inline bool is_protmode(struct kvm_vcpu *vcpu) | 36 | static inline bool is_protmode(struct kvm_vcpu *vcpu) |
40 | { | 37 | { |
41 | return kvm_read_cr0_bits(vcpu, X86_CR0_PE); | 38 | return kvm_read_cr0_bits(vcpu, X86_CR0_PE); |
@@ -125,4 +122,6 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
125 | gva_t addr, void *val, unsigned int bytes, | 122 | gva_t addr, void *val, unsigned int bytes, |
126 | struct x86_exception *exception); | 123 | struct x86_exception *exception); |
127 | 124 | ||
125 | extern u64 host_xcr0; | ||
126 | |||
128 | #endif | 127 | #endif |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index cf4603ba866f..642d8805bc1b 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -856,18 +856,23 @@ static void __init lguest_init_IRQ(void) | |||
856 | } | 856 | } |
857 | 857 | ||
858 | /* | 858 | /* |
859 | * With CONFIG_SPARSE_IRQ, interrupt descriptors are allocated as-needed, so | 859 | * Interrupt descriptors are allocated as-needed, but low-numbered ones are |
860 | * rather than set them in lguest_init_IRQ we are called here every time an | 860 | * reserved by the generic x86 code. So we ignore irq_alloc_desc_at if it |
861 | * lguest device needs an interrupt. | 861 | * tells us the irq is already used: other errors (ie. ENOMEM) we take |
862 | * | 862 | * seriously. |
863 | * FIXME: irq_alloc_desc_at() can fail due to lack of memory, we should | ||
864 | * pass that up! | ||
865 | */ | 863 | */ |
866 | void lguest_setup_irq(unsigned int irq) | 864 | int lguest_setup_irq(unsigned int irq) |
867 | { | 865 | { |
868 | irq_alloc_desc_at(irq, 0); | 866 | int err; |
867 | |||
868 | /* Returns -ve error or vector number. */ | ||
869 | err = irq_alloc_desc_at(irq, 0); | ||
870 | if (err < 0 && err != -EEXIST) | ||
871 | return err; | ||
872 | |||
869 | irq_set_chip_and_handler_name(irq, &lguest_irq_controller, | 873 | irq_set_chip_and_handler_name(irq, &lguest_irq_controller, |
870 | handle_level_irq, "level"); | 874 | handle_level_irq, "level"); |
875 | return 0; | ||
871 | } | 876 | } |
872 | 877 | ||
873 | /* | 878 | /* |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index a298914058f9..6cabf6570d64 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/ioport.h> | 3 | #include <linux/ioport.h> |
4 | #include <linux/swap.h> | 4 | #include <linux/swap.h> |
5 | #include <linux/memblock.h> | 5 | #include <linux/memblock.h> |
6 | #include <linux/bootmem.h> /* for max_low_pfn */ | ||
6 | 7 | ||
7 | #include <asm/cacheflush.h> | 8 | #include <asm/cacheflush.h> |
8 | #include <asm/e820.h> | 9 | #include <asm/e820.h> |
@@ -15,6 +16,7 @@ | |||
15 | #include <asm/tlbflush.h> | 16 | #include <asm/tlbflush.h> |
16 | #include <asm/tlb.h> | 17 | #include <asm/tlb.h> |
17 | #include <asm/proto.h> | 18 | #include <asm/proto.h> |
19 | #include <asm/dma.h> /* for MAX_DMA_PFN */ | ||
18 | 20 | ||
19 | unsigned long __initdata pgt_buf_start; | 21 | unsigned long __initdata pgt_buf_start; |
20 | unsigned long __meminitdata pgt_buf_end; | 22 | unsigned long __meminitdata pgt_buf_end; |
@@ -392,3 +394,24 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
392 | free_init_pages("initrd memory", start, PAGE_ALIGN(end)); | 394 | free_init_pages("initrd memory", start, PAGE_ALIGN(end)); |
393 | } | 395 | } |
394 | #endif | 396 | #endif |
397 | |||
398 | void __init zone_sizes_init(void) | ||
399 | { | ||
400 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | ||
401 | |||
402 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | ||
403 | |||
404 | #ifdef CONFIG_ZONE_DMA | ||
405 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | ||
406 | #endif | ||
407 | #ifdef CONFIG_ZONE_DMA32 | ||
408 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | ||
409 | #endif | ||
410 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | ||
411 | #ifdef CONFIG_HIGHMEM | ||
412 | max_zone_pfns[ZONE_HIGHMEM] = max_pfn; | ||
413 | #endif | ||
414 | |||
415 | free_area_init_nodes(max_zone_pfns); | ||
416 | } | ||
417 | |||
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0c1da394a634..8663f6c47ccb 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -668,22 +668,6 @@ void __init initmem_init(void) | |||
668 | } | 668 | } |
669 | #endif /* !CONFIG_NEED_MULTIPLE_NODES */ | 669 | #endif /* !CONFIG_NEED_MULTIPLE_NODES */ |
670 | 670 | ||
671 | static void __init zone_sizes_init(void) | ||
672 | { | ||
673 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | ||
674 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | ||
675 | #ifdef CONFIG_ZONE_DMA | ||
676 | max_zone_pfns[ZONE_DMA] = | ||
677 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | ||
678 | #endif | ||
679 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | ||
680 | #ifdef CONFIG_HIGHMEM | ||
681 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; | ||
682 | #endif | ||
683 | |||
684 | free_area_init_nodes(max_zone_pfns); | ||
685 | } | ||
686 | |||
687 | void __init setup_bootmem_allocator(void) | 671 | void __init setup_bootmem_allocator(void) |
688 | { | 672 | { |
689 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 673 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
@@ -754,6 +738,17 @@ void __init mem_init(void) | |||
754 | #ifdef CONFIG_FLATMEM | 738 | #ifdef CONFIG_FLATMEM |
755 | BUG_ON(!mem_map); | 739 | BUG_ON(!mem_map); |
756 | #endif | 740 | #endif |
741 | /* | ||
742 | * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to | ||
743 | * be done before free_all_bootmem(). Memblock use free low memory for | ||
744 | * temporary data (see find_range_array()) and for this purpose can use | ||
745 | * pages that was already passed to the buddy allocator, hence marked as | ||
746 | * not accessible in the page tables when compiled with | ||
747 | * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not | ||
748 | * important here. | ||
749 | */ | ||
750 | set_highmem_pages_init(); | ||
751 | |||
757 | /* this will put all low memory onto the freelists */ | 752 | /* this will put all low memory onto the freelists */ |
758 | totalram_pages += free_all_bootmem(); | 753 | totalram_pages += free_all_bootmem(); |
759 | 754 | ||
@@ -765,8 +760,6 @@ void __init mem_init(void) | |||
765 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) | 760 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) |
766 | reservedpages++; | 761 | reservedpages++; |
767 | 762 | ||
768 | set_highmem_pages_init(); | ||
769 | |||
770 | codesize = (unsigned long) &_etext - (unsigned long) &_text; | 763 | codesize = (unsigned long) &_etext - (unsigned long) &_text; |
771 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; | 764 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; |
772 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; | 765 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a8a56ce3a962..436a0309db33 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -614,15 +614,6 @@ void __init initmem_init(void) | |||
614 | 614 | ||
615 | void __init paging_init(void) | 615 | void __init paging_init(void) |
616 | { | 616 | { |
617 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | ||
618 | |||
619 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | ||
620 | #ifdef CONFIG_ZONE_DMA | ||
621 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | ||
622 | #endif | ||
623 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | ||
624 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | ||
625 | |||
626 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | 617 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
627 | sparse_init(); | 618 | sparse_init(); |
628 | 619 | ||
@@ -634,7 +625,7 @@ void __init paging_init(void) | |||
634 | */ | 625 | */ |
635 | node_clear_state(0, N_NORMAL_MEMORY); | 626 | node_clear_state(0, N_NORMAL_MEMORY); |
636 | 627 | ||
637 | free_area_init_nodes(max_zone_pfns); | 628 | zone_sizes_init(); |
638 | } | 629 | } |
639 | 630 | ||
640 | /* | 631 | /* |
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 4b5ba85eb5c9..845df6835f9f 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -75,9 +75,9 @@ static unsigned long mmap_rnd(void) | |||
75 | */ | 75 | */ |
76 | if (current->flags & PF_RANDOMIZE) { | 76 | if (current->flags & PF_RANDOMIZE) { |
77 | if (mmap_is_ia32()) | 77 | if (mmap_is_ia32()) |
78 | rnd = (long)get_random_int() % (1<<8); | 78 | rnd = get_random_int() % (1<<8); |
79 | else | 79 | else |
80 | rnd = (long)(get_random_int() % (1<<28)); | 80 | rnd = get_random_int() % (1<<28); |
81 | } | 81 | } |
82 | return rnd << PAGE_SHIFT; | 82 | return rnd << PAGE_SHIFT; |
83 | } | 83 | } |
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index de54b9b278a7..dc0b727742f4 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c | |||
@@ -75,8 +75,8 @@ static LIST_HEAD(trace_list); /* struct remap_trace */ | |||
75 | 75 | ||
76 | /* module parameters */ | 76 | /* module parameters */ |
77 | static unsigned long filter_offset; | 77 | static unsigned long filter_offset; |
78 | static int nommiotrace; | 78 | static bool nommiotrace; |
79 | static int trace_pc; | 79 | static bool trace_pc; |
80 | 80 | ||
81 | module_param(filter_offset, ulong, 0); | 81 | module_param(filter_offset, ulong, 0); |
82 | module_param(nommiotrace, bool, 0); | 82 | module_param(nommiotrace, bool, 0); |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 496f494593bf..19d3fa08b119 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -110,7 +110,7 @@ void __cpuinit numa_clear_node(int cpu) | |||
110 | * Allocate node_to_cpumask_map based on number of available nodes | 110 | * Allocate node_to_cpumask_map based on number of available nodes |
111 | * Requires node_possible_map to be valid. | 111 | * Requires node_possible_map to be valid. |
112 | * | 112 | * |
113 | * Note: node_to_cpumask() is not valid until after this is done. | 113 | * Note: cpumask_of_node() is not valid until after this is done. |
114 | * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) | 114 | * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) |
115 | */ | 115 | */ |
116 | void __init setup_node_to_cpumask_map(void) | 116 | void __init setup_node_to_cpumask_map(void) |
@@ -422,8 +422,9 @@ static int __init numa_alloc_distance(void) | |||
422 | * calls are ignored until the distance table is reset with | 422 | * calls are ignored until the distance table is reset with |
423 | * numa_reset_distance(). | 423 | * numa_reset_distance(). |
424 | * | 424 | * |
425 | * If @from or @to is higher than the highest known node at the time of | 425 | * If @from or @to is higher than the highest known node or lower than zero |
426 | * table creation or @distance doesn't make sense, the call is ignored. | 426 | * at the time of table creation or @distance doesn't make sense, the call |
427 | * is ignored. | ||
427 | * This is to allow simplification of specific NUMA config implementations. | 428 | * This is to allow simplification of specific NUMA config implementations. |
428 | */ | 429 | */ |
429 | void __init numa_set_distance(int from, int to, int distance) | 430 | void __init numa_set_distance(int from, int to, int distance) |
@@ -431,8 +432,9 @@ void __init numa_set_distance(int from, int to, int distance) | |||
431 | if (!numa_distance && numa_alloc_distance() < 0) | 432 | if (!numa_distance && numa_alloc_distance() < 0) |
432 | return; | 433 | return; |
433 | 434 | ||
434 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) { | 435 | if (from >= numa_distance_cnt || to >= numa_distance_cnt || |
435 | printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n", | 436 | from < 0 || to < 0) { |
437 | pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n", | ||
436 | from, to, distance); | 438 | from, to, distance); |
437 | return; | 439 | return; |
438 | } | 440 | } |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index eda2acbb6e81..e1ebde315210 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -1334,12 +1334,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
1334 | } | 1334 | } |
1335 | 1335 | ||
1336 | /* | 1336 | /* |
1337 | * If page allocator is not up yet then do not call c_p_a(): | ||
1338 | */ | ||
1339 | if (!debug_pagealloc_enabled) | ||
1340 | return; | ||
1341 | |||
1342 | /* | ||
1343 | * The return value is ignored as the calls cannot fail. | 1337 | * The return value is ignored as the calls cannot fail. |
1344 | * Large pages for identity mappings are not used at boot time | 1338 | * Large pages for identity mappings are not used at boot time |
1345 | * and hence no memory allocations during large page split. | 1339 | * and hence no memory allocations during large page split. |
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index fd61b3fb7341..1c1c4f46a7c1 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c | |||
@@ -109,6 +109,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
109 | if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) | 109 | if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) |
110 | return; | 110 | return; |
111 | pxm = pa->proximity_domain_lo; | 111 | pxm = pa->proximity_domain_lo; |
112 | if (acpi_srat_revision >= 2) | ||
113 | pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8; | ||
112 | node = setup_node(pxm); | 114 | node = setup_node(pxm); |
113 | if (node < 0) { | 115 | if (node < 0) { |
114 | printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); | 116 | printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); |
@@ -160,6 +162,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
160 | start = ma->base_address; | 162 | start = ma->base_address; |
161 | end = start + ma->length; | 163 | end = start + ma->length; |
162 | pxm = ma->proximity_domain; | 164 | pxm = ma->proximity_domain; |
165 | if (acpi_srat_revision <= 1) | ||
166 | pxm &= 0xff; | ||
163 | node = setup_node(pxm); | 167 | node = setup_node(pxm); |
164 | if (node < 0) { | 168 | if (node < 0) { |
165 | printk(KERN_ERR "SRAT: Too many proximity domains.\n"); | 169 | printk(KERN_ERR "SRAT: Too many proximity domains.\n"); |
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 6b8759f7634e..e76e18c94a3c 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
@@ -15,11 +15,12 @@ obj-$(CONFIG_X86_VISWS) += visws.o | |||
15 | 15 | ||
16 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | 16 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o |
17 | 17 | ||
18 | obj-$(CONFIG_X86_MRST) += mrst.o | 18 | obj-$(CONFIG_X86_INTEL_MID) += mrst.o |
19 | 19 | ||
20 | obj-y += common.o early.o | 20 | obj-y += common.o early.o |
21 | obj-y += amd_bus.o bus_numa.o | 21 | obj-y += bus_numa.o |
22 | 22 | ||
23 | obj-$(CONFIG_AMD_NB) += amd_bus.o | ||
23 | obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o | 24 | obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o |
24 | 25 | ||
25 | ifeq ($(CONFIG_PCI_DEBUG),y) | 26 | ifeq ($(CONFIG_PCI_DEBUG),y) |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 404f21a3ff9e..a312e76063a7 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -12,7 +12,7 @@ struct pci_root_info { | |||
12 | char *name; | 12 | char *name; |
13 | unsigned int res_num; | 13 | unsigned int res_num; |
14 | struct resource *res; | 14 | struct resource *res; |
15 | struct pci_bus *bus; | 15 | struct list_head *resources; |
16 | int busnum; | 16 | int busnum; |
17 | }; | 17 | }; |
18 | 18 | ||
@@ -24,6 +24,12 @@ static int __init set_use_crs(const struct dmi_system_id *id) | |||
24 | return 0; | 24 | return 0; |
25 | } | 25 | } |
26 | 26 | ||
27 | static int __init set_nouse_crs(const struct dmi_system_id *id) | ||
28 | { | ||
29 | pci_use_crs = false; | ||
30 | return 0; | ||
31 | } | ||
32 | |||
27 | static const struct dmi_system_id pci_use_crs_table[] __initconst = { | 33 | static const struct dmi_system_id pci_use_crs_table[] __initconst = { |
28 | /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */ | 34 | /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */ |
29 | { | 35 | { |
@@ -54,6 +60,29 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = { | |||
54 | DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), | 60 | DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), |
55 | }, | 61 | }, |
56 | }, | 62 | }, |
63 | |||
64 | /* Now for the blacklist.. */ | ||
65 | |||
66 | /* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */ | ||
67 | { | ||
68 | .callback = set_nouse_crs, | ||
69 | .ident = "Dell Studio 1557", | ||
70 | .matches = { | ||
71 | DMI_MATCH(DMI_BOARD_VENDOR, "Dell Inc."), | ||
72 | DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1557"), | ||
73 | DMI_MATCH(DMI_BIOS_VERSION, "A09"), | ||
74 | }, | ||
75 | }, | ||
76 | /* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */ | ||
77 | { | ||
78 | .callback = set_nouse_crs, | ||
79 | .ident = "Thinkpad SL510", | ||
80 | .matches = { | ||
81 | DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), | ||
82 | DMI_MATCH(DMI_BOARD_NAME, "2847DFG"), | ||
83 | DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"), | ||
84 | }, | ||
85 | }, | ||
57 | {} | 86 | {} |
58 | }; | 87 | }; |
59 | 88 | ||
@@ -149,7 +178,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
149 | struct acpi_resource_address64 addr; | 178 | struct acpi_resource_address64 addr; |
150 | acpi_status status; | 179 | acpi_status status; |
151 | unsigned long flags; | 180 | unsigned long flags; |
152 | u64 start, end; | 181 | u64 start, orig_end, end; |
153 | 182 | ||
154 | status = resource_to_addr(acpi_res, &addr); | 183 | status = resource_to_addr(acpi_res, &addr); |
155 | if (!ACPI_SUCCESS(status)) | 184 | if (!ACPI_SUCCESS(status)) |
@@ -165,7 +194,21 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
165 | return AE_OK; | 194 | return AE_OK; |
166 | 195 | ||
167 | start = addr.minimum + addr.translation_offset; | 196 | start = addr.minimum + addr.translation_offset; |
168 | end = addr.maximum + addr.translation_offset; | 197 | orig_end = end = addr.maximum + addr.translation_offset; |
198 | |||
199 | /* Exclude non-addressable range or non-addressable portion of range */ | ||
200 | end = min(end, (u64)iomem_resource.end); | ||
201 | if (end <= start) { | ||
202 | dev_info(&info->bridge->dev, | ||
203 | "host bridge window [%#llx-%#llx] " | ||
204 | "(ignored, not CPU addressable)\n", start, orig_end); | ||
205 | return AE_OK; | ||
206 | } else if (orig_end != end) { | ||
207 | dev_info(&info->bridge->dev, | ||
208 | "host bridge window [%#llx-%#llx] " | ||
209 | "([%#llx-%#llx] ignored, not CPU addressable)\n", | ||
210 | start, orig_end, end + 1, orig_end); | ||
211 | } | ||
169 | 212 | ||
170 | res = &info->res[info->res_num]; | 213 | res = &info->res[info->res_num]; |
171 | res->name = info->name; | 214 | res->name = info->name; |
@@ -261,23 +304,20 @@ static void add_resources(struct pci_root_info *info) | |||
261 | "ignoring host bridge window %pR (conflicts with %s %pR)\n", | 304 | "ignoring host bridge window %pR (conflicts with %s %pR)\n", |
262 | res, conflict->name, conflict); | 305 | res, conflict->name, conflict); |
263 | else | 306 | else |
264 | pci_bus_add_resource(info->bus, res, 0); | 307 | pci_add_resource(info->resources, res); |
265 | } | 308 | } |
266 | } | 309 | } |
267 | 310 | ||
268 | static void | 311 | static void |
269 | get_current_resources(struct acpi_device *device, int busnum, | 312 | get_current_resources(struct acpi_device *device, int busnum, |
270 | int domain, struct pci_bus *bus) | 313 | int domain, struct list_head *resources) |
271 | { | 314 | { |
272 | struct pci_root_info info; | 315 | struct pci_root_info info; |
273 | size_t size; | 316 | size_t size; |
274 | 317 | ||
275 | if (pci_use_crs) | ||
276 | pci_bus_remove_resources(bus); | ||
277 | |||
278 | info.bridge = device; | 318 | info.bridge = device; |
279 | info.bus = bus; | ||
280 | info.res_num = 0; | 319 | info.res_num = 0; |
320 | info.resources = resources; | ||
281 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, | 321 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, |
282 | &info); | 322 | &info); |
283 | if (!info.res_num) | 323 | if (!info.res_num) |
@@ -286,7 +326,7 @@ get_current_resources(struct acpi_device *device, int busnum, | |||
286 | size = sizeof(*info.res) * info.res_num; | 326 | size = sizeof(*info.res) * info.res_num; |
287 | info.res = kmalloc(size, GFP_KERNEL); | 327 | info.res = kmalloc(size, GFP_KERNEL); |
288 | if (!info.res) | 328 | if (!info.res) |
289 | goto res_alloc_fail; | 329 | return; |
290 | 330 | ||
291 | info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum); | 331 | info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum); |
292 | if (!info.name) | 332 | if (!info.name) |
@@ -301,8 +341,6 @@ get_current_resources(struct acpi_device *device, int busnum, | |||
301 | 341 | ||
302 | name_alloc_fail: | 342 | name_alloc_fail: |
303 | kfree(info.res); | 343 | kfree(info.res); |
304 | res_alloc_fail: | ||
305 | return; | ||
306 | } | 344 | } |
307 | 345 | ||
308 | struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | 346 | struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) |
@@ -310,6 +348,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | |||
310 | struct acpi_device *device = root->device; | 348 | struct acpi_device *device = root->device; |
311 | int domain = root->segment; | 349 | int domain = root->segment; |
312 | int busnum = root->secondary.start; | 350 | int busnum = root->secondary.start; |
351 | LIST_HEAD(resources); | ||
313 | struct pci_bus *bus; | 352 | struct pci_bus *bus; |
314 | struct pci_sysdata *sd; | 353 | struct pci_sysdata *sd; |
315 | int node; | 354 | int node; |
@@ -364,11 +403,15 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | |||
364 | memcpy(bus->sysdata, sd, sizeof(*sd)); | 403 | memcpy(bus->sysdata, sd, sizeof(*sd)); |
365 | kfree(sd); | 404 | kfree(sd); |
366 | } else { | 405 | } else { |
367 | bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); | 406 | get_current_resources(device, busnum, domain, &resources); |
368 | if (bus) { | 407 | if (list_empty(&resources)) |
369 | get_current_resources(device, busnum, domain, bus); | 408 | x86_pci_root_bus_resources(busnum, &resources); |
409 | bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, | ||
410 | &resources); | ||
411 | if (bus) | ||
370 | bus->subordinate = pci_scan_child_bus(bus); | 412 | bus->subordinate = pci_scan_child_bus(bus); |
371 | } | 413 | else |
414 | pci_free_resource_list(&resources); | ||
372 | } | 415 | } |
373 | 416 | ||
374 | /* After the PCI-E bus has been walked and all devices discovered, | 417 | /* After the PCI-E bus has been walked and all devices discovered, |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 026e4931d162..0567df3890e1 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -30,34 +30,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = { | |||
30 | { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, | 30 | { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, |
31 | }; | 31 | }; |
32 | 32 | ||
33 | static u64 __initdata fam10h_mmconf_start; | ||
34 | static u64 __initdata fam10h_mmconf_end; | ||
35 | static void __init get_pci_mmcfg_amd_fam10h_range(void) | ||
36 | { | ||
37 | u32 address; | ||
38 | u64 base, msr; | ||
39 | unsigned segn_busn_bits; | ||
40 | |||
41 | /* assume all cpus from fam10h have mmconf */ | ||
42 | if (boot_cpu_data.x86 < 0x10) | ||
43 | return; | ||
44 | |||
45 | address = MSR_FAM10H_MMIO_CONF_BASE; | ||
46 | rdmsrl(address, msr); | ||
47 | |||
48 | /* mmconfig is not enable */ | ||
49 | if (!(msr & FAM10H_MMIO_CONF_ENABLE)) | ||
50 | return; | ||
51 | |||
52 | base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); | ||
53 | |||
54 | segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & | ||
55 | FAM10H_MMIO_CONF_BUSRANGE_MASK; | ||
56 | |||
57 | fam10h_mmconf_start = base; | ||
58 | fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; | ||
59 | } | ||
60 | |||
61 | #define RANGE_NUM 16 | 33 | #define RANGE_NUM 16 |
62 | 34 | ||
63 | /** | 35 | /** |
@@ -85,6 +57,9 @@ static int __init early_fill_mp_bus_info(void) | |||
85 | u64 val; | 57 | u64 val; |
86 | u32 address; | 58 | u32 address; |
87 | bool found; | 59 | bool found; |
60 | struct resource fam10h_mmconf_res, *fam10h_mmconf; | ||
61 | u64 fam10h_mmconf_start; | ||
62 | u64 fam10h_mmconf_end; | ||
88 | 63 | ||
89 | if (!early_pci_allowed()) | 64 | if (!early_pci_allowed()) |
90 | return -1; | 65 | return -1; |
@@ -211,12 +186,17 @@ static int __init early_fill_mp_bus_info(void) | |||
211 | subtract_range(range, RANGE_NUM, 0, end); | 186 | subtract_range(range, RANGE_NUM, 0, end); |
212 | 187 | ||
213 | /* get mmconfig */ | 188 | /* get mmconfig */ |
214 | get_pci_mmcfg_amd_fam10h_range(); | 189 | fam10h_mmconf = amd_get_mmconfig_range(&fam10h_mmconf_res); |
215 | /* need to take out mmconf range */ | 190 | /* need to take out mmconf range */ |
216 | if (fam10h_mmconf_end) { | 191 | if (fam10h_mmconf) { |
217 | printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); | 192 | printk(KERN_DEBUG "Fam 10h mmconf %pR\n", fam10h_mmconf); |
193 | fam10h_mmconf_start = fam10h_mmconf->start; | ||
194 | fam10h_mmconf_end = fam10h_mmconf->end; | ||
218 | subtract_range(range, RANGE_NUM, fam10h_mmconf_start, | 195 | subtract_range(range, RANGE_NUM, fam10h_mmconf_start, |
219 | fam10h_mmconf_end + 1); | 196 | fam10h_mmconf_end + 1); |
197 | } else { | ||
198 | fam10h_mmconf_start = 0; | ||
199 | fam10h_mmconf_end = 0; | ||
220 | } | 200 | } |
221 | 201 | ||
222 | /* mmio resource */ | 202 | /* mmio resource */ |
@@ -403,7 +383,6 @@ static void __init pci_enable_pci_io_ecs(void) | |||
403 | ++n; | 383 | ++n; |
404 | } | 384 | } |
405 | } | 385 | } |
406 | pr_info("Extended Config Space enabled on %u nodes\n", n); | ||
407 | #endif | 386 | #endif |
408 | } | 387 | } |
409 | 388 | ||
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index ab8269b0da29..f3a7c569a403 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c | |||
@@ -15,10 +15,11 @@ | |||
15 | #include <linux/pci.h> | 15 | #include <linux/pci.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <asm/pci_x86.h> | 17 | #include <asm/pci_x86.h> |
18 | #include <asm/pci-direct.h> | ||
18 | 19 | ||
19 | #include "bus_numa.h" | 20 | #include "bus_numa.h" |
20 | 21 | ||
21 | static void __devinit cnb20le_res(struct pci_dev *dev) | 22 | static void __init cnb20le_res(u8 bus, u8 slot, u8 func) |
22 | { | 23 | { |
23 | struct pci_root_info *info; | 24 | struct pci_root_info *info; |
24 | struct resource res; | 25 | struct resource res; |
@@ -26,21 +27,12 @@ static void __devinit cnb20le_res(struct pci_dev *dev) | |||
26 | u8 fbus, lbus; | 27 | u8 fbus, lbus; |
27 | int i; | 28 | int i; |
28 | 29 | ||
29 | #ifdef CONFIG_ACPI | ||
30 | /* | ||
31 | * We should get host bridge information from ACPI unless the BIOS | ||
32 | * doesn't support it. | ||
33 | */ | ||
34 | if (acpi_os_get_root_pointer()) | ||
35 | return; | ||
36 | #endif | ||
37 | |||
38 | info = &pci_root_info[pci_root_num]; | 30 | info = &pci_root_info[pci_root_num]; |
39 | pci_root_num++; | 31 | pci_root_num++; |
40 | 32 | ||
41 | /* read the PCI bus numbers */ | 33 | /* read the PCI bus numbers */ |
42 | pci_read_config_byte(dev, 0x44, &fbus); | 34 | fbus = read_pci_config_byte(bus, slot, func, 0x44); |
43 | pci_read_config_byte(dev, 0x45, &lbus); | 35 | lbus = read_pci_config_byte(bus, slot, func, 0x45); |
44 | info->bus_min = fbus; | 36 | info->bus_min = fbus; |
45 | info->bus_max = lbus; | 37 | info->bus_max = lbus; |
46 | 38 | ||
@@ -59,8 +51,8 @@ static void __devinit cnb20le_res(struct pci_dev *dev) | |||
59 | } | 51 | } |
60 | 52 | ||
61 | /* read the non-prefetchable memory window */ | 53 | /* read the non-prefetchable memory window */ |
62 | pci_read_config_word(dev, 0xc0, &word1); | 54 | word1 = read_pci_config_16(bus, slot, func, 0xc0); |
63 | pci_read_config_word(dev, 0xc2, &word2); | 55 | word2 = read_pci_config_16(bus, slot, func, 0xc2); |
64 | if (word1 != word2) { | 56 | if (word1 != word2) { |
65 | res.start = (word1 << 16) | 0x0000; | 57 | res.start = (word1 << 16) | 0x0000; |
66 | res.end = (word2 << 16) | 0xffff; | 58 | res.end = (word2 << 16) | 0xffff; |
@@ -69,8 +61,8 @@ static void __devinit cnb20le_res(struct pci_dev *dev) | |||
69 | } | 61 | } |
70 | 62 | ||
71 | /* read the prefetchable memory window */ | 63 | /* read the prefetchable memory window */ |
72 | pci_read_config_word(dev, 0xc4, &word1); | 64 | word1 = read_pci_config_16(bus, slot, func, 0xc4); |
73 | pci_read_config_word(dev, 0xc6, &word2); | 65 | word2 = read_pci_config_16(bus, slot, func, 0xc6); |
74 | if (word1 != word2) { | 66 | if (word1 != word2) { |
75 | res.start = (word1 << 16) | 0x0000; | 67 | res.start = (word1 << 16) | 0x0000; |
76 | res.end = (word2 << 16) | 0xffff; | 68 | res.end = (word2 << 16) | 0xffff; |
@@ -79,8 +71,8 @@ static void __devinit cnb20le_res(struct pci_dev *dev) | |||
79 | } | 71 | } |
80 | 72 | ||
81 | /* read the IO port window */ | 73 | /* read the IO port window */ |
82 | pci_read_config_word(dev, 0xd0, &word1); | 74 | word1 = read_pci_config_16(bus, slot, func, 0xd0); |
83 | pci_read_config_word(dev, 0xd2, &word2); | 75 | word2 = read_pci_config_16(bus, slot, func, 0xd2); |
84 | if (word1 != word2) { | 76 | if (word1 != word2) { |
85 | res.start = word1; | 77 | res.start = word1; |
86 | res.end = word2; | 78 | res.end = word2; |
@@ -92,13 +84,37 @@ static void __devinit cnb20le_res(struct pci_dev *dev) | |||
92 | res.start = fbus; | 84 | res.start = fbus; |
93 | res.end = lbus; | 85 | res.end = lbus; |
94 | res.flags = IORESOURCE_BUS; | 86 | res.flags = IORESOURCE_BUS; |
95 | dev_info(&dev->dev, "CNB20LE PCI Host Bridge (domain %04x %pR)\n", | 87 | printk(KERN_INFO "CNB20LE PCI Host Bridge (domain 0000 %pR)\n", &res); |
96 | pci_domain_nr(dev->bus), &res); | ||
97 | 88 | ||
98 | for (i = 0; i < info->res_num; i++) | 89 | for (i = 0; i < info->res_num; i++) |
99 | dev_info(&dev->dev, "host bridge window %pR\n", &info->res[i]); | 90 | printk(KERN_INFO "host bridge window %pR\n", &info->res[i]); |
100 | } | 91 | } |
101 | 92 | ||
102 | DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_LE, | 93 | static int __init broadcom_postcore_init(void) |
103 | cnb20le_res); | 94 | { |
95 | u8 bus = 0, slot = 0; | ||
96 | u32 id; | ||
97 | u16 vendor, device; | ||
98 | |||
99 | #ifdef CONFIG_ACPI | ||
100 | /* | ||
101 | * We should get host bridge information from ACPI unless the BIOS | ||
102 | * doesn't support it. | ||
103 | */ | ||
104 | if (acpi_os_get_root_pointer()) | ||
105 | return 0; | ||
106 | #endif | ||
107 | |||
108 | id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); | ||
109 | vendor = id & 0xffff; | ||
110 | device = (id >> 16) & 0xffff; | ||
111 | |||
112 | if (vendor == PCI_VENDOR_ID_SERVERWORKS && | ||
113 | device == PCI_DEVICE_ID_SERVERWORKS_LE) { | ||
114 | cnb20le_res(bus, slot, 0); | ||
115 | cnb20le_res(bus, slot, 1); | ||
116 | } | ||
117 | return 0; | ||
118 | } | ||
104 | 119 | ||
120 | postcore_initcall(broadcom_postcore_init); | ||
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 64a122883896..fd3f65510e9d 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c | |||
@@ -7,45 +7,50 @@ | |||
7 | int pci_root_num; | 7 | int pci_root_num; |
8 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; | 8 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; |
9 | 9 | ||
10 | void x86_pci_root_bus_res_quirks(struct pci_bus *b) | 10 | void x86_pci_root_bus_resources(int bus, struct list_head *resources) |
11 | { | 11 | { |
12 | int i; | 12 | int i; |
13 | int j; | 13 | int j; |
14 | struct pci_root_info *info; | 14 | struct pci_root_info *info; |
15 | 15 | ||
16 | /* don't go for it if _CRS is used already */ | ||
17 | if (b->resource[0] != &ioport_resource || | ||
18 | b->resource[1] != &iomem_resource) | ||
19 | return; | ||
20 | |||
21 | if (!pci_root_num) | 16 | if (!pci_root_num) |
22 | return; | 17 | goto default_resources; |
23 | 18 | ||
24 | for (i = 0; i < pci_root_num; i++) { | 19 | for (i = 0; i < pci_root_num; i++) { |
25 | if (pci_root_info[i].bus_min == b->number) | 20 | if (pci_root_info[i].bus_min == bus) |
26 | break; | 21 | break; |
27 | } | 22 | } |
28 | 23 | ||
29 | if (i == pci_root_num) | 24 | if (i == pci_root_num) |
30 | return; | 25 | goto default_resources; |
31 | 26 | ||
32 | printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", | 27 | printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n", |
33 | b->number); | 28 | bus); |
34 | 29 | ||
35 | pci_bus_remove_resources(b); | ||
36 | info = &pci_root_info[i]; | 30 | info = &pci_root_info[i]; |
37 | for (j = 0; j < info->res_num; j++) { | 31 | for (j = 0; j < info->res_num; j++) { |
38 | struct resource *res; | 32 | struct resource *res; |
39 | struct resource *root; | 33 | struct resource *root; |
40 | 34 | ||
41 | res = &info->res[j]; | 35 | res = &info->res[j]; |
42 | pci_bus_add_resource(b, res, 0); | 36 | pci_add_resource(resources, res); |
43 | if (res->flags & IORESOURCE_IO) | 37 | if (res->flags & IORESOURCE_IO) |
44 | root = &ioport_resource; | 38 | root = &ioport_resource; |
45 | else | 39 | else |
46 | root = &iomem_resource; | 40 | root = &iomem_resource; |
47 | insert_resource(root, res); | 41 | insert_resource(root, res); |
48 | } | 42 | } |
43 | return; | ||
44 | |||
45 | default_resources: | ||
46 | /* | ||
47 | * We don't have any host bridge aperture information from the | ||
48 | * "native host bridge drivers," e.g., amd_bus or broadcom_bus, | ||
49 | * so fall back to the defaults historically used by pci_create_bus(). | ||
50 | */ | ||
51 | printk(KERN_DEBUG "PCI: root bus %02x: using default resources\n", bus); | ||
52 | pci_add_resource(resources, &ioport_resource); | ||
53 | pci_add_resource(resources, &iomem_resource); | ||
49 | } | 54 | } |
50 | 55 | ||
51 | void __devinit update_res(struct pci_root_info *info, resource_size_t start, | 56 | void __devinit update_res(struct pci_root_info *info, resource_size_t start, |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 7962ccb4d9b2..323481e06ef8 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -164,9 +164,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *b) | |||
164 | { | 164 | { |
165 | struct pci_dev *dev; | 165 | struct pci_dev *dev; |
166 | 166 | ||
167 | /* root bus? */ | ||
168 | if (!b->parent) | ||
169 | x86_pci_root_bus_res_quirks(b); | ||
170 | pci_read_bridge_bases(b); | 167 | pci_read_bridge_bases(b); |
171 | list_for_each_entry(dev, &b->devices, bus_list) | 168 | list_for_each_entry(dev, &b->devices, bus_list) |
172 | pcibios_fixup_device_resources(dev); | 169 | pcibios_fixup_device_resources(dev); |
@@ -433,6 +430,7 @@ void __init dmi_check_pciprobe(void) | |||
433 | 430 | ||
434 | struct pci_bus * __devinit pcibios_scan_root(int busnum) | 431 | struct pci_bus * __devinit pcibios_scan_root(int busnum) |
435 | { | 432 | { |
433 | LIST_HEAD(resources); | ||
436 | struct pci_bus *bus = NULL; | 434 | struct pci_bus *bus = NULL; |
437 | struct pci_sysdata *sd; | 435 | struct pci_sysdata *sd; |
438 | 436 | ||
@@ -456,9 +454,12 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) | |||
456 | sd->node = get_mp_bus_to_node(busnum); | 454 | sd->node = get_mp_bus_to_node(busnum); |
457 | 455 | ||
458 | printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); | 456 | printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); |
459 | bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); | 457 | x86_pci_root_bus_resources(busnum, &resources); |
460 | if (!bus) | 458 | bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); |
459 | if (!bus) { | ||
460 | pci_free_resource_list(&resources); | ||
461 | kfree(sd); | 461 | kfree(sd); |
462 | } | ||
462 | 463 | ||
463 | return bus; | 464 | return bus; |
464 | } | 465 | } |
@@ -639,6 +640,7 @@ int pci_ext_cfg_avail(struct pci_dev *dev) | |||
639 | 640 | ||
640 | struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) | 641 | struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) |
641 | { | 642 | { |
643 | LIST_HEAD(resources); | ||
642 | struct pci_bus *bus = NULL; | 644 | struct pci_bus *bus = NULL; |
643 | struct pci_sysdata *sd; | 645 | struct pci_sysdata *sd; |
644 | 646 | ||
@@ -653,9 +655,12 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, | |||
653 | return NULL; | 655 | return NULL; |
654 | } | 656 | } |
655 | sd->node = node; | 657 | sd->node = node; |
656 | bus = pci_scan_bus(busno, ops, sd); | 658 | x86_pci_root_bus_resources(busno, &resources); |
657 | if (!bus) | 659 | bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources); |
660 | if (!bus) { | ||
661 | pci_free_resource_list(&resources); | ||
658 | kfree(sd); | 662 | kfree(sd); |
663 | } | ||
659 | 664 | ||
660 | return bus; | 665 | return bus; |
661 | } | 666 | } |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 794b092d01ae..91821a1a0c3a 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -254,26 +254,6 @@ void __init pcibios_resource_survey(void) | |||
254 | */ | 254 | */ |
255 | fs_initcall(pcibios_assign_resources); | 255 | fs_initcall(pcibios_assign_resources); |
256 | 256 | ||
257 | /* | ||
258 | * If we set up a device for bus mastering, we need to check the latency | ||
259 | * timer as certain crappy BIOSes forget to set it properly. | ||
260 | */ | ||
261 | unsigned int pcibios_max_latency = 255; | ||
262 | |||
263 | void pcibios_set_master(struct pci_dev *dev) | ||
264 | { | ||
265 | u8 lat; | ||
266 | pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); | ||
267 | if (lat < 16) | ||
268 | lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; | ||
269 | else if (lat > pcibios_max_latency) | ||
270 | lat = pcibios_max_latency; | ||
271 | else | ||
272 | return; | ||
273 | dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat); | ||
274 | pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); | ||
275 | } | ||
276 | |||
277 | static const struct vm_operations_struct pci_mmap_ops = { | 257 | static const struct vm_operations_struct pci_mmap_ops = { |
278 | .access = generic_access_phys, | 258 | .access = generic_access_phys, |
279 | }; | 259 | }; |
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 2c2aeabc2609..a1df191129d3 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c | |||
@@ -31,9 +31,6 @@ int __init pci_legacy_init(void) | |||
31 | 31 | ||
32 | printk("PCI: Probing PCI hardware\n"); | 32 | printk("PCI: Probing PCI hardware\n"); |
33 | pci_root_bus = pcibios_scan_root(0); | 33 | pci_root_bus = pcibios_scan_root(0); |
34 | if (pci_root_bus) | ||
35 | pci_bus_add_devices(pci_root_bus); | ||
36 | |||
37 | return 0; | 34 | return 0; |
38 | } | 35 | } |
39 | 36 | ||
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 51abf02f9226..83e125b95ca6 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c | |||
@@ -153,8 +153,6 @@ int __init pci_numaq_init(void) | |||
153 | raw_pci_ops = &pci_direct_conf1_mq; | 153 | raw_pci_ops = &pci_direct_conf1_mq; |
154 | 154 | ||
155 | pci_root_bus = pcibios_scan_root(0); | 155 | pci_root_bus = pcibios_scan_root(0); |
156 | if (pci_root_bus) | ||
157 | pci_bus_add_devices(pci_root_bus); | ||
158 | if (num_online_nodes() > 1) | 156 | if (num_online_nodes() > 1) |
159 | for_each_online_node(quad) { | 157 | for_each_online_node(quad) { |
160 | if (quad == 0) | 158 | if (quad == 0) |
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index db0e9a51e611..da8fe0535ff4 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c | |||
@@ -44,7 +44,7 @@ static inline void set_bios_x(void) | |||
44 | pcibios_enabled = 1; | 44 | pcibios_enabled = 1; |
45 | set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); | 45 | set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); |
46 | if (__supported_pte_mask & _PAGE_NX) | 46 | if (__supported_pte_mask & _PAGE_NX) |
47 | printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n"); | 47 | printk(KERN_INFO "PCI : PCI BIOS area is rw and x. Use pci=nobios if you want it NX.\n"); |
48 | } | 48 | } |
49 | 49 | ||
50 | /* | 50 | /* |
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c index ca1973699d3d..dc5f1d32aced 100644 --- a/arch/x86/platform/geode/alix.c +++ b/arch/x86/platform/geode/alix.c | |||
@@ -27,7 +27,7 @@ | |||
27 | 27 | ||
28 | #include <asm/geode.h> | 28 | #include <asm/geode.h> |
29 | 29 | ||
30 | static int force = 0; | 30 | static bool force = 0; |
31 | module_param(force, bool, 0444); | 31 | module_param(force, bool, 0444); |
32 | /* FIXME: Award bios is not automatically detected as Alix platform */ | 32 | /* FIXME: Award bios is not automatically detected as Alix platform */ |
33 | MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform"); | 33 | MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform"); |
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c index 1ba7f5ed8c9b..5917eb56b313 100644 --- a/arch/x86/platform/iris/iris.c +++ b/arch/x86/platform/iris/iris.c | |||
@@ -42,7 +42,7 @@ MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>"); | |||
42 | MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); | 42 | MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); |
43 | MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); | 43 | MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); |
44 | 44 | ||
45 | static int force; | 45 | static bool force; |
46 | 46 | ||
47 | module_param(force, bool, 0); | 47 | module_param(force, bool, 0); |
48 | MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation."); | 48 | MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation."); |
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile index 1ea38775a6d3..7baed5135e0f 100644 --- a/arch/x86/platform/mrst/Makefile +++ b/arch/x86/platform/mrst/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-$(CONFIG_X86_MRST) += mrst.o | 1 | obj-$(CONFIG_X86_INTEL_MID) += mrst.o |
2 | obj-$(CONFIG_X86_MRST) += vrtc.o | 2 | obj-$(CONFIG_X86_INTEL_MID) += vrtc.o |
3 | obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o | 3 | obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o |
4 | obj-$(CONFIG_X86_MRST) += pmu.o | 4 | obj-$(CONFIG_X86_MRST) += pmu.o |
diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c index 25bfdbb5b130..3c6e328483c7 100644 --- a/arch/x86/platform/mrst/early_printk_mrst.c +++ b/arch/x86/platform/mrst/early_printk_mrst.c | |||
@@ -245,16 +245,24 @@ struct console early_mrst_console = { | |||
245 | * Following is the early console based on Medfield HSU (High | 245 | * Following is the early console based on Medfield HSU (High |
246 | * Speed UART) device. | 246 | * Speed UART) device. |
247 | */ | 247 | */ |
248 | #define HSU_PORT2_PADDR 0xffa28180 | 248 | #define HSU_PORT_BASE 0xffa28080 |
249 | 249 | ||
250 | static void __iomem *phsu; | 250 | static void __iomem *phsu; |
251 | 251 | ||
252 | void hsu_early_console_init(void) | 252 | void hsu_early_console_init(const char *s) |
253 | { | 253 | { |
254 | unsigned long paddr, port = 0; | ||
254 | u8 lcr; | 255 | u8 lcr; |
255 | 256 | ||
256 | phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, | 257 | /* |
257 | HSU_PORT2_PADDR); | 258 | * Select the early HSU console port if specified by user in the |
259 | * kernel command line. | ||
260 | */ | ||
261 | if (*s && !kstrtoul(s, 10, &port)) | ||
262 | port = clamp_val(port, 0, 2); | ||
263 | |||
264 | paddr = HSU_PORT_BASE + port * 0x80; | ||
265 | phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr); | ||
258 | 266 | ||
259 | /* Disable FIFO */ | 267 | /* Disable FIFO */ |
260 | writeb(0x0, phsu + UART_FCR); | 268 | writeb(0x0, phsu + UART_FCR); |
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index ad4ec1cb097e..475e2cd0f3c3 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c | |||
@@ -848,8 +848,7 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *entry) | |||
848 | if (mrst_has_msic()) | 848 | if (mrst_has_msic()) |
849 | return; | 849 | return; |
850 | 850 | ||
851 | /* ID as IRQ is a hack that will go away */ | 851 | pdev = platform_device_alloc(entry->name, 0); |
852 | pdev = platform_device_alloc(entry->name, entry->irq); | ||
853 | if (pdev == NULL) { | 852 | if (pdev == NULL) { |
854 | pr_err("out of memory for SFI platform device '%s'.\n", | 853 | pr_err("out of memory for SFI platform device '%s'.\n", |
855 | entry->name); | 854 | entry->name); |
@@ -1030,6 +1029,7 @@ static int __init pb_keys_init(void) | |||
1030 | num = sizeof(gpio_button) / sizeof(struct gpio_keys_button); | 1029 | num = sizeof(gpio_button) / sizeof(struct gpio_keys_button); |
1031 | for (i = 0; i < num; i++) { | 1030 | for (i = 0; i < num; i++) { |
1032 | gb[i].gpio = get_gpio_by_name(gb[i].desc); | 1031 | gb[i].gpio = get_gpio_by_name(gb[i].desc); |
1032 | pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, gb[i].gpio); | ||
1033 | if (gb[i].gpio == -1) | 1033 | if (gb[i].gpio == -1) |
1034 | continue; | 1034 | continue; |
1035 | 1035 | ||
diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c index 309c70fb7759..5d4ba301e776 100644 --- a/arch/x86/platform/uv/uv_sysfs.c +++ b/arch/x86/platform/uv/uv_sysfs.c | |||
@@ -19,7 +19,7 @@ | |||
19 | * Copyright (c) Russ Anderson | 19 | * Copyright (c) Russ Anderson |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/sysdev.h> | 22 | #include <linux/device.h> |
23 | #include <asm/uv/bios.h> | 23 | #include <asm/uv/bios.h> |
24 | #include <asm/uv/uv.h> | 24 | #include <asm/uv/uv.h> |
25 | 25 | ||
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile new file mode 100644 index 000000000000..564b2476fede --- /dev/null +++ b/arch/x86/syscalls/Makefile | |||
@@ -0,0 +1,43 @@ | |||
1 | out := $(obj)/../include/generated/asm | ||
2 | |||
3 | # Create output directory if not already present | ||
4 | _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') | ||
5 | |||
6 | syscall32 := $(srctree)/$(src)/syscall_32.tbl | ||
7 | syscall64 := $(srctree)/$(src)/syscall_64.tbl | ||
8 | |||
9 | syshdr := $(srctree)/$(src)/syscallhdr.sh | ||
10 | systbl := $(srctree)/$(src)/syscalltbl.sh | ||
11 | |||
12 | quiet_cmd_syshdr = SYSHDR $@ | ||
13 | cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' $< $@ \ | ||
14 | $(syshdr_abi_$(basetarget)) $(syshdr_pfx_$(basetarget)) | ||
15 | quiet_cmd_systbl = SYSTBL $@ | ||
16 | cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@ | ||
17 | |||
18 | syshdr_abi_unistd_32 := i386 | ||
19 | $(out)/unistd_32.h: $(syscall32) $(syshdr) | ||
20 | $(call if_changed,syshdr) | ||
21 | |||
22 | syshdr_abi_unistd_32_ia32 := i386 | ||
23 | syshdr_pfx_unistd_32_ia32 := ia32_ | ||
24 | $(out)/unistd_32_ia32.h: $(syscall32) $(syshdr) | ||
25 | $(call if_changed,syshdr) | ||
26 | |||
27 | syshdr_abi_unistd_64 := 64 | ||
28 | $(out)/unistd_64.h: $(syscall64) $(syshdr) | ||
29 | $(call if_changed,syshdr) | ||
30 | |||
31 | $(out)/syscalls_32.h: $(syscall32) $(systbl) | ||
32 | $(call if_changed,systbl) | ||
33 | $(out)/syscalls_64.h: $(syscall64) $(systbl) | ||
34 | $(call if_changed,systbl) | ||
35 | |||
36 | syshdr-y += unistd_32.h unistd_64.h | ||
37 | syshdr-y += syscalls_32.h | ||
38 | syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h | ||
39 | syshdr-$(CONFIG_X86_64) += syscalls_64.h | ||
40 | |||
41 | targets += $(syshdr-y) | ||
42 | |||
43 | all: $(addprefix $(out)/,$(targets)) | ||
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl new file mode 100644 index 000000000000..ce98e287c066 --- /dev/null +++ b/arch/x86/syscalls/syscall_32.tbl | |||
@@ -0,0 +1,357 @@ | |||
1 | # | ||
2 | # 32-bit system call numbers and entry vectors | ||
3 | # | ||
4 | # The format is: | ||
5 | # <number> <abi> <name> <entry point> <compat entry point> | ||
6 | # | ||
7 | # The abi is always "i386" for this file. | ||
8 | # | ||
9 | 0 i386 restart_syscall sys_restart_syscall | ||
10 | 1 i386 exit sys_exit | ||
11 | 2 i386 fork ptregs_fork stub32_fork | ||
12 | 3 i386 read sys_read | ||
13 | 4 i386 write sys_write | ||
14 | 5 i386 open sys_open compat_sys_open | ||
15 | 6 i386 close sys_close | ||
16 | 7 i386 waitpid sys_waitpid sys32_waitpid | ||
17 | 8 i386 creat sys_creat | ||
18 | 9 i386 link sys_link | ||
19 | 10 i386 unlink sys_unlink | ||
20 | 11 i386 execve ptregs_execve stub32_execve | ||
21 | 12 i386 chdir sys_chdir | ||
22 | 13 i386 time sys_time compat_sys_time | ||
23 | 14 i386 mknod sys_mknod | ||
24 | 15 i386 chmod sys_chmod | ||
25 | 16 i386 lchown sys_lchown16 | ||
26 | 17 i386 break | ||
27 | 18 i386 oldstat sys_stat | ||
28 | 19 i386 lseek sys_lseek sys32_lseek | ||
29 | 20 i386 getpid sys_getpid | ||
30 | 21 i386 mount sys_mount compat_sys_mount | ||
31 | 22 i386 umount sys_oldumount | ||
32 | 23 i386 setuid sys_setuid16 | ||
33 | 24 i386 getuid sys_getuid16 | ||
34 | 25 i386 stime sys_stime compat_sys_stime | ||
35 | 26 i386 ptrace sys_ptrace compat_sys_ptrace | ||
36 | 27 i386 alarm sys_alarm | ||
37 | 28 i386 oldfstat sys_fstat | ||
38 | 29 i386 pause sys_pause | ||
39 | 30 i386 utime sys_utime compat_sys_utime | ||
40 | 31 i386 stty | ||
41 | 32 i386 gtty | ||
42 | 33 i386 access sys_access | ||
43 | 34 i386 nice sys_nice | ||
44 | 35 i386 ftime | ||
45 | 36 i386 sync sys_sync | ||
46 | 37 i386 kill sys_kill sys32_kill | ||
47 | 38 i386 rename sys_rename | ||
48 | 39 i386 mkdir sys_mkdir | ||
49 | 40 i386 rmdir sys_rmdir | ||
50 | 41 i386 dup sys_dup | ||
51 | 42 i386 pipe sys_pipe | ||
52 | 43 i386 times sys_times compat_sys_times | ||
53 | 44 i386 prof | ||
54 | 45 i386 brk sys_brk | ||
55 | 46 i386 setgid sys_setgid16 | ||
56 | 47 i386 getgid sys_getgid16 | ||
57 | 48 i386 signal sys_signal | ||
58 | 49 i386 geteuid sys_geteuid16 | ||
59 | 50 i386 getegid sys_getegid16 | ||
60 | 51 i386 acct sys_acct | ||
61 | 52 i386 umount2 sys_umount | ||
62 | 53 i386 lock | ||
63 | 54 i386 ioctl sys_ioctl compat_sys_ioctl | ||
64 | 55 i386 fcntl sys_fcntl compat_sys_fcntl64 | ||
65 | 56 i386 mpx | ||
66 | 57 i386 setpgid sys_setpgid | ||
67 | 58 i386 ulimit | ||
68 | 59 i386 oldolduname sys_olduname | ||
69 | 60 i386 umask sys_umask | ||
70 | 61 i386 chroot sys_chroot | ||
71 | 62 i386 ustat sys_ustat compat_sys_ustat | ||
72 | 63 i386 dup2 sys_dup2 | ||
73 | 64 i386 getppid sys_getppid | ||
74 | 65 i386 getpgrp sys_getpgrp | ||
75 | 66 i386 setsid sys_setsid | ||
76 | 67 i386 sigaction sys_sigaction sys32_sigaction | ||
77 | 68 i386 sgetmask sys_sgetmask | ||
78 | 69 i386 ssetmask sys_ssetmask | ||
79 | 70 i386 setreuid sys_setreuid16 | ||
80 | 71 i386 setregid sys_setregid16 | ||
81 | 72 i386 sigsuspend sys_sigsuspend sys32_sigsuspend | ||
82 | 73 i386 sigpending sys_sigpending compat_sys_sigpending | ||
83 | 74 i386 sethostname sys_sethostname | ||
84 | 75 i386 setrlimit sys_setrlimit compat_sys_setrlimit | ||
85 | 76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit | ||
86 | 77 i386 getrusage sys_getrusage compat_sys_getrusage | ||
87 | 78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday | ||
88 | 79 i386 settimeofday sys_settimeofday compat_sys_settimeofday | ||
89 | 80 i386 getgroups sys_getgroups16 | ||
90 | 81 i386 setgroups sys_setgroups16 | ||
91 | 82 i386 select sys_old_select compat_sys_old_select | ||
92 | 83 i386 symlink sys_symlink | ||
93 | 84 i386 oldlstat sys_lstat | ||
94 | 85 i386 readlink sys_readlink | ||
95 | 86 i386 uselib sys_uselib | ||
96 | 87 i386 swapon sys_swapon | ||
97 | 88 i386 reboot sys_reboot | ||
98 | 89 i386 readdir sys_old_readdir compat_sys_old_readdir | ||
99 | 90 i386 mmap sys_old_mmap sys32_mmap | ||
100 | 91 i386 munmap sys_munmap | ||
101 | 92 i386 truncate sys_truncate | ||
102 | 93 i386 ftruncate sys_ftruncate | ||
103 | 94 i386 fchmod sys_fchmod | ||
104 | 95 i386 fchown sys_fchown16 | ||
105 | 96 i386 getpriority sys_getpriority | ||
106 | 97 i386 setpriority sys_setpriority | ||
107 | 98 i386 profil | ||
108 | 99 i386 statfs sys_statfs compat_sys_statfs | ||
109 | 100 i386 fstatfs sys_fstatfs compat_sys_fstatfs | ||
110 | 101 i386 ioperm sys_ioperm | ||
111 | 102 i386 socketcall sys_socketcall compat_sys_socketcall | ||
112 | 103 i386 syslog sys_syslog | ||
113 | 104 i386 setitimer sys_setitimer compat_sys_setitimer | ||
114 | 105 i386 getitimer sys_getitimer compat_sys_getitimer | ||
115 | 106 i386 stat sys_newstat compat_sys_newstat | ||
116 | 107 i386 lstat sys_newlstat compat_sys_newlstat | ||
117 | 108 i386 fstat sys_newfstat compat_sys_newfstat | ||
118 | 109 i386 olduname sys_uname | ||
119 | 110 i386 iopl ptregs_iopl stub32_iopl | ||
120 | 111 i386 vhangup sys_vhangup | ||
121 | 112 i386 idle | ||
122 | 113 i386 vm86old ptregs_vm86old sys32_vm86_warning | ||
123 | 114 i386 wait4 sys_wait4 compat_sys_wait4 | ||
124 | 115 i386 swapoff sys_swapoff | ||
125 | 116 i386 sysinfo sys_sysinfo compat_sys_sysinfo | ||
126 | 117 i386 ipc sys_ipc sys32_ipc | ||
127 | 118 i386 fsync sys_fsync | ||
128 | 119 i386 sigreturn ptregs_sigreturn stub32_sigreturn | ||
129 | 120 i386 clone ptregs_clone stub32_clone | ||
130 | 121 i386 setdomainname sys_setdomainname | ||
131 | 122 i386 uname sys_newuname | ||
132 | 123 i386 modify_ldt sys_modify_ldt | ||
133 | 124 i386 adjtimex sys_adjtimex compat_sys_adjtimex | ||
134 | 125 i386 mprotect sys_mprotect sys32_mprotect | ||
135 | 126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask | ||
136 | 127 i386 create_module | ||
137 | 128 i386 init_module sys_init_module | ||
138 | 129 i386 delete_module sys_delete_module | ||
139 | 130 i386 get_kernel_syms | ||
140 | 131 i386 quotactl sys_quotactl sys32_quotactl | ||
141 | 132 i386 getpgid sys_getpgid | ||
142 | 133 i386 fchdir sys_fchdir | ||
143 | 134 i386 bdflush sys_bdflush | ||
144 | 135 i386 sysfs sys_sysfs | ||
145 | 136 i386 personality sys_personality | ||
146 | 137 i386 afs_syscall | ||
147 | 138 i386 setfsuid sys_setfsuid16 | ||
148 | 139 i386 setfsgid sys_setfsgid16 | ||
149 | 140 i386 _llseek sys_llseek | ||
150 | 141 i386 getdents sys_getdents compat_sys_getdents | ||
151 | 142 i386 _newselect sys_select compat_sys_select | ||
152 | 143 i386 flock sys_flock | ||
153 | 144 i386 msync sys_msync | ||
154 | 145 i386 readv sys_readv compat_sys_readv | ||
155 | 146 i386 writev sys_writev compat_sys_writev | ||
156 | 147 i386 getsid sys_getsid | ||
157 | 148 i386 fdatasync sys_fdatasync | ||
158 | 149 i386 _sysctl sys_sysctl compat_sys_sysctl | ||
159 | 150 i386 mlock sys_mlock | ||
160 | 151 i386 munlock sys_munlock | ||
161 | 152 i386 mlockall sys_mlockall | ||
162 | 153 i386 munlockall sys_munlockall | ||
163 | 154 i386 sched_setparam sys_sched_setparam | ||
164 | 155 i386 sched_getparam sys_sched_getparam | ||
165 | 156 i386 sched_setscheduler sys_sched_setscheduler | ||
166 | 157 i386 sched_getscheduler sys_sched_getscheduler | ||
167 | 158 i386 sched_yield sys_sched_yield | ||
168 | 159 i386 sched_get_priority_max sys_sched_get_priority_max | ||
169 | 160 i386 sched_get_priority_min sys_sched_get_priority_min | ||
170 | 161 i386 sched_rr_get_interval sys_sched_rr_get_interval sys32_sched_rr_get_interval | ||
171 | 162 i386 nanosleep sys_nanosleep compat_sys_nanosleep | ||
172 | 163 i386 mremap sys_mremap | ||
173 | 164 i386 setresuid sys_setresuid16 | ||
174 | 165 i386 getresuid sys_getresuid16 | ||
175 | 166 i386 vm86 ptregs_vm86 sys32_vm86_warning | ||
176 | 167 i386 query_module | ||
177 | 168 i386 poll sys_poll | ||
178 | 169 i386 nfsservctl | ||
179 | 170 i386 setresgid sys_setresgid16 | ||
180 | 171 i386 getresgid sys_getresgid16 | ||
181 | 172 i386 prctl sys_prctl | ||
182 | 173 i386 rt_sigreturn ptregs_rt_sigreturn stub32_rt_sigreturn | ||
183 | 174 i386 rt_sigaction sys_rt_sigaction sys32_rt_sigaction | ||
184 | 175 i386 rt_sigprocmask sys_rt_sigprocmask sys32_rt_sigprocmask | ||
185 | 176 i386 rt_sigpending sys_rt_sigpending sys32_rt_sigpending | ||
186 | 177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait | ||
187 | 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo sys32_rt_sigqueueinfo | ||
188 | 179 i386 rt_sigsuspend sys_rt_sigsuspend | ||
189 | 180 i386 pread64 sys_pread64 sys32_pread | ||
190 | 181 i386 pwrite64 sys_pwrite64 sys32_pwrite | ||
191 | 182 i386 chown sys_chown16 | ||
192 | 183 i386 getcwd sys_getcwd | ||
193 | 184 i386 capget sys_capget | ||
194 | 185 i386 capset sys_capset | ||
195 | 186 i386 sigaltstack ptregs_sigaltstack stub32_sigaltstack | ||
196 | 187 i386 sendfile sys_sendfile sys32_sendfile | ||
197 | 188 i386 getpmsg | ||
198 | 189 i386 putpmsg | ||
199 | 190 i386 vfork ptregs_vfork stub32_vfork | ||
200 | 191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit | ||
201 | 192 i386 mmap2 sys_mmap_pgoff | ||
202 | 193 i386 truncate64 sys_truncate64 sys32_truncate64 | ||
203 | 194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64 | ||
204 | 195 i386 stat64 sys_stat64 sys32_stat64 | ||
205 | 196 i386 lstat64 sys_lstat64 sys32_lstat64 | ||
206 | 197 i386 fstat64 sys_fstat64 sys32_fstat64 | ||
207 | 198 i386 lchown32 sys_lchown | ||
208 | 199 i386 getuid32 sys_getuid | ||
209 | 200 i386 getgid32 sys_getgid | ||
210 | 201 i386 geteuid32 sys_geteuid | ||
211 | 202 i386 getegid32 sys_getegid | ||
212 | 203 i386 setreuid32 sys_setreuid | ||
213 | 204 i386 setregid32 sys_setregid | ||
214 | 205 i386 getgroups32 sys_getgroups | ||
215 | 206 i386 setgroups32 sys_setgroups | ||
216 | 207 i386 fchown32 sys_fchown | ||
217 | 208 i386 setresuid32 sys_setresuid | ||
218 | 209 i386 getresuid32 sys_getresuid | ||
219 | 210 i386 setresgid32 sys_setresgid | ||
220 | 211 i386 getresgid32 sys_getresgid | ||
221 | 212 i386 chown32 sys_chown | ||
222 | 213 i386 setuid32 sys_setuid | ||
223 | 214 i386 setgid32 sys_setgid | ||
224 | 215 i386 setfsuid32 sys_setfsuid | ||
225 | 216 i386 setfsgid32 sys_setfsgid | ||
226 | 217 i386 pivot_root sys_pivot_root | ||
227 | 218 i386 mincore sys_mincore | ||
228 | 219 i386 madvise sys_madvise | ||
229 | 220 i386 getdents64 sys_getdents64 compat_sys_getdents64 | ||
230 | 221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64 | ||
231 | # 222 is unused | ||
232 | # 223 is unused | ||
233 | 224 i386 gettid sys_gettid | ||
234 | 225 i386 readahead sys_readahead sys32_readahead | ||
235 | 226 i386 setxattr sys_setxattr | ||
236 | 227 i386 lsetxattr sys_lsetxattr | ||
237 | 228 i386 fsetxattr sys_fsetxattr | ||
238 | 229 i386 getxattr sys_getxattr | ||
239 | 230 i386 lgetxattr sys_lgetxattr | ||
240 | 231 i386 fgetxattr sys_fgetxattr | ||
241 | 232 i386 listxattr sys_listxattr | ||
242 | 233 i386 llistxattr sys_llistxattr | ||
243 | 234 i386 flistxattr sys_flistxattr | ||
244 | 235 i386 removexattr sys_removexattr | ||
245 | 236 i386 lremovexattr sys_lremovexattr | ||
246 | 237 i386 fremovexattr sys_fremovexattr | ||
247 | 238 i386 tkill sys_tkill | ||
248 | 239 i386 sendfile64 sys_sendfile64 | ||
249 | 240 i386 futex sys_futex compat_sys_futex | ||
250 | 241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity | ||
251 | 242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity | ||
252 | 243 i386 set_thread_area sys_set_thread_area | ||
253 | 244 i386 get_thread_area sys_get_thread_area | ||
254 | 245 i386 io_setup sys_io_setup compat_sys_io_setup | ||
255 | 246 i386 io_destroy sys_io_destroy | ||
256 | 247 i386 io_getevents sys_io_getevents compat_sys_io_getevents | ||
257 | 248 i386 io_submit sys_io_submit compat_sys_io_submit | ||
258 | 249 i386 io_cancel sys_io_cancel | ||
259 | 250 i386 fadvise64 sys_fadvise64 sys32_fadvise64 | ||
260 | # 251 is available for reuse (was briefly sys_set_zone_reclaim) | ||
261 | 252 i386 exit_group sys_exit_group | ||
262 | 253 i386 lookup_dcookie sys_lookup_dcookie sys32_lookup_dcookie | ||
263 | 254 i386 epoll_create sys_epoll_create | ||
264 | 255 i386 epoll_ctl sys_epoll_ctl | ||
265 | 256 i386 epoll_wait sys_epoll_wait | ||
266 | 257 i386 remap_file_pages sys_remap_file_pages | ||
267 | 258 i386 set_tid_address sys_set_tid_address | ||
268 | 259 i386 timer_create sys_timer_create compat_sys_timer_create | ||
269 | 260 i386 timer_settime sys_timer_settime compat_sys_timer_settime | ||
270 | 261 i386 timer_gettime sys_timer_gettime compat_sys_timer_gettime | ||
271 | 262 i386 timer_getoverrun sys_timer_getoverrun | ||
272 | 263 i386 timer_delete sys_timer_delete | ||
273 | 264 i386 clock_settime sys_clock_settime compat_sys_clock_settime | ||
274 | 265 i386 clock_gettime sys_clock_gettime compat_sys_clock_gettime | ||
275 | 266 i386 clock_getres sys_clock_getres compat_sys_clock_getres | ||
276 | 267 i386 clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep | ||
277 | 268 i386 statfs64 sys_statfs64 compat_sys_statfs64 | ||
278 | 269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 | ||
279 | 270 i386 tgkill sys_tgkill | ||
280 | 271 i386 utimes sys_utimes compat_sys_utimes | ||
281 | 272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64 | ||
282 | 273 i386 vserver | ||
283 | 274 i386 mbind sys_mbind | ||
284 | 275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy | ||
285 | 276 i386 set_mempolicy sys_set_mempolicy | ||
286 | 277 i386 mq_open sys_mq_open compat_sys_mq_open | ||
287 | 278 i386 mq_unlink sys_mq_unlink | ||
288 | 279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend | ||
289 | 280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive | ||
290 | 281 i386 mq_notify sys_mq_notify compat_sys_mq_notify | ||
291 | 282 i386 mq_getsetaddr sys_mq_getsetattr compat_sys_mq_getsetattr | ||
292 | 283 i386 kexec_load sys_kexec_load compat_sys_kexec_load | ||
293 | 284 i386 waitid sys_waitid compat_sys_waitid | ||
294 | # 285 sys_setaltroot | ||
295 | 286 i386 add_key sys_add_key | ||
296 | 287 i386 request_key sys_request_key | ||
297 | 288 i386 keyctl sys_keyctl | ||
298 | 289 i386 ioprio_set sys_ioprio_set | ||
299 | 290 i386 ioprio_get sys_ioprio_get | ||
300 | 291 i386 inotify_init sys_inotify_init | ||
301 | 292 i386 inotify_add_watch sys_inotify_add_watch | ||
302 | 293 i386 inotify_rm_watch sys_inotify_rm_watch | ||
303 | 294 i386 migrate_pages sys_migrate_pages | ||
304 | 295 i386 openat sys_openat compat_sys_openat | ||
305 | 296 i386 mkdirat sys_mkdirat | ||
306 | 297 i386 mknodat sys_mknodat | ||
307 | 298 i386 fchownat sys_fchownat | ||
308 | 299 i386 futimesat sys_futimesat compat_sys_futimesat | ||
309 | 300 i386 fstatat64 sys_fstatat64 sys32_fstatat | ||
310 | 301 i386 unlinkat sys_unlinkat | ||
311 | 302 i386 renameat sys_renameat | ||
312 | 303 i386 linkat sys_linkat | ||
313 | 304 i386 symlinkat sys_symlinkat | ||
314 | 305 i386 readlinkat sys_readlinkat | ||
315 | 306 i386 fchmodat sys_fchmodat | ||
316 | 307 i386 faccessat sys_faccessat | ||
317 | 308 i386 pselect6 sys_pselect6 compat_sys_pselect6 | ||
318 | 309 i386 ppoll sys_ppoll compat_sys_ppoll | ||
319 | 310 i386 unshare sys_unshare | ||
320 | 311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list | ||
321 | 312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list | ||
322 | 313 i386 splice sys_splice | ||
323 | 314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range | ||
324 | 315 i386 tee sys_tee | ||
325 | 316 i386 vmsplice sys_vmsplice compat_sys_vmsplice | ||
326 | 317 i386 move_pages sys_move_pages compat_sys_move_pages | ||
327 | 318 i386 getcpu sys_getcpu | ||
328 | 319 i386 epoll_pwait sys_epoll_pwait | ||
329 | 320 i386 utimensat sys_utimensat compat_sys_utimensat | ||
330 | 321 i386 signalfd sys_signalfd compat_sys_signalfd | ||
331 | 322 i386 timerfd_create sys_timerfd_create | ||
332 | 323 i386 eventfd sys_eventfd | ||
333 | 324 i386 fallocate sys_fallocate sys32_fallocate | ||
334 | 325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime | ||
335 | 326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime | ||
336 | 327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4 | ||
337 | 328 i386 eventfd2 sys_eventfd2 | ||
338 | 329 i386 epoll_create1 sys_epoll_create1 | ||
339 | 330 i386 dup3 sys_dup3 | ||
340 | 331 i386 pipe2 sys_pipe2 | ||
341 | 332 i386 inotify_init1 sys_inotify_init1 | ||
342 | 333 i386 preadv sys_preadv compat_sys_preadv | ||
343 | 334 i386 pwritev sys_pwritev compat_sys_pwritev | ||
344 | 335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo | ||
345 | 336 i386 perf_event_open sys_perf_event_open | ||
346 | 337 i386 recvmmsg sys_recvmmsg compat_sys_recvmmsg | ||
347 | 338 i386 fanotify_init sys_fanotify_init | ||
348 | 339 i386 fanotify_mark sys_fanotify_mark sys32_fanotify_mark | ||
349 | 340 i386 prlimit64 sys_prlimit64 | ||
350 | 341 i386 name_to_handle_at sys_name_to_handle_at | ||
351 | 342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at | ||
352 | 343 i386 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime | ||
353 | 344 i386 syncfs sys_syncfs | ||
354 | 345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg | ||
355 | 346 i386 setns sys_setns | ||
356 | 347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv | ||
357 | 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev | ||
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl new file mode 100644 index 000000000000..b440a8f7eefa --- /dev/null +++ b/arch/x86/syscalls/syscall_64.tbl | |||
@@ -0,0 +1,320 @@ | |||
1 | # | ||
2 | # 64-bit system call numbers and entry vectors | ||
3 | # | ||
4 | # The format is: | ||
5 | # <number> <abi> <name> <entry point> | ||
6 | # | ||
7 | # The abi is always "64" for this file (for now.) | ||
8 | # | ||
9 | 0 64 read sys_read | ||
10 | 1 64 write sys_write | ||
11 | 2 64 open sys_open | ||
12 | 3 64 close sys_close | ||
13 | 4 64 stat sys_newstat | ||
14 | 5 64 fstat sys_newfstat | ||
15 | 6 64 lstat sys_newlstat | ||
16 | 7 64 poll sys_poll | ||
17 | 8 64 lseek sys_lseek | ||
18 | 9 64 mmap sys_mmap | ||
19 | 10 64 mprotect sys_mprotect | ||
20 | 11 64 munmap sys_munmap | ||
21 | 12 64 brk sys_brk | ||
22 | 13 64 rt_sigaction sys_rt_sigaction | ||
23 | 14 64 rt_sigprocmask sys_rt_sigprocmask | ||
24 | 15 64 rt_sigreturn stub_rt_sigreturn | ||
25 | 16 64 ioctl sys_ioctl | ||
26 | 17 64 pread64 sys_pread64 | ||
27 | 18 64 pwrite64 sys_pwrite64 | ||
28 | 19 64 readv sys_readv | ||
29 | 20 64 writev sys_writev | ||
30 | 21 64 access sys_access | ||
31 | 22 64 pipe sys_pipe | ||
32 | 23 64 select sys_select | ||
33 | 24 64 sched_yield sys_sched_yield | ||
34 | 25 64 mremap sys_mremap | ||
35 | 26 64 msync sys_msync | ||
36 | 27 64 mincore sys_mincore | ||
37 | 28 64 madvise sys_madvise | ||
38 | 29 64 shmget sys_shmget | ||
39 | 30 64 shmat sys_shmat | ||
40 | 31 64 shmctl sys_shmctl | ||
41 | 32 64 dup sys_dup | ||
42 | 33 64 dup2 sys_dup2 | ||
43 | 34 64 pause sys_pause | ||
44 | 35 64 nanosleep sys_nanosleep | ||
45 | 36 64 getitimer sys_getitimer | ||
46 | 37 64 alarm sys_alarm | ||
47 | 38 64 setitimer sys_setitimer | ||
48 | 39 64 getpid sys_getpid | ||
49 | 40 64 sendfile sys_sendfile64 | ||
50 | 41 64 socket sys_socket | ||
51 | 42 64 connect sys_connect | ||
52 | 43 64 accept sys_accept | ||
53 | 44 64 sendto sys_sendto | ||
54 | 45 64 recvfrom sys_recvfrom | ||
55 | 46 64 sendmsg sys_sendmsg | ||
56 | 47 64 recvmsg sys_recvmsg | ||
57 | 48 64 shutdown sys_shutdown | ||
58 | 49 64 bind sys_bind | ||
59 | 50 64 listen sys_listen | ||
60 | 51 64 getsockname sys_getsockname | ||
61 | 52 64 getpeername sys_getpeername | ||
62 | 53 64 socketpair sys_socketpair | ||
63 | 54 64 setsockopt sys_setsockopt | ||
64 | 55 64 getsockopt sys_getsockopt | ||
65 | 56 64 clone stub_clone | ||
66 | 57 64 fork stub_fork | ||
67 | 58 64 vfork stub_vfork | ||
68 | 59 64 execve stub_execve | ||
69 | 60 64 exit sys_exit | ||
70 | 61 64 wait4 sys_wait4 | ||
71 | 62 64 kill sys_kill | ||
72 | 63 64 uname sys_newuname | ||
73 | 64 64 semget sys_semget | ||
74 | 65 64 semop sys_semop | ||
75 | 66 64 semctl sys_semctl | ||
76 | 67 64 shmdt sys_shmdt | ||
77 | 68 64 msgget sys_msgget | ||
78 | 69 64 msgsnd sys_msgsnd | ||
79 | 70 64 msgrcv sys_msgrcv | ||
80 | 71 64 msgctl sys_msgctl | ||
81 | 72 64 fcntl sys_fcntl | ||
82 | 73 64 flock sys_flock | ||
83 | 74 64 fsync sys_fsync | ||
84 | 75 64 fdatasync sys_fdatasync | ||
85 | 76 64 truncate sys_truncate | ||
86 | 77 64 ftruncate sys_ftruncate | ||
87 | 78 64 getdents sys_getdents | ||
88 | 79 64 getcwd sys_getcwd | ||
89 | 80 64 chdir sys_chdir | ||
90 | 81 64 fchdir sys_fchdir | ||
91 | 82 64 rename sys_rename | ||
92 | 83 64 mkdir sys_mkdir | ||
93 | 84 64 rmdir sys_rmdir | ||
94 | 85 64 creat sys_creat | ||
95 | 86 64 link sys_link | ||
96 | 87 64 unlink sys_unlink | ||
97 | 88 64 symlink sys_symlink | ||
98 | 89 64 readlink sys_readlink | ||
99 | 90 64 chmod sys_chmod | ||
100 | 91 64 fchmod sys_fchmod | ||
101 | 92 64 chown sys_chown | ||
102 | 93 64 fchown sys_fchown | ||
103 | 94 64 lchown sys_lchown | ||
104 | 95 64 umask sys_umask | ||
105 | 96 64 gettimeofday sys_gettimeofday | ||
106 | 97 64 getrlimit sys_getrlimit | ||
107 | 98 64 getrusage sys_getrusage | ||
108 | 99 64 sysinfo sys_sysinfo | ||
109 | 100 64 times sys_times | ||
110 | 101 64 ptrace sys_ptrace | ||
111 | 102 64 getuid sys_getuid | ||
112 | 103 64 syslog sys_syslog | ||
113 | 104 64 getgid sys_getgid | ||
114 | 105 64 setuid sys_setuid | ||
115 | 106 64 setgid sys_setgid | ||
116 | 107 64 geteuid sys_geteuid | ||
117 | 108 64 getegid sys_getegid | ||
118 | 109 64 setpgid sys_setpgid | ||
119 | 110 64 getppid sys_getppid | ||
120 | 111 64 getpgrp sys_getpgrp | ||
121 | 112 64 setsid sys_setsid | ||
122 | 113 64 setreuid sys_setreuid | ||
123 | 114 64 setregid sys_setregid | ||
124 | 115 64 getgroups sys_getgroups | ||
125 | 116 64 setgroups sys_setgroups | ||
126 | 117 64 setresuid sys_setresuid | ||
127 | 118 64 getresuid sys_getresuid | ||
128 | 119 64 setresgid sys_setresgid | ||
129 | 120 64 getresgid sys_getresgid | ||
130 | 121 64 getpgid sys_getpgid | ||
131 | 122 64 setfsuid sys_setfsuid | ||
132 | 123 64 setfsgid sys_setfsgid | ||
133 | 124 64 getsid sys_getsid | ||
134 | 125 64 capget sys_capget | ||
135 | 126 64 capset sys_capset | ||
136 | 127 64 rt_sigpending sys_rt_sigpending | ||
137 | 128 64 rt_sigtimedwait sys_rt_sigtimedwait | ||
138 | 129 64 rt_sigqueueinfo sys_rt_sigqueueinfo | ||
139 | 130 64 rt_sigsuspend sys_rt_sigsuspend | ||
140 | 131 64 sigaltstack stub_sigaltstack | ||
141 | 132 64 utime sys_utime | ||
142 | 133 64 mknod sys_mknod | ||
143 | 134 64 uselib | ||
144 | 135 64 personality sys_personality | ||
145 | 136 64 ustat sys_ustat | ||
146 | 137 64 statfs sys_statfs | ||
147 | 138 64 fstatfs sys_fstatfs | ||
148 | 139 64 sysfs sys_sysfs | ||
149 | 140 64 getpriority sys_getpriority | ||
150 | 141 64 setpriority sys_setpriority | ||
151 | 142 64 sched_setparam sys_sched_setparam | ||
152 | 143 64 sched_getparam sys_sched_getparam | ||
153 | 144 64 sched_setscheduler sys_sched_setscheduler | ||
154 | 145 64 sched_getscheduler sys_sched_getscheduler | ||
155 | 146 64 sched_get_priority_max sys_sched_get_priority_max | ||
156 | 147 64 sched_get_priority_min sys_sched_get_priority_min | ||
157 | 148 64 sched_rr_get_interval sys_sched_rr_get_interval | ||
158 | 149 64 mlock sys_mlock | ||
159 | 150 64 munlock sys_munlock | ||
160 | 151 64 mlockall sys_mlockall | ||
161 | 152 64 munlockall sys_munlockall | ||
162 | 153 64 vhangup sys_vhangup | ||
163 | 154 64 modify_ldt sys_modify_ldt | ||
164 | 155 64 pivot_root sys_pivot_root | ||
165 | 156 64 _sysctl sys_sysctl | ||
166 | 157 64 prctl sys_prctl | ||
167 | 158 64 arch_prctl sys_arch_prctl | ||
168 | 159 64 adjtimex sys_adjtimex | ||
169 | 160 64 setrlimit sys_setrlimit | ||
170 | 161 64 chroot sys_chroot | ||
171 | 162 64 sync sys_sync | ||
172 | 163 64 acct sys_acct | ||
173 | 164 64 settimeofday sys_settimeofday | ||
174 | 165 64 mount sys_mount | ||
175 | 166 64 umount2 sys_umount | ||
176 | 167 64 swapon sys_swapon | ||
177 | 168 64 swapoff sys_swapoff | ||
178 | 169 64 reboot sys_reboot | ||
179 | 170 64 sethostname sys_sethostname | ||
180 | 171 64 setdomainname sys_setdomainname | ||
181 | 172 64 iopl stub_iopl | ||
182 | 173 64 ioperm sys_ioperm | ||
183 | 174 64 create_module | ||
184 | 175 64 init_module sys_init_module | ||
185 | 176 64 delete_module sys_delete_module | ||
186 | 177 64 get_kernel_syms | ||
187 | 178 64 query_module | ||
188 | 179 64 quotactl sys_quotactl | ||
189 | 180 64 nfsservctl | ||
190 | 181 64 getpmsg | ||
191 | 182 64 putpmsg | ||
192 | 183 64 afs_syscall | ||
193 | 184 64 tuxcall | ||
194 | 185 64 security | ||
195 | 186 64 gettid sys_gettid | ||
196 | 187 64 readahead sys_readahead | ||
197 | 188 64 setxattr sys_setxattr | ||
198 | 189 64 lsetxattr sys_lsetxattr | ||
199 | 190 64 fsetxattr sys_fsetxattr | ||
200 | 191 64 getxattr sys_getxattr | ||
201 | 192 64 lgetxattr sys_lgetxattr | ||
202 | 193 64 fgetxattr sys_fgetxattr | ||
203 | 194 64 listxattr sys_listxattr | ||
204 | 195 64 llistxattr sys_llistxattr | ||
205 | 196 64 flistxattr sys_flistxattr | ||
206 | 197 64 removexattr sys_removexattr | ||
207 | 198 64 lremovexattr sys_lremovexattr | ||
208 | 199 64 fremovexattr sys_fremovexattr | ||
209 | 200 64 tkill sys_tkill | ||
210 | 201 64 time sys_time | ||
211 | 202 64 futex sys_futex | ||
212 | 203 64 sched_setaffinity sys_sched_setaffinity | ||
213 | 204 64 sched_getaffinity sys_sched_getaffinity | ||
214 | 205 64 set_thread_area | ||
215 | 206 64 io_setup sys_io_setup | ||
216 | 207 64 io_destroy sys_io_destroy | ||
217 | 208 64 io_getevents sys_io_getevents | ||
218 | 209 64 io_submit sys_io_submit | ||
219 | 210 64 io_cancel sys_io_cancel | ||
220 | 211 64 get_thread_area | ||
221 | 212 64 lookup_dcookie sys_lookup_dcookie | ||
222 | 213 64 epoll_create sys_epoll_create | ||
223 | 214 64 epoll_ctl_old | ||
224 | 215 64 epoll_wait_old | ||
225 | 216 64 remap_file_pages sys_remap_file_pages | ||
226 | 217 64 getdents64 sys_getdents64 | ||
227 | 218 64 set_tid_address sys_set_tid_address | ||
228 | 219 64 restart_syscall sys_restart_syscall | ||
229 | 220 64 semtimedop sys_semtimedop | ||
230 | 221 64 fadvise64 sys_fadvise64 | ||
231 | 222 64 timer_create sys_timer_create | ||
232 | 223 64 timer_settime sys_timer_settime | ||
233 | 224 64 timer_gettime sys_timer_gettime | ||
234 | 225 64 timer_getoverrun sys_timer_getoverrun | ||
235 | 226 64 timer_delete sys_timer_delete | ||
236 | 227 64 clock_settime sys_clock_settime | ||
237 | 228 64 clock_gettime sys_clock_gettime | ||
238 | 229 64 clock_getres sys_clock_getres | ||
239 | 230 64 clock_nanosleep sys_clock_nanosleep | ||
240 | 231 64 exit_group sys_exit_group | ||
241 | 232 64 epoll_wait sys_epoll_wait | ||
242 | 233 64 epoll_ctl sys_epoll_ctl | ||
243 | 234 64 tgkill sys_tgkill | ||
244 | 235 64 utimes sys_utimes | ||
245 | 236 64 vserver | ||
246 | 237 64 mbind sys_mbind | ||
247 | 238 64 set_mempolicy sys_set_mempolicy | ||
248 | 239 64 get_mempolicy sys_get_mempolicy | ||
249 | 240 64 mq_open sys_mq_open | ||
250 | 241 64 mq_unlink sys_mq_unlink | ||
251 | 242 64 mq_timedsend sys_mq_timedsend | ||
252 | 243 64 mq_timedreceive sys_mq_timedreceive | ||
253 | 244 64 mq_notify sys_mq_notify | ||
254 | 245 64 mq_getsetattr sys_mq_getsetattr | ||
255 | 246 64 kexec_load sys_kexec_load | ||
256 | 247 64 waitid sys_waitid | ||
257 | 248 64 add_key sys_add_key | ||
258 | 249 64 request_key sys_request_key | ||
259 | 250 64 keyctl sys_keyctl | ||
260 | 251 64 ioprio_set sys_ioprio_set | ||
261 | 252 64 ioprio_get sys_ioprio_get | ||
262 | 253 64 inotify_init sys_inotify_init | ||
263 | 254 64 inotify_add_watch sys_inotify_add_watch | ||
264 | 255 64 inotify_rm_watch sys_inotify_rm_watch | ||
265 | 256 64 migrate_pages sys_migrate_pages | ||
266 | 257 64 openat sys_openat | ||
267 | 258 64 mkdirat sys_mkdirat | ||
268 | 259 64 mknodat sys_mknodat | ||
269 | 260 64 fchownat sys_fchownat | ||
270 | 261 64 futimesat sys_futimesat | ||
271 | 262 64 newfstatat sys_newfstatat | ||
272 | 263 64 unlinkat sys_unlinkat | ||
273 | 264 64 renameat sys_renameat | ||
274 | 265 64 linkat sys_linkat | ||
275 | 266 64 symlinkat sys_symlinkat | ||
276 | 267 64 readlinkat sys_readlinkat | ||
277 | 268 64 fchmodat sys_fchmodat | ||
278 | 269 64 faccessat sys_faccessat | ||
279 | 270 64 pselect6 sys_pselect6 | ||
280 | 271 64 ppoll sys_ppoll | ||
281 | 272 64 unshare sys_unshare | ||
282 | 273 64 set_robust_list sys_set_robust_list | ||
283 | 274 64 get_robust_list sys_get_robust_list | ||
284 | 275 64 splice sys_splice | ||
285 | 276 64 tee sys_tee | ||
286 | 277 64 sync_file_range sys_sync_file_range | ||
287 | 278 64 vmsplice sys_vmsplice | ||
288 | 279 64 move_pages sys_move_pages | ||
289 | 280 64 utimensat sys_utimensat | ||
290 | 281 64 epoll_pwait sys_epoll_pwait | ||
291 | 282 64 signalfd sys_signalfd | ||
292 | 283 64 timerfd_create sys_timerfd_create | ||
293 | 284 64 eventfd sys_eventfd | ||
294 | 285 64 fallocate sys_fallocate | ||
295 | 286 64 timerfd_settime sys_timerfd_settime | ||
296 | 287 64 timerfd_gettime sys_timerfd_gettime | ||
297 | 288 64 accept4 sys_accept4 | ||
298 | 289 64 signalfd4 sys_signalfd4 | ||
299 | 290 64 eventfd2 sys_eventfd2 | ||
300 | 291 64 epoll_create1 sys_epoll_create1 | ||
301 | 292 64 dup3 sys_dup3 | ||
302 | 293 64 pipe2 sys_pipe2 | ||
303 | 294 64 inotify_init1 sys_inotify_init1 | ||
304 | 295 64 preadv sys_preadv | ||
305 | 296 64 pwritev sys_pwritev | ||
306 | 297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo | ||
307 | 298 64 perf_event_open sys_perf_event_open | ||
308 | 299 64 recvmmsg sys_recvmmsg | ||
309 | 300 64 fanotify_init sys_fanotify_init | ||
310 | 301 64 fanotify_mark sys_fanotify_mark | ||
311 | 302 64 prlimit64 sys_prlimit64 | ||
312 | 303 64 name_to_handle_at sys_name_to_handle_at | ||
313 | 304 64 open_by_handle_at sys_open_by_handle_at | ||
314 | 305 64 clock_adjtime sys_clock_adjtime | ||
315 | 306 64 syncfs sys_syncfs | ||
316 | 307 64 sendmmsg sys_sendmmsg | ||
317 | 308 64 setns sys_setns | ||
318 | 309 64 getcpu sys_getcpu | ||
319 | 310 64 process_vm_readv sys_process_vm_readv | ||
320 | 311 64 process_vm_writev sys_process_vm_writev | ||
diff --git a/arch/x86/syscalls/syscallhdr.sh b/arch/x86/syscalls/syscallhdr.sh new file mode 100644 index 000000000000..31fd5f1f38f7 --- /dev/null +++ b/arch/x86/syscalls/syscallhdr.sh | |||
@@ -0,0 +1,27 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | in="$1" | ||
4 | out="$2" | ||
5 | my_abis=`echo "($3)" | tr ',' '|'` | ||
6 | prefix="$4" | ||
7 | offset="$5" | ||
8 | |||
9 | fileguard=_ASM_X86_`basename "$out" | sed \ | ||
10 | -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \ | ||
11 | -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'` | ||
12 | grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | ( | ||
13 | echo "#ifndef ${fileguard}" | ||
14 | echo "#define ${fileguard} 1" | ||
15 | echo "" | ||
16 | |||
17 | while read nr abi name entry ; do | ||
18 | if [ -z "$offset" ]; then | ||
19 | echo "#define __NR_${prefix}${name} $nr" | ||
20 | else | ||
21 | echo "#define __NR_${prefix}${name} ($offset + $nr)" | ||
22 | fi | ||
23 | done | ||
24 | |||
25 | echo "" | ||
26 | echo "#endif /* ${fileguard} */" | ||
27 | ) > "$out" | ||
diff --git a/arch/x86/syscalls/syscalltbl.sh b/arch/x86/syscalls/syscalltbl.sh new file mode 100644 index 000000000000..0e7f8ec071e7 --- /dev/null +++ b/arch/x86/syscalls/syscalltbl.sh | |||
@@ -0,0 +1,15 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | in="$1" | ||
4 | out="$2" | ||
5 | |||
6 | grep '^[0-9]' "$in" | sort -n | ( | ||
7 | while read nr abi name entry compat; do | ||
8 | abi=`echo "$abi" | tr '[a-z]' '[A-Z]'` | ||
9 | if [ -n "$compat" ]; then | ||
10 | echo "__SYSCALL_${abi}($nr, $entry, $compat)" | ||
11 | elif [ -n "$entry" ]; then | ||
12 | echo "__SYSCALL_${abi}($nr, $entry, $entry)" | ||
13 | fi | ||
14 | done | ||
15 | ) > "$out" | ||
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 1d97bd84b6fb..b2b54d2edf53 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig | |||
@@ -6,14 +6,6 @@ menu "UML-specific options" | |||
6 | 6 | ||
7 | menu "Host processor type and features" | 7 | menu "Host processor type and features" |
8 | 8 | ||
9 | config CMPXCHG_LOCAL | ||
10 | bool | ||
11 | default n | ||
12 | |||
13 | config CMPXCHG_DOUBLE | ||
14 | bool | ||
15 | default n | ||
16 | |||
17 | source "arch/x86/Kconfig.cpu" | 9 | source "arch/x86/Kconfig.cpu" |
18 | 10 | ||
19 | endmenu | 11 | endmenu |
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 8fb58400e415..5d065b2222d3 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile | |||
@@ -37,7 +37,8 @@ subarch-$(CONFIG_MODULES) += ../kernel/module.o | |||
37 | USER_OBJS := bugs_$(BITS).o ptrace_user.o fault.o | 37 | USER_OBJS := bugs_$(BITS).o ptrace_user.o fault.o |
38 | 38 | ||
39 | extra-y += user-offsets.s | 39 | extra-y += user-offsets.s |
40 | $(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) | 40 | $(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) \ |
41 | -Iarch/x86/include/generated | ||
41 | 42 | ||
42 | UNPROFILE_OBJS := stub_segv.o | 43 | UNPROFILE_OBJS := stub_segv.o |
43 | CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING) | 44 | CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING) |
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h index 711b1621747f..2bbe1ec2d96a 100644 --- a/arch/x86/um/shared/sysdep/ptrace.h +++ b/arch/x86/um/shared/sysdep/ptrace.h | |||
@@ -1,5 +1,15 @@ | |||
1 | #ifndef __SYSDEP_X86_PTRACE_H | ||
2 | #define __SYSDEP_X86_PTRACE_H | ||
3 | |||
1 | #ifdef __i386__ | 4 | #ifdef __i386__ |
2 | #include "ptrace_32.h" | 5 | #include "ptrace_32.h" |
3 | #else | 6 | #else |
4 | #include "ptrace_64.h" | 7 | #include "ptrace_64.h" |
5 | #endif | 8 | #endif |
9 | |||
10 | static inline long regs_return_value(struct uml_pt_regs *regs) | ||
11 | { | ||
12 | return UPT_SYSCALL_RET(regs); | ||
13 | } | ||
14 | |||
15 | #endif /* __SYSDEP_X86_PTRACE_H */ | ||
diff --git a/arch/x86/um/sys_call_table_32.S b/arch/x86/um/sys_call_table_32.S deleted file mode 100644 index a7ca80d2dceb..000000000000 --- a/arch/x86/um/sys_call_table_32.S +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | /* Steal i386 syscall table for our purposes, but with some slight changes.*/ | ||
3 | |||
4 | #define sys_iopl sys_ni_syscall | ||
5 | #define sys_ioperm sys_ni_syscall | ||
6 | |||
7 | #define sys_vm86old sys_ni_syscall | ||
8 | #define sys_vm86 sys_ni_syscall | ||
9 | |||
10 | #define old_mmap sys_old_mmap | ||
11 | |||
12 | #define ptregs_fork sys_fork | ||
13 | #define ptregs_execve sys_execve | ||
14 | #define ptregs_iopl sys_iopl | ||
15 | #define ptregs_vm86old sys_vm86old | ||
16 | #define ptregs_clone sys_clone | ||
17 | #define ptregs_vm86 sys_vm86 | ||
18 | #define ptregs_sigaltstack sys_sigaltstack | ||
19 | #define ptregs_vfork sys_vfork | ||
20 | |||
21 | .section .rodata,"a" | ||
22 | |||
23 | #include "../kernel/syscall_table_32.S" | ||
24 | |||
25 | ENTRY(syscall_table_size) | ||
26 | .long .-sys_call_table | ||
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c new file mode 100644 index 000000000000..416bd40c0eba --- /dev/null +++ b/arch/x86/um/sys_call_table_32.c | |||
@@ -0,0 +1,55 @@ | |||
1 | /* | ||
2 | * System call table for UML/i386, copied from arch/x86/kernel/syscall_*.c | ||
3 | * with some changes for UML. | ||
4 | */ | ||
5 | |||
6 | #include <linux/linkage.h> | ||
7 | #include <linux/sys.h> | ||
8 | #include <linux/cache.h> | ||
9 | #include <generated/user_constants.h> | ||
10 | |||
11 | #define __NO_STUBS | ||
12 | |||
13 | /* | ||
14 | * Below you can see, in terms of #define's, the differences between the x86-64 | ||
15 | * and the UML syscall table. | ||
16 | */ | ||
17 | |||
18 | /* Not going to be implemented by UML, since we have no hardware. */ | ||
19 | #define sys_iopl sys_ni_syscall | ||
20 | #define sys_ioperm sys_ni_syscall | ||
21 | |||
22 | #define sys_vm86old sys_ni_syscall | ||
23 | #define sys_vm86 sys_ni_syscall | ||
24 | |||
25 | #define old_mmap sys_old_mmap | ||
26 | |||
27 | #define ptregs_fork sys_fork | ||
28 | #define ptregs_execve sys_execve | ||
29 | #define ptregs_iopl sys_iopl | ||
30 | #define ptregs_vm86old sys_vm86old | ||
31 | #define ptregs_clone sys_clone | ||
32 | #define ptregs_vm86 sys_vm86 | ||
33 | #define ptregs_sigaltstack sys_sigaltstack | ||
34 | #define ptregs_vfork sys_vfork | ||
35 | |||
36 | #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; | ||
37 | #include <asm/syscalls_32.h> | ||
38 | |||
39 | #undef __SYSCALL_I386 | ||
40 | #define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym, | ||
41 | |||
42 | typedef void (*sys_call_ptr_t)(void); | ||
43 | |||
44 | extern void sys_ni_syscall(void); | ||
45 | |||
46 | const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { | ||
47 | /* | ||
48 | * Smells like a compiler bug -- it doesn't work | ||
49 | * when the & below is removed. | ||
50 | */ | ||
51 | [0 ... __NR_syscall_max] = &sys_ni_syscall, | ||
52 | #include <asm/syscalls_32.h> | ||
53 | }; | ||
54 | |||
55 | int syscall_table_size = sizeof(sys_call_table); | ||
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index 99522f78b162..fe626c3ba01b 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c | |||
@@ -1,11 +1,12 @@ | |||
1 | /* | 1 | /* |
2 | * System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c | 2 | * System call table for UML/x86-64, copied from arch/x86/kernel/syscall_*.c |
3 | * with some changes for UML. | 3 | * with some changes for UML. |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/linkage.h> | 6 | #include <linux/linkage.h> |
7 | #include <linux/sys.h> | 7 | #include <linux/sys.h> |
8 | #include <linux/cache.h> | 8 | #include <linux/cache.h> |
9 | #include <generated/user_constants.h> | ||
9 | 10 | ||
10 | #define __NO_STUBS | 11 | #define __NO_STUBS |
11 | 12 | ||
@@ -34,31 +35,23 @@ | |||
34 | #define stub_sigaltstack sys_sigaltstack | 35 | #define stub_sigaltstack sys_sigaltstack |
35 | #define stub_rt_sigreturn sys_rt_sigreturn | 36 | #define stub_rt_sigreturn sys_rt_sigreturn |
36 | 37 | ||
37 | #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; | 38 | #define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; |
38 | #undef _ASM_X86_UNISTD_64_H | 39 | #include <asm/syscalls_64.h> |
39 | #include "../../x86/include/asm/unistd_64.h" | ||
40 | 40 | ||
41 | #undef __SYSCALL | 41 | #undef __SYSCALL_64 |
42 | #define __SYSCALL(nr, sym) [ nr ] = sym, | 42 | #define __SYSCALL_64(nr, sym, compat) [ nr ] = sym, |
43 | #undef _ASM_X86_UNISTD_64_H | ||
44 | 43 | ||
45 | typedef void (*sys_call_ptr_t)(void); | 44 | typedef void (*sys_call_ptr_t)(void); |
46 | 45 | ||
47 | extern void sys_ni_syscall(void); | 46 | extern void sys_ni_syscall(void); |
48 | 47 | ||
49 | /* | 48 | const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { |
50 | * We used to have a trick here which made sure that holes in the | 49 | /* |
51 | * x86_64 table were filled in with sys_ni_syscall, but a comment in | 50 | * Smells like a compiler bug -- it doesn't work |
52 | * unistd_64.h says that holes aren't allowed, so the trick was | 51 | * when the & below is removed. |
53 | * removed. | 52 | */ |
54 | * The trick looked like this | 53 | [0 ... __NR_syscall_max] = &sys_ni_syscall, |
55 | * [0 ... UM_NR_syscall_max] = &sys_ni_syscall | 54 | #include <asm/syscalls_64.h> |
56 | * before including unistd_64.h - the later initializations overwrote | ||
57 | * the sys_ni_syscall filler. | ||
58 | */ | ||
59 | |||
60 | sys_call_ptr_t sys_call_table[] __cacheline_aligned = { | ||
61 | #include <asm/unistd_64.h> | ||
62 | }; | 55 | }; |
63 | 56 | ||
64 | int syscall_table_size = sizeof(sys_call_table); | 57 | int syscall_table_size = sizeof(sys_call_table); |
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index ca49be8ddd0c..5edf4f4bbf53 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c | |||
@@ -8,6 +8,18 @@ | |||
8 | #include <asm/ptrace.h> | 8 | #include <asm/ptrace.h> |
9 | #include <asm/types.h> | 9 | #include <asm/types.h> |
10 | 10 | ||
11 | #ifdef __i386__ | ||
12 | #define __SYSCALL_I386(nr, sym, compat) [nr] = 1, | ||
13 | static char syscalls[] = { | ||
14 | #include <asm/syscalls_32.h> | ||
15 | }; | ||
16 | #else | ||
17 | #define __SYSCALL_64(nr, sym, compat) [nr] = 1, | ||
18 | static char syscalls[] = { | ||
19 | #include <asm/syscalls_64.h> | ||
20 | }; | ||
21 | #endif | ||
22 | |||
11 | #define DEFINE(sym, val) \ | 23 | #define DEFINE(sym, val) \ |
12 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) | 24 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) |
13 | 25 | ||
@@ -77,4 +89,7 @@ void foo(void) | |||
77 | DEFINE(UM_PROT_READ, PROT_READ); | 89 | DEFINE(UM_PROT_READ, PROT_READ); |
78 | DEFINE(UM_PROT_WRITE, PROT_WRITE); | 90 | DEFINE(UM_PROT_WRITE, PROT_WRITE); |
79 | DEFINE(UM_PROT_EXEC, PROT_EXEC); | 91 | DEFINE(UM_PROT_EXEC, PROT_EXEC); |
92 | |||
93 | DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); | ||
94 | DEFINE(NR_syscalls, sizeof(syscalls)); | ||
80 | } | 95 | } |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 26c731a106af..fdce49c7aff6 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -29,7 +29,8 @@ config XEN_PVHVM | |||
29 | 29 | ||
30 | config XEN_MAX_DOMAIN_MEMORY | 30 | config XEN_MAX_DOMAIN_MEMORY |
31 | int | 31 | int |
32 | default 128 | 32 | default 500 if X86_64 |
33 | default 64 if X86_32 | ||
33 | depends on XEN | 34 | depends on XEN |
34 | help | 35 | help |
35 | This only affects the sizing of some bss arrays, the unused | 36 | This only affects the sizing of some bss arrays, the unused |
@@ -48,3 +49,4 @@ config XEN_DEBUG_FS | |||
48 | help | 49 | help |
49 | Enable statistics output and various tuning options in debugfs. | 50 | Enable statistics output and various tuning options in debugfs. |
50 | Enabling this option may incur a significant performance overhead. | 51 | Enabling this option may incur a significant performance overhead. |
52 | |||
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c index 7c0fedd98ea0..ef1db1900d86 100644 --- a/arch/x86/xen/debugfs.c +++ b/arch/x86/xen/debugfs.c | |||
@@ -109,7 +109,7 @@ static const struct file_operations u32_array_fops = { | |||
109 | .llseek = no_llseek, | 109 | .llseek = no_llseek, |
110 | }; | 110 | }; |
111 | 111 | ||
112 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | 112 | struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode, |
113 | struct dentry *parent, | 113 | struct dentry *parent, |
114 | u32 *array, unsigned elements) | 114 | u32 *array, unsigned elements) |
115 | { | 115 | { |
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h index e28132084832..78d25499be5b 100644 --- a/arch/x86/xen/debugfs.h +++ b/arch/x86/xen/debugfs.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | struct dentry * __init xen_init_debugfs(void); | 4 | struct dentry * __init xen_init_debugfs(void); |
5 | 5 | ||
6 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | 6 | struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode, |
7 | struct dentry *parent, | 7 | struct dentry *parent, |
8 | u32 *array, unsigned elements); | 8 | u32 *array, unsigned elements); |
9 | 9 | ||
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 5a40d24ba331..3a5f55d51907 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c | |||
@@ -54,6 +54,20 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page, | |||
54 | return 0; | 54 | return 0; |
55 | } | 55 | } |
56 | 56 | ||
57 | /* | ||
58 | * This function is used to map shared frames to store grant status. It is | ||
59 | * different from map_pte_fn above, the frames type here is uint64_t. | ||
60 | */ | ||
61 | static int map_pte_fn_status(pte_t *pte, struct page *pmd_page, | ||
62 | unsigned long addr, void *data) | ||
63 | { | ||
64 | uint64_t **frames = (uint64_t **)data; | ||
65 | |||
66 | set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); | ||
67 | (*frames)++; | ||
68 | return 0; | ||
69 | } | ||
70 | |||
57 | static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | 71 | static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, |
58 | unsigned long addr, void *data) | 72 | unsigned long addr, void *data) |
59 | { | 73 | { |
@@ -64,10 +78,10 @@ static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | |||
64 | 78 | ||
65 | int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, | 79 | int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, |
66 | unsigned long max_nr_gframes, | 80 | unsigned long max_nr_gframes, |
67 | struct grant_entry **__shared) | 81 | void **__shared) |
68 | { | 82 | { |
69 | int rc; | 83 | int rc; |
70 | struct grant_entry *shared = *__shared; | 84 | void *shared = *__shared; |
71 | 85 | ||
72 | if (shared == NULL) { | 86 | if (shared == NULL) { |
73 | struct vm_struct *area = | 87 | struct vm_struct *area = |
@@ -83,8 +97,30 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, | |||
83 | return rc; | 97 | return rc; |
84 | } | 98 | } |
85 | 99 | ||
86 | void arch_gnttab_unmap_shared(struct grant_entry *shared, | 100 | int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, |
87 | unsigned long nr_gframes) | 101 | unsigned long max_nr_gframes, |
102 | grant_status_t **__shared) | ||
103 | { | ||
104 | int rc; | ||
105 | grant_status_t *shared = *__shared; | ||
106 | |||
107 | if (shared == NULL) { | ||
108 | /* No need to pass in PTE as we are going to do it | ||
109 | * in apply_to_page_range anyhow. */ | ||
110 | struct vm_struct *area = | ||
111 | alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); | ||
112 | BUG_ON(area == NULL); | ||
113 | shared = area->addr; | ||
114 | *__shared = shared; | ||
115 | } | ||
116 | |||
117 | rc = apply_to_page_range(&init_mm, (unsigned long)shared, | ||
118 | PAGE_SIZE * nr_gframes, | ||
119 | map_pte_fn_status, &frames); | ||
120 | return rc; | ||
121 | } | ||
122 | |||
123 | void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) | ||
88 | { | 124 | { |
89 | apply_to_page_range(&init_mm, (unsigned long)shared, | 125 | apply_to_page_range(&init_mm, (unsigned long)shared, |
90 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); | 126 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f4bf8aa574f4..58a0e46c404d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1852,7 +1852,7 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1852 | xen_write_cr3(__pa(initial_page_table)); | 1852 | xen_write_cr3(__pa(initial_page_table)); |
1853 | 1853 | ||
1854 | memblock_reserve(__pa(xen_start_info->pt_base), | 1854 | memblock_reserve(__pa(xen_start_info->pt_base), |
1855 | xen_start_info->nr_pt_frames * PAGE_SIZE)); | 1855 | xen_start_info->nr_pt_frames * PAGE_SIZE); |
1856 | 1856 | ||
1857 | return initial_page_table; | 1857 | return initial_page_table; |
1858 | } | 1858 | } |