diff options
Diffstat (limited to 'arch/x86')
131 files changed, 6431 insertions, 2310 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 72ace9515a07..178084b4377c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -49,6 +49,7 @@ config X86 | |||
49 | select HAVE_KERNEL_GZIP | 49 | select HAVE_KERNEL_GZIP |
50 | select HAVE_KERNEL_BZIP2 | 50 | select HAVE_KERNEL_BZIP2 |
51 | select HAVE_KERNEL_LZMA | 51 | select HAVE_KERNEL_LZMA |
52 | select HAVE_HW_BREAKPOINT | ||
52 | select HAVE_ARCH_KMEMCHECK | 53 | select HAVE_ARCH_KMEMCHECK |
53 | 54 | ||
54 | config OUTPUT_FORMAT | 55 | config OUTPUT_FORMAT |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2649840d888f..5e99762eb5c2 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -406,7 +406,7 @@ config X86_CMPXCHG64 | |||
406 | # generates cmov. | 406 | # generates cmov. |
407 | config X86_CMOV | 407 | config X86_CMOV |
408 | def_bool y | 408 | def_bool y |
409 | depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM) | 409 | depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) |
410 | 410 | ||
411 | config X86_MINIMUM_CPU_FAMILY | 411 | config X86_MINIMUM_CPU_FAMILY |
412 | int | 412 | int |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29bb6bb..731318e5ac1d 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -186,6 +186,15 @@ config X86_DS_SELFTEST | |||
186 | config HAVE_MMIOTRACE_SUPPORT | 186 | config HAVE_MMIOTRACE_SUPPORT |
187 | def_bool y | 187 | def_bool y |
188 | 188 | ||
189 | config X86_DECODER_SELFTEST | ||
190 | bool "x86 instruction decoder selftest" | ||
191 | depends on DEBUG_KERNEL | ||
192 | ---help--- | ||
193 | Perform x86 instruction decoder selftests at build time. | ||
194 | This option is useful for checking the sanity of x86 instruction | ||
195 | decoder code. | ||
196 | If unsure, say "N". | ||
197 | |||
189 | # | 198 | # |
190 | # IO delay types: | 199 | # IO delay types: |
191 | # | 200 | # |
@@ -287,4 +296,18 @@ config OPTIMIZE_INLINING | |||
287 | 296 | ||
288 | If unsure, say N. | 297 | If unsure, say N. |
289 | 298 | ||
299 | config DEBUG_STRICT_USER_COPY_CHECKS | ||
300 | bool "Strict copy size checks" | ||
301 | depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING | ||
302 | ---help--- | ||
303 | Enabling this option turns a certain set of sanity checks for user | ||
304 | copy operations into compile time failures. | ||
305 | |||
306 | The copy_from_user() etc checks are there to help test if there | ||
307 | are sufficient security checks on the length argument of | ||
308 | the copy operation, by having gcc prove that the argument is | ||
309 | within bounds. | ||
310 | |||
311 | If unsure, or if you run an older (pre 4.4) gcc, say N. | ||
312 | |||
290 | endmenu | 313 | endmenu |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index d2d24c9ee64d..78b32be55e9e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -155,6 +155,9 @@ all: bzImage | |||
155 | KBUILD_IMAGE := $(boot)/bzImage | 155 | KBUILD_IMAGE := $(boot)/bzImage |
156 | 156 | ||
157 | bzImage: vmlinux | 157 | bzImage: vmlinux |
158 | ifeq ($(CONFIG_X86_DECODER_SELFTEST),y) | ||
159 | $(Q)$(MAKE) $(build)=arch/x86/tools posttest | ||
160 | endif | ||
158 | $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) | 161 | $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) |
159 | $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot | 162 | $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot |
160 | $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ | 163 | $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ |
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 30e9a264f69d..cbf0776dbec1 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu | |||
@@ -41,7 +41,7 @@ cflags-$(CONFIG_X86_ELAN) += -march=i486 | |||
41 | 41 | ||
42 | # Geode GX1 support | 42 | # Geode GX1 support |
43 | cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx | 43 | cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx |
44 | 44 | cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) | |
45 | # add at the end to overwrite eventual tuning options from earlier | 45 | # add at the end to overwrite eventual tuning options from earlier |
46 | # cpu entries | 46 | # cpu entries |
47 | cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) | 47 | cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) |
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80cdcfa5..9f828f87ca35 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild | |||
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h | |||
10 | header-y += sigcontext32.h | 10 | header-y += sigcontext32.h |
11 | header-y += ucontext.h | 11 | header-y += ucontext.h |
12 | header-y += processor-flags.h | 12 | header-y += processor-flags.h |
13 | header-y += hw_breakpoint.h | ||
13 | 14 | ||
14 | unifdef-y += e820.h | 15 | unifdef-y += e820.h |
15 | unifdef-y += ist.h | 16 | unifdef-y += ist.h |
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index bb70e397aa84..7a15588e45d4 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | #include <linux/user.h> | 18 | #include <linux/user.h> |
19 | #include <linux/elfcore.h> | 19 | #include <linux/elfcore.h> |
20 | #include <asm/debugreg.h> | ||
20 | 21 | ||
21 | /* | 22 | /* |
22 | * fill in the user structure for an a.out core dump | 23 | * fill in the user structure for an a.out core dump |
@@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) | |||
32 | >> PAGE_SHIFT; | 33 | >> PAGE_SHIFT; |
33 | dump->u_dsize -= dump->u_tsize; | 34 | dump->u_dsize -= dump->u_tsize; |
34 | dump->u_ssize = 0; | 35 | dump->u_ssize = 0; |
35 | dump->u_debugreg[0] = current->thread.debugreg0; | 36 | aout_dump_debugregs(dump); |
36 | dump->u_debugreg[1] = current->thread.debugreg1; | ||
37 | dump->u_debugreg[2] = current->thread.debugreg2; | ||
38 | dump->u_debugreg[3] = current->thread.debugreg3; | ||
39 | dump->u_debugreg[4] = 0; | ||
40 | dump->u_debugreg[5] = 0; | ||
41 | dump->u_debugreg[6] = current->thread.debugreg6; | ||
42 | dump->u_debugreg[7] = current->thread.debugreg7; | ||
43 | 37 | ||
44 | if (dump->start_stack < TASK_SIZE) | 38 | if (dump->start_stack < TASK_SIZE) |
45 | dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) | 39 | dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) |
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index e2077d343c33..b97f786a48d5 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -1,17 +1,13 @@ | |||
1 | #ifdef __ASSEMBLY__ | 1 | #ifdef __ASSEMBLY__ |
2 | 2 | ||
3 | #ifdef CONFIG_X86_32 | 3 | #include <asm/asm.h> |
4 | # define X86_ALIGN .long | ||
5 | #else | ||
6 | # define X86_ALIGN .quad | ||
7 | #endif | ||
8 | 4 | ||
9 | #ifdef CONFIG_SMP | 5 | #ifdef CONFIG_SMP |
10 | .macro LOCK_PREFIX | 6 | .macro LOCK_PREFIX |
11 | 1: lock | 7 | 1: lock |
12 | .section .smp_locks,"a" | 8 | .section .smp_locks,"a" |
13 | .align 4 | 9 | _ASM_ALIGN |
14 | X86_ALIGN 1b | 10 | _ASM_PTR 1b |
15 | .previous | 11 | .previous |
16 | .endm | 12 | .endm |
17 | #else | 13 | #else |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index c240efc74e00..69b74a7b877f 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -84,6 +84,7 @@ static inline void alternatives_smp_switch(int smp) {} | |||
84 | " .byte " __stringify(feature) "\n" /* feature bit */ \ | 84 | " .byte " __stringify(feature) "\n" /* feature bit */ \ |
85 | " .byte 662b-661b\n" /* sourcelen */ \ | 85 | " .byte 662b-661b\n" /* sourcelen */ \ |
86 | " .byte 664f-663f\n" /* replacementlen */ \ | 86 | " .byte 664f-663f\n" /* replacementlen */ \ |
87 | " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ | ||
87 | ".previous\n" \ | 88 | ".previous\n" \ |
88 | ".section .altinstr_replacement, \"ax\"\n" \ | 89 | ".section .altinstr_replacement, \"ax\"\n" \ |
89 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | 90 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ |
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 4b180897e6b5..5af2982133b5 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
@@ -23,19 +23,13 @@ | |||
23 | #include <linux/irqreturn.h> | 23 | #include <linux/irqreturn.h> |
24 | 24 | ||
25 | #ifdef CONFIG_AMD_IOMMU | 25 | #ifdef CONFIG_AMD_IOMMU |
26 | extern int amd_iommu_init(void); | 26 | |
27 | extern int amd_iommu_init_dma_ops(void); | ||
28 | extern int amd_iommu_init_passthrough(void); | ||
29 | extern void amd_iommu_detect(void); | 27 | extern void amd_iommu_detect(void); |
30 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | 28 | |
31 | extern void amd_iommu_flush_all_domains(void); | ||
32 | extern void amd_iommu_flush_all_devices(void); | ||
33 | extern void amd_iommu_shutdown(void); | ||
34 | extern void amd_iommu_apply_erratum_63(u16 devid); | ||
35 | #else | 29 | #else |
36 | static inline int amd_iommu_init(void) { return -ENODEV; } | 30 | |
37 | static inline void amd_iommu_detect(void) { } | 31 | static inline void amd_iommu_detect(void) { } |
38 | static inline void amd_iommu_shutdown(void) { } | 32 | |
39 | #endif | 33 | #endif |
40 | 34 | ||
41 | #endif /* _ASM_X86_AMD_IOMMU_H */ | 35 | #endif /* _ASM_X86_AMD_IOMMU_H */ |
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h new file mode 100644 index 000000000000..84786fb9a23b --- /dev/null +++ b/arch/x86/include/asm/amd_iommu_proto.h | |||
@@ -0,0 +1,38 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Advanced Micro Devices, Inc. | ||
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published | ||
7 | * by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | #ifndef _ASM_X86_AMD_IOMMU_PROTO_H | ||
20 | #define _ASM_X86_AMD_IOMMU_PROTO_H | ||
21 | |||
22 | struct amd_iommu; | ||
23 | |||
24 | extern int amd_iommu_init_dma_ops(void); | ||
25 | extern int amd_iommu_init_passthrough(void); | ||
26 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | ||
27 | extern void amd_iommu_flush_all_domains(void); | ||
28 | extern void amd_iommu_flush_all_devices(void); | ||
29 | extern void amd_iommu_apply_erratum_63(u16 devid); | ||
30 | extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); | ||
31 | |||
32 | #ifndef CONFIG_AMD_IOMMU_STATS | ||
33 | |||
34 | static inline void amd_iommu_stats_init(void) { } | ||
35 | |||
36 | #endif /* !CONFIG_AMD_IOMMU_STATS */ | ||
37 | |||
38 | #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ | ||
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 2a2cc7a78a81..ba19ad4c47d0 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
@@ -25,6 +25,11 @@ | |||
25 | #include <linux/spinlock.h> | 25 | #include <linux/spinlock.h> |
26 | 26 | ||
27 | /* | 27 | /* |
28 | * Maximum number of IOMMUs supported | ||
29 | */ | ||
30 | #define MAX_IOMMUS 32 | ||
31 | |||
32 | /* | ||
28 | * some size calculation constants | 33 | * some size calculation constants |
29 | */ | 34 | */ |
30 | #define DEV_TABLE_ENTRY_SIZE 32 | 35 | #define DEV_TABLE_ENTRY_SIZE 32 |
@@ -206,6 +211,9 @@ extern bool amd_iommu_dump; | |||
206 | printk(KERN_INFO "AMD-Vi: " format, ## arg); \ | 211 | printk(KERN_INFO "AMD-Vi: " format, ## arg); \ |
207 | } while(0); | 212 | } while(0); |
208 | 213 | ||
214 | /* global flag if IOMMUs cache non-present entries */ | ||
215 | extern bool amd_iommu_np_cache; | ||
216 | |||
209 | /* | 217 | /* |
210 | * Make iterating over all IOMMUs easier | 218 | * Make iterating over all IOMMUs easier |
211 | */ | 219 | */ |
@@ -226,6 +234,8 @@ extern bool amd_iommu_dump; | |||
226 | * independent of their use. | 234 | * independent of their use. |
227 | */ | 235 | */ |
228 | struct protection_domain { | 236 | struct protection_domain { |
237 | struct list_head list; /* for list of all protection domains */ | ||
238 | struct list_head dev_list; /* List of all devices in this domain */ | ||
229 | spinlock_t lock; /* mostly used to lock the page table*/ | 239 | spinlock_t lock; /* mostly used to lock the page table*/ |
230 | u16 id; /* the domain id written to the device table */ | 240 | u16 id; /* the domain id written to the device table */ |
231 | int mode; /* paging mode (0-6 levels) */ | 241 | int mode; /* paging mode (0-6 levels) */ |
@@ -233,7 +243,20 @@ struct protection_domain { | |||
233 | unsigned long flags; /* flags to find out type of domain */ | 243 | unsigned long flags; /* flags to find out type of domain */ |
234 | bool updated; /* complete domain flush required */ | 244 | bool updated; /* complete domain flush required */ |
235 | unsigned dev_cnt; /* devices assigned to this domain */ | 245 | unsigned dev_cnt; /* devices assigned to this domain */ |
246 | unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ | ||
236 | void *priv; /* private data */ | 247 | void *priv; /* private data */ |
248 | |||
249 | }; | ||
250 | |||
251 | /* | ||
252 | * This struct contains device specific data for the IOMMU | ||
253 | */ | ||
254 | struct iommu_dev_data { | ||
255 | struct list_head list; /* For domain->dev_list */ | ||
256 | struct device *dev; /* Device this data belong to */ | ||
257 | struct device *alias; /* The Alias Device */ | ||
258 | struct protection_domain *domain; /* Domain the device is bound to */ | ||
259 | atomic_t bind; /* Domain attach reverent count */ | ||
237 | }; | 260 | }; |
238 | 261 | ||
239 | /* | 262 | /* |
@@ -291,6 +314,9 @@ struct dma_ops_domain { | |||
291 | struct amd_iommu { | 314 | struct amd_iommu { |
292 | struct list_head list; | 315 | struct list_head list; |
293 | 316 | ||
317 | /* Index within the IOMMU array */ | ||
318 | int index; | ||
319 | |||
294 | /* locks the accesses to the hardware */ | 320 | /* locks the accesses to the hardware */ |
295 | spinlock_t lock; | 321 | spinlock_t lock; |
296 | 322 | ||
@@ -357,6 +383,21 @@ struct amd_iommu { | |||
357 | extern struct list_head amd_iommu_list; | 383 | extern struct list_head amd_iommu_list; |
358 | 384 | ||
359 | /* | 385 | /* |
386 | * Array with pointers to each IOMMU struct | ||
387 | * The indices are referenced in the protection domains | ||
388 | */ | ||
389 | extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; | ||
390 | |||
391 | /* Number of IOMMUs present in the system */ | ||
392 | extern int amd_iommus_present; | ||
393 | |||
394 | /* | ||
395 | * Declarations for the global list of all protection domains | ||
396 | */ | ||
397 | extern spinlock_t amd_iommu_pd_lock; | ||
398 | extern struct list_head amd_iommu_pd_list; | ||
399 | |||
400 | /* | ||
360 | * Structure defining one entry in the device table | 401 | * Structure defining one entry in the device table |
361 | */ | 402 | */ |
362 | struct dev_table_entry { | 403 | struct dev_table_entry { |
@@ -416,15 +457,9 @@ extern unsigned amd_iommu_aperture_order; | |||
416 | /* largest PCI device id we expect translation requests for */ | 457 | /* largest PCI device id we expect translation requests for */ |
417 | extern u16 amd_iommu_last_bdf; | 458 | extern u16 amd_iommu_last_bdf; |
418 | 459 | ||
419 | /* data structures for protection domain handling */ | ||
420 | extern struct protection_domain **amd_iommu_pd_table; | ||
421 | |||
422 | /* allocation bitmap for domain ids */ | 460 | /* allocation bitmap for domain ids */ |
423 | extern unsigned long *amd_iommu_pd_alloc_bitmap; | 461 | extern unsigned long *amd_iommu_pd_alloc_bitmap; |
424 | 462 | ||
425 | /* will be 1 if device isolation is enabled */ | ||
426 | extern bool amd_iommu_isolate; | ||
427 | |||
428 | /* | 463 | /* |
429 | * If true, the addresses will be flushed on unmap time, not when | 464 | * If true, the addresses will be flushed on unmap time, not when |
430 | * they are reused | 465 | * they are reused |
@@ -462,11 +497,6 @@ struct __iommu_counter { | |||
462 | #define ADD_STATS_COUNTER(name, x) | 497 | #define ADD_STATS_COUNTER(name, x) |
463 | #define SUB_STATS_COUNTER(name, x) | 498 | #define SUB_STATS_COUNTER(name, x) |
464 | 499 | ||
465 | static inline void amd_iommu_stats_init(void) { } | ||
466 | |||
467 | #endif /* CONFIG_AMD_IOMMU_STATS */ | 500 | #endif /* CONFIG_AMD_IOMMU_STATS */ |
468 | 501 | ||
469 | /* some function prototypes */ | ||
470 | extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); | ||
471 | |||
472 | #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ | 502 | #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 474d80d3e6cc..b4ac2cdcb64f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -297,20 +297,20 @@ struct apic { | |||
297 | int disable_esr; | 297 | int disable_esr; |
298 | 298 | ||
299 | int dest_logical; | 299 | int dest_logical; |
300 | unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); | 300 | unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); |
301 | unsigned long (*check_apicid_present)(int apicid); | 301 | unsigned long (*check_apicid_present)(int apicid); |
302 | 302 | ||
303 | void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); | 303 | void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); |
304 | void (*init_apic_ldr)(void); | 304 | void (*init_apic_ldr)(void); |
305 | 305 | ||
306 | physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); | 306 | void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); |
307 | 307 | ||
308 | void (*setup_apic_routing)(void); | 308 | void (*setup_apic_routing)(void); |
309 | int (*multi_timer_check)(int apic, int irq); | 309 | int (*multi_timer_check)(int apic, int irq); |
310 | int (*apicid_to_node)(int logical_apicid); | 310 | int (*apicid_to_node)(int logical_apicid); |
311 | int (*cpu_to_logical_apicid)(int cpu); | 311 | int (*cpu_to_logical_apicid)(int cpu); |
312 | int (*cpu_present_to_apicid)(int mps_cpu); | 312 | int (*cpu_present_to_apicid)(int mps_cpu); |
313 | physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); | 313 | void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); |
314 | void (*setup_portio_remap)(void); | 314 | void (*setup_portio_remap)(void); |
315 | int (*check_phys_apicid_present)(int phys_apicid); | 315 | int (*check_phys_apicid_present)(int phys_apicid); |
316 | void (*enable_apic_mode)(void); | 316 | void (*enable_apic_mode)(void); |
@@ -488,6 +488,8 @@ static inline unsigned int read_apic_id(void) | |||
488 | 488 | ||
489 | extern void default_setup_apic_routing(void); | 489 | extern void default_setup_apic_routing(void); |
490 | 490 | ||
491 | extern struct apic apic_noop; | ||
492 | |||
491 | #ifdef CONFIG_X86_32 | 493 | #ifdef CONFIG_X86_32 |
492 | 494 | ||
493 | extern struct apic apic_default; | 495 | extern struct apic apic_default; |
@@ -532,9 +534,9 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
532 | return (unsigned int)(mask1 & mask2 & mask3); | 534 | return (unsigned int)(mask1 & mask2 & mask3); |
533 | } | 535 | } |
534 | 536 | ||
535 | static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) | 537 | static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) |
536 | { | 538 | { |
537 | return physid_isset(apicid, bitmap); | 539 | return physid_isset(apicid, *map); |
538 | } | 540 | } |
539 | 541 | ||
540 | static inline unsigned long default_check_apicid_present(int bit) | 542 | static inline unsigned long default_check_apicid_present(int bit) |
@@ -542,9 +544,9 @@ static inline unsigned long default_check_apicid_present(int bit) | |||
542 | return physid_isset(bit, phys_cpu_present_map); | 544 | return physid_isset(bit, phys_cpu_present_map); |
543 | } | 545 | } |
544 | 546 | ||
545 | static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) | 547 | static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
546 | { | 548 | { |
547 | return phys_map; | 549 | *retmap = *phys_map; |
548 | } | 550 | } |
549 | 551 | ||
550 | /* Mapping from cpu number to logical apicid */ | 552 | /* Mapping from cpu number to logical apicid */ |
@@ -583,11 +585,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu); | |||
583 | extern int default_check_phys_apicid_present(int phys_apicid); | 585 | extern int default_check_phys_apicid_present(int phys_apicid); |
584 | #endif | 586 | #endif |
585 | 587 | ||
586 | static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) | ||
587 | { | ||
588 | return physid_mask_of_physid(phys_apicid); | ||
589 | } | ||
590 | |||
591 | #endif /* CONFIG_X86_LOCAL_APIC */ | 588 | #endif /* CONFIG_X86_LOCAL_APIC */ |
592 | 589 | ||
593 | #ifdef CONFIG_X86_32 | 590 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 3b62da926de9..7fe3b3060f08 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -11,6 +11,12 @@ | |||
11 | #define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 | 11 | #define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 |
12 | #define APIC_DEFAULT_PHYS_BASE 0xfee00000 | 12 | #define APIC_DEFAULT_PHYS_BASE 0xfee00000 |
13 | 13 | ||
14 | /* | ||
15 | * This is the IO-APIC register space as specified | ||
16 | * by Intel docs: | ||
17 | */ | ||
18 | #define IO_APIC_SLOT_SIZE 1024 | ||
19 | |||
14 | #define APIC_ID 0x20 | 20 | #define APIC_ID 0x20 |
15 | 21 | ||
16 | #define APIC_LVR 0x30 | 22 | #define APIC_LVR 0x30 |
diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h deleted file mode 100644 index 82f613c607ce..000000000000 --- a/arch/x86/include/asm/apicnum.h +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | #ifndef _ASM_X86_APICNUM_H | ||
2 | #define _ASM_X86_APICNUM_H | ||
3 | |||
4 | /* define MAX_IO_APICS */ | ||
5 | #ifdef CONFIG_X86_32 | ||
6 | # define MAX_IO_APICS 64 | ||
7 | #else | ||
8 | # define MAX_IO_APICS 128 | ||
9 | # define MAX_LOCAL_APIC 32768 | ||
10 | #endif | ||
11 | |||
12 | #endif /* _ASM_X86_APICNUM_H */ | ||
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index d9cf1cd156d2..f654d1bb17fb 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h | |||
@@ -22,14 +22,14 @@ do { \ | |||
22 | ".popsection" \ | 22 | ".popsection" \ |
23 | : : "i" (__FILE__), "i" (__LINE__), \ | 23 | : : "i" (__FILE__), "i" (__LINE__), \ |
24 | "i" (sizeof(struct bug_entry))); \ | 24 | "i" (sizeof(struct bug_entry))); \ |
25 | for (;;) ; \ | 25 | unreachable(); \ |
26 | } while (0) | 26 | } while (0) |
27 | 27 | ||
28 | #else | 28 | #else |
29 | #define BUG() \ | 29 | #define BUG() \ |
30 | do { \ | 30 | do { \ |
31 | asm volatile("ud2"); \ | 31 | asm volatile("ud2"); \ |
32 | for (;;) ; \ | 32 | unreachable(); \ |
33 | } while (0) | 33 | } while (0) |
34 | #endif | 34 | #endif |
35 | 35 | ||
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h index b03bedb62aa7..0918654305af 100644 --- a/arch/x86/include/asm/calgary.h +++ b/arch/x86/include/asm/calgary.h | |||
@@ -62,10 +62,8 @@ struct cal_chipset_ops { | |||
62 | extern int use_calgary; | 62 | extern int use_calgary; |
63 | 63 | ||
64 | #ifdef CONFIG_CALGARY_IOMMU | 64 | #ifdef CONFIG_CALGARY_IOMMU |
65 | extern int calgary_iommu_init(void); | ||
66 | extern void detect_calgary(void); | 65 | extern void detect_calgary(void); |
67 | #else | 66 | #else |
68 | static inline int calgary_iommu_init(void) { return 1; } | ||
69 | static inline void detect_calgary(void) { return; } | 67 | static inline void detect_calgary(void) { return; } |
70 | #endif | 68 | #endif |
71 | 69 | ||
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ee1931be6593..ffb9bb6b6c37 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
@@ -8,14 +8,50 @@ | |||
8 | * you need to test for the feature in boot_cpu_data. | 8 | * you need to test for the feature in boot_cpu_data. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #define xchg(ptr, v) \ | 11 | extern void __xchg_wrong_size(void); |
12 | ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr)))) | 12 | |
13 | /* | ||
14 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway | ||
15 | * Note 2: xchg has side effect, so that attribute volatile is necessary, | ||
16 | * but generally the primitive is invalid, *ptr is output argument. --ANK | ||
17 | */ | ||
13 | 18 | ||
14 | struct __xchg_dummy { | 19 | struct __xchg_dummy { |
15 | unsigned long a[100]; | 20 | unsigned long a[100]; |
16 | }; | 21 | }; |
17 | #define __xg(x) ((struct __xchg_dummy *)(x)) | 22 | #define __xg(x) ((struct __xchg_dummy *)(x)) |
18 | 23 | ||
24 | #define __xchg(x, ptr, size) \ | ||
25 | ({ \ | ||
26 | __typeof(*(ptr)) __x = (x); \ | ||
27 | switch (size) { \ | ||
28 | case 1: \ | ||
29 | asm volatile("xchgb %b0,%1" \ | ||
30 | : "=q" (__x) \ | ||
31 | : "m" (*__xg(ptr)), "0" (__x) \ | ||
32 | : "memory"); \ | ||
33 | break; \ | ||
34 | case 2: \ | ||
35 | asm volatile("xchgw %w0,%1" \ | ||
36 | : "=r" (__x) \ | ||
37 | : "m" (*__xg(ptr)), "0" (__x) \ | ||
38 | : "memory"); \ | ||
39 | break; \ | ||
40 | case 4: \ | ||
41 | asm volatile("xchgl %0,%1" \ | ||
42 | : "=r" (__x) \ | ||
43 | : "m" (*__xg(ptr)), "0" (__x) \ | ||
44 | : "memory"); \ | ||
45 | break; \ | ||
46 | default: \ | ||
47 | __xchg_wrong_size(); \ | ||
48 | } \ | ||
49 | __x; \ | ||
50 | }) | ||
51 | |||
52 | #define xchg(ptr, v) \ | ||
53 | __xchg((v), (ptr), sizeof(*ptr)) | ||
54 | |||
19 | /* | 55 | /* |
20 | * The semantics of XCHGCMP8B are a bit strange, this is why | 56 | * The semantics of XCHGCMP8B are a bit strange, this is why |
21 | * there is a loop and the loading of %%eax and %%edx has to | 57 | * there is a loop and the loading of %%eax and %%edx has to |
@@ -71,57 +107,63 @@ static inline void __set_64bit_var(unsigned long long *ptr, | |||
71 | (unsigned int)((value) >> 32)) \ | 107 | (unsigned int)((value) >> 32)) \ |
72 | : __set_64bit(ptr, ll_low((value)), ll_high((value)))) | 108 | : __set_64bit(ptr, ll_low((value)), ll_high((value)))) |
73 | 109 | ||
74 | /* | 110 | extern void __cmpxchg_wrong_size(void); |
75 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway | ||
76 | * Note 2: xchg has side effect, so that attribute volatile is necessary, | ||
77 | * but generally the primitive is invalid, *ptr is output argument. --ANK | ||
78 | */ | ||
79 | static inline unsigned long __xchg(unsigned long x, volatile void *ptr, | ||
80 | int size) | ||
81 | { | ||
82 | switch (size) { | ||
83 | case 1: | ||
84 | asm volatile("xchgb %b0,%1" | ||
85 | : "=q" (x) | ||
86 | : "m" (*__xg(ptr)), "0" (x) | ||
87 | : "memory"); | ||
88 | break; | ||
89 | case 2: | ||
90 | asm volatile("xchgw %w0,%1" | ||
91 | : "=r" (x) | ||
92 | : "m" (*__xg(ptr)), "0" (x) | ||
93 | : "memory"); | ||
94 | break; | ||
95 | case 4: | ||
96 | asm volatile("xchgl %0,%1" | ||
97 | : "=r" (x) | ||
98 | : "m" (*__xg(ptr)), "0" (x) | ||
99 | : "memory"); | ||
100 | break; | ||
101 | } | ||
102 | return x; | ||
103 | } | ||
104 | 111 | ||
105 | /* | 112 | /* |
106 | * Atomic compare and exchange. Compare OLD with MEM, if identical, | 113 | * Atomic compare and exchange. Compare OLD with MEM, if identical, |
107 | * store NEW in MEM. Return the initial value in MEM. Success is | 114 | * store NEW in MEM. Return the initial value in MEM. Success is |
108 | * indicated by comparing RETURN with OLD. | 115 | * indicated by comparing RETURN with OLD. |
109 | */ | 116 | */ |
117 | #define __raw_cmpxchg(ptr, old, new, size, lock) \ | ||
118 | ({ \ | ||
119 | __typeof__(*(ptr)) __ret; \ | ||
120 | __typeof__(*(ptr)) __old = (old); \ | ||
121 | __typeof__(*(ptr)) __new = (new); \ | ||
122 | switch (size) { \ | ||
123 | case 1: \ | ||
124 | asm volatile(lock "cmpxchgb %b1,%2" \ | ||
125 | : "=a"(__ret) \ | ||
126 | : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
127 | : "memory"); \ | ||
128 | break; \ | ||
129 | case 2: \ | ||
130 | asm volatile(lock "cmpxchgw %w1,%2" \ | ||
131 | : "=a"(__ret) \ | ||
132 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
133 | : "memory"); \ | ||
134 | break; \ | ||
135 | case 4: \ | ||
136 | asm volatile(lock "cmpxchgl %1,%2" \ | ||
137 | : "=a"(__ret) \ | ||
138 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
139 | : "memory"); \ | ||
140 | break; \ | ||
141 | default: \ | ||
142 | __cmpxchg_wrong_size(); \ | ||
143 | } \ | ||
144 | __ret; \ | ||
145 | }) | ||
146 | |||
147 | #define __cmpxchg(ptr, old, new, size) \ | ||
148 | __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) | ||
149 | |||
150 | #define __sync_cmpxchg(ptr, old, new, size) \ | ||
151 | __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") | ||
152 | |||
153 | #define __cmpxchg_local(ptr, old, new, size) \ | ||
154 | __raw_cmpxchg((ptr), (old), (new), (size), "") | ||
110 | 155 | ||
111 | #ifdef CONFIG_X86_CMPXCHG | 156 | #ifdef CONFIG_X86_CMPXCHG |
112 | #define __HAVE_ARCH_CMPXCHG 1 | 157 | #define __HAVE_ARCH_CMPXCHG 1 |
113 | #define cmpxchg(ptr, o, n) \ | 158 | |
114 | ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ | 159 | #define cmpxchg(ptr, old, new) \ |
115 | (unsigned long)(n), \ | 160 | __cmpxchg((ptr), (old), (new), sizeof(*ptr)) |
116 | sizeof(*(ptr)))) | 161 | |
117 | #define sync_cmpxchg(ptr, o, n) \ | 162 | #define sync_cmpxchg(ptr, old, new) \ |
118 | ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ | 163 | __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) |
119 | (unsigned long)(n), \ | 164 | |
120 | sizeof(*(ptr)))) | 165 | #define cmpxchg_local(ptr, old, new) \ |
121 | #define cmpxchg_local(ptr, o, n) \ | 166 | __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) |
122 | ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ | ||
123 | (unsigned long)(n), \ | ||
124 | sizeof(*(ptr)))) | ||
125 | #endif | 167 | #endif |
126 | 168 | ||
127 | #ifdef CONFIG_X86_CMPXCHG64 | 169 | #ifdef CONFIG_X86_CMPXCHG64 |
@@ -133,94 +175,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, | |||
133 | (unsigned long long)(n))) | 175 | (unsigned long long)(n))) |
134 | #endif | 176 | #endif |
135 | 177 | ||
136 | static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, | ||
137 | unsigned long new, int size) | ||
138 | { | ||
139 | unsigned long prev; | ||
140 | switch (size) { | ||
141 | case 1: | ||
142 | asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" | ||
143 | : "=a"(prev) | ||
144 | : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
145 | : "memory"); | ||
146 | return prev; | ||
147 | case 2: | ||
148 | asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" | ||
149 | : "=a"(prev) | ||
150 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
151 | : "memory"); | ||
152 | return prev; | ||
153 | case 4: | ||
154 | asm volatile(LOCK_PREFIX "cmpxchgl %1,%2" | ||
155 | : "=a"(prev) | ||
156 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
157 | : "memory"); | ||
158 | return prev; | ||
159 | } | ||
160 | return old; | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * Always use locked operations when touching memory shared with a | ||
165 | * hypervisor, since the system may be SMP even if the guest kernel | ||
166 | * isn't. | ||
167 | */ | ||
168 | static inline unsigned long __sync_cmpxchg(volatile void *ptr, | ||
169 | unsigned long old, | ||
170 | unsigned long new, int size) | ||
171 | { | ||
172 | unsigned long prev; | ||
173 | switch (size) { | ||
174 | case 1: | ||
175 | asm volatile("lock; cmpxchgb %b1,%2" | ||
176 | : "=a"(prev) | ||
177 | : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
178 | : "memory"); | ||
179 | return prev; | ||
180 | case 2: | ||
181 | asm volatile("lock; cmpxchgw %w1,%2" | ||
182 | : "=a"(prev) | ||
183 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
184 | : "memory"); | ||
185 | return prev; | ||
186 | case 4: | ||
187 | asm volatile("lock; cmpxchgl %1,%2" | ||
188 | : "=a"(prev) | ||
189 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
190 | : "memory"); | ||
191 | return prev; | ||
192 | } | ||
193 | return old; | ||
194 | } | ||
195 | |||
196 | static inline unsigned long __cmpxchg_local(volatile void *ptr, | ||
197 | unsigned long old, | ||
198 | unsigned long new, int size) | ||
199 | { | ||
200 | unsigned long prev; | ||
201 | switch (size) { | ||
202 | case 1: | ||
203 | asm volatile("cmpxchgb %b1,%2" | ||
204 | : "=a"(prev) | ||
205 | : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
206 | : "memory"); | ||
207 | return prev; | ||
208 | case 2: | ||
209 | asm volatile("cmpxchgw %w1,%2" | ||
210 | : "=a"(prev) | ||
211 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
212 | : "memory"); | ||
213 | return prev; | ||
214 | case 4: | ||
215 | asm volatile("cmpxchgl %1,%2" | ||
216 | : "=a"(prev) | ||
217 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
218 | : "memory"); | ||
219 | return prev; | ||
220 | } | ||
221 | return old; | ||
222 | } | ||
223 | |||
224 | static inline unsigned long long __cmpxchg64(volatile void *ptr, | 178 | static inline unsigned long long __cmpxchg64(volatile void *ptr, |
225 | unsigned long long old, | 179 | unsigned long long old, |
226 | unsigned long long new) | 180 | unsigned long long new) |
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 52de72e0de8c..485ae415faec 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h | |||
@@ -3,9 +3,6 @@ | |||
3 | 3 | ||
4 | #include <asm/alternative.h> /* Provides LOCK_PREFIX */ | 4 | #include <asm/alternative.h> /* Provides LOCK_PREFIX */ |
5 | 5 | ||
6 | #define xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), \ | ||
7 | (ptr), sizeof(*(ptr)))) | ||
8 | |||
9 | #define __xg(x) ((volatile long *)(x)) | 6 | #define __xg(x) ((volatile long *)(x)) |
10 | 7 | ||
11 | static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) | 8 | static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) |
@@ -15,167 +12,118 @@ static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) | |||
15 | 12 | ||
16 | #define _set_64bit set_64bit | 13 | #define _set_64bit set_64bit |
17 | 14 | ||
15 | extern void __xchg_wrong_size(void); | ||
16 | extern void __cmpxchg_wrong_size(void); | ||
17 | |||
18 | /* | 18 | /* |
19 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway | 19 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway |
20 | * Note 2: xchg has side effect, so that attribute volatile is necessary, | 20 | * Note 2: xchg has side effect, so that attribute volatile is necessary, |
21 | * but generally the primitive is invalid, *ptr is output argument. --ANK | 21 | * but generally the primitive is invalid, *ptr is output argument. --ANK |
22 | */ | 22 | */ |
23 | static inline unsigned long __xchg(unsigned long x, volatile void *ptr, | 23 | #define __xchg(x, ptr, size) \ |
24 | int size) | 24 | ({ \ |
25 | { | 25 | __typeof(*(ptr)) __x = (x); \ |
26 | switch (size) { | 26 | switch (size) { \ |
27 | case 1: | 27 | case 1: \ |
28 | asm volatile("xchgb %b0,%1" | 28 | asm volatile("xchgb %b0,%1" \ |
29 | : "=q" (x) | 29 | : "=q" (__x) \ |
30 | : "m" (*__xg(ptr)), "0" (x) | 30 | : "m" (*__xg(ptr)), "0" (__x) \ |
31 | : "memory"); | 31 | : "memory"); \ |
32 | break; | 32 | break; \ |
33 | case 2: | 33 | case 2: \ |
34 | asm volatile("xchgw %w0,%1" | 34 | asm volatile("xchgw %w0,%1" \ |
35 | : "=r" (x) | 35 | : "=r" (__x) \ |
36 | : "m" (*__xg(ptr)), "0" (x) | 36 | : "m" (*__xg(ptr)), "0" (__x) \ |
37 | : "memory"); | 37 | : "memory"); \ |
38 | break; | 38 | break; \ |
39 | case 4: | 39 | case 4: \ |
40 | asm volatile("xchgl %k0,%1" | 40 | asm volatile("xchgl %k0,%1" \ |
41 | : "=r" (x) | 41 | : "=r" (__x) \ |
42 | : "m" (*__xg(ptr)), "0" (x) | 42 | : "m" (*__xg(ptr)), "0" (__x) \ |
43 | : "memory"); | 43 | : "memory"); \ |
44 | break; | 44 | break; \ |
45 | case 8: | 45 | case 8: \ |
46 | asm volatile("xchgq %0,%1" | 46 | asm volatile("xchgq %0,%1" \ |
47 | : "=r" (x) | 47 | : "=r" (__x) \ |
48 | : "m" (*__xg(ptr)), "0" (x) | 48 | : "m" (*__xg(ptr)), "0" (__x) \ |
49 | : "memory"); | 49 | : "memory"); \ |
50 | break; | 50 | break; \ |
51 | } | 51 | default: \ |
52 | return x; | 52 | __xchg_wrong_size(); \ |
53 | } | 53 | } \ |
54 | __x; \ | ||
55 | }) | ||
56 | |||
57 | #define xchg(ptr, v) \ | ||
58 | __xchg((v), (ptr), sizeof(*ptr)) | ||
59 | |||
60 | #define __HAVE_ARCH_CMPXCHG 1 | ||
54 | 61 | ||
55 | /* | 62 | /* |
56 | * Atomic compare and exchange. Compare OLD with MEM, if identical, | 63 | * Atomic compare and exchange. Compare OLD with MEM, if identical, |
57 | * store NEW in MEM. Return the initial value in MEM. Success is | 64 | * store NEW in MEM. Return the initial value in MEM. Success is |
58 | * indicated by comparing RETURN with OLD. | 65 | * indicated by comparing RETURN with OLD. |
59 | */ | 66 | */ |
67 | #define __raw_cmpxchg(ptr, old, new, size, lock) \ | ||
68 | ({ \ | ||
69 | __typeof__(*(ptr)) __ret; \ | ||
70 | __typeof__(*(ptr)) __old = (old); \ | ||
71 | __typeof__(*(ptr)) __new = (new); \ | ||
72 | switch (size) { \ | ||
73 | case 1: \ | ||
74 | asm volatile(lock "cmpxchgb %b1,%2" \ | ||
75 | : "=a"(__ret) \ | ||
76 | : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
77 | : "memory"); \ | ||
78 | break; \ | ||
79 | case 2: \ | ||
80 | asm volatile(lock "cmpxchgw %w1,%2" \ | ||
81 | : "=a"(__ret) \ | ||
82 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
83 | : "memory"); \ | ||
84 | break; \ | ||
85 | case 4: \ | ||
86 | asm volatile(lock "cmpxchgl %k1,%2" \ | ||
87 | : "=a"(__ret) \ | ||
88 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
89 | : "memory"); \ | ||
90 | break; \ | ||
91 | case 8: \ | ||
92 | asm volatile(lock "cmpxchgq %1,%2" \ | ||
93 | : "=a"(__ret) \ | ||
94 | : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ | ||
95 | : "memory"); \ | ||
96 | break; \ | ||
97 | default: \ | ||
98 | __cmpxchg_wrong_size(); \ | ||
99 | } \ | ||
100 | __ret; \ | ||
101 | }) | ||
60 | 102 | ||
61 | #define __HAVE_ARCH_CMPXCHG 1 | 103 | #define __cmpxchg(ptr, old, new, size) \ |
104 | __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) | ||
62 | 105 | ||
63 | static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, | 106 | #define __sync_cmpxchg(ptr, old, new, size) \ |
64 | unsigned long new, int size) | 107 | __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") |
65 | { | ||
66 | unsigned long prev; | ||
67 | switch (size) { | ||
68 | case 1: | ||
69 | asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" | ||
70 | : "=a"(prev) | ||
71 | : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
72 | : "memory"); | ||
73 | return prev; | ||
74 | case 2: | ||
75 | asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" | ||
76 | : "=a"(prev) | ||
77 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
78 | : "memory"); | ||
79 | return prev; | ||
80 | case 4: | ||
81 | asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2" | ||
82 | : "=a"(prev) | ||
83 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
84 | : "memory"); | ||
85 | return prev; | ||
86 | case 8: | ||
87 | asm volatile(LOCK_PREFIX "cmpxchgq %1,%2" | ||
88 | : "=a"(prev) | ||
89 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
90 | : "memory"); | ||
91 | return prev; | ||
92 | } | ||
93 | return old; | ||
94 | } | ||
95 | 108 | ||
96 | /* | 109 | #define __cmpxchg_local(ptr, old, new, size) \ |
97 | * Always use locked operations when touching memory shared with a | 110 | __raw_cmpxchg((ptr), (old), (new), (size), "") |
98 | * hypervisor, since the system may be SMP even if the guest kernel | ||
99 | * isn't. | ||
100 | */ | ||
101 | static inline unsigned long __sync_cmpxchg(volatile void *ptr, | ||
102 | unsigned long old, | ||
103 | unsigned long new, int size) | ||
104 | { | ||
105 | unsigned long prev; | ||
106 | switch (size) { | ||
107 | case 1: | ||
108 | asm volatile("lock; cmpxchgb %b1,%2" | ||
109 | : "=a"(prev) | ||
110 | : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
111 | : "memory"); | ||
112 | return prev; | ||
113 | case 2: | ||
114 | asm volatile("lock; cmpxchgw %w1,%2" | ||
115 | : "=a"(prev) | ||
116 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
117 | : "memory"); | ||
118 | return prev; | ||
119 | case 4: | ||
120 | asm volatile("lock; cmpxchgl %1,%2" | ||
121 | : "=a"(prev) | ||
122 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
123 | : "memory"); | ||
124 | return prev; | ||
125 | } | ||
126 | return old; | ||
127 | } | ||
128 | 111 | ||
129 | static inline unsigned long __cmpxchg_local(volatile void *ptr, | 112 | #define cmpxchg(ptr, old, new) \ |
130 | unsigned long old, | 113 | __cmpxchg((ptr), (old), (new), sizeof(*ptr)) |
131 | unsigned long new, int size) | 114 | |
132 | { | 115 | #define sync_cmpxchg(ptr, old, new) \ |
133 | unsigned long prev; | 116 | __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) |
134 | switch (size) { | 117 | |
135 | case 1: | 118 | #define cmpxchg_local(ptr, old, new) \ |
136 | asm volatile("cmpxchgb %b1,%2" | 119 | __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) |
137 | : "=a"(prev) | ||
138 | : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
139 | : "memory"); | ||
140 | return prev; | ||
141 | case 2: | ||
142 | asm volatile("cmpxchgw %w1,%2" | ||
143 | : "=a"(prev) | ||
144 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
145 | : "memory"); | ||
146 | return prev; | ||
147 | case 4: | ||
148 | asm volatile("cmpxchgl %k1,%2" | ||
149 | : "=a"(prev) | ||
150 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
151 | : "memory"); | ||
152 | return prev; | ||
153 | case 8: | ||
154 | asm volatile("cmpxchgq %1,%2" | ||
155 | : "=a"(prev) | ||
156 | : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
157 | : "memory"); | ||
158 | return prev; | ||
159 | } | ||
160 | return old; | ||
161 | } | ||
162 | 120 | ||
163 | #define cmpxchg(ptr, o, n) \ | ||
164 | ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ | ||
165 | (unsigned long)(n), sizeof(*(ptr)))) | ||
166 | #define cmpxchg64(ptr, o, n) \ | 121 | #define cmpxchg64(ptr, o, n) \ |
167 | ({ \ | 122 | ({ \ |
168 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ | 123 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ |
169 | cmpxchg((ptr), (o), (n)); \ | 124 | cmpxchg((ptr), (o), (n)); \ |
170 | }) | 125 | }) |
171 | #define cmpxchg_local(ptr, o, n) \ | 126 | |
172 | ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ | ||
173 | (unsigned long)(n), \ | ||
174 | sizeof(*(ptr)))) | ||
175 | #define sync_cmpxchg(ptr, o, n) \ | ||
176 | ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ | ||
177 | (unsigned long)(n), \ | ||
178 | sizeof(*(ptr)))) | ||
179 | #define cmpxchg64_local(ptr, o, n) \ | 127 | #define cmpxchg64_local(ptr, o, n) \ |
180 | ({ \ | 128 | ({ \ |
181 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ | 129 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ |
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 3ea6f37be9e2..8240f76b531e 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
@@ -18,6 +18,7 @@ | |||
18 | #define DR_TRAP1 (0x2) /* db1 */ | 18 | #define DR_TRAP1 (0x2) /* db1 */ |
19 | #define DR_TRAP2 (0x4) /* db2 */ | 19 | #define DR_TRAP2 (0x4) /* db2 */ |
20 | #define DR_TRAP3 (0x8) /* db3 */ | 20 | #define DR_TRAP3 (0x8) /* db3 */ |
21 | #define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) | ||
21 | 22 | ||
22 | #define DR_STEP (0x4000) /* single-step */ | 23 | #define DR_STEP (0x4000) /* single-step */ |
23 | #define DR_SWITCH (0x8000) /* task switch */ | 24 | #define DR_SWITCH (0x8000) /* task switch */ |
@@ -49,6 +50,8 @@ | |||
49 | 50 | ||
50 | #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ | 51 | #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ |
51 | #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ | 52 | #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ |
53 | #define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ | ||
54 | #define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ | ||
52 | #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ | 55 | #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ |
53 | 56 | ||
54 | #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ | 57 | #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ |
@@ -67,4 +70,34 @@ | |||
67 | #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ | 70 | #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ |
68 | #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ | 71 | #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ |
69 | 72 | ||
73 | /* | ||
74 | * HW breakpoint additions | ||
75 | */ | ||
76 | #ifdef __KERNEL__ | ||
77 | |||
78 | DECLARE_PER_CPU(unsigned long, cpu_dr7); | ||
79 | |||
80 | static inline void hw_breakpoint_disable(void) | ||
81 | { | ||
82 | /* Zero the control register for HW Breakpoint */ | ||
83 | set_debugreg(0UL, 7); | ||
84 | |||
85 | /* Zero-out the individual HW breakpoint address registers */ | ||
86 | set_debugreg(0UL, 0); | ||
87 | set_debugreg(0UL, 1); | ||
88 | set_debugreg(0UL, 2); | ||
89 | set_debugreg(0UL, 3); | ||
90 | } | ||
91 | |||
92 | static inline int hw_breakpoint_active(void) | ||
93 | { | ||
94 | return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; | ||
95 | } | ||
96 | |||
97 | extern void aout_dump_debugregs(struct user *dump); | ||
98 | |||
99 | extern void hw_breakpoint_restore(void); | ||
100 | |||
101 | #endif /* __KERNEL__ */ | ||
102 | |||
70 | #endif /* _ASM_X86_DEBUGREG_H */ | 103 | #endif /* _ASM_X86_DEBUGREG_H */ |
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index cee34e9ca45b..029f230ab637 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h | |||
@@ -8,7 +8,7 @@ struct dev_archdata { | |||
8 | #ifdef CONFIG_X86_64 | 8 | #ifdef CONFIG_X86_64 |
9 | struct dma_map_ops *dma_ops; | 9 | struct dma_map_ops *dma_ops; |
10 | #endif | 10 | #endif |
11 | #ifdef CONFIG_DMAR | 11 | #if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU) |
12 | void *iommu; /* hook for IOMMU specific extension */ | 12 | void *iommu; /* hook for IOMMU specific extension */ |
13 | #endif | 13 | #endif |
14 | }; | 14 | }; |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 6a25d5d42836..0f6c02f3b7d4 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -20,7 +20,8 @@ | |||
20 | # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) | 20 | # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) |
21 | #endif | 21 | #endif |
22 | 22 | ||
23 | extern dma_addr_t bad_dma_address; | 23 | #define DMA_ERROR_CODE 0 |
24 | |||
24 | extern int iommu_merge; | 25 | extern int iommu_merge; |
25 | extern struct device x86_dma_fallback_dev; | 26 | extern struct device x86_dma_fallback_dev; |
26 | extern int panic_on_overflow; | 27 | extern int panic_on_overflow; |
@@ -48,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | |||
48 | if (ops->mapping_error) | 49 | if (ops->mapping_error) |
49 | return ops->mapping_error(dev, dma_addr); | 50 | return ops->mapping_error(dev, dma_addr); |
50 | 51 | ||
51 | return (dma_addr == bad_dma_address); | 52 | return (dma_addr == DMA_ERROR_CODE); |
52 | } | 53 | } |
53 | 54 | ||
54 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | 55 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) |
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 6cfdafa409d8..4ac5b0f33fc1 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h | |||
@@ -35,8 +35,7 @@ extern int gart_iommu_aperture_allowed; | |||
35 | extern int gart_iommu_aperture_disabled; | 35 | extern int gart_iommu_aperture_disabled; |
36 | 36 | ||
37 | extern void early_gart_iommu_check(void); | 37 | extern void early_gart_iommu_check(void); |
38 | extern void gart_iommu_init(void); | 38 | extern int gart_iommu_init(void); |
39 | extern void gart_iommu_shutdown(void); | ||
40 | extern void __init gart_parse_options(char *); | 39 | extern void __init gart_parse_options(char *); |
41 | extern void gart_iommu_hole_init(void); | 40 | extern void gart_iommu_hole_init(void); |
42 | 41 | ||
@@ -48,12 +47,6 @@ extern void gart_iommu_hole_init(void); | |||
48 | static inline void early_gart_iommu_check(void) | 47 | static inline void early_gart_iommu_check(void) |
49 | { | 48 | { |
50 | } | 49 | } |
51 | static inline void gart_iommu_init(void) | ||
52 | { | ||
53 | } | ||
54 | static inline void gart_iommu_shutdown(void) | ||
55 | { | ||
56 | } | ||
57 | static inline void gart_parse_options(char *options) | 50 | static inline void gart_parse_options(char *options) |
58 | { | 51 | { |
59 | } | 52 | } |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 82e3e8f01043..108eb6fd1ae7 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -20,11 +20,11 @@ typedef struct { | |||
20 | unsigned int irq_call_count; | 20 | unsigned int irq_call_count; |
21 | unsigned int irq_tlb_count; | 21 | unsigned int irq_tlb_count; |
22 | #endif | 22 | #endif |
23 | #ifdef CONFIG_X86_MCE | 23 | #ifdef CONFIG_X86_THERMAL_VECTOR |
24 | unsigned int irq_thermal_count; | 24 | unsigned int irq_thermal_count; |
25 | # ifdef CONFIG_X86_MCE_THRESHOLD | 25 | #endif |
26 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
26 | unsigned int irq_threshold_count; | 27 | unsigned int irq_threshold_count; |
27 | # endif | ||
28 | #endif | 28 | #endif |
29 | } ____cacheline_aligned irq_cpustat_t; | 29 | } ____cacheline_aligned irq_cpustat_t; |
30 | 30 | ||
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 000000000000..0675a7c4c20e --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
@@ -0,0 +1,73 @@ | |||
1 | #ifndef _I386_HW_BREAKPOINT_H | ||
2 | #define _I386_HW_BREAKPOINT_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | #define __ARCH_HW_BREAKPOINT_H | ||
6 | |||
7 | /* | ||
8 | * The name should probably be something dealt in | ||
9 | * a higher level. While dealing with the user | ||
10 | * (display/resolving) | ||
11 | */ | ||
12 | struct arch_hw_breakpoint { | ||
13 | char *name; /* Contains name of the symbol to set bkpt */ | ||
14 | unsigned long address; | ||
15 | u8 len; | ||
16 | u8 type; | ||
17 | }; | ||
18 | |||
19 | #include <linux/kdebug.h> | ||
20 | #include <linux/percpu.h> | ||
21 | #include <linux/list.h> | ||
22 | |||
23 | /* Available HW breakpoint length encodings */ | ||
24 | #define X86_BREAKPOINT_LEN_1 0x40 | ||
25 | #define X86_BREAKPOINT_LEN_2 0x44 | ||
26 | #define X86_BREAKPOINT_LEN_4 0x4c | ||
27 | #define X86_BREAKPOINT_LEN_EXECUTE 0x40 | ||
28 | |||
29 | #ifdef CONFIG_X86_64 | ||
30 | #define X86_BREAKPOINT_LEN_8 0x48 | ||
31 | #endif | ||
32 | |||
33 | /* Available HW breakpoint type encodings */ | ||
34 | |||
35 | /* trigger on instruction execute */ | ||
36 | #define X86_BREAKPOINT_EXECUTE 0x80 | ||
37 | /* trigger on memory write */ | ||
38 | #define X86_BREAKPOINT_WRITE 0x81 | ||
39 | /* trigger on memory read or write */ | ||
40 | #define X86_BREAKPOINT_RW 0x83 | ||
41 | |||
42 | /* Total number of available HW breakpoint registers */ | ||
43 | #define HBP_NUM 4 | ||
44 | |||
45 | struct perf_event; | ||
46 | struct pmu; | ||
47 | |||
48 | extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); | ||
49 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp, | ||
50 | struct task_struct *tsk); | ||
51 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, | ||
52 | unsigned long val, void *data); | ||
53 | |||
54 | |||
55 | int arch_install_hw_breakpoint(struct perf_event *bp); | ||
56 | void arch_uninstall_hw_breakpoint(struct perf_event *bp); | ||
57 | void hw_breakpoint_pmu_read(struct perf_event *bp); | ||
58 | void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); | ||
59 | |||
60 | extern void | ||
61 | arch_fill_perf_breakpoint(struct perf_event *bp); | ||
62 | |||
63 | unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); | ||
64 | int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); | ||
65 | |||
66 | extern int arch_bp_generic_fields(int x86_len, int x86_type, | ||
67 | int *gen_len, int *gen_type); | ||
68 | |||
69 | extern struct pmu perf_ops_bp; | ||
70 | |||
71 | #endif /* __KERNEL__ */ | ||
72 | #endif /* _I386_HW_BREAKPOINT_H */ | ||
73 | |||
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index ba180d93b08c..6e124269fd4b 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -79,14 +79,32 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, | |||
79 | int ioapic, int ioapic_pin, | 79 | int ioapic, int ioapic_pin, |
80 | int trigger, int polarity) | 80 | int trigger, int polarity) |
81 | { | 81 | { |
82 | irq_attr->ioapic = ioapic; | 82 | irq_attr->ioapic = ioapic; |
83 | irq_attr->ioapic_pin = ioapic_pin; | 83 | irq_attr->ioapic_pin = ioapic_pin; |
84 | irq_attr->trigger = trigger; | 84 | irq_attr->trigger = trigger; |
85 | irq_attr->polarity = polarity; | 85 | irq_attr->polarity = polarity; |
86 | } | 86 | } |
87 | 87 | ||
88 | extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, | 88 | /* |
89 | struct io_apic_irq_attr *irq_attr); | 89 | * This is performance-critical, we want to do it O(1) |
90 | * | ||
91 | * Most irqs are mapped 1:1 with pins. | ||
92 | */ | ||
93 | struct irq_cfg { | ||
94 | struct irq_pin_list *irq_2_pin; | ||
95 | cpumask_var_t domain; | ||
96 | cpumask_var_t old_domain; | ||
97 | u8 vector; | ||
98 | u8 move_in_progress : 1; | ||
99 | }; | ||
100 | |||
101 | extern struct irq_cfg *irq_cfg(unsigned int); | ||
102 | extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); | ||
103 | extern void send_cleanup_vector(struct irq_cfg *); | ||
104 | |||
105 | struct irq_desc; | ||
106 | extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *); | ||
107 | extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); | ||
90 | extern void setup_ioapic_dest(void); | 108 | extern void setup_ioapic_dest(void); |
91 | 109 | ||
92 | extern void enable_IO_APIC(void); | 110 | extern void enable_IO_APIC(void); |
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h new file mode 100644 index 000000000000..205b063e3e32 --- /dev/null +++ b/arch/x86/include/asm/inat.h | |||
@@ -0,0 +1,220 @@ | |||
1 | #ifndef _ASM_X86_INAT_H | ||
2 | #define _ASM_X86_INAT_H | ||
3 | /* | ||
4 | * x86 instruction attributes | ||
5 | * | ||
6 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
21 | * | ||
22 | */ | ||
23 | #include <asm/inat_types.h> | ||
24 | |||
25 | /* | ||
26 | * Internal bits. Don't use bitmasks directly, because these bits are | ||
27 | * unstable. You should use checking functions. | ||
28 | */ | ||
29 | |||
30 | #define INAT_OPCODE_TABLE_SIZE 256 | ||
31 | #define INAT_GROUP_TABLE_SIZE 8 | ||
32 | |||
33 | /* Legacy last prefixes */ | ||
34 | #define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ | ||
35 | #define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ | ||
36 | #define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ | ||
37 | /* Other Legacy prefixes */ | ||
38 | #define INAT_PFX_LOCK 4 /* 0xF0 */ | ||
39 | #define INAT_PFX_CS 5 /* 0x2E */ | ||
40 | #define INAT_PFX_DS 6 /* 0x3E */ | ||
41 | #define INAT_PFX_ES 7 /* 0x26 */ | ||
42 | #define INAT_PFX_FS 8 /* 0x64 */ | ||
43 | #define INAT_PFX_GS 9 /* 0x65 */ | ||
44 | #define INAT_PFX_SS 10 /* 0x36 */ | ||
45 | #define INAT_PFX_ADDRSZ 11 /* 0x67 */ | ||
46 | /* x86-64 REX prefix */ | ||
47 | #define INAT_PFX_REX 12 /* 0x4X */ | ||
48 | /* AVX VEX prefixes */ | ||
49 | #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ | ||
50 | #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ | ||
51 | |||
52 | #define INAT_LSTPFX_MAX 3 | ||
53 | #define INAT_LGCPFX_MAX 11 | ||
54 | |||
55 | /* Immediate size */ | ||
56 | #define INAT_IMM_BYTE 1 | ||
57 | #define INAT_IMM_WORD 2 | ||
58 | #define INAT_IMM_DWORD 3 | ||
59 | #define INAT_IMM_QWORD 4 | ||
60 | #define INAT_IMM_PTR 5 | ||
61 | #define INAT_IMM_VWORD32 6 | ||
62 | #define INAT_IMM_VWORD 7 | ||
63 | |||
64 | /* Legacy prefix */ | ||
65 | #define INAT_PFX_OFFS 0 | ||
66 | #define INAT_PFX_BITS 4 | ||
67 | #define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) | ||
68 | #define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) | ||
69 | /* Escape opcodes */ | ||
70 | #define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) | ||
71 | #define INAT_ESC_BITS 2 | ||
72 | #define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) | ||
73 | #define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) | ||
74 | /* Group opcodes (1-16) */ | ||
75 | #define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) | ||
76 | #define INAT_GRP_BITS 5 | ||
77 | #define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) | ||
78 | #define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) | ||
79 | /* Immediates */ | ||
80 | #define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) | ||
81 | #define INAT_IMM_BITS 3 | ||
82 | #define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) | ||
83 | /* Flags */ | ||
84 | #define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) | ||
85 | #define INAT_MODRM (1 << (INAT_FLAG_OFFS)) | ||
86 | #define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) | ||
87 | #define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) | ||
88 | #define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) | ||
89 | #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) | ||
90 | #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) | ||
91 | #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) | ||
92 | /* Attribute making macros for attribute tables */ | ||
93 | #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) | ||
94 | #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) | ||
95 | #define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) | ||
96 | #define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) | ||
97 | |||
98 | /* Attribute search APIs */ | ||
99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); | ||
100 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, | ||
101 | insn_byte_t last_pfx, | ||
102 | insn_attr_t esc_attr); | ||
103 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, | ||
104 | insn_byte_t last_pfx, | ||
105 | insn_attr_t esc_attr); | ||
106 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, | ||
107 | insn_byte_t vex_m, | ||
108 | insn_byte_t vex_pp); | ||
109 | |||
110 | /* Attribute checking functions */ | ||
111 | static inline int inat_is_legacy_prefix(insn_attr_t attr) | ||
112 | { | ||
113 | attr &= INAT_PFX_MASK; | ||
114 | return attr && attr <= INAT_LGCPFX_MAX; | ||
115 | } | ||
116 | |||
117 | static inline int inat_is_address_size_prefix(insn_attr_t attr) | ||
118 | { | ||
119 | return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; | ||
120 | } | ||
121 | |||
122 | static inline int inat_is_operand_size_prefix(insn_attr_t attr) | ||
123 | { | ||
124 | return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; | ||
125 | } | ||
126 | |||
127 | static inline int inat_is_rex_prefix(insn_attr_t attr) | ||
128 | { | ||
129 | return (attr & INAT_PFX_MASK) == INAT_PFX_REX; | ||
130 | } | ||
131 | |||
132 | static inline int inat_last_prefix_id(insn_attr_t attr) | ||
133 | { | ||
134 | if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) | ||
135 | return 0; | ||
136 | else | ||
137 | return attr & INAT_PFX_MASK; | ||
138 | } | ||
139 | |||
140 | static inline int inat_is_vex_prefix(insn_attr_t attr) | ||
141 | { | ||
142 | attr &= INAT_PFX_MASK; | ||
143 | return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; | ||
144 | } | ||
145 | |||
146 | static inline int inat_is_vex3_prefix(insn_attr_t attr) | ||
147 | { | ||
148 | return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; | ||
149 | } | ||
150 | |||
151 | static inline int inat_is_escape(insn_attr_t attr) | ||
152 | { | ||
153 | return attr & INAT_ESC_MASK; | ||
154 | } | ||
155 | |||
156 | static inline int inat_escape_id(insn_attr_t attr) | ||
157 | { | ||
158 | return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; | ||
159 | } | ||
160 | |||
161 | static inline int inat_is_group(insn_attr_t attr) | ||
162 | { | ||
163 | return attr & INAT_GRP_MASK; | ||
164 | } | ||
165 | |||
166 | static inline int inat_group_id(insn_attr_t attr) | ||
167 | { | ||
168 | return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; | ||
169 | } | ||
170 | |||
171 | static inline int inat_group_common_attribute(insn_attr_t attr) | ||
172 | { | ||
173 | return attr & ~INAT_GRP_MASK; | ||
174 | } | ||
175 | |||
176 | static inline int inat_has_immediate(insn_attr_t attr) | ||
177 | { | ||
178 | return attr & INAT_IMM_MASK; | ||
179 | } | ||
180 | |||
181 | static inline int inat_immediate_size(insn_attr_t attr) | ||
182 | { | ||
183 | return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; | ||
184 | } | ||
185 | |||
186 | static inline int inat_has_modrm(insn_attr_t attr) | ||
187 | { | ||
188 | return attr & INAT_MODRM; | ||
189 | } | ||
190 | |||
191 | static inline int inat_is_force64(insn_attr_t attr) | ||
192 | { | ||
193 | return attr & INAT_FORCE64; | ||
194 | } | ||
195 | |||
196 | static inline int inat_has_second_immediate(insn_attr_t attr) | ||
197 | { | ||
198 | return attr & INAT_SCNDIMM; | ||
199 | } | ||
200 | |||
201 | static inline int inat_has_moffset(insn_attr_t attr) | ||
202 | { | ||
203 | return attr & INAT_MOFFSET; | ||
204 | } | ||
205 | |||
206 | static inline int inat_has_variant(insn_attr_t attr) | ||
207 | { | ||
208 | return attr & INAT_VARIANT; | ||
209 | } | ||
210 | |||
211 | static inline int inat_accept_vex(insn_attr_t attr) | ||
212 | { | ||
213 | return attr & INAT_VEXOK; | ||
214 | } | ||
215 | |||
216 | static inline int inat_must_vex(insn_attr_t attr) | ||
217 | { | ||
218 | return attr & INAT_VEXONLY; | ||
219 | } | ||
220 | #endif | ||
diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h new file mode 100644 index 000000000000..cb3c20ce39cf --- /dev/null +++ b/arch/x86/include/asm/inat_types.h | |||
@@ -0,0 +1,29 @@ | |||
1 | #ifndef _ASM_X86_INAT_TYPES_H | ||
2 | #define _ASM_X86_INAT_TYPES_H | ||
3 | /* | ||
4 | * x86 instruction attributes | ||
5 | * | ||
6 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | /* Instruction attributes */ | ||
25 | typedef unsigned int insn_attr_t; | ||
26 | typedef unsigned char insn_byte_t; | ||
27 | typedef signed int insn_value_t; | ||
28 | |||
29 | #endif | ||
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h new file mode 100644 index 000000000000..96c2e0ad04ca --- /dev/null +++ b/arch/x86/include/asm/insn.h | |||
@@ -0,0 +1,184 @@ | |||
1 | #ifndef _ASM_X86_INSN_H | ||
2 | #define _ASM_X86_INSN_H | ||
3 | /* | ||
4 | * x86 instruction analysis | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | * Copyright (C) IBM Corporation, 2009 | ||
21 | */ | ||
22 | |||
23 | /* insn_attr_t is defined in inat.h */ | ||
24 | #include <asm/inat.h> | ||
25 | |||
26 | struct insn_field { | ||
27 | union { | ||
28 | insn_value_t value; | ||
29 | insn_byte_t bytes[4]; | ||
30 | }; | ||
31 | /* !0 if we've run insn_get_xxx() for this field */ | ||
32 | unsigned char got; | ||
33 | unsigned char nbytes; | ||
34 | }; | ||
35 | |||
36 | struct insn { | ||
37 | struct insn_field prefixes; /* | ||
38 | * Prefixes | ||
39 | * prefixes.bytes[3]: last prefix | ||
40 | */ | ||
41 | struct insn_field rex_prefix; /* REX prefix */ | ||
42 | struct insn_field vex_prefix; /* VEX prefix */ | ||
43 | struct insn_field opcode; /* | ||
44 | * opcode.bytes[0]: opcode1 | ||
45 | * opcode.bytes[1]: opcode2 | ||
46 | * opcode.bytes[2]: opcode3 | ||
47 | */ | ||
48 | struct insn_field modrm; | ||
49 | struct insn_field sib; | ||
50 | struct insn_field displacement; | ||
51 | union { | ||
52 | struct insn_field immediate; | ||
53 | struct insn_field moffset1; /* for 64bit MOV */ | ||
54 | struct insn_field immediate1; /* for 64bit imm or off16/32 */ | ||
55 | }; | ||
56 | union { | ||
57 | struct insn_field moffset2; /* for 64bit MOV */ | ||
58 | struct insn_field immediate2; /* for 64bit imm or seg16 */ | ||
59 | }; | ||
60 | |||
61 | insn_attr_t attr; | ||
62 | unsigned char opnd_bytes; | ||
63 | unsigned char addr_bytes; | ||
64 | unsigned char length; | ||
65 | unsigned char x86_64; | ||
66 | |||
67 | const insn_byte_t *kaddr; /* kernel address of insn to analyze */ | ||
68 | const insn_byte_t *next_byte; | ||
69 | }; | ||
70 | |||
71 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) | ||
72 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) | ||
73 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) | ||
74 | |||
75 | #define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) | ||
76 | #define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) | ||
77 | #define X86_SIB_BASE(sib) ((sib) & 0x07) | ||
78 | |||
79 | #define X86_REX_W(rex) ((rex) & 8) | ||
80 | #define X86_REX_R(rex) ((rex) & 4) | ||
81 | #define X86_REX_X(rex) ((rex) & 2) | ||
82 | #define X86_REX_B(rex) ((rex) & 1) | ||
83 | |||
84 | /* VEX bit flags */ | ||
85 | #define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ | ||
86 | #define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ | ||
87 | #define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ | ||
88 | #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ | ||
89 | #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ | ||
90 | /* VEX bit fields */ | ||
91 | #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ | ||
92 | #define X86_VEX2_M 1 /* VEX2.M always 1 */ | ||
93 | #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ | ||
94 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ | ||
95 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ | ||
96 | |||
97 | /* The last prefix is needed for two-byte and three-byte opcodes */ | ||
98 | static inline insn_byte_t insn_last_prefix(struct insn *insn) | ||
99 | { | ||
100 | return insn->prefixes.bytes[3]; | ||
101 | } | ||
102 | |||
103 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); | ||
104 | extern void insn_get_prefixes(struct insn *insn); | ||
105 | extern void insn_get_opcode(struct insn *insn); | ||
106 | extern void insn_get_modrm(struct insn *insn); | ||
107 | extern void insn_get_sib(struct insn *insn); | ||
108 | extern void insn_get_displacement(struct insn *insn); | ||
109 | extern void insn_get_immediate(struct insn *insn); | ||
110 | extern void insn_get_length(struct insn *insn); | ||
111 | |||
112 | /* Attribute will be determined after getting ModRM (for opcode groups) */ | ||
113 | static inline void insn_get_attribute(struct insn *insn) | ||
114 | { | ||
115 | insn_get_modrm(insn); | ||
116 | } | ||
117 | |||
118 | /* Instruction uses RIP-relative addressing */ | ||
119 | extern int insn_rip_relative(struct insn *insn); | ||
120 | |||
121 | /* Init insn for kernel text */ | ||
122 | static inline void kernel_insn_init(struct insn *insn, const void *kaddr) | ||
123 | { | ||
124 | #ifdef CONFIG_X86_64 | ||
125 | insn_init(insn, kaddr, 1); | ||
126 | #else /* CONFIG_X86_32 */ | ||
127 | insn_init(insn, kaddr, 0); | ||
128 | #endif | ||
129 | } | ||
130 | |||
131 | static inline int insn_is_avx(struct insn *insn) | ||
132 | { | ||
133 | if (!insn->prefixes.got) | ||
134 | insn_get_prefixes(insn); | ||
135 | return (insn->vex_prefix.value != 0); | ||
136 | } | ||
137 | |||
138 | static inline insn_byte_t insn_vex_m_bits(struct insn *insn) | ||
139 | { | ||
140 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ | ||
141 | return X86_VEX2_M; | ||
142 | else | ||
143 | return X86_VEX3_M(insn->vex_prefix.bytes[1]); | ||
144 | } | ||
145 | |||
146 | static inline insn_byte_t insn_vex_p_bits(struct insn *insn) | ||
147 | { | ||
148 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ | ||
149 | return X86_VEX_P(insn->vex_prefix.bytes[1]); | ||
150 | else | ||
151 | return X86_VEX_P(insn->vex_prefix.bytes[2]); | ||
152 | } | ||
153 | |||
154 | /* Offset of each field from kaddr */ | ||
155 | static inline int insn_offset_rex_prefix(struct insn *insn) | ||
156 | { | ||
157 | return insn->prefixes.nbytes; | ||
158 | } | ||
159 | static inline int insn_offset_vex_prefix(struct insn *insn) | ||
160 | { | ||
161 | return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; | ||
162 | } | ||
163 | static inline int insn_offset_opcode(struct insn *insn) | ||
164 | { | ||
165 | return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; | ||
166 | } | ||
167 | static inline int insn_offset_modrm(struct insn *insn) | ||
168 | { | ||
169 | return insn_offset_opcode(insn) + insn->opcode.nbytes; | ||
170 | } | ||
171 | static inline int insn_offset_sib(struct insn *insn) | ||
172 | { | ||
173 | return insn_offset_modrm(insn) + insn->modrm.nbytes; | ||
174 | } | ||
175 | static inline int insn_offset_displacement(struct insn *insn) | ||
176 | { | ||
177 | return insn_offset_sib(insn) + insn->sib.nbytes; | ||
178 | } | ||
179 | static inline int insn_offset_immediate(struct insn *insn) | ||
180 | { | ||
181 | return insn_offset_displacement(insn) + insn->displacement.nbytes; | ||
182 | } | ||
183 | |||
184 | #endif /* _ASM_X86_INSN_H */ | ||
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index fd6d21bbee6c..345c99cef152 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h | |||
@@ -1,8 +1,6 @@ | |||
1 | #ifndef _ASM_X86_IOMMU_H | 1 | #ifndef _ASM_X86_IOMMU_H |
2 | #define _ASM_X86_IOMMU_H | 2 | #define _ASM_X86_IOMMU_H |
3 | 3 | ||
4 | extern void pci_iommu_shutdown(void); | ||
5 | extern void no_iommu_init(void); | ||
6 | extern struct dma_map_ops nommu_dma_ops; | 4 | extern struct dma_map_ops nommu_dma_ops; |
7 | extern int force_iommu, no_iommu; | 5 | extern int force_iommu, no_iommu; |
8 | extern int iommu_detected; | 6 | extern int iommu_detected; |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ddda6cbed6f4..ffd700ff5dcb 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -34,6 +34,7 @@ static inline int irq_canonicalize(int irq) | |||
34 | #ifdef CONFIG_HOTPLUG_CPU | 34 | #ifdef CONFIG_HOTPLUG_CPU |
35 | #include <linux/cpumask.h> | 35 | #include <linux/cpumask.h> |
36 | extern void fixup_irqs(void); | 36 | extern void fixup_irqs(void); |
37 | extern void irq_force_complete_move(int); | ||
37 | #endif | 38 | #endif |
38 | 39 | ||
39 | extern void (*generic_interrupt_extension)(void); | 40 | extern void (*generic_interrupt_extension)(void); |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f1363b72364f..858baa061cfc 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -108,6 +108,8 @@ struct mce_log { | |||
108 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) | 108 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) |
109 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) | 109 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) |
110 | 110 | ||
111 | extern struct atomic_notifier_head x86_mce_decoder_chain; | ||
112 | |||
111 | #ifdef __KERNEL__ | 113 | #ifdef __KERNEL__ |
112 | 114 | ||
113 | #include <linux/percpu.h> | 115 | #include <linux/percpu.h> |
@@ -118,9 +120,11 @@ extern int mce_disabled; | |||
118 | extern int mce_p5_enabled; | 120 | extern int mce_p5_enabled; |
119 | 121 | ||
120 | #ifdef CONFIG_X86_MCE | 122 | #ifdef CONFIG_X86_MCE |
121 | void mcheck_init(struct cpuinfo_x86 *c); | 123 | int mcheck_init(void); |
124 | void mcheck_cpu_init(struct cpuinfo_x86 *c); | ||
122 | #else | 125 | #else |
123 | static inline void mcheck_init(struct cpuinfo_x86 *c) {} | 126 | static inline int mcheck_init(void) { return 0; } |
127 | static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} | ||
124 | #endif | 128 | #endif |
125 | 129 | ||
126 | #ifdef CONFIG_X86_ANCIENT_MCE | 130 | #ifdef CONFIG_X86_ANCIENT_MCE |
@@ -214,5 +218,11 @@ void intel_init_thermal(struct cpuinfo_x86 *c); | |||
214 | 218 | ||
215 | void mce_log_therm_throt_event(__u64 status); | 219 | void mce_log_therm_throt_event(__u64 status); |
216 | 220 | ||
221 | #ifdef CONFIG_X86_THERMAL_VECTOR | ||
222 | extern void mcheck_intel_therm_init(void); | ||
223 | #else | ||
224 | static inline void mcheck_intel_therm_init(void) { } | ||
225 | #endif | ||
226 | |||
217 | #endif /* __KERNEL__ */ | 227 | #endif /* __KERNEL__ */ |
218 | #endif /* _ASM_X86_MCE_H */ | 228 | #endif /* _ASM_X86_MCE_H */ |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 79c94500c0bb..61d90b1331c3 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -163,14 +163,16 @@ typedef struct physid_mask physid_mask_t; | |||
163 | #define physids_shift_left(d, s, n) \ | 163 | #define physids_shift_left(d, s, n) \ |
164 | bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) | 164 | bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) |
165 | 165 | ||
166 | #define physids_coerce(map) ((map).mask[0]) | 166 | static inline unsigned long physids_coerce(physid_mask_t *map) |
167 | { | ||
168 | return map->mask[0]; | ||
169 | } | ||
167 | 170 | ||
168 | #define physids_promote(physids) \ | 171 | static inline void physids_promote(unsigned long physids, physid_mask_t *map) |
169 | ({ \ | 172 | { |
170 | physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ | 173 | physids_clear(*map); |
171 | __physid_mask.mask[0] = physids; \ | 174 | map->mask[0] = physids; |
172 | __physid_mask; \ | 175 | } |
173 | }) | ||
174 | 176 | ||
175 | /* Note: will create very large stack frames if physid_mask_t is big */ | 177 | /* Note: will create very large stack frames if physid_mask_t is big */ |
176 | #define physid_mask_of_physid(physid) \ | 178 | #define physid_mask_of_physid(physid) \ |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7e2b6ba962ff..5bef931f8b14 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -247,8 +247,8 @@ do { \ | |||
247 | #ifdef CONFIG_SMP | 247 | #ifdef CONFIG_SMP |
248 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); | 248 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); |
249 | int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); | 249 | int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); |
250 | void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); | 250 | void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); |
251 | void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); | 251 | void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); |
252 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); | 252 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); |
253 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); | 253 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); |
254 | int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); | 254 | int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); |
@@ -264,12 +264,12 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | |||
264 | wrmsr(msr_no, l, h); | 264 | wrmsr(msr_no, l, h); |
265 | return 0; | 265 | return 0; |
266 | } | 266 | } |
267 | static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no, | 267 | static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no, |
268 | struct msr *msrs) | 268 | struct msr *msrs) |
269 | { | 269 | { |
270 | rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); | 270 | rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); |
271 | } | 271 | } |
272 | static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no, | 272 | static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no, |
273 | struct msr *msrs) | 273 | struct msr *msrs) |
274 | { | 274 | { |
275 | wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); | 275 | wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index ad7ce3fd5065..8d9f8548a870 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -28,9 +28,20 @@ | |||
28 | */ | 28 | */ |
29 | #define ARCH_PERFMON_EVENT_MASK 0xffff | 29 | #define ARCH_PERFMON_EVENT_MASK 0xffff |
30 | 30 | ||
31 | /* | ||
32 | * filter mask to validate fixed counter events. | ||
33 | * the following filters disqualify for fixed counters: | ||
34 | * - inv | ||
35 | * - edge | ||
36 | * - cnt-mask | ||
37 | * The other filters are supported by fixed counters. | ||
38 | * The any-thread option is supported starting with v3. | ||
39 | */ | ||
40 | #define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 | ||
41 | |||
31 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | 42 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c |
32 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | 43 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) |
33 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 | 44 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 |
34 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ | 45 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ |
35 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | 46 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) |
36 | 47 | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c9786480f0fe..6f8ec1c37e0a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -30,6 +30,7 @@ struct mm_struct; | |||
30 | #include <linux/math64.h> | 30 | #include <linux/math64.h> |
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | 32 | ||
33 | #define HBP_NUM 4 | ||
33 | /* | 34 | /* |
34 | * Default implementation of macro that returns current | 35 | * Default implementation of macro that returns current |
35 | * instruction pointer ("program counter"). | 36 | * instruction pointer ("program counter"). |
@@ -422,6 +423,8 @@ extern unsigned int xstate_size; | |||
422 | extern void free_thread_xstate(struct task_struct *); | 423 | extern void free_thread_xstate(struct task_struct *); |
423 | extern struct kmem_cache *task_xstate_cachep; | 424 | extern struct kmem_cache *task_xstate_cachep; |
424 | 425 | ||
426 | struct perf_event; | ||
427 | |||
425 | struct thread_struct { | 428 | struct thread_struct { |
426 | /* Cached TLS descriptors: */ | 429 | /* Cached TLS descriptors: */ |
427 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; | 430 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; |
@@ -443,13 +446,10 @@ struct thread_struct { | |||
443 | unsigned long fs; | 446 | unsigned long fs; |
444 | #endif | 447 | #endif |
445 | unsigned long gs; | 448 | unsigned long gs; |
446 | /* Hardware debugging registers: */ | 449 | /* Save middle states of ptrace breakpoints */ |
447 | unsigned long debugreg0; | 450 | struct perf_event *ptrace_bps[HBP_NUM]; |
448 | unsigned long debugreg1; | 451 | /* Debug status used for traps, single steps, etc... */ |
449 | unsigned long debugreg2; | 452 | unsigned long debugreg6; |
450 | unsigned long debugreg3; | ||
451 | unsigned long debugreg6; | ||
452 | unsigned long debugreg7; | ||
453 | /* Fault info: */ | 453 | /* Fault info: */ |
454 | unsigned long cr2; | 454 | unsigned long cr2; |
455 | unsigned long trap_no; | 455 | unsigned long trap_no; |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 0f0d908349aa..3d11fd0f44c5 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #ifdef __KERNEL__ | 8 | #ifdef __KERNEL__ |
9 | #include <asm/segment.h> | 9 | #include <asm/segment.h> |
10 | #include <asm/page_types.h> | ||
10 | #endif | 11 | #endif |
11 | 12 | ||
12 | #ifndef __ASSEMBLY__ | 13 | #ifndef __ASSEMBLY__ |
@@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) | |||
216 | return regs->sp; | 217 | return regs->sp; |
217 | } | 218 | } |
218 | 219 | ||
220 | /* Query offset/name of register from its name/offset */ | ||
221 | extern int regs_query_register_offset(const char *name); | ||
222 | extern const char *regs_query_register_name(unsigned int offset); | ||
223 | #define MAX_REG_OFFSET (offsetof(struct pt_regs, ss)) | ||
224 | |||
225 | /** | ||
226 | * regs_get_register() - get register value from its offset | ||
227 | * @regs: pt_regs from which register value is gotten. | ||
228 | * @offset: offset number of the register. | ||
229 | * | ||
230 | * regs_get_register returns the value of a register. The @offset is the | ||
231 | * offset of the register in struct pt_regs address which specified by @regs. | ||
232 | * If @offset is bigger than MAX_REG_OFFSET, this returns 0. | ||
233 | */ | ||
234 | static inline unsigned long regs_get_register(struct pt_regs *regs, | ||
235 | unsigned int offset) | ||
236 | { | ||
237 | if (unlikely(offset > MAX_REG_OFFSET)) | ||
238 | return 0; | ||
239 | return *(unsigned long *)((unsigned long)regs + offset); | ||
240 | } | ||
241 | |||
242 | /** | ||
243 | * regs_within_kernel_stack() - check the address in the stack | ||
244 | * @regs: pt_regs which contains kernel stack pointer. | ||
245 | * @addr: address which is checked. | ||
246 | * | ||
247 | * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). | ||
248 | * If @addr is within the kernel stack, it returns true. If not, returns false. | ||
249 | */ | ||
250 | static inline int regs_within_kernel_stack(struct pt_regs *regs, | ||
251 | unsigned long addr) | ||
252 | { | ||
253 | return ((addr & ~(THREAD_SIZE - 1)) == | ||
254 | (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * regs_get_kernel_stack_nth() - get Nth entry of the stack | ||
259 | * @regs: pt_regs which contains kernel stack pointer. | ||
260 | * @n: stack entry number. | ||
261 | * | ||
262 | * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which | ||
263 | * is specified by @regs. If the @n th entry is NOT in the kernel stack, | ||
264 | * this returns 0. | ||
265 | */ | ||
266 | static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, | ||
267 | unsigned int n) | ||
268 | { | ||
269 | unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); | ||
270 | addr += n; | ||
271 | if (regs_within_kernel_stack(regs, (unsigned long)addr)) | ||
272 | return *addr; | ||
273 | else | ||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | /* Get Nth argument at function call */ | ||
278 | extern unsigned long regs_get_argument_nth(struct pt_regs *regs, | ||
279 | unsigned int n); | ||
280 | |||
219 | /* | 281 | /* |
220 | * These are defined as per linux/ptrace.h, which see. | 282 | * These are defined as per linux/ptrace.h, which see. |
221 | */ | 283 | */ |
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index ae907e617181..3d3e8353ee5c 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h | |||
@@ -177,10 +177,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) | |||
177 | */ | 177 | */ |
178 | 178 | ||
179 | #ifndef CONFIG_KMEMCHECK | 179 | #ifndef CONFIG_KMEMCHECK |
180 | |||
181 | #if (__GNUC__ >= 4) | ||
182 | #define memcpy(t, f, n) __builtin_memcpy(t, f, n) | ||
183 | #else | ||
180 | #define memcpy(t, f, n) \ | 184 | #define memcpy(t, f, n) \ |
181 | (__builtin_constant_p((n)) \ | 185 | (__builtin_constant_p((n)) \ |
182 | ? __constant_memcpy((t), (f), (n)) \ | 186 | ? __constant_memcpy((t), (f), (n)) \ |
183 | : __memcpy((t), (f), (n))) | 187 | : __memcpy((t), (f), (n))) |
188 | #endif | ||
184 | #else | 189 | #else |
185 | /* | 190 | /* |
186 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, | 191 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, |
@@ -316,11 +321,15 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, | |||
316 | : __memset_generic((s), (c), (count))) | 321 | : __memset_generic((s), (c), (count))) |
317 | 322 | ||
318 | #define __HAVE_ARCH_MEMSET | 323 | #define __HAVE_ARCH_MEMSET |
324 | #if (__GNUC__ >= 4) | ||
325 | #define memset(s, c, count) __builtin_memset(s, c, count) | ||
326 | #else | ||
319 | #define memset(s, c, count) \ | 327 | #define memset(s, c, count) \ |
320 | (__builtin_constant_p(c) \ | 328 | (__builtin_constant_p(c) \ |
321 | ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ | 329 | ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ |
322 | (count)) \ | 330 | (count)) \ |
323 | : __memset((s), (c), (count))) | 331 | : __memset((s), (c), (count))) |
332 | #endif | ||
324 | 333 | ||
325 | /* | 334 | /* |
326 | * find the first occurrence of byte 'c', or 1 past the area if none | 335 | * find the first occurrence of byte 'c', or 1 past the area if none |
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index b9e4e20174fb..87ffcb12a1b8 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h | |||
@@ -3,17 +3,14 @@ | |||
3 | 3 | ||
4 | #include <linux/swiotlb.h> | 4 | #include <linux/swiotlb.h> |
5 | 5 | ||
6 | /* SWIOTLB interface */ | ||
7 | |||
8 | extern int swiotlb_force; | ||
9 | |||
10 | #ifdef CONFIG_SWIOTLB | 6 | #ifdef CONFIG_SWIOTLB |
11 | extern int swiotlb; | 7 | extern int swiotlb; |
12 | extern void pci_swiotlb_init(void); | 8 | extern int pci_swiotlb_init(void); |
13 | #else | 9 | #else |
14 | #define swiotlb 0 | 10 | #define swiotlb 0 |
15 | static inline void pci_swiotlb_init(void) | 11 | static inline int pci_swiotlb_init(void) |
16 | { | 12 | { |
13 | return 0; | ||
17 | } | 14 | } |
18 | #endif | 15 | #endif |
19 | 16 | ||
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index f08f97374892..022a84386de8 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -128,8 +128,6 @@ do { \ | |||
128 | "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ | 128 | "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ |
129 | "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ | 129 | "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ |
130 | "call __switch_to\n\t" \ | 130 | "call __switch_to\n\t" \ |
131 | ".globl thread_return\n" \ | ||
132 | "thread_return:\n\t" \ | ||
133 | "movq "__percpu_arg([current_task])",%%rsi\n\t" \ | 131 | "movq "__percpu_arg([current_task])",%%rsi\n\t" \ |
134 | __switch_canary \ | 132 | __switch_canary \ |
135 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ | 133 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ |
@@ -157,19 +155,22 @@ extern void native_load_gs_index(unsigned); | |||
157 | * Load a segment. Fall back on loading the zero | 155 | * Load a segment. Fall back on loading the zero |
158 | * segment if something goes wrong.. | 156 | * segment if something goes wrong.. |
159 | */ | 157 | */ |
160 | #define loadsegment(seg, value) \ | 158 | #define loadsegment(seg, value) \ |
161 | asm volatile("\n" \ | 159 | do { \ |
162 | "1:\t" \ | 160 | unsigned short __val = (value); \ |
163 | "movl %k0,%%" #seg "\n" \ | 161 | \ |
164 | "2:\n" \ | 162 | asm volatile(" \n" \ |
165 | ".section .fixup,\"ax\"\n" \ | 163 | "1: movl %k0,%%" #seg " \n" \ |
166 | "3:\t" \ | 164 | \ |
167 | "movl %k1, %%" #seg "\n\t" \ | 165 | ".section .fixup,\"ax\" \n" \ |
168 | "jmp 2b\n" \ | 166 | "2: xorl %k0,%k0 \n" \ |
169 | ".previous\n" \ | 167 | " jmp 1b \n" \ |
170 | _ASM_EXTABLE(1b,3b) \ | 168 | ".previous \n" \ |
171 | : :"r" (value), "r" (0) : "memory") | 169 | \ |
172 | 170 | _ASM_EXTABLE(1b, 2b) \ | |
171 | \ | ||
172 | : "+r" (__val) : : "memory"); \ | ||
173 | } while (0) | ||
173 | 174 | ||
174 | /* | 175 | /* |
175 | * Save a segment register away | 176 | * Save a segment register away |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d2c6c930b491..abd3e0ea762a 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -570,7 +570,6 @@ extern struct movsl_mask { | |||
570 | #ifdef CONFIG_X86_32 | 570 | #ifdef CONFIG_X86_32 |
571 | # include "uaccess_32.h" | 571 | # include "uaccess_32.h" |
572 | #else | 572 | #else |
573 | # define ARCH_HAS_SEARCH_EXTABLE | ||
574 | # include "uaccess_64.h" | 573 | # include "uaccess_64.h" |
575 | #endif | 574 | #endif |
576 | 575 | ||
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 632fb44b4cb5..0c9825e97f36 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h | |||
@@ -187,9 +187,34 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, | |||
187 | 187 | ||
188 | unsigned long __must_check copy_to_user(void __user *to, | 188 | unsigned long __must_check copy_to_user(void __user *to, |
189 | const void *from, unsigned long n); | 189 | const void *from, unsigned long n); |
190 | unsigned long __must_check copy_from_user(void *to, | 190 | unsigned long __must_check _copy_from_user(void *to, |
191 | const void __user *from, | 191 | const void __user *from, |
192 | unsigned long n); | 192 | unsigned long n); |
193 | |||
194 | |||
195 | extern void copy_from_user_overflow(void) | ||
196 | #ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS | ||
197 | __compiletime_error("copy_from_user() buffer size is not provably correct") | ||
198 | #else | ||
199 | __compiletime_warning("copy_from_user() buffer size is not provably correct") | ||
200 | #endif | ||
201 | ; | ||
202 | |||
203 | static inline unsigned long __must_check copy_from_user(void *to, | ||
204 | const void __user *from, | ||
205 | unsigned long n) | ||
206 | { | ||
207 | int sz = __compiletime_object_size(to); | ||
208 | int ret = -EFAULT; | ||
209 | |||
210 | if (likely(sz == -1 || sz >= n)) | ||
211 | ret = _copy_from_user(to, from, n); | ||
212 | else | ||
213 | copy_from_user_overflow(); | ||
214 | |||
215 | return ret; | ||
216 | } | ||
217 | |||
193 | long __must_check strncpy_from_user(char *dst, const char __user *src, | 218 | long __must_check strncpy_from_user(char *dst, const char __user *src, |
194 | long count); | 219 | long count); |
195 | long __must_check __strncpy_from_user(char *dst, | 220 | long __must_check __strncpy_from_user(char *dst, |
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index db24b215fc50..46324c6a4f6e 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -19,12 +19,37 @@ __must_check unsigned long | |||
19 | copy_user_generic(void *to, const void *from, unsigned len); | 19 | copy_user_generic(void *to, const void *from, unsigned len); |
20 | 20 | ||
21 | __must_check unsigned long | 21 | __must_check unsigned long |
22 | copy_to_user(void __user *to, const void *from, unsigned len); | 22 | _copy_to_user(void __user *to, const void *from, unsigned len); |
23 | __must_check unsigned long | 23 | __must_check unsigned long |
24 | copy_from_user(void *to, const void __user *from, unsigned len); | 24 | _copy_from_user(void *to, const void __user *from, unsigned len); |
25 | __must_check unsigned long | 25 | __must_check unsigned long |
26 | copy_in_user(void __user *to, const void __user *from, unsigned len); | 26 | copy_in_user(void __user *to, const void __user *from, unsigned len); |
27 | 27 | ||
28 | static inline unsigned long __must_check copy_from_user(void *to, | ||
29 | const void __user *from, | ||
30 | unsigned long n) | ||
31 | { | ||
32 | int sz = __compiletime_object_size(to); | ||
33 | int ret = -EFAULT; | ||
34 | |||
35 | might_fault(); | ||
36 | if (likely(sz == -1 || sz >= n)) | ||
37 | ret = _copy_from_user(to, from, n); | ||
38 | #ifdef CONFIG_DEBUG_VM | ||
39 | else | ||
40 | WARN(1, "Buffer overflow detected!\n"); | ||
41 | #endif | ||
42 | return ret; | ||
43 | } | ||
44 | |||
45 | static __always_inline __must_check | ||
46 | int copy_to_user(void __user *dst, const void *src, unsigned size) | ||
47 | { | ||
48 | might_fault(); | ||
49 | |||
50 | return _copy_to_user(dst, src, size); | ||
51 | } | ||
52 | |||
28 | static __always_inline __must_check | 53 | static __always_inline __must_check |
29 | int __copy_from_user(void *dst, const void __user *src, unsigned size) | 54 | int __copy_from_user(void *dst, const void __user *src, unsigned size) |
30 | { | 55 | { |
@@ -176,8 +201,11 @@ __must_check long strlen_user(const char __user *str); | |||
176 | __must_check unsigned long clear_user(void __user *mem, unsigned long len); | 201 | __must_check unsigned long clear_user(void __user *mem, unsigned long len); |
177 | __must_check unsigned long __clear_user(void __user *mem, unsigned long len); | 202 | __must_check unsigned long __clear_user(void __user *mem, unsigned long len); |
178 | 203 | ||
179 | __must_check long __copy_from_user_inatomic(void *dst, const void __user *src, | 204 | static __must_check __always_inline int |
180 | unsigned size); | 205 | __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) |
206 | { | ||
207 | return copy_user_generic(dst, (__force const void *)src, size); | ||
208 | } | ||
181 | 209 | ||
182 | static __must_check __always_inline int | 210 | static __must_check __always_inline int |
183 | __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) | 211 | __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) |
diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h index 9613c8c0b647..d6b17c760622 100644 --- a/arch/x86/include/asm/uv/uv_irq.h +++ b/arch/x86/include/asm/uv/uv_irq.h | |||
@@ -25,12 +25,14 @@ struct uv_IO_APIC_route_entry { | |||
25 | dest : 32; | 25 | dest : 32; |
26 | }; | 26 | }; |
27 | 27 | ||
28 | extern struct irq_chip uv_irq_chip; | 28 | enum { |
29 | 29 | UV_AFFINITY_ALL, | |
30 | extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long); | 30 | UV_AFFINITY_NODE, |
31 | extern void arch_disable_uv_irq(int, unsigned long); | 31 | UV_AFFINITY_CPU |
32 | }; | ||
32 | 33 | ||
33 | extern int uv_setup_irq(char *, int, int, unsigned long); | 34 | extern int uv_irq_2_mmr_info(int, unsigned long *, int *); |
34 | extern void uv_teardown_irq(unsigned int, int, unsigned long); | 35 | extern int uv_setup_irq(char *, int, int, unsigned long, int); |
36 | extern void uv_teardown_irq(unsigned int); | ||
35 | 37 | ||
36 | #endif /* _ASM_X86_UV_UV_IRQ_H */ | 38 | #endif /* _ASM_X86_UV_UV_IRQ_H */ |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2c756fd4ab0e..d8e71459f025 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -91,6 +91,14 @@ struct x86_init_timers { | |||
91 | }; | 91 | }; |
92 | 92 | ||
93 | /** | 93 | /** |
94 | * struct x86_init_iommu - platform specific iommu setup | ||
95 | * @iommu_init: platform specific iommu setup | ||
96 | */ | ||
97 | struct x86_init_iommu { | ||
98 | int (*iommu_init)(void); | ||
99 | }; | ||
100 | |||
101 | /** | ||
94 | * struct x86_init_ops - functions for platform specific setup | 102 | * struct x86_init_ops - functions for platform specific setup |
95 | * | 103 | * |
96 | */ | 104 | */ |
@@ -101,6 +109,7 @@ struct x86_init_ops { | |||
101 | struct x86_init_oem oem; | 109 | struct x86_init_oem oem; |
102 | struct x86_init_paging paging; | 110 | struct x86_init_paging paging; |
103 | struct x86_init_timers timers; | 111 | struct x86_init_timers timers; |
112 | struct x86_init_iommu iommu; | ||
104 | }; | 113 | }; |
105 | 114 | ||
106 | /** | 115 | /** |
@@ -121,6 +130,7 @@ struct x86_platform_ops { | |||
121 | unsigned long (*calibrate_tsc)(void); | 130 | unsigned long (*calibrate_tsc)(void); |
122 | unsigned long (*get_wallclock)(void); | 131 | unsigned long (*get_wallclock)(void); |
123 | int (*set_wallclock)(unsigned long nowtime); | 132 | int (*set_wallclock)(unsigned long nowtime); |
133 | void (*iommu_shutdown)(void); | ||
124 | }; | 134 | }; |
125 | 135 | ||
126 | extern struct x86_init_ops x86_init; | 136 | extern struct x86_init_ops x86_init; |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d8e5d0cdd678..4f2e66e29ecc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | |||
40 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 40 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
41 | obj-y += bootflag.o e820.o | 41 | obj-y += bootflag.o e820.o |
42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
43 | obj-y += alternative.o i8253.o pci-nommu.o | 43 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
44 | obj-y += tsc.o io_delay.o rtc.o | 44 | obj-y += tsc.o io_delay.o rtc.o |
45 | 45 | ||
46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0285521e0a99..32fb09102a13 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
29 | #include <asm/iommu.h> | 29 | #include <asm/iommu.h> |
30 | #include <asm/gart.h> | 30 | #include <asm/gart.h> |
31 | #include <asm/amd_iommu_proto.h> | ||
31 | #include <asm/amd_iommu_types.h> | 32 | #include <asm/amd_iommu_types.h> |
32 | #include <asm/amd_iommu.h> | 33 | #include <asm/amd_iommu.h> |
33 | 34 | ||
@@ -56,20 +57,115 @@ struct iommu_cmd { | |||
56 | u32 data[4]; | 57 | u32 data[4]; |
57 | }; | 58 | }; |
58 | 59 | ||
59 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | ||
60 | struct unity_map_entry *e); | ||
61 | static struct dma_ops_domain *find_protection_domain(u16 devid); | ||
62 | static u64 *alloc_pte(struct protection_domain *domain, | ||
63 | unsigned long address, int end_lvl, | ||
64 | u64 **pte_page, gfp_t gfp); | ||
65 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | ||
66 | unsigned long start_page, | ||
67 | unsigned int pages); | ||
68 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); | 60 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); |
69 | static u64 *fetch_pte(struct protection_domain *domain, | ||
70 | unsigned long address, int map_size); | ||
71 | static void update_domain(struct protection_domain *domain); | 61 | static void update_domain(struct protection_domain *domain); |
72 | 62 | ||
63 | /**************************************************************************** | ||
64 | * | ||
65 | * Helper functions | ||
66 | * | ||
67 | ****************************************************************************/ | ||
68 | |||
69 | static inline u16 get_device_id(struct device *dev) | ||
70 | { | ||
71 | struct pci_dev *pdev = to_pci_dev(dev); | ||
72 | |||
73 | return calc_devid(pdev->bus->number, pdev->devfn); | ||
74 | } | ||
75 | |||
76 | static struct iommu_dev_data *get_dev_data(struct device *dev) | ||
77 | { | ||
78 | return dev->archdata.iommu; | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * In this function the list of preallocated protection domains is traversed to | ||
83 | * find the domain for a specific device | ||
84 | */ | ||
85 | static struct dma_ops_domain *find_protection_domain(u16 devid) | ||
86 | { | ||
87 | struct dma_ops_domain *entry, *ret = NULL; | ||
88 | unsigned long flags; | ||
89 | u16 alias = amd_iommu_alias_table[devid]; | ||
90 | |||
91 | if (list_empty(&iommu_pd_list)) | ||
92 | return NULL; | ||
93 | |||
94 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
95 | |||
96 | list_for_each_entry(entry, &iommu_pd_list, list) { | ||
97 | if (entry->target_dev == devid || | ||
98 | entry->target_dev == alias) { | ||
99 | ret = entry; | ||
100 | break; | ||
101 | } | ||
102 | } | ||
103 | |||
104 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
105 | |||
106 | return ret; | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * This function checks if the driver got a valid device from the caller to | ||
111 | * avoid dereferencing invalid pointers. | ||
112 | */ | ||
113 | static bool check_device(struct device *dev) | ||
114 | { | ||
115 | u16 devid; | ||
116 | |||
117 | if (!dev || !dev->dma_mask) | ||
118 | return false; | ||
119 | |||
120 | /* No device or no PCI device */ | ||
121 | if (!dev || dev->bus != &pci_bus_type) | ||
122 | return false; | ||
123 | |||
124 | devid = get_device_id(dev); | ||
125 | |||
126 | /* Out of our scope? */ | ||
127 | if (devid > amd_iommu_last_bdf) | ||
128 | return false; | ||
129 | |||
130 | if (amd_iommu_rlookup_table[devid] == NULL) | ||
131 | return false; | ||
132 | |||
133 | return true; | ||
134 | } | ||
135 | |||
136 | static int iommu_init_device(struct device *dev) | ||
137 | { | ||
138 | struct iommu_dev_data *dev_data; | ||
139 | struct pci_dev *pdev; | ||
140 | u16 devid, alias; | ||
141 | |||
142 | if (dev->archdata.iommu) | ||
143 | return 0; | ||
144 | |||
145 | dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); | ||
146 | if (!dev_data) | ||
147 | return -ENOMEM; | ||
148 | |||
149 | dev_data->dev = dev; | ||
150 | |||
151 | devid = get_device_id(dev); | ||
152 | alias = amd_iommu_alias_table[devid]; | ||
153 | pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); | ||
154 | if (pdev) | ||
155 | dev_data->alias = &pdev->dev; | ||
156 | |||
157 | atomic_set(&dev_data->bind, 0); | ||
158 | |||
159 | dev->archdata.iommu = dev_data; | ||
160 | |||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | static void iommu_uninit_device(struct device *dev) | ||
166 | { | ||
167 | kfree(dev->archdata.iommu); | ||
168 | } | ||
73 | #ifdef CONFIG_AMD_IOMMU_STATS | 169 | #ifdef CONFIG_AMD_IOMMU_STATS |
74 | 170 | ||
75 | /* | 171 | /* |
@@ -90,7 +186,6 @@ DECLARE_STATS_COUNTER(alloced_io_mem); | |||
90 | DECLARE_STATS_COUNTER(total_map_requests); | 186 | DECLARE_STATS_COUNTER(total_map_requests); |
91 | 187 | ||
92 | static struct dentry *stats_dir; | 188 | static struct dentry *stats_dir; |
93 | static struct dentry *de_isolate; | ||
94 | static struct dentry *de_fflush; | 189 | static struct dentry *de_fflush; |
95 | 190 | ||
96 | static void amd_iommu_stats_add(struct __iommu_counter *cnt) | 191 | static void amd_iommu_stats_add(struct __iommu_counter *cnt) |
@@ -108,9 +203,6 @@ static void amd_iommu_stats_init(void) | |||
108 | if (stats_dir == NULL) | 203 | if (stats_dir == NULL) |
109 | return; | 204 | return; |
110 | 205 | ||
111 | de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, | ||
112 | (u32 *)&amd_iommu_isolate); | ||
113 | |||
114 | de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, | 206 | de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, |
115 | (u32 *)&amd_iommu_unmap_flush); | 207 | (u32 *)&amd_iommu_unmap_flush); |
116 | 208 | ||
@@ -130,12 +222,6 @@ static void amd_iommu_stats_init(void) | |||
130 | 222 | ||
131 | #endif | 223 | #endif |
132 | 224 | ||
133 | /* returns !0 if the IOMMU is caching non-present entries in its TLB */ | ||
134 | static int iommu_has_npcache(struct amd_iommu *iommu) | ||
135 | { | ||
136 | return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); | ||
137 | } | ||
138 | |||
139 | /**************************************************************************** | 225 | /**************************************************************************** |
140 | * | 226 | * |
141 | * Interrupt handling functions | 227 | * Interrupt handling functions |
@@ -199,6 +285,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) | |||
199 | break; | 285 | break; |
200 | case EVENT_TYPE_ILL_CMD: | 286 | case EVENT_TYPE_ILL_CMD: |
201 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | 287 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); |
288 | iommu->reset_in_progress = true; | ||
202 | reset_iommu_command_buffer(iommu); | 289 | reset_iommu_command_buffer(iommu); |
203 | dump_command(address); | 290 | dump_command(address); |
204 | break; | 291 | break; |
@@ -321,11 +408,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) | |||
321 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | 408 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; |
322 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | 409 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); |
323 | 410 | ||
324 | if (unlikely(i == EXIT_LOOP_COUNT)) { | 411 | if (unlikely(i == EXIT_LOOP_COUNT)) |
325 | spin_unlock(&iommu->lock); | 412 | iommu->reset_in_progress = true; |
326 | reset_iommu_command_buffer(iommu); | ||
327 | spin_lock(&iommu->lock); | ||
328 | } | ||
329 | } | 413 | } |
330 | 414 | ||
331 | /* | 415 | /* |
@@ -372,26 +456,46 @@ static int iommu_completion_wait(struct amd_iommu *iommu) | |||
372 | out: | 456 | out: |
373 | spin_unlock_irqrestore(&iommu->lock, flags); | 457 | spin_unlock_irqrestore(&iommu->lock, flags); |
374 | 458 | ||
459 | if (iommu->reset_in_progress) | ||
460 | reset_iommu_command_buffer(iommu); | ||
461 | |||
375 | return 0; | 462 | return 0; |
376 | } | 463 | } |
377 | 464 | ||
465 | static void iommu_flush_complete(struct protection_domain *domain) | ||
466 | { | ||
467 | int i; | ||
468 | |||
469 | for (i = 0; i < amd_iommus_present; ++i) { | ||
470 | if (!domain->dev_iommu[i]) | ||
471 | continue; | ||
472 | |||
473 | /* | ||
474 | * Devices of this domain are behind this IOMMU | ||
475 | * We need to wait for completion of all commands. | ||
476 | */ | ||
477 | iommu_completion_wait(amd_iommus[i]); | ||
478 | } | ||
479 | } | ||
480 | |||
378 | /* | 481 | /* |
379 | * Command send function for invalidating a device table entry | 482 | * Command send function for invalidating a device table entry |
380 | */ | 483 | */ |
381 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | 484 | static int iommu_flush_device(struct device *dev) |
382 | { | 485 | { |
486 | struct amd_iommu *iommu; | ||
383 | struct iommu_cmd cmd; | 487 | struct iommu_cmd cmd; |
384 | int ret; | 488 | u16 devid; |
385 | 489 | ||
386 | BUG_ON(iommu == NULL); | 490 | devid = get_device_id(dev); |
491 | iommu = amd_iommu_rlookup_table[devid]; | ||
387 | 492 | ||
493 | /* Build command */ | ||
388 | memset(&cmd, 0, sizeof(cmd)); | 494 | memset(&cmd, 0, sizeof(cmd)); |
389 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); | 495 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); |
390 | cmd.data[0] = devid; | 496 | cmd.data[0] = devid; |
391 | 497 | ||
392 | ret = iommu_queue_command(iommu, &cmd); | 498 | return iommu_queue_command(iommu, &cmd); |
393 | |||
394 | return ret; | ||
395 | } | 499 | } |
396 | 500 | ||
397 | static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, | 501 | static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, |
@@ -430,11 +534,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | |||
430 | * It invalidates a single PTE if the range to flush is within a single | 534 | * It invalidates a single PTE if the range to flush is within a single |
431 | * page. Otherwise it flushes the whole TLB of the IOMMU. | 535 | * page. Otherwise it flushes the whole TLB of the IOMMU. |
432 | */ | 536 | */ |
433 | static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | 537 | static void __iommu_flush_pages(struct protection_domain *domain, |
434 | u64 address, size_t size) | 538 | u64 address, size_t size, int pde) |
435 | { | 539 | { |
436 | int s = 0; | 540 | int s = 0, i; |
437 | unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); | 541 | unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); |
438 | 542 | ||
439 | address &= PAGE_MASK; | 543 | address &= PAGE_MASK; |
440 | 544 | ||
@@ -447,142 +551,212 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | |||
447 | s = 1; | 551 | s = 1; |
448 | } | 552 | } |
449 | 553 | ||
450 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s); | ||
451 | 554 | ||
452 | return 0; | 555 | for (i = 0; i < amd_iommus_present; ++i) { |
556 | if (!domain->dev_iommu[i]) | ||
557 | continue; | ||
558 | |||
559 | /* | ||
560 | * Devices of this domain are behind this IOMMU | ||
561 | * We need a TLB flush | ||
562 | */ | ||
563 | iommu_queue_inv_iommu_pages(amd_iommus[i], address, | ||
564 | domain->id, pde, s); | ||
565 | } | ||
566 | |||
567 | return; | ||
453 | } | 568 | } |
454 | 569 | ||
455 | /* Flush the whole IO/TLB for a given protection domain */ | 570 | static void iommu_flush_pages(struct protection_domain *domain, |
456 | static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | 571 | u64 address, size_t size) |
457 | { | 572 | { |
458 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | 573 | __iommu_flush_pages(domain, address, size, 0); |
459 | 574 | } | |
460 | INC_STATS_COUNTER(domain_flush_single); | ||
461 | 575 | ||
462 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); | 576 | /* Flush the whole IO/TLB for a given protection domain */ |
577 | static void iommu_flush_tlb(struct protection_domain *domain) | ||
578 | { | ||
579 | __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); | ||
463 | } | 580 | } |
464 | 581 | ||
465 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ | 582 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ |
466 | static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) | 583 | static void iommu_flush_tlb_pde(struct protection_domain *domain) |
467 | { | 584 | { |
468 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | 585 | __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); |
469 | |||
470 | INC_STATS_COUNTER(domain_flush_single); | ||
471 | |||
472 | iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1); | ||
473 | } | 586 | } |
474 | 587 | ||
588 | |||
475 | /* | 589 | /* |
476 | * This function flushes one domain on one IOMMU | 590 | * This function flushes the DTEs for all devices in domain |
477 | */ | 591 | */ |
478 | static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) | 592 | static void iommu_flush_domain_devices(struct protection_domain *domain) |
479 | { | 593 | { |
480 | struct iommu_cmd cmd; | 594 | struct iommu_dev_data *dev_data; |
481 | unsigned long flags; | 595 | unsigned long flags; |
482 | 596 | ||
483 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | 597 | spin_lock_irqsave(&domain->lock, flags); |
484 | domid, 1, 1); | ||
485 | 598 | ||
486 | spin_lock_irqsave(&iommu->lock, flags); | 599 | list_for_each_entry(dev_data, &domain->dev_list, list) |
487 | __iommu_queue_command(iommu, &cmd); | 600 | iommu_flush_device(dev_data->dev); |
488 | __iommu_completion_wait(iommu); | 601 | |
489 | __iommu_wait_for_completion(iommu); | 602 | spin_unlock_irqrestore(&domain->lock, flags); |
490 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
491 | } | 603 | } |
492 | 604 | ||
493 | static void flush_all_domains_on_iommu(struct amd_iommu *iommu) | 605 | static void iommu_flush_all_domain_devices(void) |
494 | { | 606 | { |
495 | int i; | 607 | struct protection_domain *domain; |
608 | unsigned long flags; | ||
496 | 609 | ||
497 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | 610 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); |
498 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | 611 | |
499 | continue; | 612 | list_for_each_entry(domain, &amd_iommu_pd_list, list) { |
500 | flush_domain_on_iommu(iommu, i); | 613 | iommu_flush_domain_devices(domain); |
614 | iommu_flush_complete(domain); | ||
501 | } | 615 | } |
502 | 616 | ||
617 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
618 | } | ||
619 | |||
620 | void amd_iommu_flush_all_devices(void) | ||
621 | { | ||
622 | iommu_flush_all_domain_devices(); | ||
503 | } | 623 | } |
504 | 624 | ||
505 | /* | 625 | /* |
506 | * This function is used to flush the IO/TLB for a given protection domain | 626 | * This function uses heavy locking and may disable irqs for some time. But |
507 | * on every IOMMU in the system | 627 | * this is no issue because it is only called during resume. |
508 | */ | 628 | */ |
509 | static void iommu_flush_domain(u16 domid) | 629 | void amd_iommu_flush_all_domains(void) |
510 | { | 630 | { |
511 | struct amd_iommu *iommu; | 631 | struct protection_domain *domain; |
632 | unsigned long flags; | ||
512 | 633 | ||
513 | INC_STATS_COUNTER(domain_flush_all); | 634 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); |
514 | 635 | ||
515 | for_each_iommu(iommu) | 636 | list_for_each_entry(domain, &amd_iommu_pd_list, list) { |
516 | flush_domain_on_iommu(iommu, domid); | 637 | spin_lock(&domain->lock); |
638 | iommu_flush_tlb_pde(domain); | ||
639 | iommu_flush_complete(domain); | ||
640 | spin_unlock(&domain->lock); | ||
641 | } | ||
642 | |||
643 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
517 | } | 644 | } |
518 | 645 | ||
519 | void amd_iommu_flush_all_domains(void) | 646 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) |
520 | { | 647 | { |
521 | struct amd_iommu *iommu; | 648 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); |
522 | 649 | ||
523 | for_each_iommu(iommu) | 650 | if (iommu->reset_in_progress) |
524 | flush_all_domains_on_iommu(iommu); | 651 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); |
652 | |||
653 | amd_iommu_reset_cmd_buffer(iommu); | ||
654 | amd_iommu_flush_all_devices(); | ||
655 | amd_iommu_flush_all_domains(); | ||
656 | |||
657 | iommu->reset_in_progress = false; | ||
525 | } | 658 | } |
526 | 659 | ||
527 | static void flush_all_devices_for_iommu(struct amd_iommu *iommu) | 660 | /**************************************************************************** |
661 | * | ||
662 | * The functions below are used the create the page table mappings for | ||
663 | * unity mapped regions. | ||
664 | * | ||
665 | ****************************************************************************/ | ||
666 | |||
667 | /* | ||
668 | * This function is used to add another level to an IO page table. Adding | ||
669 | * another level increases the size of the address space by 9 bits to a size up | ||
670 | * to 64 bits. | ||
671 | */ | ||
672 | static bool increase_address_space(struct protection_domain *domain, | ||
673 | gfp_t gfp) | ||
528 | { | 674 | { |
529 | int i; | 675 | u64 *pte; |
530 | 676 | ||
531 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 677 | if (domain->mode == PAGE_MODE_6_LEVEL) |
532 | if (iommu != amd_iommu_rlookup_table[i]) | 678 | /* address space already 64 bit large */ |
533 | continue; | 679 | return false; |
534 | 680 | ||
535 | iommu_queue_inv_dev_entry(iommu, i); | 681 | pte = (void *)get_zeroed_page(gfp); |
536 | iommu_completion_wait(iommu); | 682 | if (!pte) |
537 | } | 683 | return false; |
684 | |||
685 | *pte = PM_LEVEL_PDE(domain->mode, | ||
686 | virt_to_phys(domain->pt_root)); | ||
687 | domain->pt_root = pte; | ||
688 | domain->mode += 1; | ||
689 | domain->updated = true; | ||
690 | |||
691 | return true; | ||
538 | } | 692 | } |
539 | 693 | ||
540 | static void flush_devices_by_domain(struct protection_domain *domain) | 694 | static u64 *alloc_pte(struct protection_domain *domain, |
695 | unsigned long address, | ||
696 | int end_lvl, | ||
697 | u64 **pte_page, | ||
698 | gfp_t gfp) | ||
541 | { | 699 | { |
542 | struct amd_iommu *iommu; | 700 | u64 *pte, *page; |
543 | int i; | 701 | int level; |
544 | 702 | ||
545 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 703 | while (address > PM_LEVEL_SIZE(domain->mode)) |
546 | if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || | 704 | increase_address_space(domain, gfp); |
547 | (amd_iommu_pd_table[i] != domain)) | ||
548 | continue; | ||
549 | 705 | ||
550 | iommu = amd_iommu_rlookup_table[i]; | 706 | level = domain->mode - 1; |
551 | if (!iommu) | 707 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
552 | continue; | ||
553 | 708 | ||
554 | iommu_queue_inv_dev_entry(iommu, i); | 709 | while (level > end_lvl) { |
555 | iommu_completion_wait(iommu); | 710 | if (!IOMMU_PTE_PRESENT(*pte)) { |
711 | page = (u64 *)get_zeroed_page(gfp); | ||
712 | if (!page) | ||
713 | return NULL; | ||
714 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | ||
715 | } | ||
716 | |||
717 | level -= 1; | ||
718 | |||
719 | pte = IOMMU_PTE_PAGE(*pte); | ||
720 | |||
721 | if (pte_page && level == end_lvl) | ||
722 | *pte_page = pte; | ||
723 | |||
724 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | ||
556 | } | 725 | } |
726 | |||
727 | return pte; | ||
557 | } | 728 | } |
558 | 729 | ||
559 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) | 730 | /* |
731 | * This function checks if there is a PTE for a given dma address. If | ||
732 | * there is one, it returns the pointer to it. | ||
733 | */ | ||
734 | static u64 *fetch_pte(struct protection_domain *domain, | ||
735 | unsigned long address, int map_size) | ||
560 | { | 736 | { |
561 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); | 737 | int level; |
738 | u64 *pte; | ||
562 | 739 | ||
563 | if (iommu->reset_in_progress) | 740 | level = domain->mode - 1; |
564 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); | 741 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
565 | 742 | ||
566 | iommu->reset_in_progress = true; | 743 | while (level > map_size) { |
744 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
745 | return NULL; | ||
567 | 746 | ||
568 | amd_iommu_reset_cmd_buffer(iommu); | 747 | level -= 1; |
569 | flush_all_devices_for_iommu(iommu); | ||
570 | flush_all_domains_on_iommu(iommu); | ||
571 | 748 | ||
572 | iommu->reset_in_progress = false; | 749 | pte = IOMMU_PTE_PAGE(*pte); |
573 | } | 750 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
574 | 751 | ||
575 | void amd_iommu_flush_all_devices(void) | 752 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { |
576 | { | 753 | pte = NULL; |
577 | flush_devices_by_domain(NULL); | 754 | break; |
578 | } | 755 | } |
756 | } | ||
579 | 757 | ||
580 | /**************************************************************************** | 758 | return pte; |
581 | * | 759 | } |
582 | * The functions below are used the create the page table mappings for | ||
583 | * unity mapped regions. | ||
584 | * | ||
585 | ****************************************************************************/ | ||
586 | 760 | ||
587 | /* | 761 | /* |
588 | * Generic mapping functions. It maps a physical address into a DMA | 762 | * Generic mapping functions. It maps a physical address into a DMA |
@@ -654,28 +828,6 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, | |||
654 | } | 828 | } |
655 | 829 | ||
656 | /* | 830 | /* |
657 | * Init the unity mappings for a specific IOMMU in the system | ||
658 | * | ||
659 | * Basically iterates over all unity mapping entries and applies them to | ||
660 | * the default domain DMA of that IOMMU if necessary. | ||
661 | */ | ||
662 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) | ||
663 | { | ||
664 | struct unity_map_entry *entry; | ||
665 | int ret; | ||
666 | |||
667 | list_for_each_entry(entry, &amd_iommu_unity_map, list) { | ||
668 | if (!iommu_for_unity_map(iommu, entry)) | ||
669 | continue; | ||
670 | ret = dma_ops_unity_map(iommu->default_dom, entry); | ||
671 | if (ret) | ||
672 | return ret; | ||
673 | } | ||
674 | |||
675 | return 0; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * This function actually applies the mapping to the page table of the | 831 | * This function actually applies the mapping to the page table of the |
680 | * dma_ops domain. | 832 | * dma_ops domain. |
681 | */ | 833 | */ |
@@ -704,6 +856,28 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
704 | } | 856 | } |
705 | 857 | ||
706 | /* | 858 | /* |
859 | * Init the unity mappings for a specific IOMMU in the system | ||
860 | * | ||
861 | * Basically iterates over all unity mapping entries and applies them to | ||
862 | * the default domain DMA of that IOMMU if necessary. | ||
863 | */ | ||
864 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) | ||
865 | { | ||
866 | struct unity_map_entry *entry; | ||
867 | int ret; | ||
868 | |||
869 | list_for_each_entry(entry, &amd_iommu_unity_map, list) { | ||
870 | if (!iommu_for_unity_map(iommu, entry)) | ||
871 | continue; | ||
872 | ret = dma_ops_unity_map(iommu->default_dom, entry); | ||
873 | if (ret) | ||
874 | return ret; | ||
875 | } | ||
876 | |||
877 | return 0; | ||
878 | } | ||
879 | |||
880 | /* | ||
707 | * Inits the unity mappings required for a specific device | 881 | * Inits the unity mappings required for a specific device |
708 | */ | 882 | */ |
709 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | 883 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, |
@@ -740,34 +914,23 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
740 | */ | 914 | */ |
741 | 915 | ||
742 | /* | 916 | /* |
743 | * This function checks if there is a PTE for a given dma address. If | 917 | * Used to reserve address ranges in the aperture (e.g. for exclusion |
744 | * there is one, it returns the pointer to it. | 918 | * ranges. |
745 | */ | 919 | */ |
746 | static u64 *fetch_pte(struct protection_domain *domain, | 920 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, |
747 | unsigned long address, int map_size) | 921 | unsigned long start_page, |
922 | unsigned int pages) | ||
748 | { | 923 | { |
749 | int level; | 924 | unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; |
750 | u64 *pte; | ||
751 | |||
752 | level = domain->mode - 1; | ||
753 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
754 | |||
755 | while (level > map_size) { | ||
756 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
757 | return NULL; | ||
758 | |||
759 | level -= 1; | ||
760 | 925 | ||
761 | pte = IOMMU_PTE_PAGE(*pte); | 926 | if (start_page + pages > last_page) |
762 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | 927 | pages = last_page - start_page; |
763 | 928 | ||
764 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { | 929 | for (i = start_page; i < start_page + pages; ++i) { |
765 | pte = NULL; | 930 | int index = i / APERTURE_RANGE_PAGES; |
766 | break; | 931 | int page = i % APERTURE_RANGE_PAGES; |
767 | } | 932 | __set_bit(page, dom->aperture[index]->bitmap); |
768 | } | 933 | } |
769 | |||
770 | return pte; | ||
771 | } | 934 | } |
772 | 935 | ||
773 | /* | 936 | /* |
@@ -775,11 +938,11 @@ static u64 *fetch_pte(struct protection_domain *domain, | |||
775 | * aperture in case of dma_ops domain allocation or address allocation | 938 | * aperture in case of dma_ops domain allocation or address allocation |
776 | * failure. | 939 | * failure. |
777 | */ | 940 | */ |
778 | static int alloc_new_range(struct amd_iommu *iommu, | 941 | static int alloc_new_range(struct dma_ops_domain *dma_dom, |
779 | struct dma_ops_domain *dma_dom, | ||
780 | bool populate, gfp_t gfp) | 942 | bool populate, gfp_t gfp) |
781 | { | 943 | { |
782 | int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; | 944 | int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; |
945 | struct amd_iommu *iommu; | ||
783 | int i; | 946 | int i; |
784 | 947 | ||
785 | #ifdef CONFIG_IOMMU_STRESS | 948 | #ifdef CONFIG_IOMMU_STRESS |
@@ -819,14 +982,17 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
819 | dma_dom->aperture_size += APERTURE_RANGE_SIZE; | 982 | dma_dom->aperture_size += APERTURE_RANGE_SIZE; |
820 | 983 | ||
821 | /* Intialize the exclusion range if necessary */ | 984 | /* Intialize the exclusion range if necessary */ |
822 | if (iommu->exclusion_start && | 985 | for_each_iommu(iommu) { |
823 | iommu->exclusion_start >= dma_dom->aperture[index]->offset && | 986 | if (iommu->exclusion_start && |
824 | iommu->exclusion_start < dma_dom->aperture_size) { | 987 | iommu->exclusion_start >= dma_dom->aperture[index]->offset |
825 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; | 988 | && iommu->exclusion_start < dma_dom->aperture_size) { |
826 | int pages = iommu_num_pages(iommu->exclusion_start, | 989 | unsigned long startpage; |
827 | iommu->exclusion_length, | 990 | int pages = iommu_num_pages(iommu->exclusion_start, |
828 | PAGE_SIZE); | 991 | iommu->exclusion_length, |
829 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | 992 | PAGE_SIZE); |
993 | startpage = iommu->exclusion_start >> PAGE_SHIFT; | ||
994 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | ||
995 | } | ||
830 | } | 996 | } |
831 | 997 | ||
832 | /* | 998 | /* |
@@ -928,7 +1094,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, | |||
928 | } | 1094 | } |
929 | 1095 | ||
930 | if (unlikely(address == -1)) | 1096 | if (unlikely(address == -1)) |
931 | address = bad_dma_address; | 1097 | address = DMA_ERROR_CODE; |
932 | 1098 | ||
933 | WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); | 1099 | WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); |
934 | 1100 | ||
@@ -973,6 +1139,31 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, | |||
973 | * | 1139 | * |
974 | ****************************************************************************/ | 1140 | ****************************************************************************/ |
975 | 1141 | ||
1142 | /* | ||
1143 | * This function adds a protection domain to the global protection domain list | ||
1144 | */ | ||
1145 | static void add_domain_to_list(struct protection_domain *domain) | ||
1146 | { | ||
1147 | unsigned long flags; | ||
1148 | |||
1149 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); | ||
1150 | list_add(&domain->list, &amd_iommu_pd_list); | ||
1151 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
1152 | } | ||
1153 | |||
1154 | /* | ||
1155 | * This function removes a protection domain to the global | ||
1156 | * protection domain list | ||
1157 | */ | ||
1158 | static void del_domain_from_list(struct protection_domain *domain) | ||
1159 | { | ||
1160 | unsigned long flags; | ||
1161 | |||
1162 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); | ||
1163 | list_del(&domain->list); | ||
1164 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
1165 | } | ||
1166 | |||
976 | static u16 domain_id_alloc(void) | 1167 | static u16 domain_id_alloc(void) |
977 | { | 1168 | { |
978 | unsigned long flags; | 1169 | unsigned long flags; |
@@ -1000,26 +1191,6 @@ static void domain_id_free(int id) | |||
1000 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1191 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1001 | } | 1192 | } |
1002 | 1193 | ||
1003 | /* | ||
1004 | * Used to reserve address ranges in the aperture (e.g. for exclusion | ||
1005 | * ranges. | ||
1006 | */ | ||
1007 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | ||
1008 | unsigned long start_page, | ||
1009 | unsigned int pages) | ||
1010 | { | ||
1011 | unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; | ||
1012 | |||
1013 | if (start_page + pages > last_page) | ||
1014 | pages = last_page - start_page; | ||
1015 | |||
1016 | for (i = start_page; i < start_page + pages; ++i) { | ||
1017 | int index = i / APERTURE_RANGE_PAGES; | ||
1018 | int page = i % APERTURE_RANGE_PAGES; | ||
1019 | __set_bit(page, dom->aperture[index]->bitmap); | ||
1020 | } | ||
1021 | } | ||
1022 | |||
1023 | static void free_pagetable(struct protection_domain *domain) | 1194 | static void free_pagetable(struct protection_domain *domain) |
1024 | { | 1195 | { |
1025 | int i, j; | 1196 | int i, j; |
@@ -1061,6 +1232,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
1061 | if (!dom) | 1232 | if (!dom) |
1062 | return; | 1233 | return; |
1063 | 1234 | ||
1235 | del_domain_from_list(&dom->domain); | ||
1236 | |||
1064 | free_pagetable(&dom->domain); | 1237 | free_pagetable(&dom->domain); |
1065 | 1238 | ||
1066 | for (i = 0; i < APERTURE_MAX_RANGES; ++i) { | 1239 | for (i = 0; i < APERTURE_MAX_RANGES; ++i) { |
@@ -1078,7 +1251,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
1078 | * It also intializes the page table and the address allocator data | 1251 | * It also intializes the page table and the address allocator data |
1079 | * structures required for the dma_ops interface | 1252 | * structures required for the dma_ops interface |
1080 | */ | 1253 | */ |
1081 | static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | 1254 | static struct dma_ops_domain *dma_ops_domain_alloc(void) |
1082 | { | 1255 | { |
1083 | struct dma_ops_domain *dma_dom; | 1256 | struct dma_ops_domain *dma_dom; |
1084 | 1257 | ||
@@ -1091,6 +1264,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | |||
1091 | dma_dom->domain.id = domain_id_alloc(); | 1264 | dma_dom->domain.id = domain_id_alloc(); |
1092 | if (dma_dom->domain.id == 0) | 1265 | if (dma_dom->domain.id == 0) |
1093 | goto free_dma_dom; | 1266 | goto free_dma_dom; |
1267 | INIT_LIST_HEAD(&dma_dom->domain.dev_list); | ||
1094 | dma_dom->domain.mode = PAGE_MODE_2_LEVEL; | 1268 | dma_dom->domain.mode = PAGE_MODE_2_LEVEL; |
1095 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 1269 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
1096 | dma_dom->domain.flags = PD_DMA_OPS_MASK; | 1270 | dma_dom->domain.flags = PD_DMA_OPS_MASK; |
@@ -1101,7 +1275,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | |||
1101 | dma_dom->need_flush = false; | 1275 | dma_dom->need_flush = false; |
1102 | dma_dom->target_dev = 0xffff; | 1276 | dma_dom->target_dev = 0xffff; |
1103 | 1277 | ||
1104 | if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) | 1278 | add_domain_to_list(&dma_dom->domain); |
1279 | |||
1280 | if (alloc_new_range(dma_dom, true, GFP_KERNEL)) | ||
1105 | goto free_dma_dom; | 1281 | goto free_dma_dom; |
1106 | 1282 | ||
1107 | /* | 1283 | /* |
@@ -1129,22 +1305,6 @@ static bool dma_ops_domain(struct protection_domain *domain) | |||
1129 | return domain->flags & PD_DMA_OPS_MASK; | 1305 | return domain->flags & PD_DMA_OPS_MASK; |
1130 | } | 1306 | } |
1131 | 1307 | ||
1132 | /* | ||
1133 | * Find out the protection domain structure for a given PCI device. This | ||
1134 | * will give us the pointer to the page table root for example. | ||
1135 | */ | ||
1136 | static struct protection_domain *domain_for_device(u16 devid) | ||
1137 | { | ||
1138 | struct protection_domain *dom; | ||
1139 | unsigned long flags; | ||
1140 | |||
1141 | read_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1142 | dom = amd_iommu_pd_table[devid]; | ||
1143 | read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1144 | |||
1145 | return dom; | ||
1146 | } | ||
1147 | |||
1148 | static void set_dte_entry(u16 devid, struct protection_domain *domain) | 1308 | static void set_dte_entry(u16 devid, struct protection_domain *domain) |
1149 | { | 1309 | { |
1150 | u64 pte_root = virt_to_phys(domain->pt_root); | 1310 | u64 pte_root = virt_to_phys(domain->pt_root); |
@@ -1156,42 +1316,123 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) | |||
1156 | amd_iommu_dev_table[devid].data[2] = domain->id; | 1316 | amd_iommu_dev_table[devid].data[2] = domain->id; |
1157 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | 1317 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); |
1158 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | 1318 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); |
1319 | } | ||
1320 | |||
1321 | static void clear_dte_entry(u16 devid) | ||
1322 | { | ||
1323 | /* remove entry from the device table seen by the hardware */ | ||
1324 | amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1325 | amd_iommu_dev_table[devid].data[1] = 0; | ||
1326 | amd_iommu_dev_table[devid].data[2] = 0; | ||
1159 | 1327 | ||
1160 | amd_iommu_pd_table[devid] = domain; | 1328 | amd_iommu_apply_erratum_63(devid); |
1329 | } | ||
1330 | |||
1331 | static void do_attach(struct device *dev, struct protection_domain *domain) | ||
1332 | { | ||
1333 | struct iommu_dev_data *dev_data; | ||
1334 | struct amd_iommu *iommu; | ||
1335 | u16 devid; | ||
1336 | |||
1337 | devid = get_device_id(dev); | ||
1338 | iommu = amd_iommu_rlookup_table[devid]; | ||
1339 | dev_data = get_dev_data(dev); | ||
1340 | |||
1341 | /* Update data structures */ | ||
1342 | dev_data->domain = domain; | ||
1343 | list_add(&dev_data->list, &domain->dev_list); | ||
1344 | set_dte_entry(devid, domain); | ||
1345 | |||
1346 | /* Do reference counting */ | ||
1347 | domain->dev_iommu[iommu->index] += 1; | ||
1348 | domain->dev_cnt += 1; | ||
1349 | |||
1350 | /* Flush the DTE entry */ | ||
1351 | iommu_flush_device(dev); | ||
1352 | } | ||
1353 | |||
1354 | static void do_detach(struct device *dev) | ||
1355 | { | ||
1356 | struct iommu_dev_data *dev_data; | ||
1357 | struct amd_iommu *iommu; | ||
1358 | u16 devid; | ||
1359 | |||
1360 | devid = get_device_id(dev); | ||
1361 | iommu = amd_iommu_rlookup_table[devid]; | ||
1362 | dev_data = get_dev_data(dev); | ||
1363 | |||
1364 | /* decrease reference counters */ | ||
1365 | dev_data->domain->dev_iommu[iommu->index] -= 1; | ||
1366 | dev_data->domain->dev_cnt -= 1; | ||
1367 | |||
1368 | /* Update data structures */ | ||
1369 | dev_data->domain = NULL; | ||
1370 | list_del(&dev_data->list); | ||
1371 | clear_dte_entry(devid); | ||
1372 | |||
1373 | /* Flush the DTE entry */ | ||
1374 | iommu_flush_device(dev); | ||
1161 | } | 1375 | } |
1162 | 1376 | ||
1163 | /* | 1377 | /* |
1164 | * If a device is not yet associated with a domain, this function does | 1378 | * If a device is not yet associated with a domain, this function does |
1165 | * assigns it visible for the hardware | 1379 | * assigns it visible for the hardware |
1166 | */ | 1380 | */ |
1167 | static void __attach_device(struct amd_iommu *iommu, | 1381 | static int __attach_device(struct device *dev, |
1168 | struct protection_domain *domain, | 1382 | struct protection_domain *domain) |
1169 | u16 devid) | ||
1170 | { | 1383 | { |
1384 | struct iommu_dev_data *dev_data, *alias_data; | ||
1385 | |||
1386 | dev_data = get_dev_data(dev); | ||
1387 | alias_data = get_dev_data(dev_data->alias); | ||
1388 | |||
1389 | if (!alias_data) | ||
1390 | return -EINVAL; | ||
1391 | |||
1171 | /* lock domain */ | 1392 | /* lock domain */ |
1172 | spin_lock(&domain->lock); | 1393 | spin_lock(&domain->lock); |
1173 | 1394 | ||
1174 | /* update DTE entry */ | 1395 | /* Some sanity checks */ |
1175 | set_dte_entry(devid, domain); | 1396 | if (alias_data->domain != NULL && |
1397 | alias_data->domain != domain) | ||
1398 | return -EBUSY; | ||
1176 | 1399 | ||
1177 | domain->dev_cnt += 1; | 1400 | if (dev_data->domain != NULL && |
1401 | dev_data->domain != domain) | ||
1402 | return -EBUSY; | ||
1403 | |||
1404 | /* Do real assignment */ | ||
1405 | if (dev_data->alias != dev) { | ||
1406 | alias_data = get_dev_data(dev_data->alias); | ||
1407 | if (alias_data->domain == NULL) | ||
1408 | do_attach(dev_data->alias, domain); | ||
1409 | |||
1410 | atomic_inc(&alias_data->bind); | ||
1411 | } | ||
1412 | |||
1413 | if (dev_data->domain == NULL) | ||
1414 | do_attach(dev, domain); | ||
1415 | |||
1416 | atomic_inc(&dev_data->bind); | ||
1178 | 1417 | ||
1179 | /* ready */ | 1418 | /* ready */ |
1180 | spin_unlock(&domain->lock); | 1419 | spin_unlock(&domain->lock); |
1420 | |||
1421 | return 0; | ||
1181 | } | 1422 | } |
1182 | 1423 | ||
1183 | /* | 1424 | /* |
1184 | * If a device is not yet associated with a domain, this function does | 1425 | * If a device is not yet associated with a domain, this function does |
1185 | * assigns it visible for the hardware | 1426 | * assigns it visible for the hardware |
1186 | */ | 1427 | */ |
1187 | static void attach_device(struct amd_iommu *iommu, | 1428 | static int attach_device(struct device *dev, |
1188 | struct protection_domain *domain, | 1429 | struct protection_domain *domain) |
1189 | u16 devid) | ||
1190 | { | 1430 | { |
1191 | unsigned long flags; | 1431 | unsigned long flags; |
1432 | int ret; | ||
1192 | 1433 | ||
1193 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1434 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1194 | __attach_device(iommu, domain, devid); | 1435 | ret = __attach_device(dev, domain); |
1195 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1436 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1196 | 1437 | ||
1197 | /* | 1438 | /* |
@@ -1199,98 +1440,125 @@ static void attach_device(struct amd_iommu *iommu, | |||
1199 | * left the caches in the IOMMU dirty. So we have to flush | 1440 | * left the caches in the IOMMU dirty. So we have to flush |
1200 | * here to evict all dirty stuff. | 1441 | * here to evict all dirty stuff. |
1201 | */ | 1442 | */ |
1202 | iommu_queue_inv_dev_entry(iommu, devid); | 1443 | iommu_flush_tlb_pde(domain); |
1203 | iommu_flush_tlb_pde(iommu, domain->id); | 1444 | |
1445 | return ret; | ||
1204 | } | 1446 | } |
1205 | 1447 | ||
1206 | /* | 1448 | /* |
1207 | * Removes a device from a protection domain (unlocked) | 1449 | * Removes a device from a protection domain (unlocked) |
1208 | */ | 1450 | */ |
1209 | static void __detach_device(struct protection_domain *domain, u16 devid) | 1451 | static void __detach_device(struct device *dev) |
1210 | { | 1452 | { |
1453 | struct iommu_dev_data *dev_data = get_dev_data(dev); | ||
1454 | struct iommu_dev_data *alias_data; | ||
1455 | unsigned long flags; | ||
1211 | 1456 | ||
1212 | /* lock domain */ | 1457 | BUG_ON(!dev_data->domain); |
1213 | spin_lock(&domain->lock); | ||
1214 | |||
1215 | /* remove domain from the lookup table */ | ||
1216 | amd_iommu_pd_table[devid] = NULL; | ||
1217 | 1458 | ||
1218 | /* remove entry from the device table seen by the hardware */ | 1459 | spin_lock_irqsave(&dev_data->domain->lock, flags); |
1219 | amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1220 | amd_iommu_dev_table[devid].data[1] = 0; | ||
1221 | amd_iommu_dev_table[devid].data[2] = 0; | ||
1222 | 1460 | ||
1223 | amd_iommu_apply_erratum_63(devid); | 1461 | if (dev_data->alias != dev) { |
1462 | alias_data = get_dev_data(dev_data->alias); | ||
1463 | if (atomic_dec_and_test(&alias_data->bind)) | ||
1464 | do_detach(dev_data->alias); | ||
1465 | } | ||
1224 | 1466 | ||
1225 | /* decrease reference counter */ | 1467 | if (atomic_dec_and_test(&dev_data->bind)) |
1226 | domain->dev_cnt -= 1; | 1468 | do_detach(dev); |
1227 | 1469 | ||
1228 | /* ready */ | 1470 | spin_unlock_irqrestore(&dev_data->domain->lock, flags); |
1229 | spin_unlock(&domain->lock); | ||
1230 | 1471 | ||
1231 | /* | 1472 | /* |
1232 | * If we run in passthrough mode the device must be assigned to the | 1473 | * If we run in passthrough mode the device must be assigned to the |
1233 | * passthrough domain if it is detached from any other domain | 1474 | * passthrough domain if it is detached from any other domain |
1234 | */ | 1475 | */ |
1235 | if (iommu_pass_through) { | 1476 | if (iommu_pass_through && dev_data->domain == NULL) |
1236 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; | 1477 | __attach_device(dev, pt_domain); |
1237 | __attach_device(iommu, pt_domain, devid); | ||
1238 | } | ||
1239 | } | 1478 | } |
1240 | 1479 | ||
1241 | /* | 1480 | /* |
1242 | * Removes a device from a protection domain (with devtable_lock held) | 1481 | * Removes a device from a protection domain (with devtable_lock held) |
1243 | */ | 1482 | */ |
1244 | static void detach_device(struct protection_domain *domain, u16 devid) | 1483 | static void detach_device(struct device *dev) |
1245 | { | 1484 | { |
1246 | unsigned long flags; | 1485 | unsigned long flags; |
1247 | 1486 | ||
1248 | /* lock device table */ | 1487 | /* lock device table */ |
1249 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1488 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1250 | __detach_device(domain, devid); | 1489 | __detach_device(dev); |
1251 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1490 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1252 | } | 1491 | } |
1253 | 1492 | ||
1493 | /* | ||
1494 | * Find out the protection domain structure for a given PCI device. This | ||
1495 | * will give us the pointer to the page table root for example. | ||
1496 | */ | ||
1497 | static struct protection_domain *domain_for_device(struct device *dev) | ||
1498 | { | ||
1499 | struct protection_domain *dom; | ||
1500 | struct iommu_dev_data *dev_data, *alias_data; | ||
1501 | unsigned long flags; | ||
1502 | u16 devid, alias; | ||
1503 | |||
1504 | devid = get_device_id(dev); | ||
1505 | alias = amd_iommu_alias_table[devid]; | ||
1506 | dev_data = get_dev_data(dev); | ||
1507 | alias_data = get_dev_data(dev_data->alias); | ||
1508 | if (!alias_data) | ||
1509 | return NULL; | ||
1510 | |||
1511 | read_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1512 | dom = dev_data->domain; | ||
1513 | if (dom == NULL && | ||
1514 | alias_data->domain != NULL) { | ||
1515 | __attach_device(dev, alias_data->domain); | ||
1516 | dom = alias_data->domain; | ||
1517 | } | ||
1518 | |||
1519 | read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1520 | |||
1521 | return dom; | ||
1522 | } | ||
1523 | |||
1254 | static int device_change_notifier(struct notifier_block *nb, | 1524 | static int device_change_notifier(struct notifier_block *nb, |
1255 | unsigned long action, void *data) | 1525 | unsigned long action, void *data) |
1256 | { | 1526 | { |
1257 | struct device *dev = data; | 1527 | struct device *dev = data; |
1258 | struct pci_dev *pdev = to_pci_dev(dev); | 1528 | u16 devid; |
1259 | u16 devid = calc_devid(pdev->bus->number, pdev->devfn); | ||
1260 | struct protection_domain *domain; | 1529 | struct protection_domain *domain; |
1261 | struct dma_ops_domain *dma_domain; | 1530 | struct dma_ops_domain *dma_domain; |
1262 | struct amd_iommu *iommu; | 1531 | struct amd_iommu *iommu; |
1263 | unsigned long flags; | 1532 | unsigned long flags; |
1264 | 1533 | ||
1265 | if (devid > amd_iommu_last_bdf) | 1534 | if (!check_device(dev)) |
1266 | goto out; | 1535 | return 0; |
1267 | |||
1268 | devid = amd_iommu_alias_table[devid]; | ||
1269 | |||
1270 | iommu = amd_iommu_rlookup_table[devid]; | ||
1271 | if (iommu == NULL) | ||
1272 | goto out; | ||
1273 | |||
1274 | domain = domain_for_device(devid); | ||
1275 | 1536 | ||
1276 | if (domain && !dma_ops_domain(domain)) | 1537 | devid = get_device_id(dev); |
1277 | WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " | 1538 | iommu = amd_iommu_rlookup_table[devid]; |
1278 | "to a non-dma-ops domain\n", dev_name(dev)); | ||
1279 | 1539 | ||
1280 | switch (action) { | 1540 | switch (action) { |
1281 | case BUS_NOTIFY_UNBOUND_DRIVER: | 1541 | case BUS_NOTIFY_UNBOUND_DRIVER: |
1542 | |||
1543 | domain = domain_for_device(dev); | ||
1544 | |||
1282 | if (!domain) | 1545 | if (!domain) |
1283 | goto out; | 1546 | goto out; |
1284 | if (iommu_pass_through) | 1547 | if (iommu_pass_through) |
1285 | break; | 1548 | break; |
1286 | detach_device(domain, devid); | 1549 | detach_device(dev); |
1287 | break; | 1550 | break; |
1288 | case BUS_NOTIFY_ADD_DEVICE: | 1551 | case BUS_NOTIFY_ADD_DEVICE: |
1552 | |||
1553 | iommu_init_device(dev); | ||
1554 | |||
1555 | domain = domain_for_device(dev); | ||
1556 | |||
1289 | /* allocate a protection domain if a device is added */ | 1557 | /* allocate a protection domain if a device is added */ |
1290 | dma_domain = find_protection_domain(devid); | 1558 | dma_domain = find_protection_domain(devid); |
1291 | if (dma_domain) | 1559 | if (dma_domain) |
1292 | goto out; | 1560 | goto out; |
1293 | dma_domain = dma_ops_domain_alloc(iommu); | 1561 | dma_domain = dma_ops_domain_alloc(); |
1294 | if (!dma_domain) | 1562 | if (!dma_domain) |
1295 | goto out; | 1563 | goto out; |
1296 | dma_domain->target_dev = devid; | 1564 | dma_domain->target_dev = devid; |
@@ -1300,11 +1568,15 @@ static int device_change_notifier(struct notifier_block *nb, | |||
1300 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | 1568 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); |
1301 | 1569 | ||
1302 | break; | 1570 | break; |
1571 | case BUS_NOTIFY_DEL_DEVICE: | ||
1572 | |||
1573 | iommu_uninit_device(dev); | ||
1574 | |||
1303 | default: | 1575 | default: |
1304 | goto out; | 1576 | goto out; |
1305 | } | 1577 | } |
1306 | 1578 | ||
1307 | iommu_queue_inv_dev_entry(iommu, devid); | 1579 | iommu_flush_device(dev); |
1308 | iommu_completion_wait(iommu); | 1580 | iommu_completion_wait(iommu); |
1309 | 1581 | ||
1310 | out: | 1582 | out: |
@@ -1322,106 +1594,46 @@ static struct notifier_block device_nb = { | |||
1322 | *****************************************************************************/ | 1594 | *****************************************************************************/ |
1323 | 1595 | ||
1324 | /* | 1596 | /* |
1325 | * This function checks if the driver got a valid device from the caller to | ||
1326 | * avoid dereferencing invalid pointers. | ||
1327 | */ | ||
1328 | static bool check_device(struct device *dev) | ||
1329 | { | ||
1330 | if (!dev || !dev->dma_mask) | ||
1331 | return false; | ||
1332 | |||
1333 | return true; | ||
1334 | } | ||
1335 | |||
1336 | /* | ||
1337 | * In this function the list of preallocated protection domains is traversed to | ||
1338 | * find the domain for a specific device | ||
1339 | */ | ||
1340 | static struct dma_ops_domain *find_protection_domain(u16 devid) | ||
1341 | { | ||
1342 | struct dma_ops_domain *entry, *ret = NULL; | ||
1343 | unsigned long flags; | ||
1344 | |||
1345 | if (list_empty(&iommu_pd_list)) | ||
1346 | return NULL; | ||
1347 | |||
1348 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
1349 | |||
1350 | list_for_each_entry(entry, &iommu_pd_list, list) { | ||
1351 | if (entry->target_dev == devid) { | ||
1352 | ret = entry; | ||
1353 | break; | ||
1354 | } | ||
1355 | } | ||
1356 | |||
1357 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
1358 | |||
1359 | return ret; | ||
1360 | } | ||
1361 | |||
1362 | /* | ||
1363 | * In the dma_ops path we only have the struct device. This function | 1597 | * In the dma_ops path we only have the struct device. This function |
1364 | * finds the corresponding IOMMU, the protection domain and the | 1598 | * finds the corresponding IOMMU, the protection domain and the |
1365 | * requestor id for a given device. | 1599 | * requestor id for a given device. |
1366 | * If the device is not yet associated with a domain this is also done | 1600 | * If the device is not yet associated with a domain this is also done |
1367 | * in this function. | 1601 | * in this function. |
1368 | */ | 1602 | */ |
1369 | static int get_device_resources(struct device *dev, | 1603 | static struct protection_domain *get_domain(struct device *dev) |
1370 | struct amd_iommu **iommu, | ||
1371 | struct protection_domain **domain, | ||
1372 | u16 *bdf) | ||
1373 | { | 1604 | { |
1605 | struct protection_domain *domain; | ||
1374 | struct dma_ops_domain *dma_dom; | 1606 | struct dma_ops_domain *dma_dom; |
1375 | struct pci_dev *pcidev; | 1607 | u16 devid = get_device_id(dev); |
1376 | u16 _bdf; | ||
1377 | |||
1378 | *iommu = NULL; | ||
1379 | *domain = NULL; | ||
1380 | *bdf = 0xffff; | ||
1381 | |||
1382 | if (dev->bus != &pci_bus_type) | ||
1383 | return 0; | ||
1384 | 1608 | ||
1385 | pcidev = to_pci_dev(dev); | 1609 | if (!check_device(dev)) |
1386 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | 1610 | return ERR_PTR(-EINVAL); |
1387 | 1611 | ||
1388 | /* device not translated by any IOMMU in the system? */ | 1612 | domain = domain_for_device(dev); |
1389 | if (_bdf > amd_iommu_last_bdf) | 1613 | if (domain != NULL && !dma_ops_domain(domain)) |
1390 | return 0; | 1614 | return ERR_PTR(-EBUSY); |
1391 | 1615 | ||
1392 | *bdf = amd_iommu_alias_table[_bdf]; | 1616 | if (domain != NULL) |
1617 | return domain; | ||
1393 | 1618 | ||
1394 | *iommu = amd_iommu_rlookup_table[*bdf]; | 1619 | /* Device not bount yet - bind it */ |
1395 | if (*iommu == NULL) | 1620 | dma_dom = find_protection_domain(devid); |
1396 | return 0; | 1621 | if (!dma_dom) |
1397 | *domain = domain_for_device(*bdf); | 1622 | dma_dom = amd_iommu_rlookup_table[devid]->default_dom; |
1398 | if (*domain == NULL) { | 1623 | attach_device(dev, &dma_dom->domain); |
1399 | dma_dom = find_protection_domain(*bdf); | 1624 | DUMP_printk("Using protection domain %d for device %s\n", |
1400 | if (!dma_dom) | 1625 | dma_dom->domain.id, dev_name(dev)); |
1401 | dma_dom = (*iommu)->default_dom; | ||
1402 | *domain = &dma_dom->domain; | ||
1403 | attach_device(*iommu, *domain, *bdf); | ||
1404 | DUMP_printk("Using protection domain %d for device %s\n", | ||
1405 | (*domain)->id, dev_name(dev)); | ||
1406 | } | ||
1407 | |||
1408 | if (domain_for_device(_bdf) == NULL) | ||
1409 | attach_device(*iommu, *domain, _bdf); | ||
1410 | 1626 | ||
1411 | return 1; | 1627 | return &dma_dom->domain; |
1412 | } | 1628 | } |
1413 | 1629 | ||
1414 | static void update_device_table(struct protection_domain *domain) | 1630 | static void update_device_table(struct protection_domain *domain) |
1415 | { | 1631 | { |
1416 | unsigned long flags; | 1632 | struct iommu_dev_data *dev_data; |
1417 | int i; | ||
1418 | 1633 | ||
1419 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 1634 | list_for_each_entry(dev_data, &domain->dev_list, list) { |
1420 | if (amd_iommu_pd_table[i] != domain) | 1635 | u16 devid = get_device_id(dev_data->dev); |
1421 | continue; | 1636 | set_dte_entry(devid, domain); |
1422 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1423 | set_dte_entry(i, domain); | ||
1424 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1425 | } | 1637 | } |
1426 | } | 1638 | } |
1427 | 1639 | ||
@@ -1431,76 +1643,13 @@ static void update_domain(struct protection_domain *domain) | |||
1431 | return; | 1643 | return; |
1432 | 1644 | ||
1433 | update_device_table(domain); | 1645 | update_device_table(domain); |
1434 | flush_devices_by_domain(domain); | 1646 | iommu_flush_domain_devices(domain); |
1435 | iommu_flush_domain(domain->id); | 1647 | iommu_flush_tlb_pde(domain); |
1436 | 1648 | ||
1437 | domain->updated = false; | 1649 | domain->updated = false; |
1438 | } | 1650 | } |
1439 | 1651 | ||
1440 | /* | 1652 | /* |
1441 | * This function is used to add another level to an IO page table. Adding | ||
1442 | * another level increases the size of the address space by 9 bits to a size up | ||
1443 | * to 64 bits. | ||
1444 | */ | ||
1445 | static bool increase_address_space(struct protection_domain *domain, | ||
1446 | gfp_t gfp) | ||
1447 | { | ||
1448 | u64 *pte; | ||
1449 | |||
1450 | if (domain->mode == PAGE_MODE_6_LEVEL) | ||
1451 | /* address space already 64 bit large */ | ||
1452 | return false; | ||
1453 | |||
1454 | pte = (void *)get_zeroed_page(gfp); | ||
1455 | if (!pte) | ||
1456 | return false; | ||
1457 | |||
1458 | *pte = PM_LEVEL_PDE(domain->mode, | ||
1459 | virt_to_phys(domain->pt_root)); | ||
1460 | domain->pt_root = pte; | ||
1461 | domain->mode += 1; | ||
1462 | domain->updated = true; | ||
1463 | |||
1464 | return true; | ||
1465 | } | ||
1466 | |||
1467 | static u64 *alloc_pte(struct protection_domain *domain, | ||
1468 | unsigned long address, | ||
1469 | int end_lvl, | ||
1470 | u64 **pte_page, | ||
1471 | gfp_t gfp) | ||
1472 | { | ||
1473 | u64 *pte, *page; | ||
1474 | int level; | ||
1475 | |||
1476 | while (address > PM_LEVEL_SIZE(domain->mode)) | ||
1477 | increase_address_space(domain, gfp); | ||
1478 | |||
1479 | level = domain->mode - 1; | ||
1480 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
1481 | |||
1482 | while (level > end_lvl) { | ||
1483 | if (!IOMMU_PTE_PRESENT(*pte)) { | ||
1484 | page = (u64 *)get_zeroed_page(gfp); | ||
1485 | if (!page) | ||
1486 | return NULL; | ||
1487 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | ||
1488 | } | ||
1489 | |||
1490 | level -= 1; | ||
1491 | |||
1492 | pte = IOMMU_PTE_PAGE(*pte); | ||
1493 | |||
1494 | if (pte_page && level == end_lvl) | ||
1495 | *pte_page = pte; | ||
1496 | |||
1497 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | ||
1498 | } | ||
1499 | |||
1500 | return pte; | ||
1501 | } | ||
1502 | |||
1503 | /* | ||
1504 | * This function fetches the PTE for a given address in the aperture | 1653 | * This function fetches the PTE for a given address in the aperture |
1505 | */ | 1654 | */ |
1506 | static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | 1655 | static u64* dma_ops_get_pte(struct dma_ops_domain *dom, |
@@ -1530,8 +1679,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | |||
1530 | * This is the generic map function. It maps one 4kb page at paddr to | 1679 | * This is the generic map function. It maps one 4kb page at paddr to |
1531 | * the given address in the DMA address space for the domain. | 1680 | * the given address in the DMA address space for the domain. |
1532 | */ | 1681 | */ |
1533 | static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | 1682 | static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, |
1534 | struct dma_ops_domain *dom, | ||
1535 | unsigned long address, | 1683 | unsigned long address, |
1536 | phys_addr_t paddr, | 1684 | phys_addr_t paddr, |
1537 | int direction) | 1685 | int direction) |
@@ -1544,7 +1692,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | |||
1544 | 1692 | ||
1545 | pte = dma_ops_get_pte(dom, address); | 1693 | pte = dma_ops_get_pte(dom, address); |
1546 | if (!pte) | 1694 | if (!pte) |
1547 | return bad_dma_address; | 1695 | return DMA_ERROR_CODE; |
1548 | 1696 | ||
1549 | __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; | 1697 | __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; |
1550 | 1698 | ||
@@ -1565,8 +1713,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | |||
1565 | /* | 1713 | /* |
1566 | * The generic unmapping function for on page in the DMA address space. | 1714 | * The generic unmapping function for on page in the DMA address space. |
1567 | */ | 1715 | */ |
1568 | static void dma_ops_domain_unmap(struct amd_iommu *iommu, | 1716 | static void dma_ops_domain_unmap(struct dma_ops_domain *dom, |
1569 | struct dma_ops_domain *dom, | ||
1570 | unsigned long address) | 1717 | unsigned long address) |
1571 | { | 1718 | { |
1572 | struct aperture_range *aperture; | 1719 | struct aperture_range *aperture; |
@@ -1597,7 +1744,6 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
1597 | * Must be called with the domain lock held. | 1744 | * Must be called with the domain lock held. |
1598 | */ | 1745 | */ |
1599 | static dma_addr_t __map_single(struct device *dev, | 1746 | static dma_addr_t __map_single(struct device *dev, |
1600 | struct amd_iommu *iommu, | ||
1601 | struct dma_ops_domain *dma_dom, | 1747 | struct dma_ops_domain *dma_dom, |
1602 | phys_addr_t paddr, | 1748 | phys_addr_t paddr, |
1603 | size_t size, | 1749 | size_t size, |
@@ -1625,7 +1771,7 @@ static dma_addr_t __map_single(struct device *dev, | |||
1625 | retry: | 1771 | retry: |
1626 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, | 1772 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, |
1627 | dma_mask); | 1773 | dma_mask); |
1628 | if (unlikely(address == bad_dma_address)) { | 1774 | if (unlikely(address == DMA_ERROR_CODE)) { |
1629 | /* | 1775 | /* |
1630 | * setting next_address here will let the address | 1776 | * setting next_address here will let the address |
1631 | * allocator only scan the new allocated range in the | 1777 | * allocator only scan the new allocated range in the |
@@ -1633,7 +1779,7 @@ retry: | |||
1633 | */ | 1779 | */ |
1634 | dma_dom->next_address = dma_dom->aperture_size; | 1780 | dma_dom->next_address = dma_dom->aperture_size; |
1635 | 1781 | ||
1636 | if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) | 1782 | if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) |
1637 | goto out; | 1783 | goto out; |
1638 | 1784 | ||
1639 | /* | 1785 | /* |
@@ -1645,8 +1791,8 @@ retry: | |||
1645 | 1791 | ||
1646 | start = address; | 1792 | start = address; |
1647 | for (i = 0; i < pages; ++i) { | 1793 | for (i = 0; i < pages; ++i) { |
1648 | ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); | 1794 | ret = dma_ops_domain_map(dma_dom, start, paddr, dir); |
1649 | if (ret == bad_dma_address) | 1795 | if (ret == DMA_ERROR_CODE) |
1650 | goto out_unmap; | 1796 | goto out_unmap; |
1651 | 1797 | ||
1652 | paddr += PAGE_SIZE; | 1798 | paddr += PAGE_SIZE; |
@@ -1657,10 +1803,10 @@ retry: | |||
1657 | ADD_STATS_COUNTER(alloced_io_mem, size); | 1803 | ADD_STATS_COUNTER(alloced_io_mem, size); |
1658 | 1804 | ||
1659 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { | 1805 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { |
1660 | iommu_flush_tlb(iommu, dma_dom->domain.id); | 1806 | iommu_flush_tlb(&dma_dom->domain); |
1661 | dma_dom->need_flush = false; | 1807 | dma_dom->need_flush = false; |
1662 | } else if (unlikely(iommu_has_npcache(iommu))) | 1808 | } else if (unlikely(amd_iommu_np_cache)) |
1663 | iommu_flush_pages(iommu, dma_dom->domain.id, address, size); | 1809 | iommu_flush_pages(&dma_dom->domain, address, size); |
1664 | 1810 | ||
1665 | out: | 1811 | out: |
1666 | return address; | 1812 | return address; |
@@ -1669,20 +1815,19 @@ out_unmap: | |||
1669 | 1815 | ||
1670 | for (--i; i >= 0; --i) { | 1816 | for (--i; i >= 0; --i) { |
1671 | start -= PAGE_SIZE; | 1817 | start -= PAGE_SIZE; |
1672 | dma_ops_domain_unmap(iommu, dma_dom, start); | 1818 | dma_ops_domain_unmap(dma_dom, start); |
1673 | } | 1819 | } |
1674 | 1820 | ||
1675 | dma_ops_free_addresses(dma_dom, address, pages); | 1821 | dma_ops_free_addresses(dma_dom, address, pages); |
1676 | 1822 | ||
1677 | return bad_dma_address; | 1823 | return DMA_ERROR_CODE; |
1678 | } | 1824 | } |
1679 | 1825 | ||
1680 | /* | 1826 | /* |
1681 | * Does the reverse of the __map_single function. Must be called with | 1827 | * Does the reverse of the __map_single function. Must be called with |
1682 | * the domain lock held too | 1828 | * the domain lock held too |
1683 | */ | 1829 | */ |
1684 | static void __unmap_single(struct amd_iommu *iommu, | 1830 | static void __unmap_single(struct dma_ops_domain *dma_dom, |
1685 | struct dma_ops_domain *dma_dom, | ||
1686 | dma_addr_t dma_addr, | 1831 | dma_addr_t dma_addr, |
1687 | size_t size, | 1832 | size_t size, |
1688 | int dir) | 1833 | int dir) |
@@ -1690,7 +1835,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
1690 | dma_addr_t i, start; | 1835 | dma_addr_t i, start; |
1691 | unsigned int pages; | 1836 | unsigned int pages; |
1692 | 1837 | ||
1693 | if ((dma_addr == bad_dma_address) || | 1838 | if ((dma_addr == DMA_ERROR_CODE) || |
1694 | (dma_addr + size > dma_dom->aperture_size)) | 1839 | (dma_addr + size > dma_dom->aperture_size)) |
1695 | return; | 1840 | return; |
1696 | 1841 | ||
@@ -1699,7 +1844,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
1699 | start = dma_addr; | 1844 | start = dma_addr; |
1700 | 1845 | ||
1701 | for (i = 0; i < pages; ++i) { | 1846 | for (i = 0; i < pages; ++i) { |
1702 | dma_ops_domain_unmap(iommu, dma_dom, start); | 1847 | dma_ops_domain_unmap(dma_dom, start); |
1703 | start += PAGE_SIZE; | 1848 | start += PAGE_SIZE; |
1704 | } | 1849 | } |
1705 | 1850 | ||
@@ -1708,7 +1853,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
1708 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 1853 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
1709 | 1854 | ||
1710 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { | 1855 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { |
1711 | iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); | 1856 | iommu_flush_pages(&dma_dom->domain, dma_addr, size); |
1712 | dma_dom->need_flush = false; | 1857 | dma_dom->need_flush = false; |
1713 | } | 1858 | } |
1714 | } | 1859 | } |
@@ -1722,36 +1867,29 @@ static dma_addr_t map_page(struct device *dev, struct page *page, | |||
1722 | struct dma_attrs *attrs) | 1867 | struct dma_attrs *attrs) |
1723 | { | 1868 | { |
1724 | unsigned long flags; | 1869 | unsigned long flags; |
1725 | struct amd_iommu *iommu; | ||
1726 | struct protection_domain *domain; | 1870 | struct protection_domain *domain; |
1727 | u16 devid; | ||
1728 | dma_addr_t addr; | 1871 | dma_addr_t addr; |
1729 | u64 dma_mask; | 1872 | u64 dma_mask; |
1730 | phys_addr_t paddr = page_to_phys(page) + offset; | 1873 | phys_addr_t paddr = page_to_phys(page) + offset; |
1731 | 1874 | ||
1732 | INC_STATS_COUNTER(cnt_map_single); | 1875 | INC_STATS_COUNTER(cnt_map_single); |
1733 | 1876 | ||
1734 | if (!check_device(dev)) | 1877 | domain = get_domain(dev); |
1735 | return bad_dma_address; | 1878 | if (PTR_ERR(domain) == -EINVAL) |
1736 | |||
1737 | dma_mask = *dev->dma_mask; | ||
1738 | |||
1739 | get_device_resources(dev, &iommu, &domain, &devid); | ||
1740 | |||
1741 | if (iommu == NULL || domain == NULL) | ||
1742 | /* device not handled by any AMD IOMMU */ | ||
1743 | return (dma_addr_t)paddr; | 1879 | return (dma_addr_t)paddr; |
1880 | else if (IS_ERR(domain)) | ||
1881 | return DMA_ERROR_CODE; | ||
1744 | 1882 | ||
1745 | if (!dma_ops_domain(domain)) | 1883 | dma_mask = *dev->dma_mask; |
1746 | return bad_dma_address; | ||
1747 | 1884 | ||
1748 | spin_lock_irqsave(&domain->lock, flags); | 1885 | spin_lock_irqsave(&domain->lock, flags); |
1749 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, | 1886 | |
1887 | addr = __map_single(dev, domain->priv, paddr, size, dir, false, | ||
1750 | dma_mask); | 1888 | dma_mask); |
1751 | if (addr == bad_dma_address) | 1889 | if (addr == DMA_ERROR_CODE) |
1752 | goto out; | 1890 | goto out; |
1753 | 1891 | ||
1754 | iommu_completion_wait(iommu); | 1892 | iommu_flush_complete(domain); |
1755 | 1893 | ||
1756 | out: | 1894 | out: |
1757 | spin_unlock_irqrestore(&domain->lock, flags); | 1895 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -1766,25 +1904,19 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, | |||
1766 | enum dma_data_direction dir, struct dma_attrs *attrs) | 1904 | enum dma_data_direction dir, struct dma_attrs *attrs) |
1767 | { | 1905 | { |
1768 | unsigned long flags; | 1906 | unsigned long flags; |
1769 | struct amd_iommu *iommu; | ||
1770 | struct protection_domain *domain; | 1907 | struct protection_domain *domain; |
1771 | u16 devid; | ||
1772 | 1908 | ||
1773 | INC_STATS_COUNTER(cnt_unmap_single); | 1909 | INC_STATS_COUNTER(cnt_unmap_single); |
1774 | 1910 | ||
1775 | if (!check_device(dev) || | 1911 | domain = get_domain(dev); |
1776 | !get_device_resources(dev, &iommu, &domain, &devid)) | 1912 | if (IS_ERR(domain)) |
1777 | /* device not handled by any AMD IOMMU */ | ||
1778 | return; | ||
1779 | |||
1780 | if (!dma_ops_domain(domain)) | ||
1781 | return; | 1913 | return; |
1782 | 1914 | ||
1783 | spin_lock_irqsave(&domain->lock, flags); | 1915 | spin_lock_irqsave(&domain->lock, flags); |
1784 | 1916 | ||
1785 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); | 1917 | __unmap_single(domain->priv, dma_addr, size, dir); |
1786 | 1918 | ||
1787 | iommu_completion_wait(iommu); | 1919 | iommu_flush_complete(domain); |
1788 | 1920 | ||
1789 | spin_unlock_irqrestore(&domain->lock, flags); | 1921 | spin_unlock_irqrestore(&domain->lock, flags); |
1790 | } | 1922 | } |
@@ -1816,9 +1948,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1816 | struct dma_attrs *attrs) | 1948 | struct dma_attrs *attrs) |
1817 | { | 1949 | { |
1818 | unsigned long flags; | 1950 | unsigned long flags; |
1819 | struct amd_iommu *iommu; | ||
1820 | struct protection_domain *domain; | 1951 | struct protection_domain *domain; |
1821 | u16 devid; | ||
1822 | int i; | 1952 | int i; |
1823 | struct scatterlist *s; | 1953 | struct scatterlist *s; |
1824 | phys_addr_t paddr; | 1954 | phys_addr_t paddr; |
@@ -1827,25 +1957,20 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1827 | 1957 | ||
1828 | INC_STATS_COUNTER(cnt_map_sg); | 1958 | INC_STATS_COUNTER(cnt_map_sg); |
1829 | 1959 | ||
1830 | if (!check_device(dev)) | 1960 | domain = get_domain(dev); |
1961 | if (PTR_ERR(domain) == -EINVAL) | ||
1962 | return map_sg_no_iommu(dev, sglist, nelems, dir); | ||
1963 | else if (IS_ERR(domain)) | ||
1831 | return 0; | 1964 | return 0; |
1832 | 1965 | ||
1833 | dma_mask = *dev->dma_mask; | 1966 | dma_mask = *dev->dma_mask; |
1834 | 1967 | ||
1835 | get_device_resources(dev, &iommu, &domain, &devid); | ||
1836 | |||
1837 | if (!iommu || !domain) | ||
1838 | return map_sg_no_iommu(dev, sglist, nelems, dir); | ||
1839 | |||
1840 | if (!dma_ops_domain(domain)) | ||
1841 | return 0; | ||
1842 | |||
1843 | spin_lock_irqsave(&domain->lock, flags); | 1968 | spin_lock_irqsave(&domain->lock, flags); |
1844 | 1969 | ||
1845 | for_each_sg(sglist, s, nelems, i) { | 1970 | for_each_sg(sglist, s, nelems, i) { |
1846 | paddr = sg_phys(s); | 1971 | paddr = sg_phys(s); |
1847 | 1972 | ||
1848 | s->dma_address = __map_single(dev, iommu, domain->priv, | 1973 | s->dma_address = __map_single(dev, domain->priv, |
1849 | paddr, s->length, dir, false, | 1974 | paddr, s->length, dir, false, |
1850 | dma_mask); | 1975 | dma_mask); |
1851 | 1976 | ||
@@ -1856,7 +1981,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1856 | goto unmap; | 1981 | goto unmap; |
1857 | } | 1982 | } |
1858 | 1983 | ||
1859 | iommu_completion_wait(iommu); | 1984 | iommu_flush_complete(domain); |
1860 | 1985 | ||
1861 | out: | 1986 | out: |
1862 | spin_unlock_irqrestore(&domain->lock, flags); | 1987 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -1865,7 +1990,7 @@ out: | |||
1865 | unmap: | 1990 | unmap: |
1866 | for_each_sg(sglist, s, mapped_elems, i) { | 1991 | for_each_sg(sglist, s, mapped_elems, i) { |
1867 | if (s->dma_address) | 1992 | if (s->dma_address) |
1868 | __unmap_single(iommu, domain->priv, s->dma_address, | 1993 | __unmap_single(domain->priv, s->dma_address, |
1869 | s->dma_length, dir); | 1994 | s->dma_length, dir); |
1870 | s->dma_address = s->dma_length = 0; | 1995 | s->dma_address = s->dma_length = 0; |
1871 | } | 1996 | } |
@@ -1884,30 +2009,25 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
1884 | struct dma_attrs *attrs) | 2009 | struct dma_attrs *attrs) |
1885 | { | 2010 | { |
1886 | unsigned long flags; | 2011 | unsigned long flags; |
1887 | struct amd_iommu *iommu; | ||
1888 | struct protection_domain *domain; | 2012 | struct protection_domain *domain; |
1889 | struct scatterlist *s; | 2013 | struct scatterlist *s; |
1890 | u16 devid; | ||
1891 | int i; | 2014 | int i; |
1892 | 2015 | ||
1893 | INC_STATS_COUNTER(cnt_unmap_sg); | 2016 | INC_STATS_COUNTER(cnt_unmap_sg); |
1894 | 2017 | ||
1895 | if (!check_device(dev) || | 2018 | domain = get_domain(dev); |
1896 | !get_device_resources(dev, &iommu, &domain, &devid)) | 2019 | if (IS_ERR(domain)) |
1897 | return; | ||
1898 | |||
1899 | if (!dma_ops_domain(domain)) | ||
1900 | return; | 2020 | return; |
1901 | 2021 | ||
1902 | spin_lock_irqsave(&domain->lock, flags); | 2022 | spin_lock_irqsave(&domain->lock, flags); |
1903 | 2023 | ||
1904 | for_each_sg(sglist, s, nelems, i) { | 2024 | for_each_sg(sglist, s, nelems, i) { |
1905 | __unmap_single(iommu, domain->priv, s->dma_address, | 2025 | __unmap_single(domain->priv, s->dma_address, |
1906 | s->dma_length, dir); | 2026 | s->dma_length, dir); |
1907 | s->dma_address = s->dma_length = 0; | 2027 | s->dma_address = s->dma_length = 0; |
1908 | } | 2028 | } |
1909 | 2029 | ||
1910 | iommu_completion_wait(iommu); | 2030 | iommu_flush_complete(domain); |
1911 | 2031 | ||
1912 | spin_unlock_irqrestore(&domain->lock, flags); | 2032 | spin_unlock_irqrestore(&domain->lock, flags); |
1913 | } | 2033 | } |
@@ -1920,49 +2040,44 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
1920 | { | 2040 | { |
1921 | unsigned long flags; | 2041 | unsigned long flags; |
1922 | void *virt_addr; | 2042 | void *virt_addr; |
1923 | struct amd_iommu *iommu; | ||
1924 | struct protection_domain *domain; | 2043 | struct protection_domain *domain; |
1925 | u16 devid; | ||
1926 | phys_addr_t paddr; | 2044 | phys_addr_t paddr; |
1927 | u64 dma_mask = dev->coherent_dma_mask; | 2045 | u64 dma_mask = dev->coherent_dma_mask; |
1928 | 2046 | ||
1929 | INC_STATS_COUNTER(cnt_alloc_coherent); | 2047 | INC_STATS_COUNTER(cnt_alloc_coherent); |
1930 | 2048 | ||
1931 | if (!check_device(dev)) | 2049 | domain = get_domain(dev); |
2050 | if (PTR_ERR(domain) == -EINVAL) { | ||
2051 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | ||
2052 | *dma_addr = __pa(virt_addr); | ||
2053 | return virt_addr; | ||
2054 | } else if (IS_ERR(domain)) | ||
1932 | return NULL; | 2055 | return NULL; |
1933 | 2056 | ||
1934 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 2057 | dma_mask = dev->coherent_dma_mask; |
1935 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | 2058 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); |
2059 | flag |= __GFP_ZERO; | ||
1936 | 2060 | ||
1937 | flag |= __GFP_ZERO; | ||
1938 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | 2061 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); |
1939 | if (!virt_addr) | 2062 | if (!virt_addr) |
1940 | return NULL; | 2063 | return NULL; |
1941 | 2064 | ||
1942 | paddr = virt_to_phys(virt_addr); | 2065 | paddr = virt_to_phys(virt_addr); |
1943 | 2066 | ||
1944 | if (!iommu || !domain) { | ||
1945 | *dma_addr = (dma_addr_t)paddr; | ||
1946 | return virt_addr; | ||
1947 | } | ||
1948 | |||
1949 | if (!dma_ops_domain(domain)) | ||
1950 | goto out_free; | ||
1951 | |||
1952 | if (!dma_mask) | 2067 | if (!dma_mask) |
1953 | dma_mask = *dev->dma_mask; | 2068 | dma_mask = *dev->dma_mask; |
1954 | 2069 | ||
1955 | spin_lock_irqsave(&domain->lock, flags); | 2070 | spin_lock_irqsave(&domain->lock, flags); |
1956 | 2071 | ||
1957 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, | 2072 | *dma_addr = __map_single(dev, domain->priv, paddr, |
1958 | size, DMA_BIDIRECTIONAL, true, dma_mask); | 2073 | size, DMA_BIDIRECTIONAL, true, dma_mask); |
1959 | 2074 | ||
1960 | if (*dma_addr == bad_dma_address) { | 2075 | if (*dma_addr == DMA_ERROR_CODE) { |
1961 | spin_unlock_irqrestore(&domain->lock, flags); | 2076 | spin_unlock_irqrestore(&domain->lock, flags); |
1962 | goto out_free; | 2077 | goto out_free; |
1963 | } | 2078 | } |
1964 | 2079 | ||
1965 | iommu_completion_wait(iommu); | 2080 | iommu_flush_complete(domain); |
1966 | 2081 | ||
1967 | spin_unlock_irqrestore(&domain->lock, flags); | 2082 | spin_unlock_irqrestore(&domain->lock, flags); |
1968 | 2083 | ||
@@ -1982,28 +2097,19 @@ static void free_coherent(struct device *dev, size_t size, | |||
1982 | void *virt_addr, dma_addr_t dma_addr) | 2097 | void *virt_addr, dma_addr_t dma_addr) |
1983 | { | 2098 | { |
1984 | unsigned long flags; | 2099 | unsigned long flags; |
1985 | struct amd_iommu *iommu; | ||
1986 | struct protection_domain *domain; | 2100 | struct protection_domain *domain; |
1987 | u16 devid; | ||
1988 | 2101 | ||
1989 | INC_STATS_COUNTER(cnt_free_coherent); | 2102 | INC_STATS_COUNTER(cnt_free_coherent); |
1990 | 2103 | ||
1991 | if (!check_device(dev)) | 2104 | domain = get_domain(dev); |
1992 | return; | 2105 | if (IS_ERR(domain)) |
1993 | |||
1994 | get_device_resources(dev, &iommu, &domain, &devid); | ||
1995 | |||
1996 | if (!iommu || !domain) | ||
1997 | goto free_mem; | ||
1998 | |||
1999 | if (!dma_ops_domain(domain)) | ||
2000 | goto free_mem; | 2106 | goto free_mem; |
2001 | 2107 | ||
2002 | spin_lock_irqsave(&domain->lock, flags); | 2108 | spin_lock_irqsave(&domain->lock, flags); |
2003 | 2109 | ||
2004 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); | 2110 | __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); |
2005 | 2111 | ||
2006 | iommu_completion_wait(iommu); | 2112 | iommu_flush_complete(domain); |
2007 | 2113 | ||
2008 | spin_unlock_irqrestore(&domain->lock, flags); | 2114 | spin_unlock_irqrestore(&domain->lock, flags); |
2009 | 2115 | ||
@@ -2017,22 +2123,7 @@ free_mem: | |||
2017 | */ | 2123 | */ |
2018 | static int amd_iommu_dma_supported(struct device *dev, u64 mask) | 2124 | static int amd_iommu_dma_supported(struct device *dev, u64 mask) |
2019 | { | 2125 | { |
2020 | u16 bdf; | 2126 | return check_device(dev); |
2021 | struct pci_dev *pcidev; | ||
2022 | |||
2023 | /* No device or no PCI device */ | ||
2024 | if (!dev || dev->bus != &pci_bus_type) | ||
2025 | return 0; | ||
2026 | |||
2027 | pcidev = to_pci_dev(dev); | ||
2028 | |||
2029 | bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | ||
2030 | |||
2031 | /* Out of our scope? */ | ||
2032 | if (bdf > amd_iommu_last_bdf) | ||
2033 | return 0; | ||
2034 | |||
2035 | return 1; | ||
2036 | } | 2127 | } |
2037 | 2128 | ||
2038 | /* | 2129 | /* |
@@ -2046,25 +2137,30 @@ static void prealloc_protection_domains(void) | |||
2046 | { | 2137 | { |
2047 | struct pci_dev *dev = NULL; | 2138 | struct pci_dev *dev = NULL; |
2048 | struct dma_ops_domain *dma_dom; | 2139 | struct dma_ops_domain *dma_dom; |
2049 | struct amd_iommu *iommu; | ||
2050 | u16 devid; | 2140 | u16 devid; |
2051 | 2141 | ||
2052 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 2142 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
2053 | devid = calc_devid(dev->bus->number, dev->devfn); | 2143 | |
2054 | if (devid > amd_iommu_last_bdf) | 2144 | /* Do we handle this device? */ |
2055 | continue; | 2145 | if (!check_device(&dev->dev)) |
2056 | devid = amd_iommu_alias_table[devid]; | ||
2057 | if (domain_for_device(devid)) | ||
2058 | continue; | 2146 | continue; |
2059 | iommu = amd_iommu_rlookup_table[devid]; | 2147 | |
2060 | if (!iommu) | 2148 | iommu_init_device(&dev->dev); |
2149 | |||
2150 | /* Is there already any domain for it? */ | ||
2151 | if (domain_for_device(&dev->dev)) | ||
2061 | continue; | 2152 | continue; |
2062 | dma_dom = dma_ops_domain_alloc(iommu); | 2153 | |
2154 | devid = get_device_id(&dev->dev); | ||
2155 | |||
2156 | dma_dom = dma_ops_domain_alloc(); | ||
2063 | if (!dma_dom) | 2157 | if (!dma_dom) |
2064 | continue; | 2158 | continue; |
2065 | init_unity_mappings_for_device(dma_dom, devid); | 2159 | init_unity_mappings_for_device(dma_dom, devid); |
2066 | dma_dom->target_dev = devid; | 2160 | dma_dom->target_dev = devid; |
2067 | 2161 | ||
2162 | attach_device(&dev->dev, &dma_dom->domain); | ||
2163 | |||
2068 | list_add_tail(&dma_dom->list, &iommu_pd_list); | 2164 | list_add_tail(&dma_dom->list, &iommu_pd_list); |
2069 | } | 2165 | } |
2070 | } | 2166 | } |
@@ -2093,7 +2189,7 @@ int __init amd_iommu_init_dma_ops(void) | |||
2093 | * protection domain will be assigned to the default one. | 2189 | * protection domain will be assigned to the default one. |
2094 | */ | 2190 | */ |
2095 | for_each_iommu(iommu) { | 2191 | for_each_iommu(iommu) { |
2096 | iommu->default_dom = dma_ops_domain_alloc(iommu); | 2192 | iommu->default_dom = dma_ops_domain_alloc(); |
2097 | if (iommu->default_dom == NULL) | 2193 | if (iommu->default_dom == NULL) |
2098 | return -ENOMEM; | 2194 | return -ENOMEM; |
2099 | iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; | 2195 | iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; |
@@ -2103,15 +2199,12 @@ int __init amd_iommu_init_dma_ops(void) | |||
2103 | } | 2199 | } |
2104 | 2200 | ||
2105 | /* | 2201 | /* |
2106 | * If device isolation is enabled, pre-allocate the protection | 2202 | * Pre-allocate the protection domains for each device. |
2107 | * domains for each device. | ||
2108 | */ | 2203 | */ |
2109 | if (amd_iommu_isolate) | 2204 | prealloc_protection_domains(); |
2110 | prealloc_protection_domains(); | ||
2111 | 2205 | ||
2112 | iommu_detected = 1; | 2206 | iommu_detected = 1; |
2113 | force_iommu = 1; | 2207 | swiotlb = 0; |
2114 | bad_dma_address = 0; | ||
2115 | #ifdef CONFIG_GART_IOMMU | 2208 | #ifdef CONFIG_GART_IOMMU |
2116 | gart_iommu_aperture_disabled = 1; | 2209 | gart_iommu_aperture_disabled = 1; |
2117 | gart_iommu_aperture = 0; | 2210 | gart_iommu_aperture = 0; |
@@ -2150,14 +2243,17 @@ free_domains: | |||
2150 | 2243 | ||
2151 | static void cleanup_domain(struct protection_domain *domain) | 2244 | static void cleanup_domain(struct protection_domain *domain) |
2152 | { | 2245 | { |
2246 | struct iommu_dev_data *dev_data, *next; | ||
2153 | unsigned long flags; | 2247 | unsigned long flags; |
2154 | u16 devid; | ||
2155 | 2248 | ||
2156 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 2249 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
2157 | 2250 | ||
2158 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) | 2251 | list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { |
2159 | if (amd_iommu_pd_table[devid] == domain) | 2252 | struct device *dev = dev_data->dev; |
2160 | __detach_device(domain, devid); | 2253 | |
2254 | do_detach(dev); | ||
2255 | atomic_set(&dev_data->bind, 0); | ||
2256 | } | ||
2161 | 2257 | ||
2162 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 2258 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
2163 | } | 2259 | } |
@@ -2167,6 +2263,8 @@ static void protection_domain_free(struct protection_domain *domain) | |||
2167 | if (!domain) | 2263 | if (!domain) |
2168 | return; | 2264 | return; |
2169 | 2265 | ||
2266 | del_domain_from_list(domain); | ||
2267 | |||
2170 | if (domain->id) | 2268 | if (domain->id) |
2171 | domain_id_free(domain->id); | 2269 | domain_id_free(domain->id); |
2172 | 2270 | ||
@@ -2185,6 +2283,9 @@ static struct protection_domain *protection_domain_alloc(void) | |||
2185 | domain->id = domain_id_alloc(); | 2283 | domain->id = domain_id_alloc(); |
2186 | if (!domain->id) | 2284 | if (!domain->id) |
2187 | goto out_err; | 2285 | goto out_err; |
2286 | INIT_LIST_HEAD(&domain->dev_list); | ||
2287 | |||
2288 | add_domain_to_list(domain); | ||
2188 | 2289 | ||
2189 | return domain; | 2290 | return domain; |
2190 | 2291 | ||
@@ -2241,26 +2342,23 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) | |||
2241 | static void amd_iommu_detach_device(struct iommu_domain *dom, | 2342 | static void amd_iommu_detach_device(struct iommu_domain *dom, |
2242 | struct device *dev) | 2343 | struct device *dev) |
2243 | { | 2344 | { |
2244 | struct protection_domain *domain = dom->priv; | 2345 | struct iommu_dev_data *dev_data = dev->archdata.iommu; |
2245 | struct amd_iommu *iommu; | 2346 | struct amd_iommu *iommu; |
2246 | struct pci_dev *pdev; | ||
2247 | u16 devid; | 2347 | u16 devid; |
2248 | 2348 | ||
2249 | if (dev->bus != &pci_bus_type) | 2349 | if (!check_device(dev)) |
2250 | return; | 2350 | return; |
2251 | 2351 | ||
2252 | pdev = to_pci_dev(dev); | 2352 | devid = get_device_id(dev); |
2253 | |||
2254 | devid = calc_devid(pdev->bus->number, pdev->devfn); | ||
2255 | 2353 | ||
2256 | if (devid > 0) | 2354 | if (dev_data->domain != NULL) |
2257 | detach_device(domain, devid); | 2355 | detach_device(dev); |
2258 | 2356 | ||
2259 | iommu = amd_iommu_rlookup_table[devid]; | 2357 | iommu = amd_iommu_rlookup_table[devid]; |
2260 | if (!iommu) | 2358 | if (!iommu) |
2261 | return; | 2359 | return; |
2262 | 2360 | ||
2263 | iommu_queue_inv_dev_entry(iommu, devid); | 2361 | iommu_flush_device(dev); |
2264 | iommu_completion_wait(iommu); | 2362 | iommu_completion_wait(iommu); |
2265 | } | 2363 | } |
2266 | 2364 | ||
@@ -2268,35 +2366,30 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, | |||
2268 | struct device *dev) | 2366 | struct device *dev) |
2269 | { | 2367 | { |
2270 | struct protection_domain *domain = dom->priv; | 2368 | struct protection_domain *domain = dom->priv; |
2271 | struct protection_domain *old_domain; | 2369 | struct iommu_dev_data *dev_data; |
2272 | struct amd_iommu *iommu; | 2370 | struct amd_iommu *iommu; |
2273 | struct pci_dev *pdev; | 2371 | int ret; |
2274 | u16 devid; | 2372 | u16 devid; |
2275 | 2373 | ||
2276 | if (dev->bus != &pci_bus_type) | 2374 | if (!check_device(dev)) |
2277 | return -EINVAL; | 2375 | return -EINVAL; |
2278 | 2376 | ||
2279 | pdev = to_pci_dev(dev); | 2377 | dev_data = dev->archdata.iommu; |
2280 | 2378 | ||
2281 | devid = calc_devid(pdev->bus->number, pdev->devfn); | 2379 | devid = get_device_id(dev); |
2282 | |||
2283 | if (devid >= amd_iommu_last_bdf || | ||
2284 | devid != amd_iommu_alias_table[devid]) | ||
2285 | return -EINVAL; | ||
2286 | 2380 | ||
2287 | iommu = amd_iommu_rlookup_table[devid]; | 2381 | iommu = amd_iommu_rlookup_table[devid]; |
2288 | if (!iommu) | 2382 | if (!iommu) |
2289 | return -EINVAL; | 2383 | return -EINVAL; |
2290 | 2384 | ||
2291 | old_domain = domain_for_device(devid); | 2385 | if (dev_data->domain) |
2292 | if (old_domain) | 2386 | detach_device(dev); |
2293 | detach_device(old_domain, devid); | ||
2294 | 2387 | ||
2295 | attach_device(iommu, domain, devid); | 2388 | ret = attach_device(dev, domain); |
2296 | 2389 | ||
2297 | iommu_completion_wait(iommu); | 2390 | iommu_completion_wait(iommu); |
2298 | 2391 | ||
2299 | return 0; | 2392 | return ret; |
2300 | } | 2393 | } |
2301 | 2394 | ||
2302 | static int amd_iommu_map_range(struct iommu_domain *dom, | 2395 | static int amd_iommu_map_range(struct iommu_domain *dom, |
@@ -2342,7 +2435,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, | |||
2342 | iova += PAGE_SIZE; | 2435 | iova += PAGE_SIZE; |
2343 | } | 2436 | } |
2344 | 2437 | ||
2345 | iommu_flush_domain(domain->id); | 2438 | iommu_flush_tlb_pde(domain); |
2346 | } | 2439 | } |
2347 | 2440 | ||
2348 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | 2441 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, |
@@ -2393,8 +2486,9 @@ static struct iommu_ops amd_iommu_ops = { | |||
2393 | 2486 | ||
2394 | int __init amd_iommu_init_passthrough(void) | 2487 | int __init amd_iommu_init_passthrough(void) |
2395 | { | 2488 | { |
2489 | struct amd_iommu *iommu; | ||
2396 | struct pci_dev *dev = NULL; | 2490 | struct pci_dev *dev = NULL; |
2397 | u16 devid, devid2; | 2491 | u16 devid; |
2398 | 2492 | ||
2399 | /* allocate passthroug domain */ | 2493 | /* allocate passthroug domain */ |
2400 | pt_domain = protection_domain_alloc(); | 2494 | pt_domain = protection_domain_alloc(); |
@@ -2404,20 +2498,17 @@ int __init amd_iommu_init_passthrough(void) | |||
2404 | pt_domain->mode |= PAGE_MODE_NONE; | 2498 | pt_domain->mode |= PAGE_MODE_NONE; |
2405 | 2499 | ||
2406 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 2500 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
2407 | struct amd_iommu *iommu; | ||
2408 | 2501 | ||
2409 | devid = calc_devid(dev->bus->number, dev->devfn); | 2502 | if (!check_device(&dev->dev)) |
2410 | if (devid > amd_iommu_last_bdf) | ||
2411 | continue; | 2503 | continue; |
2412 | 2504 | ||
2413 | devid2 = amd_iommu_alias_table[devid]; | 2505 | devid = get_device_id(&dev->dev); |
2414 | 2506 | ||
2415 | iommu = amd_iommu_rlookup_table[devid2]; | 2507 | iommu = amd_iommu_rlookup_table[devid]; |
2416 | if (!iommu) | 2508 | if (!iommu) |
2417 | continue; | 2509 | continue; |
2418 | 2510 | ||
2419 | __attach_device(iommu, pt_domain, devid); | 2511 | attach_device(&dev->dev, pt_domain); |
2420 | __attach_device(iommu, pt_domain, devid2); | ||
2421 | } | 2512 | } |
2422 | 2513 | ||
2423 | pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); | 2514 | pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c20001e4f556..7ffc39965233 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
@@ -25,10 +25,12 @@ | |||
25 | #include <linux/interrupt.h> | 25 | #include <linux/interrupt.h> |
26 | #include <linux/msi.h> | 26 | #include <linux/msi.h> |
27 | #include <asm/pci-direct.h> | 27 | #include <asm/pci-direct.h> |
28 | #include <asm/amd_iommu_proto.h> | ||
28 | #include <asm/amd_iommu_types.h> | 29 | #include <asm/amd_iommu_types.h> |
29 | #include <asm/amd_iommu.h> | 30 | #include <asm/amd_iommu.h> |
30 | #include <asm/iommu.h> | 31 | #include <asm/iommu.h> |
31 | #include <asm/gart.h> | 32 | #include <asm/gart.h> |
33 | #include <asm/x86_init.h> | ||
32 | 34 | ||
33 | /* | 35 | /* |
34 | * definitions for the ACPI scanning code | 36 | * definitions for the ACPI scanning code |
@@ -123,18 +125,24 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have | |||
123 | to handle */ | 125 | to handle */ |
124 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings | 126 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings |
125 | we find in ACPI */ | 127 | we find in ACPI */ |
126 | #ifdef CONFIG_IOMMU_STRESS | ||
127 | bool amd_iommu_isolate = false; | ||
128 | #else | ||
129 | bool amd_iommu_isolate = true; /* if true, device isolation is | ||
130 | enabled */ | ||
131 | #endif | ||
132 | |||
133 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ | 128 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ |
134 | 129 | ||
135 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | 130 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the |
136 | system */ | 131 | system */ |
137 | 132 | ||
133 | /* Array to assign indices to IOMMUs*/ | ||
134 | struct amd_iommu *amd_iommus[MAX_IOMMUS]; | ||
135 | int amd_iommus_present; | ||
136 | |||
137 | /* IOMMUs have a non-present cache? */ | ||
138 | bool amd_iommu_np_cache __read_mostly; | ||
139 | |||
140 | /* | ||
141 | * List of protection domains - used during resume | ||
142 | */ | ||
143 | LIST_HEAD(amd_iommu_pd_list); | ||
144 | spinlock_t amd_iommu_pd_lock; | ||
145 | |||
138 | /* | 146 | /* |
139 | * Pointer to the device table which is shared by all AMD IOMMUs | 147 | * Pointer to the device table which is shared by all AMD IOMMUs |
140 | * it is indexed by the PCI device id or the HT unit id and contains | 148 | * it is indexed by the PCI device id or the HT unit id and contains |
@@ -157,12 +165,6 @@ u16 *amd_iommu_alias_table; | |||
157 | struct amd_iommu **amd_iommu_rlookup_table; | 165 | struct amd_iommu **amd_iommu_rlookup_table; |
158 | 166 | ||
159 | /* | 167 | /* |
160 | * The pd table (protection domain table) is used to find the protection domain | ||
161 | * data structure a device belongs to. Indexed with the PCI device id too. | ||
162 | */ | ||
163 | struct protection_domain **amd_iommu_pd_table; | ||
164 | |||
165 | /* | ||
166 | * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap | 168 | * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap |
167 | * to know which ones are already in use. | 169 | * to know which ones are already in use. |
168 | */ | 170 | */ |
@@ -838,7 +840,18 @@ static void __init free_iommu_all(void) | |||
838 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | 840 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) |
839 | { | 841 | { |
840 | spin_lock_init(&iommu->lock); | 842 | spin_lock_init(&iommu->lock); |
843 | |||
844 | /* Add IOMMU to internal data structures */ | ||
841 | list_add_tail(&iommu->list, &amd_iommu_list); | 845 | list_add_tail(&iommu->list, &amd_iommu_list); |
846 | iommu->index = amd_iommus_present++; | ||
847 | |||
848 | if (unlikely(iommu->index >= MAX_IOMMUS)) { | ||
849 | WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); | ||
850 | return -ENOSYS; | ||
851 | } | ||
852 | |||
853 | /* Index is fine - add IOMMU to the array */ | ||
854 | amd_iommus[iommu->index] = iommu; | ||
842 | 855 | ||
843 | /* | 856 | /* |
844 | * Copy data from ACPI table entry to the iommu struct | 857 | * Copy data from ACPI table entry to the iommu struct |
@@ -868,6 +881,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
868 | init_iommu_from_acpi(iommu, h); | 881 | init_iommu_from_acpi(iommu, h); |
869 | init_iommu_devices(iommu); | 882 | init_iommu_devices(iommu); |
870 | 883 | ||
884 | if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) | ||
885 | amd_iommu_np_cache = true; | ||
886 | |||
871 | return pci_enable_device(iommu->dev); | 887 | return pci_enable_device(iommu->dev); |
872 | } | 888 | } |
873 | 889 | ||
@@ -925,7 +941,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
925 | * | 941 | * |
926 | ****************************************************************************/ | 942 | ****************************************************************************/ |
927 | 943 | ||
928 | static int __init iommu_setup_msi(struct amd_iommu *iommu) | 944 | static int iommu_setup_msi(struct amd_iommu *iommu) |
929 | { | 945 | { |
930 | int r; | 946 | int r; |
931 | 947 | ||
@@ -1176,19 +1192,10 @@ static struct sys_device device_amd_iommu = { | |||
1176 | * functions. Finally it prints some information about AMD IOMMUs and | 1192 | * functions. Finally it prints some information about AMD IOMMUs and |
1177 | * the driver state and enables the hardware. | 1193 | * the driver state and enables the hardware. |
1178 | */ | 1194 | */ |
1179 | int __init amd_iommu_init(void) | 1195 | static int __init amd_iommu_init(void) |
1180 | { | 1196 | { |
1181 | int i, ret = 0; | 1197 | int i, ret = 0; |
1182 | 1198 | ||
1183 | |||
1184 | if (no_iommu) { | ||
1185 | printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); | ||
1186 | return 0; | ||
1187 | } | ||
1188 | |||
1189 | if (!amd_iommu_detected) | ||
1190 | return -ENODEV; | ||
1191 | |||
1192 | /* | 1199 | /* |
1193 | * First parse ACPI tables to find the largest Bus/Dev/Func | 1200 | * First parse ACPI tables to find the largest Bus/Dev/Func |
1194 | * we need to handle. Upon this information the shared data | 1201 | * we need to handle. Upon this information the shared data |
@@ -1225,15 +1232,6 @@ int __init amd_iommu_init(void) | |||
1225 | if (amd_iommu_rlookup_table == NULL) | 1232 | if (amd_iommu_rlookup_table == NULL) |
1226 | goto free; | 1233 | goto free; |
1227 | 1234 | ||
1228 | /* | ||
1229 | * Protection Domain table - maps devices to protection domains | ||
1230 | * This table has the same size as the rlookup_table | ||
1231 | */ | ||
1232 | amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
1233 | get_order(rlookup_table_size)); | ||
1234 | if (amd_iommu_pd_table == NULL) | ||
1235 | goto free; | ||
1236 | |||
1237 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( | 1235 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( |
1238 | GFP_KERNEL | __GFP_ZERO, | 1236 | GFP_KERNEL | __GFP_ZERO, |
1239 | get_order(MAX_DOMAIN_ID/8)); | 1237 | get_order(MAX_DOMAIN_ID/8)); |
@@ -1255,6 +1253,8 @@ int __init amd_iommu_init(void) | |||
1255 | */ | 1253 | */ |
1256 | amd_iommu_pd_alloc_bitmap[0] = 1; | 1254 | amd_iommu_pd_alloc_bitmap[0] = 1; |
1257 | 1255 | ||
1256 | spin_lock_init(&amd_iommu_pd_lock); | ||
1257 | |||
1258 | /* | 1258 | /* |
1259 | * now the data structures are allocated and basically initialized | 1259 | * now the data structures are allocated and basically initialized |
1260 | * start the real acpi table scan | 1260 | * start the real acpi table scan |
@@ -1286,17 +1286,12 @@ int __init amd_iommu_init(void) | |||
1286 | if (iommu_pass_through) | 1286 | if (iommu_pass_through) |
1287 | goto out; | 1287 | goto out; |
1288 | 1288 | ||
1289 | printk(KERN_INFO "AMD-Vi: device isolation "); | ||
1290 | if (amd_iommu_isolate) | ||
1291 | printk("enabled\n"); | ||
1292 | else | ||
1293 | printk("disabled\n"); | ||
1294 | |||
1295 | if (amd_iommu_unmap_flush) | 1289 | if (amd_iommu_unmap_flush) |
1296 | printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); | 1290 | printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); |
1297 | else | 1291 | else |
1298 | printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); | 1292 | printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); |
1299 | 1293 | ||
1294 | x86_platform.iommu_shutdown = disable_iommus; | ||
1300 | out: | 1295 | out: |
1301 | return ret; | 1296 | return ret; |
1302 | 1297 | ||
@@ -1304,9 +1299,6 @@ free: | |||
1304 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, | 1299 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, |
1305 | get_order(MAX_DOMAIN_ID/8)); | 1300 | get_order(MAX_DOMAIN_ID/8)); |
1306 | 1301 | ||
1307 | free_pages((unsigned long)amd_iommu_pd_table, | ||
1308 | get_order(rlookup_table_size)); | ||
1309 | |||
1310 | free_pages((unsigned long)amd_iommu_rlookup_table, | 1302 | free_pages((unsigned long)amd_iommu_rlookup_table, |
1311 | get_order(rlookup_table_size)); | 1303 | get_order(rlookup_table_size)); |
1312 | 1304 | ||
@@ -1323,11 +1315,6 @@ free: | |||
1323 | goto out; | 1315 | goto out; |
1324 | } | 1316 | } |
1325 | 1317 | ||
1326 | void amd_iommu_shutdown(void) | ||
1327 | { | ||
1328 | disable_iommus(); | ||
1329 | } | ||
1330 | |||
1331 | /**************************************************************************** | 1318 | /**************************************************************************** |
1332 | * | 1319 | * |
1333 | * Early detect code. This code runs at IOMMU detection time in the DMA | 1320 | * Early detect code. This code runs at IOMMU detection time in the DMA |
@@ -1342,16 +1329,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) | |||
1342 | 1329 | ||
1343 | void __init amd_iommu_detect(void) | 1330 | void __init amd_iommu_detect(void) |
1344 | { | 1331 | { |
1345 | if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) | 1332 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) |
1346 | return; | 1333 | return; |
1347 | 1334 | ||
1348 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { | 1335 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { |
1349 | iommu_detected = 1; | 1336 | iommu_detected = 1; |
1350 | amd_iommu_detected = 1; | 1337 | amd_iommu_detected = 1; |
1351 | #ifdef CONFIG_GART_IOMMU | 1338 | x86_init.iommu.iommu_init = amd_iommu_init; |
1352 | gart_iommu_aperture_disabled = 1; | ||
1353 | gart_iommu_aperture = 0; | ||
1354 | #endif | ||
1355 | } | 1339 | } |
1356 | } | 1340 | } |
1357 | 1341 | ||
@@ -1372,10 +1356,6 @@ static int __init parse_amd_iommu_dump(char *str) | |||
1372 | static int __init parse_amd_iommu_options(char *str) | 1356 | static int __init parse_amd_iommu_options(char *str) |
1373 | { | 1357 | { |
1374 | for (; *str; ++str) { | 1358 | for (; *str; ++str) { |
1375 | if (strncmp(str, "isolate", 7) == 0) | ||
1376 | amd_iommu_isolate = true; | ||
1377 | if (strncmp(str, "share", 5) == 0) | ||
1378 | amd_iommu_isolate = false; | ||
1379 | if (strncmp(str, "fullflush", 9) == 0) | 1359 | if (strncmp(str, "fullflush", 9) == 0) |
1380 | amd_iommu_unmap_flush = true; | 1360 | amd_iommu_unmap_flush = true; |
1381 | } | 1361 | } |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 128111d8ffe0..e0dfb6856aa2 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/pci-direct.h> | 28 | #include <asm/pci-direct.h> |
29 | #include <asm/dma.h> | 29 | #include <asm/dma.h> |
30 | #include <asm/k8.h> | 30 | #include <asm/k8.h> |
31 | #include <asm/x86_init.h> | ||
31 | 32 | ||
32 | int gart_iommu_aperture; | 33 | int gart_iommu_aperture; |
33 | int gart_iommu_aperture_disabled __initdata; | 34 | int gart_iommu_aperture_disabled __initdata; |
@@ -400,6 +401,7 @@ void __init gart_iommu_hole_init(void) | |||
400 | 401 | ||
401 | iommu_detected = 1; | 402 | iommu_detected = 1; |
402 | gart_iommu_aperture = 1; | 403 | gart_iommu_aperture = 1; |
404 | x86_init.iommu.iommu_init = gart_iommu_init; | ||
403 | 405 | ||
404 | aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; | 406 | aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; |
405 | aper_size = (32 * 1024 * 1024) << aper_order; | 407 | aper_size = (32 * 1024 * 1024) << aper_order; |
@@ -456,7 +458,7 @@ out: | |||
456 | 458 | ||
457 | if (aper_alloc) { | 459 | if (aper_alloc) { |
458 | /* Got the aperture from the AGP bridge */ | 460 | /* Got the aperture from the AGP bridge */ |
459 | } else if (swiotlb && !valid_agp) { | 461 | } else if (!valid_agp) { |
460 | /* Do nothing */ | 462 | /* Do nothing */ |
461 | } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || | 463 | } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || |
462 | force_iommu || | 464 | force_iommu || |
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index da7b7b9f8bd8..565c1bfc507d 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for local APIC drivers and for the IO-APIC code | 2 | # Makefile for local APIC drivers and for the IO-APIC code |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o | 5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o |
6 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 6 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
7 | obj-$(CONFIG_SMP) += ipi.o | 7 | obj-$(CONFIG_SMP) += ipi.o |
8 | 8 | ||
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 894aa97f0717..ad8c75b9e453 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -241,28 +241,13 @@ static int modern_apic(void) | |||
241 | } | 241 | } |
242 | 242 | ||
243 | /* | 243 | /* |
244 | * bare function to substitute write operation | 244 | * right after this call apic become NOOP driven |
245 | * and it's _that_ fast :) | 245 | * so apic->write/read doesn't do anything |
246 | */ | ||
247 | static void native_apic_write_dummy(u32 reg, u32 v) | ||
248 | { | ||
249 | WARN_ON_ONCE((cpu_has_apic || !disable_apic)); | ||
250 | } | ||
251 | |||
252 | static u32 native_apic_read_dummy(u32 reg) | ||
253 | { | ||
254 | WARN_ON_ONCE((cpu_has_apic && !disable_apic)); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * right after this call apic->write/read doesn't do anything | ||
260 | * note that there is no restore operation it works one way | ||
261 | */ | 246 | */ |
262 | void apic_disable(void) | 247 | void apic_disable(void) |
263 | { | 248 | { |
264 | apic->read = native_apic_read_dummy; | 249 | pr_info("APIC: switched to apic NOOP\n"); |
265 | apic->write = native_apic_write_dummy; | 250 | apic = &apic_noop; |
266 | } | 251 | } |
267 | 252 | ||
268 | void native_apic_wait_icr_idle(void) | 253 | void native_apic_wait_icr_idle(void) |
@@ -459,7 +444,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
459 | v = apic_read(APIC_LVTT); | 444 | v = apic_read(APIC_LVTT); |
460 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 445 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); |
461 | apic_write(APIC_LVTT, v); | 446 | apic_write(APIC_LVTT, v); |
462 | apic_write(APIC_TMICT, 0xffffffff); | 447 | apic_write(APIC_TMICT, 0); |
463 | break; | 448 | break; |
464 | case CLOCK_EVT_MODE_RESUME: | 449 | case CLOCK_EVT_MODE_RESUME: |
465 | /* Nothing to do here */ | 450 | /* Nothing to do here */ |
@@ -1392,14 +1377,11 @@ void __init enable_IR_x2apic(void) | |||
1392 | unsigned long flags; | 1377 | unsigned long flags; |
1393 | struct IO_APIC_route_entry **ioapic_entries = NULL; | 1378 | struct IO_APIC_route_entry **ioapic_entries = NULL; |
1394 | int ret, x2apic_enabled = 0; | 1379 | int ret, x2apic_enabled = 0; |
1395 | int dmar_table_init_ret = 0; | 1380 | int dmar_table_init_ret; |
1396 | 1381 | ||
1397 | #ifdef CONFIG_INTR_REMAP | ||
1398 | dmar_table_init_ret = dmar_table_init(); | 1382 | dmar_table_init_ret = dmar_table_init(); |
1399 | if (dmar_table_init_ret) | 1383 | if (dmar_table_init_ret && !x2apic_supported()) |
1400 | pr_debug("dmar_table_init() failed with %d:\n", | 1384 | return; |
1401 | dmar_table_init_ret); | ||
1402 | #endif | ||
1403 | 1385 | ||
1404 | ioapic_entries = alloc_ioapic_entries(); | 1386 | ioapic_entries = alloc_ioapic_entries(); |
1405 | if (!ioapic_entries) { | 1387 | if (!ioapic_entries) { |
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c new file mode 100644 index 000000000000..d9acc3bee0f4 --- /dev/null +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -0,0 +1,200 @@ | |||
1 | /* | ||
2 | * NOOP APIC driver. | ||
3 | * | ||
4 | * Does almost nothing and should be substituted by a real apic driver via | ||
5 | * probe routine. | ||
6 | * | ||
7 | * Though in case if apic is disabled (for some reason) we try | ||
8 | * to not uglify the caller's code and allow to call (some) apic routines | ||
9 | * like self-ipi, etc... | ||
10 | */ | ||
11 | |||
12 | #include <linux/threads.h> | ||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/ctype.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/errno.h> | ||
20 | #include <asm/fixmap.h> | ||
21 | #include <asm/mpspec.h> | ||
22 | #include <asm/apicdef.h> | ||
23 | #include <asm/apic.h> | ||
24 | #include <asm/setup.h> | ||
25 | |||
26 | #include <linux/smp.h> | ||
27 | #include <asm/ipi.h> | ||
28 | |||
29 | #include <linux/interrupt.h> | ||
30 | #include <asm/acpi.h> | ||
31 | #include <asm/e820.h> | ||
32 | |||
33 | static void noop_init_apic_ldr(void) { } | ||
34 | static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } | ||
35 | static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } | ||
36 | static void noop_send_IPI_allbutself(int vector) { } | ||
37 | static void noop_send_IPI_all(int vector) { } | ||
38 | static void noop_send_IPI_self(int vector) { } | ||
39 | static void noop_apic_wait_icr_idle(void) { } | ||
40 | static void noop_apic_icr_write(u32 low, u32 id) { } | ||
41 | |||
42 | static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) | ||
43 | { | ||
44 | return -1; | ||
45 | } | ||
46 | |||
47 | static u32 noop_safe_apic_wait_icr_idle(void) | ||
48 | { | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static u64 noop_apic_icr_read(void) | ||
53 | { | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | static int noop_cpu_to_logical_apicid(int cpu) | ||
58 | { | ||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | static int noop_phys_pkg_id(int cpuid_apic, int index_msb) | ||
63 | { | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | static unsigned int noop_get_apic_id(unsigned long x) | ||
68 | { | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static int noop_probe(void) | ||
73 | { | ||
74 | /* | ||
75 | * NOOP apic should not ever be | ||
76 | * enabled via probe routine | ||
77 | */ | ||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | static int noop_apic_id_registered(void) | ||
82 | { | ||
83 | /* | ||
84 | * if we would be really "pedantic" | ||
85 | * we should pass read_apic_id() here | ||
86 | * but since NOOP suppose APIC ID = 0 | ||
87 | * lets save a few cycles | ||
88 | */ | ||
89 | return physid_isset(0, phys_cpu_present_map); | ||
90 | } | ||
91 | |||
92 | static const struct cpumask *noop_target_cpus(void) | ||
93 | { | ||
94 | /* only BSP here */ | ||
95 | return cpumask_of(0); | ||
96 | } | ||
97 | |||
98 | static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid) | ||
99 | { | ||
100 | return physid_isset(apicid, *map); | ||
101 | } | ||
102 | |||
103 | static unsigned long noop_check_apicid_present(int bit) | ||
104 | { | ||
105 | return physid_isset(bit, phys_cpu_present_map); | ||
106 | } | ||
107 | |||
108 | static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
109 | { | ||
110 | if (cpu != 0) | ||
111 | pr_warning("APIC: Vector allocated for non-BSP cpu\n"); | ||
112 | cpumask_clear(retmask); | ||
113 | cpumask_set_cpu(cpu, retmask); | ||
114 | } | ||
115 | |||
116 | int noop_apicid_to_node(int logical_apicid) | ||
117 | { | ||
118 | /* we're always on node 0 */ | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static u32 noop_apic_read(u32 reg) | ||
123 | { | ||
124 | WARN_ON_ONCE((cpu_has_apic && !disable_apic)); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | static void noop_apic_write(u32 reg, u32 v) | ||
129 | { | ||
130 | WARN_ON_ONCE((cpu_has_apic || !disable_apic)); | ||
131 | } | ||
132 | |||
133 | struct apic apic_noop = { | ||
134 | .name = "noop", | ||
135 | .probe = noop_probe, | ||
136 | .acpi_madt_oem_check = NULL, | ||
137 | |||
138 | .apic_id_registered = noop_apic_id_registered, | ||
139 | |||
140 | .irq_delivery_mode = dest_LowestPrio, | ||
141 | /* logical delivery broadcast to all CPUs: */ | ||
142 | .irq_dest_mode = 1, | ||
143 | |||
144 | .target_cpus = noop_target_cpus, | ||
145 | .disable_esr = 0, | ||
146 | .dest_logical = APIC_DEST_LOGICAL, | ||
147 | .check_apicid_used = noop_check_apicid_used, | ||
148 | .check_apicid_present = noop_check_apicid_present, | ||
149 | |||
150 | .vector_allocation_domain = noop_vector_allocation_domain, | ||
151 | .init_apic_ldr = noop_init_apic_ldr, | ||
152 | |||
153 | .ioapic_phys_id_map = default_ioapic_phys_id_map, | ||
154 | .setup_apic_routing = NULL, | ||
155 | .multi_timer_check = NULL, | ||
156 | .apicid_to_node = noop_apicid_to_node, | ||
157 | |||
158 | .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, | ||
159 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
160 | .apicid_to_cpu_present = physid_set_mask_of_physid, | ||
161 | |||
162 | .setup_portio_remap = NULL, | ||
163 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
164 | .enable_apic_mode = NULL, | ||
165 | |||
166 | .phys_pkg_id = noop_phys_pkg_id, | ||
167 | |||
168 | .mps_oem_check = NULL, | ||
169 | |||
170 | .get_apic_id = noop_get_apic_id, | ||
171 | .set_apic_id = NULL, | ||
172 | .apic_id_mask = 0x0F << 24, | ||
173 | |||
174 | .cpu_mask_to_apicid = default_cpu_mask_to_apicid, | ||
175 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, | ||
176 | |||
177 | .send_IPI_mask = noop_send_IPI_mask, | ||
178 | .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, | ||
179 | .send_IPI_allbutself = noop_send_IPI_allbutself, | ||
180 | .send_IPI_all = noop_send_IPI_all, | ||
181 | .send_IPI_self = noop_send_IPI_self, | ||
182 | |||
183 | .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, | ||
184 | |||
185 | /* should be safe */ | ||
186 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
187 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
188 | |||
189 | .wait_for_init_deassert = NULL, | ||
190 | |||
191 | .smp_callin_clear_local_apic = NULL, | ||
192 | .inquire_remote_apic = NULL, | ||
193 | |||
194 | .read = noop_apic_read, | ||
195 | .write = noop_apic_write, | ||
196 | .icr_read = noop_apic_icr_read, | ||
197 | .icr_write = noop_apic_icr_write, | ||
198 | .wait_icr_idle = noop_apic_wait_icr_idle, | ||
199 | .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, | ||
200 | }; | ||
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 77a06413b6b2..38dcecfa5818 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void) | |||
35 | #endif | 35 | #endif |
36 | } | 36 | } |
37 | 37 | ||
38 | static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) | 38 | static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) |
39 | { | 39 | { |
40 | return 0; | 40 | return 0; |
41 | } | 41 | } |
@@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu) | |||
93 | return BAD_APICID; | 93 | return BAD_APICID; |
94 | } | 94 | } |
95 | 95 | ||
96 | static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) | ||
97 | { | ||
98 | return physid_mask_of_physid(phys_apicid); | ||
99 | } | ||
100 | |||
101 | /* Mapping from cpu number to logical apicid */ | 96 | /* Mapping from cpu number to logical apicid */ |
102 | static inline int bigsmp_cpu_to_logical_apicid(int cpu) | 97 | static inline int bigsmp_cpu_to_logical_apicid(int cpu) |
103 | { | 98 | { |
@@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu) | |||
106 | return cpu_physical_id(cpu); | 101 | return cpu_physical_id(cpu); |
107 | } | 102 | } |
108 | 103 | ||
109 | static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) | 104 | static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
110 | { | 105 | { |
111 | /* For clustered we don't have a good way to do this yet - hack */ | 106 | /* For clustered we don't have a good way to do this yet - hack */ |
112 | return physids_promote(0xFFL); | 107 | physids_promote(0xFFL, retmap); |
113 | } | 108 | } |
114 | 109 | ||
115 | static int bigsmp_check_phys_apicid_present(int phys_apicid) | 110 | static int bigsmp_check_phys_apicid_present(int phys_apicid) |
@@ -230,7 +225,7 @@ struct apic apic_bigsmp = { | |||
230 | .apicid_to_node = bigsmp_apicid_to_node, | 225 | .apicid_to_node = bigsmp_apicid_to_node, |
231 | .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, | 226 | .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, |
232 | .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, | 227 | .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, |
233 | .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, | 228 | .apicid_to_cpu_present = physid_set_mask_of_physid, |
234 | .setup_portio_remap = NULL, | 229 | .setup_portio_remap = NULL, |
235 | .check_phys_apicid_present = bigsmp_check_phys_apicid_present, | 230 | .check_phys_apicid_present = bigsmp_check_phys_apicid_present, |
236 | .enable_apic_mode = NULL, | 231 | .enable_apic_mode = NULL, |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 89174f847b49..e85f8fb7f8e7 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -466,11 +466,11 @@ static const struct cpumask *es7000_target_cpus(void) | |||
466 | return cpumask_of(smp_processor_id()); | 466 | return cpumask_of(smp_processor_id()); |
467 | } | 467 | } |
468 | 468 | ||
469 | static unsigned long | 469 | static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid) |
470 | es7000_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
471 | { | 470 | { |
472 | return 0; | 471 | return 0; |
473 | } | 472 | } |
473 | |||
474 | static unsigned long es7000_check_apicid_present(int bit) | 474 | static unsigned long es7000_check_apicid_present(int bit) |
475 | { | 475 | { |
476 | return physid_isset(bit, phys_cpu_present_map); | 476 | return physid_isset(bit, phys_cpu_present_map); |
@@ -539,14 +539,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu) | |||
539 | 539 | ||
540 | static int cpu_id; | 540 | static int cpu_id; |
541 | 541 | ||
542 | static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) | 542 | static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap) |
543 | { | 543 | { |
544 | physid_mask_t mask; | 544 | physid_set_mask_of_physid(cpu_id, retmap); |
545 | |||
546 | mask = physid_mask_of_physid(cpu_id); | ||
547 | ++cpu_id; | 545 | ++cpu_id; |
548 | |||
549 | return mask; | ||
550 | } | 546 | } |
551 | 547 | ||
552 | /* Mapping from cpu number to logical apicid */ | 548 | /* Mapping from cpu number to logical apicid */ |
@@ -561,10 +557,10 @@ static int es7000_cpu_to_logical_apicid(int cpu) | |||
561 | #endif | 557 | #endif |
562 | } | 558 | } |
563 | 559 | ||
564 | static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) | 560 | static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
565 | { | 561 | { |
566 | /* For clustered we don't have a good way to do this yet - hack */ | 562 | /* For clustered we don't have a good way to do this yet - hack */ |
567 | return physids_promote(0xff); | 563 | physids_promote(0xFFL, retmap); |
568 | } | 564 | } |
569 | 565 | ||
570 | static int es7000_check_phys_apicid_present(int cpu_physical_apicid) | 566 | static int es7000_check_phys_apicid_present(int cpu_physical_apicid) |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index dc69f28489f5..c0b4468683f9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -60,8 +60,6 @@ | |||
60 | #include <asm/irq_remapping.h> | 60 | #include <asm/irq_remapping.h> |
61 | #include <asm/hpet.h> | 61 | #include <asm/hpet.h> |
62 | #include <asm/hw_irq.h> | 62 | #include <asm/hw_irq.h> |
63 | #include <asm/uv/uv_hub.h> | ||
64 | #include <asm/uv/uv_irq.h> | ||
65 | 63 | ||
66 | #include <asm/apic.h> | 64 | #include <asm/apic.h> |
67 | 65 | ||
@@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) | |||
140 | return pin; | 138 | return pin; |
141 | } | 139 | } |
142 | 140 | ||
143 | /* | ||
144 | * This is performance-critical, we want to do it O(1) | ||
145 | * | ||
146 | * Most irqs are mapped 1:1 with pins. | ||
147 | */ | ||
148 | struct irq_cfg { | ||
149 | struct irq_pin_list *irq_2_pin; | ||
150 | cpumask_var_t domain; | ||
151 | cpumask_var_t old_domain; | ||
152 | unsigned move_cleanup_count; | ||
153 | u8 vector; | ||
154 | u8 move_in_progress : 1; | ||
155 | }; | ||
156 | |||
157 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 141 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
158 | #ifdef CONFIG_SPARSE_IRQ | 142 | #ifdef CONFIG_SPARSE_IRQ |
159 | static struct irq_cfg irq_cfgx[] = { | 143 | static struct irq_cfg irq_cfgx[] = { |
@@ -209,7 +193,7 @@ int __init arch_early_irq_init(void) | |||
209 | } | 193 | } |
210 | 194 | ||
211 | #ifdef CONFIG_SPARSE_IRQ | 195 | #ifdef CONFIG_SPARSE_IRQ |
212 | static struct irq_cfg *irq_cfg(unsigned int irq) | 196 | struct irq_cfg *irq_cfg(unsigned int irq) |
213 | { | 197 | { |
214 | struct irq_cfg *cfg = NULL; | 198 | struct irq_cfg *cfg = NULL; |
215 | struct irq_desc *desc; | 199 | struct irq_desc *desc; |
@@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) | |||
361 | /* end for move_irq_desc */ | 345 | /* end for move_irq_desc */ |
362 | 346 | ||
363 | #else | 347 | #else |
364 | static struct irq_cfg *irq_cfg(unsigned int irq) | 348 | struct irq_cfg *irq_cfg(unsigned int irq) |
365 | { | 349 | { |
366 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | 350 | return irq < nr_irqs ? irq_cfgx + irq : NULL; |
367 | } | 351 | } |
@@ -555,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, | |||
555 | add_pin_to_irq_node(cfg, node, newapic, newpin); | 539 | add_pin_to_irq_node(cfg, node, newapic, newpin); |
556 | } | 540 | } |
557 | 541 | ||
542 | static void __io_apic_modify_irq(struct irq_pin_list *entry, | ||
543 | int mask_and, int mask_or, | ||
544 | void (*final)(struct irq_pin_list *entry)) | ||
545 | { | ||
546 | unsigned int reg, pin; | ||
547 | |||
548 | pin = entry->pin; | ||
549 | reg = io_apic_read(entry->apic, 0x10 + pin * 2); | ||
550 | reg &= mask_and; | ||
551 | reg |= mask_or; | ||
552 | io_apic_modify(entry->apic, 0x10 + pin * 2, reg); | ||
553 | if (final) | ||
554 | final(entry); | ||
555 | } | ||
556 | |||
558 | static void io_apic_modify_irq(struct irq_cfg *cfg, | 557 | static void io_apic_modify_irq(struct irq_cfg *cfg, |
559 | int mask_and, int mask_or, | 558 | int mask_and, int mask_or, |
560 | void (*final)(struct irq_pin_list *entry)) | 559 | void (*final)(struct irq_pin_list *entry)) |
561 | { | 560 | { |
562 | int pin; | ||
563 | struct irq_pin_list *entry; | 561 | struct irq_pin_list *entry; |
564 | 562 | ||
565 | for_each_irq_pin(entry, cfg->irq_2_pin) { | 563 | for_each_irq_pin(entry, cfg->irq_2_pin) |
566 | unsigned int reg; | 564 | __io_apic_modify_irq(entry, mask_and, mask_or, final); |
567 | pin = entry->pin; | 565 | } |
568 | reg = io_apic_read(entry->apic, 0x10 + pin * 2); | 566 | |
569 | reg &= mask_and; | 567 | static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) |
570 | reg |= mask_or; | 568 | { |
571 | io_apic_modify(entry->apic, 0x10 + pin * 2, reg); | 569 | __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, |
572 | if (final) | 570 | IO_APIC_REDIR_MASKED, NULL); |
573 | final(entry); | 571 | } |
574 | } | 572 | |
573 | static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) | ||
574 | { | ||
575 | __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, | ||
576 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | ||
575 | } | 577 | } |
576 | 578 | ||
577 | static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) | 579 | static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) |
@@ -595,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) | |||
595 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); | 597 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); |
596 | } | 598 | } |
597 | 599 | ||
598 | static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) | ||
599 | { | ||
600 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, | ||
601 | IO_APIC_REDIR_MASKED, NULL); | ||
602 | } | ||
603 | |||
604 | static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) | ||
605 | { | ||
606 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, | ||
607 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | ||
608 | } | ||
609 | |||
610 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) | 600 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) |
611 | { | 601 | { |
612 | struct irq_cfg *cfg = desc->chip_data; | 602 | struct irq_cfg *cfg = desc->chip_data; |
@@ -1177,7 +1167,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
1177 | int cpu, err; | 1167 | int cpu, err; |
1178 | cpumask_var_t tmp_mask; | 1168 | cpumask_var_t tmp_mask; |
1179 | 1169 | ||
1180 | if ((cfg->move_in_progress) || cfg->move_cleanup_count) | 1170 | if (cfg->move_in_progress) |
1181 | return -EBUSY; | 1171 | return -EBUSY; |
1182 | 1172 | ||
1183 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) | 1173 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) |
@@ -1237,8 +1227,7 @@ next: | |||
1237 | return err; | 1227 | return err; |
1238 | } | 1228 | } |
1239 | 1229 | ||
1240 | static int | 1230 | int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) |
1241 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | ||
1242 | { | 1231 | { |
1243 | int err; | 1232 | int err; |
1244 | unsigned long flags; | 1233 | unsigned long flags; |
@@ -1599,9 +1588,6 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1599 | struct irq_desc *desc; | 1588 | struct irq_desc *desc; |
1600 | unsigned int irq; | 1589 | unsigned int irq; |
1601 | 1590 | ||
1602 | if (apic_verbosity == APIC_QUIET) | ||
1603 | return; | ||
1604 | |||
1605 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 1591 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
1606 | for (i = 0; i < nr_ioapics; i++) | 1592 | for (i = 0; i < nr_ioapics; i++) |
1607 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | 1593 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", |
@@ -1708,9 +1694,6 @@ __apicdebuginit(void) print_APIC_field(int base) | |||
1708 | { | 1694 | { |
1709 | int i; | 1695 | int i; |
1710 | 1696 | ||
1711 | if (apic_verbosity == APIC_QUIET) | ||
1712 | return; | ||
1713 | |||
1714 | printk(KERN_DEBUG); | 1697 | printk(KERN_DEBUG); |
1715 | 1698 | ||
1716 | for (i = 0; i < 8; i++) | 1699 | for (i = 0; i < 8; i++) |
@@ -1724,9 +1707,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy) | |||
1724 | unsigned int i, v, ver, maxlvt; | 1707 | unsigned int i, v, ver, maxlvt; |
1725 | u64 icr; | 1708 | u64 icr; |
1726 | 1709 | ||
1727 | if (apic_verbosity == APIC_QUIET) | ||
1728 | return; | ||
1729 | |||
1730 | printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | 1710 | printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", |
1731 | smp_processor_id(), hard_smp_processor_id()); | 1711 | smp_processor_id(), hard_smp_processor_id()); |
1732 | v = apic_read(APIC_ID); | 1712 | v = apic_read(APIC_ID); |
@@ -1824,13 +1804,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy) | |||
1824 | printk("\n"); | 1804 | printk("\n"); |
1825 | } | 1805 | } |
1826 | 1806 | ||
1827 | __apicdebuginit(void) print_all_local_APICs(void) | 1807 | __apicdebuginit(void) print_local_APICs(int maxcpu) |
1828 | { | 1808 | { |
1829 | int cpu; | 1809 | int cpu; |
1830 | 1810 | ||
1811 | if (!maxcpu) | ||
1812 | return; | ||
1813 | |||
1831 | preempt_disable(); | 1814 | preempt_disable(); |
1832 | for_each_online_cpu(cpu) | 1815 | for_each_online_cpu(cpu) { |
1816 | if (cpu >= maxcpu) | ||
1817 | break; | ||
1833 | smp_call_function_single(cpu, print_local_APIC, NULL, 1); | 1818 | smp_call_function_single(cpu, print_local_APIC, NULL, 1); |
1819 | } | ||
1834 | preempt_enable(); | 1820 | preempt_enable(); |
1835 | } | 1821 | } |
1836 | 1822 | ||
@@ -1839,7 +1825,7 @@ __apicdebuginit(void) print_PIC(void) | |||
1839 | unsigned int v; | 1825 | unsigned int v; |
1840 | unsigned long flags; | 1826 | unsigned long flags; |
1841 | 1827 | ||
1842 | if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) | 1828 | if (!nr_legacy_irqs) |
1843 | return; | 1829 | return; |
1844 | 1830 | ||
1845 | printk(KERN_DEBUG "\nprinting PIC contents\n"); | 1831 | printk(KERN_DEBUG "\nprinting PIC contents\n"); |
@@ -1866,21 +1852,41 @@ __apicdebuginit(void) print_PIC(void) | |||
1866 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); | 1852 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); |
1867 | } | 1853 | } |
1868 | 1854 | ||
1869 | __apicdebuginit(int) print_all_ICs(void) | 1855 | static int __initdata show_lapic = 1; |
1856 | static __init int setup_show_lapic(char *arg) | ||
1870 | { | 1857 | { |
1858 | int num = -1; | ||
1859 | |||
1860 | if (strcmp(arg, "all") == 0) { | ||
1861 | show_lapic = CONFIG_NR_CPUS; | ||
1862 | } else { | ||
1863 | get_option(&arg, &num); | ||
1864 | if (num >= 0) | ||
1865 | show_lapic = num; | ||
1866 | } | ||
1867 | |||
1868 | return 1; | ||
1869 | } | ||
1870 | __setup("show_lapic=", setup_show_lapic); | ||
1871 | |||
1872 | __apicdebuginit(int) print_ICs(void) | ||
1873 | { | ||
1874 | if (apic_verbosity == APIC_QUIET) | ||
1875 | return 0; | ||
1876 | |||
1871 | print_PIC(); | 1877 | print_PIC(); |
1872 | 1878 | ||
1873 | /* don't print out if apic is not there */ | 1879 | /* don't print out if apic is not there */ |
1874 | if (!cpu_has_apic && !apic_from_smp_config()) | 1880 | if (!cpu_has_apic && !apic_from_smp_config()) |
1875 | return 0; | 1881 | return 0; |
1876 | 1882 | ||
1877 | print_all_local_APICs(); | 1883 | print_local_APICs(show_lapic); |
1878 | print_IO_APIC(); | 1884 | print_IO_APIC(); |
1879 | 1885 | ||
1880 | return 0; | 1886 | return 0; |
1881 | } | 1887 | } |
1882 | 1888 | ||
1883 | fs_initcall(print_all_ICs); | 1889 | fs_initcall(print_ICs); |
1884 | 1890 | ||
1885 | 1891 | ||
1886 | /* Where if anywhere is the i8259 connect in external int mode */ | 1892 | /* Where if anywhere is the i8259 connect in external int mode */ |
@@ -2031,7 +2037,7 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2031 | * This is broken; anything with a real cpu count has to | 2037 | * This is broken; anything with a real cpu count has to |
2032 | * circumvent this idiocy regardless. | 2038 | * circumvent this idiocy regardless. |
2033 | */ | 2039 | */ |
2034 | phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); | 2040 | apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); |
2035 | 2041 | ||
2036 | /* | 2042 | /* |
2037 | * Set the IOAPIC ID to the value stored in the MPC table. | 2043 | * Set the IOAPIC ID to the value stored in the MPC table. |
@@ -2058,7 +2064,7 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2058 | * system must have a unique ID or we get lots of nice | 2064 | * system must have a unique ID or we get lots of nice |
2059 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 2065 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
2060 | */ | 2066 | */ |
2061 | if (apic->check_apicid_used(phys_id_present_map, | 2067 | if (apic->check_apicid_used(&phys_id_present_map, |
2062 | mp_ioapics[apic_id].apicid)) { | 2068 | mp_ioapics[apic_id].apicid)) { |
2063 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | 2069 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", |
2064 | apic_id, mp_ioapics[apic_id].apicid); | 2070 | apic_id, mp_ioapics[apic_id].apicid); |
@@ -2073,7 +2079,7 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2073 | mp_ioapics[apic_id].apicid = i; | 2079 | mp_ioapics[apic_id].apicid = i; |
2074 | } else { | 2080 | } else { |
2075 | physid_mask_t tmp; | 2081 | physid_mask_t tmp; |
2076 | tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); | 2082 | apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); |
2077 | apic_printk(APIC_VERBOSE, "Setting %d in the " | 2083 | apic_printk(APIC_VERBOSE, "Setting %d in the " |
2078 | "phys_id_present_map\n", | 2084 | "phys_id_present_map\n", |
2079 | mp_ioapics[apic_id].apicid); | 2085 | mp_ioapics[apic_id].apicid); |
@@ -2228,20 +2234,16 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2228 | */ | 2234 | */ |
2229 | 2235 | ||
2230 | #ifdef CONFIG_SMP | 2236 | #ifdef CONFIG_SMP |
2231 | static void send_cleanup_vector(struct irq_cfg *cfg) | 2237 | void send_cleanup_vector(struct irq_cfg *cfg) |
2232 | { | 2238 | { |
2233 | cpumask_var_t cleanup_mask; | 2239 | cpumask_var_t cleanup_mask; |
2234 | 2240 | ||
2235 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { | 2241 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { |
2236 | unsigned int i; | 2242 | unsigned int i; |
2237 | cfg->move_cleanup_count = 0; | ||
2238 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
2239 | cfg->move_cleanup_count++; | ||
2240 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | 2243 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) |
2241 | apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); | 2244 | apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); |
2242 | } else { | 2245 | } else { |
2243 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); | 2246 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); |
2244 | cfg->move_cleanup_count = cpumask_weight(cleanup_mask); | ||
2245 | apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | 2247 | apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); |
2246 | free_cpumask_var(cleanup_mask); | 2248 | free_cpumask_var(cleanup_mask); |
2247 | } | 2249 | } |
@@ -2272,15 +2274,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq | |||
2272 | } | 2274 | } |
2273 | } | 2275 | } |
2274 | 2276 | ||
2275 | static int | ||
2276 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); | ||
2277 | |||
2278 | /* | 2277 | /* |
2279 | * Either sets desc->affinity to a valid value, and returns | 2278 | * Either sets desc->affinity to a valid value, and returns |
2280 | * ->cpu_mask_to_apicid of that, or returns BAD_APICID and | 2279 | * ->cpu_mask_to_apicid of that, or returns BAD_APICID and |
2281 | * leaves desc->affinity untouched. | 2280 | * leaves desc->affinity untouched. |
2282 | */ | 2281 | */ |
2283 | static unsigned int | 2282 | unsigned int |
2284 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) | 2283 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) |
2285 | { | 2284 | { |
2286 | struct irq_cfg *cfg; | 2285 | struct irq_cfg *cfg; |
@@ -2433,8 +2432,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2433 | 2432 | ||
2434 | cfg = irq_cfg(irq); | 2433 | cfg = irq_cfg(irq); |
2435 | spin_lock(&desc->lock); | 2434 | spin_lock(&desc->lock); |
2436 | if (!cfg->move_cleanup_count) | ||
2437 | goto unlock; | ||
2438 | 2435 | ||
2439 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) | 2436 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
2440 | goto unlock; | 2437 | goto unlock; |
@@ -2452,7 +2449,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2452 | goto unlock; | 2449 | goto unlock; |
2453 | } | 2450 | } |
2454 | __get_cpu_var(vector_irq)[vector] = -1; | 2451 | __get_cpu_var(vector_irq)[vector] = -1; |
2455 | cfg->move_cleanup_count--; | ||
2456 | unlock: | 2452 | unlock: |
2457 | spin_unlock(&desc->lock); | 2453 | spin_unlock(&desc->lock); |
2458 | } | 2454 | } |
@@ -2460,21 +2456,33 @@ unlock: | |||
2460 | irq_exit(); | 2456 | irq_exit(); |
2461 | } | 2457 | } |
2462 | 2458 | ||
2463 | static void irq_complete_move(struct irq_desc **descp) | 2459 | static void __irq_complete_move(struct irq_desc **descp, unsigned vector) |
2464 | { | 2460 | { |
2465 | struct irq_desc *desc = *descp; | 2461 | struct irq_desc *desc = *descp; |
2466 | struct irq_cfg *cfg = desc->chip_data; | 2462 | struct irq_cfg *cfg = desc->chip_data; |
2467 | unsigned vector, me; | 2463 | unsigned me; |
2468 | 2464 | ||
2469 | if (likely(!cfg->move_in_progress)) | 2465 | if (likely(!cfg->move_in_progress)) |
2470 | return; | 2466 | return; |
2471 | 2467 | ||
2472 | vector = ~get_irq_regs()->orig_ax; | ||
2473 | me = smp_processor_id(); | 2468 | me = smp_processor_id(); |
2474 | 2469 | ||
2475 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) | 2470 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
2476 | send_cleanup_vector(cfg); | 2471 | send_cleanup_vector(cfg); |
2477 | } | 2472 | } |
2473 | |||
2474 | static void irq_complete_move(struct irq_desc **descp) | ||
2475 | { | ||
2476 | __irq_complete_move(descp, ~get_irq_regs()->orig_ax); | ||
2477 | } | ||
2478 | |||
2479 | void irq_force_complete_move(int irq) | ||
2480 | { | ||
2481 | struct irq_desc *desc = irq_to_desc(irq); | ||
2482 | struct irq_cfg *cfg = desc->chip_data; | ||
2483 | |||
2484 | __irq_complete_move(&desc, cfg->vector); | ||
2485 | } | ||
2478 | #else | 2486 | #else |
2479 | static inline void irq_complete_move(struct irq_desc **descp) {} | 2487 | static inline void irq_complete_move(struct irq_desc **descp) {} |
2480 | #endif | 2488 | #endif |
@@ -2490,6 +2498,59 @@ static void ack_apic_edge(unsigned int irq) | |||
2490 | 2498 | ||
2491 | atomic_t irq_mis_count; | 2499 | atomic_t irq_mis_count; |
2492 | 2500 | ||
2501 | /* | ||
2502 | * IO-APIC versions below 0x20 don't support EOI register. | ||
2503 | * For the record, here is the information about various versions: | ||
2504 | * 0Xh 82489DX | ||
2505 | * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant | ||
2506 | * 2Xh I/O(x)APIC which is PCI 2.2 Compliant | ||
2507 | * 30h-FFh Reserved | ||
2508 | * | ||
2509 | * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic | ||
2510 | * version as 0x2. This is an error with documentation and these ICH chips | ||
2511 | * use io-apic's of version 0x20. | ||
2512 | * | ||
2513 | * For IO-APIC's with EOI register, we use that to do an explicit EOI. | ||
2514 | * Otherwise, we simulate the EOI message manually by changing the trigger | ||
2515 | * mode to edge and then back to level, with RTE being masked during this. | ||
2516 | */ | ||
2517 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | ||
2518 | { | ||
2519 | struct irq_pin_list *entry; | ||
2520 | |||
2521 | for_each_irq_pin(entry, cfg->irq_2_pin) { | ||
2522 | if (mp_ioapics[entry->apic].apicver >= 0x20) { | ||
2523 | /* | ||
2524 | * Intr-remapping uses pin number as the virtual vector | ||
2525 | * in the RTE. Actual vector is programmed in | ||
2526 | * intr-remapping table entry. Hence for the io-apic | ||
2527 | * EOI we use the pin number. | ||
2528 | */ | ||
2529 | if (irq_remapped(irq)) | ||
2530 | io_apic_eoi(entry->apic, entry->pin); | ||
2531 | else | ||
2532 | io_apic_eoi(entry->apic, cfg->vector); | ||
2533 | } else { | ||
2534 | __mask_and_edge_IO_APIC_irq(entry); | ||
2535 | __unmask_and_level_IO_APIC_irq(entry); | ||
2536 | } | ||
2537 | } | ||
2538 | } | ||
2539 | |||
2540 | static void eoi_ioapic_irq(struct irq_desc *desc) | ||
2541 | { | ||
2542 | struct irq_cfg *cfg; | ||
2543 | unsigned long flags; | ||
2544 | unsigned int irq; | ||
2545 | |||
2546 | irq = desc->irq; | ||
2547 | cfg = desc->chip_data; | ||
2548 | |||
2549 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2550 | __eoi_ioapic_irq(irq, cfg); | ||
2551 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2552 | } | ||
2553 | |||
2493 | static void ack_apic_level(unsigned int irq) | 2554 | static void ack_apic_level(unsigned int irq) |
2494 | { | 2555 | { |
2495 | struct irq_desc *desc = irq_to_desc(irq); | 2556 | struct irq_desc *desc = irq_to_desc(irq); |
@@ -2525,6 +2586,19 @@ static void ack_apic_level(unsigned int irq) | |||
2525 | * level-triggered interrupt. We mask the source for the time of the | 2586 | * level-triggered interrupt. We mask the source for the time of the |
2526 | * operation to prevent an edge-triggered interrupt escaping meanwhile. | 2587 | * operation to prevent an edge-triggered interrupt escaping meanwhile. |
2527 | * The idea is from Manfred Spraul. --macro | 2588 | * The idea is from Manfred Spraul. --macro |
2589 | * | ||
2590 | * Also in the case when cpu goes offline, fixup_irqs() will forward | ||
2591 | * any unhandled interrupt on the offlined cpu to the new cpu | ||
2592 | * destination that is handling the corresponding interrupt. This | ||
2593 | * interrupt forwarding is done via IPI's. Hence, in this case also | ||
2594 | * level-triggered io-apic interrupt will be seen as an edge | ||
2595 | * interrupt in the IRR. And we can't rely on the cpu's EOI | ||
2596 | * to be broadcasted to the IO-APIC's which will clear the remoteIRR | ||
2597 | * corresponding to the level-triggered interrupt. Hence on IO-APIC's | ||
2598 | * supporting EOI register, we do an explicit EOI to clear the | ||
2599 | * remote IRR and on IO-APIC's which don't have an EOI register, | ||
2600 | * we use the above logic (mask+edge followed by unmask+level) from | ||
2601 | * Manfred Spraul to clear the remote IRR. | ||
2528 | */ | 2602 | */ |
2529 | cfg = desc->chip_data; | 2603 | cfg = desc->chip_data; |
2530 | i = cfg->vector; | 2604 | i = cfg->vector; |
@@ -2536,6 +2610,19 @@ static void ack_apic_level(unsigned int irq) | |||
2536 | */ | 2610 | */ |
2537 | ack_APIC_irq(); | 2611 | ack_APIC_irq(); |
2538 | 2612 | ||
2613 | /* | ||
2614 | * Tail end of clearing remote IRR bit (either by delivering the EOI | ||
2615 | * message via io-apic EOI register write or simulating it using | ||
2616 | * mask+edge followed by unnask+level logic) manually when the | ||
2617 | * level triggered interrupt is seen as the edge triggered interrupt | ||
2618 | * at the cpu. | ||
2619 | */ | ||
2620 | if (!(v & (1 << (i & 0x1f)))) { | ||
2621 | atomic_inc(&irq_mis_count); | ||
2622 | |||
2623 | eoi_ioapic_irq(desc); | ||
2624 | } | ||
2625 | |||
2539 | /* Now we can move and renable the irq */ | 2626 | /* Now we can move and renable the irq */ |
2540 | if (unlikely(do_unmask_irq)) { | 2627 | if (unlikely(do_unmask_irq)) { |
2541 | /* Only migrate the irq if the ack has been received. | 2628 | /* Only migrate the irq if the ack has been received. |
@@ -2569,41 +2656,9 @@ static void ack_apic_level(unsigned int irq) | |||
2569 | move_masked_irq(irq); | 2656 | move_masked_irq(irq); |
2570 | unmask_IO_APIC_irq_desc(desc); | 2657 | unmask_IO_APIC_irq_desc(desc); |
2571 | } | 2658 | } |
2572 | |||
2573 | /* Tail end of version 0x11 I/O APIC bug workaround */ | ||
2574 | if (!(v & (1 << (i & 0x1f)))) { | ||
2575 | atomic_inc(&irq_mis_count); | ||
2576 | spin_lock(&ioapic_lock); | ||
2577 | __mask_and_edge_IO_APIC_irq(cfg); | ||
2578 | __unmask_and_level_IO_APIC_irq(cfg); | ||
2579 | spin_unlock(&ioapic_lock); | ||
2580 | } | ||
2581 | } | 2659 | } |
2582 | 2660 | ||
2583 | #ifdef CONFIG_INTR_REMAP | 2661 | #ifdef CONFIG_INTR_REMAP |
2584 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | ||
2585 | { | ||
2586 | struct irq_pin_list *entry; | ||
2587 | |||
2588 | for_each_irq_pin(entry, cfg->irq_2_pin) | ||
2589 | io_apic_eoi(entry->apic, entry->pin); | ||
2590 | } | ||
2591 | |||
2592 | static void | ||
2593 | eoi_ioapic_irq(struct irq_desc *desc) | ||
2594 | { | ||
2595 | struct irq_cfg *cfg; | ||
2596 | unsigned long flags; | ||
2597 | unsigned int irq; | ||
2598 | |||
2599 | irq = desc->irq; | ||
2600 | cfg = desc->chip_data; | ||
2601 | |||
2602 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2603 | __eoi_ioapic_irq(irq, cfg); | ||
2604 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2605 | } | ||
2606 | |||
2607 | static void ir_ack_apic_edge(unsigned int irq) | 2662 | static void ir_ack_apic_edge(unsigned int irq) |
2608 | { | 2663 | { |
2609 | ack_APIC_irq(); | 2664 | ack_APIC_irq(); |
@@ -3157,6 +3212,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) | |||
3157 | continue; | 3212 | continue; |
3158 | 3213 | ||
3159 | desc_new = move_irq_desc(desc_new, node); | 3214 | desc_new = move_irq_desc(desc_new, node); |
3215 | cfg_new = desc_new->chip_data; | ||
3160 | 3216 | ||
3161 | if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) | 3217 | if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) |
3162 | irq = new; | 3218 | irq = new; |
@@ -3708,75 +3764,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3708 | } | 3764 | } |
3709 | #endif /* CONFIG_HT_IRQ */ | 3765 | #endif /* CONFIG_HT_IRQ */ |
3710 | 3766 | ||
3711 | #ifdef CONFIG_X86_UV | ||
3712 | /* | ||
3713 | * Re-target the irq to the specified CPU and enable the specified MMR located | ||
3714 | * on the specified blade to allow the sending of MSIs to the specified CPU. | ||
3715 | */ | ||
3716 | int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | ||
3717 | unsigned long mmr_offset) | ||
3718 | { | ||
3719 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | ||
3720 | struct irq_cfg *cfg; | ||
3721 | int mmr_pnode; | ||
3722 | unsigned long mmr_value; | ||
3723 | struct uv_IO_APIC_route_entry *entry; | ||
3724 | unsigned long flags; | ||
3725 | int err; | ||
3726 | |||
3727 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | ||
3728 | |||
3729 | cfg = irq_cfg(irq); | ||
3730 | |||
3731 | err = assign_irq_vector(irq, cfg, eligible_cpu); | ||
3732 | if (err != 0) | ||
3733 | return err; | ||
3734 | |||
3735 | spin_lock_irqsave(&vector_lock, flags); | ||
3736 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, | ||
3737 | irq_name); | ||
3738 | spin_unlock_irqrestore(&vector_lock, flags); | ||
3739 | |||
3740 | mmr_value = 0; | ||
3741 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
3742 | entry->vector = cfg->vector; | ||
3743 | entry->delivery_mode = apic->irq_delivery_mode; | ||
3744 | entry->dest_mode = apic->irq_dest_mode; | ||
3745 | entry->polarity = 0; | ||
3746 | entry->trigger = 0; | ||
3747 | entry->mask = 0; | ||
3748 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); | ||
3749 | |||
3750 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
3751 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
3752 | |||
3753 | if (cfg->move_in_progress) | ||
3754 | send_cleanup_vector(cfg); | ||
3755 | |||
3756 | return irq; | ||
3757 | } | ||
3758 | |||
3759 | /* | ||
3760 | * Disable the specified MMR located on the specified blade so that MSIs are | ||
3761 | * longer allowed to be sent. | ||
3762 | */ | ||
3763 | void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) | ||
3764 | { | ||
3765 | unsigned long mmr_value; | ||
3766 | struct uv_IO_APIC_route_entry *entry; | ||
3767 | int mmr_pnode; | ||
3768 | |||
3769 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | ||
3770 | |||
3771 | mmr_value = 0; | ||
3772 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
3773 | entry->mask = 1; | ||
3774 | |||
3775 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
3776 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
3777 | } | ||
3778 | #endif /* CONFIG_X86_64 */ | ||
3779 | |||
3780 | int __init io_apic_get_redir_entries (int ioapic) | 3767 | int __init io_apic_get_redir_entries (int ioapic) |
3781 | { | 3768 | { |
3782 | union IO_APIC_reg_01 reg_01; | 3769 | union IO_APIC_reg_01 reg_01; |
@@ -3944,7 +3931,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3944 | */ | 3931 | */ |
3945 | 3932 | ||
3946 | if (physids_empty(apic_id_map)) | 3933 | if (physids_empty(apic_id_map)) |
3947 | apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); | 3934 | apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); |
3948 | 3935 | ||
3949 | spin_lock_irqsave(&ioapic_lock, flags); | 3936 | spin_lock_irqsave(&ioapic_lock, flags); |
3950 | reg_00.raw = io_apic_read(ioapic, 0); | 3937 | reg_00.raw = io_apic_read(ioapic, 0); |
@@ -3960,10 +3947,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3960 | * Every APIC in a system must have a unique ID or we get lots of nice | 3947 | * Every APIC in a system must have a unique ID or we get lots of nice |
3961 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 3948 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
3962 | */ | 3949 | */ |
3963 | if (apic->check_apicid_used(apic_id_map, apic_id)) { | 3950 | if (apic->check_apicid_used(&apic_id_map, apic_id)) { |
3964 | 3951 | ||
3965 | for (i = 0; i < get_physical_broadcast(); i++) { | 3952 | for (i = 0; i < get_physical_broadcast(); i++) { |
3966 | if (!apic->check_apicid_used(apic_id_map, i)) | 3953 | if (!apic->check_apicid_used(&apic_id_map, i)) |
3967 | break; | 3954 | break; |
3968 | } | 3955 | } |
3969 | 3956 | ||
@@ -3976,7 +3963,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3976 | apic_id = i; | 3963 | apic_id = i; |
3977 | } | 3964 | } |
3978 | 3965 | ||
3979 | tmp = apic->apicid_to_cpu_present(apic_id); | 3966 | apic->apicid_to_cpu_present(apic_id, &tmp); |
3980 | physids_or(apic_id_map, apic_id_map, tmp); | 3967 | physids_or(apic_id_map, apic_id_map, tmp); |
3981 | 3968 | ||
3982 | if (reg_00.bits.ID != apic_id) { | 3969 | if (reg_00.bits.ID != apic_id) { |
@@ -4106,7 +4093,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) | |||
4106 | for (i = 0; i < nr_ioapics; i++) { | 4093 | for (i = 0; i < nr_ioapics; i++) { |
4107 | res[i].name = mem; | 4094 | res[i].name = mem; |
4108 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; | 4095 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
4109 | sprintf(mem, "IOAPIC %u", i); | 4096 | snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); |
4110 | mem += IOAPIC_RESOURCE_NAME_SIZE; | 4097 | mem += IOAPIC_RESOURCE_NAME_SIZE; |
4111 | } | 4098 | } |
4112 | 4099 | ||
@@ -4140,18 +4127,17 @@ void __init ioapic_init_mappings(void) | |||
4140 | #ifdef CONFIG_X86_32 | 4127 | #ifdef CONFIG_X86_32 |
4141 | fake_ioapic_page: | 4128 | fake_ioapic_page: |
4142 | #endif | 4129 | #endif |
4143 | ioapic_phys = (unsigned long) | 4130 | ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); |
4144 | alloc_bootmem_pages(PAGE_SIZE); | ||
4145 | ioapic_phys = __pa(ioapic_phys); | 4131 | ioapic_phys = __pa(ioapic_phys); |
4146 | } | 4132 | } |
4147 | set_fixmap_nocache(idx, ioapic_phys); | 4133 | set_fixmap_nocache(idx, ioapic_phys); |
4148 | apic_printk(APIC_VERBOSE, | 4134 | apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", |
4149 | "mapped IOAPIC to %08lx (%08lx)\n", | 4135 | __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), |
4150 | __fix_to_virt(idx), ioapic_phys); | 4136 | ioapic_phys); |
4151 | idx++; | 4137 | idx++; |
4152 | 4138 | ||
4153 | ioapic_res->start = ioapic_phys; | 4139 | ioapic_res->start = ioapic_phys; |
4154 | ioapic_res->end = ioapic_phys + (4 * 1024) - 1; | 4140 | ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; |
4155 | ioapic_res++; | 4141 | ioapic_res++; |
4156 | } | 4142 | } |
4157 | } | 4143 | } |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 7ff61d6a188a..6389432a9dbf 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -39,7 +39,8 @@ | |||
39 | int unknown_nmi_panic; | 39 | int unknown_nmi_panic; |
40 | int nmi_watchdog_enabled; | 40 | int nmi_watchdog_enabled; |
41 | 41 | ||
42 | static cpumask_t backtrace_mask __read_mostly; | 42 | /* For reliability, we're prepared to waste bits here. */ |
43 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | ||
43 | 44 | ||
44 | /* nmi_active: | 45 | /* nmi_active: |
45 | * >0: the lapic NMI watchdog is active, but can be disabled | 46 | * >0: the lapic NMI watchdog is active, but can be disabled |
@@ -414,7 +415,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
414 | } | 415 | } |
415 | 416 | ||
416 | /* We can be called before check_nmi_watchdog, hence NULL check. */ | 417 | /* We can be called before check_nmi_watchdog, hence NULL check. */ |
417 | if (cpumask_test_cpu(cpu, &backtrace_mask)) { | 418 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { |
418 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | 419 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ |
419 | 420 | ||
420 | spin_lock(&lock); | 421 | spin_lock(&lock); |
@@ -422,7 +423,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
422 | show_regs(regs); | 423 | show_regs(regs); |
423 | dump_stack(); | 424 | dump_stack(); |
424 | spin_unlock(&lock); | 425 | spin_unlock(&lock); |
425 | cpumask_clear_cpu(cpu, &backtrace_mask); | 426 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); |
426 | 427 | ||
427 | rc = 1; | 428 | rc = 1; |
428 | } | 429 | } |
@@ -558,14 +559,14 @@ void arch_trigger_all_cpu_backtrace(void) | |||
558 | { | 559 | { |
559 | int i; | 560 | int i; |
560 | 561 | ||
561 | cpumask_copy(&backtrace_mask, cpu_online_mask); | 562 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); |
562 | 563 | ||
563 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | 564 | printk(KERN_INFO "sending NMI to all CPUs:\n"); |
564 | apic->send_IPI_all(NMI_VECTOR); | 565 | apic->send_IPI_all(NMI_VECTOR); |
565 | 566 | ||
566 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | 567 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ |
567 | for (i = 0; i < 10 * 1000; i++) { | 568 | for (i = 0; i < 10 * 1000; i++) { |
568 | if (cpumask_empty(&backtrace_mask)) | 569 | if (cpumask_empty(to_cpumask(backtrace_mask))) |
569 | break; | 570 | break; |
570 | mdelay(1); | 571 | mdelay(1); |
571 | } | 572 | } |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index efa00e2b8505..07cdbdcd7a92 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -334,10 +334,9 @@ static inline const struct cpumask *numaq_target_cpus(void) | |||
334 | return cpu_all_mask; | 334 | return cpu_all_mask; |
335 | } | 335 | } |
336 | 336 | ||
337 | static inline unsigned long | 337 | static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) |
338 | numaq_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
339 | { | 338 | { |
340 | return physid_isset(apicid, bitmap); | 339 | return physid_isset(apicid, *map); |
341 | } | 340 | } |
342 | 341 | ||
343 | static inline unsigned long numaq_check_apicid_present(int bit) | 342 | static inline unsigned long numaq_check_apicid_present(int bit) |
@@ -371,10 +370,10 @@ static inline int numaq_multi_timer_check(int apic, int irq) | |||
371 | return apic != 0 && irq == 0; | 370 | return apic != 0 && irq == 0; |
372 | } | 371 | } |
373 | 372 | ||
374 | static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) | 373 | static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
375 | { | 374 | { |
376 | /* We don't have a good way to do this yet - hack */ | 375 | /* We don't have a good way to do this yet - hack */ |
377 | return physids_promote(0xFUL); | 376 | return physids_promote(0xFUL, retmap); |
378 | } | 377 | } |
379 | 378 | ||
380 | static inline int numaq_cpu_to_logical_apicid(int cpu) | 379 | static inline int numaq_cpu_to_logical_apicid(int cpu) |
@@ -402,12 +401,12 @@ static inline int numaq_apicid_to_node(int logical_apicid) | |||
402 | return logical_apicid >> 4; | 401 | return logical_apicid >> 4; |
403 | } | 402 | } |
404 | 403 | ||
405 | static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) | 404 | static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) |
406 | { | 405 | { |
407 | int node = numaq_apicid_to_node(logical_apicid); | 406 | int node = numaq_apicid_to_node(logical_apicid); |
408 | int cpu = __ffs(logical_apicid & 0xf); | 407 | int cpu = __ffs(logical_apicid & 0xf); |
409 | 408 | ||
410 | return physid_mask_of_physid(cpu + 4*node); | 409 | physid_set_mask_of_physid(cpu + 4*node, retmap); |
411 | } | 410 | } |
412 | 411 | ||
413 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ | 412 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 0c0182cc947d..1a6559f6768c 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -108,7 +108,7 @@ struct apic apic_default = { | |||
108 | .apicid_to_node = default_apicid_to_node, | 108 | .apicid_to_node = default_apicid_to_node, |
109 | .cpu_to_logical_apicid = default_cpu_to_logical_apicid, | 109 | .cpu_to_logical_apicid = default_cpu_to_logical_apicid, |
110 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | 110 | .cpu_present_to_apicid = default_cpu_present_to_apicid, |
111 | .apicid_to_cpu_present = default_apicid_to_cpu_present, | 111 | .apicid_to_cpu_present = physid_set_mask_of_physid, |
112 | .setup_portio_remap = NULL, | 112 | .setup_portio_remap = NULL, |
113 | .check_phys_apicid_present = default_check_phys_apicid_present, | 113 | .check_phys_apicid_present = default_check_phys_apicid_present, |
114 | .enable_apic_mode = NULL, | 114 | .enable_apic_mode = NULL, |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 645ecc4ff0be..9b419263d90d 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void) | |||
183 | return cpumask_of(0); | 183 | return cpumask_of(0); |
184 | } | 184 | } |
185 | 185 | ||
186 | static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) | 186 | static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid) |
187 | { | 187 | { |
188 | return 0; | 188 | return 0; |
189 | } | 189 | } |
@@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu) | |||
261 | return BAD_APICID; | 261 | return BAD_APICID; |
262 | } | 262 | } |
263 | 263 | ||
264 | static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map) | 264 | static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap) |
265 | { | 265 | { |
266 | /* For clustered we don't have a good way to do this yet - hack */ | 266 | /* For clustered we don't have a good way to do this yet - hack */ |
267 | return physids_promote(0x0F); | 267 | physids_promote(0x0FL, retmap); |
268 | } | 268 | } |
269 | 269 | ||
270 | static physid_mask_t summit_apicid_to_cpu_present(int apicid) | 270 | static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap) |
271 | { | 271 | { |
272 | return physid_mask_of_physid(0); | 272 | physid_set_mask_of_physid(0, retmap); |
273 | } | 273 | } |
274 | 274 | ||
275 | static int summit_check_phys_apicid_present(int physical_apicid) | 275 | static int summit_check_phys_apicid_present(int physical_apicid) |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 326c25477d3d..130c4b934877 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -409,6 +409,12 @@ static __init void map_mmioh_high(int max_pnode) | |||
409 | map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); | 409 | map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); |
410 | } | 410 | } |
411 | 411 | ||
412 | static __init void map_low_mmrs(void) | ||
413 | { | ||
414 | init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); | ||
415 | init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); | ||
416 | } | ||
417 | |||
412 | static __init void uv_rtc_init(void) | 418 | static __init void uv_rtc_init(void) |
413 | { | 419 | { |
414 | long status; | 420 | long status; |
@@ -550,6 +556,8 @@ void __init uv_system_init(void) | |||
550 | unsigned long mmr_base, present, paddr; | 556 | unsigned long mmr_base, present, paddr; |
551 | unsigned short pnode_mask; | 557 | unsigned short pnode_mask; |
552 | 558 | ||
559 | map_low_mmrs(); | ||
560 | |||
553 | m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); | 561 | m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); |
554 | m_val = m_n_config.s.m_skt; | 562 | m_val = m_n_config.s.m_skt; |
555 | n_val = m_n_config.s.n_skt; | 563 | n_val = m_n_config.s.n_skt; |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 151ace69a5aa..b5b6b23bce53 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -204,7 +204,6 @@ | |||
204 | #include <linux/module.h> | 204 | #include <linux/module.h> |
205 | 205 | ||
206 | #include <linux/poll.h> | 206 | #include <linux/poll.h> |
207 | #include <linux/smp_lock.h> | ||
208 | #include <linux/types.h> | 207 | #include <linux/types.h> |
209 | #include <linux/stddef.h> | 208 | #include <linux/stddef.h> |
210 | #include <linux/timer.h> | 209 | #include <linux/timer.h> |
@@ -403,6 +402,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); | |||
403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); | 402 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); |
404 | static struct apm_user *user_list; | 403 | static struct apm_user *user_list; |
405 | static DEFINE_SPINLOCK(user_list_lock); | 404 | static DEFINE_SPINLOCK(user_list_lock); |
405 | static DEFINE_MUTEX(apm_mutex); | ||
406 | 406 | ||
407 | /* | 407 | /* |
408 | * Set up a segment that references the real mode segment 0x40 | 408 | * Set up a segment that references the real mode segment 0x40 |
@@ -1531,7 +1531,7 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) | |||
1531 | return -EPERM; | 1531 | return -EPERM; |
1532 | switch (cmd) { | 1532 | switch (cmd) { |
1533 | case APM_IOC_STANDBY: | 1533 | case APM_IOC_STANDBY: |
1534 | lock_kernel(); | 1534 | mutex_lock(&apm_mutex); |
1535 | if (as->standbys_read > 0) { | 1535 | if (as->standbys_read > 0) { |
1536 | as->standbys_read--; | 1536 | as->standbys_read--; |
1537 | as->standbys_pending--; | 1537 | as->standbys_pending--; |
@@ -1540,10 +1540,10 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) | |||
1540 | queue_event(APM_USER_STANDBY, as); | 1540 | queue_event(APM_USER_STANDBY, as); |
1541 | if (standbys_pending <= 0) | 1541 | if (standbys_pending <= 0) |
1542 | standby(); | 1542 | standby(); |
1543 | unlock_kernel(); | 1543 | mutex_unlock(&apm_mutex); |
1544 | break; | 1544 | break; |
1545 | case APM_IOC_SUSPEND: | 1545 | case APM_IOC_SUSPEND: |
1546 | lock_kernel(); | 1546 | mutex_lock(&apm_mutex); |
1547 | if (as->suspends_read > 0) { | 1547 | if (as->suspends_read > 0) { |
1548 | as->suspends_read--; | 1548 | as->suspends_read--; |
1549 | as->suspends_pending--; | 1549 | as->suspends_pending--; |
@@ -1552,13 +1552,14 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) | |||
1552 | queue_event(APM_USER_SUSPEND, as); | 1552 | queue_event(APM_USER_SUSPEND, as); |
1553 | if (suspends_pending <= 0) { | 1553 | if (suspends_pending <= 0) { |
1554 | ret = suspend(1); | 1554 | ret = suspend(1); |
1555 | mutex_unlock(&apm_mutex); | ||
1555 | } else { | 1556 | } else { |
1556 | as->suspend_wait = 1; | 1557 | as->suspend_wait = 1; |
1558 | mutex_unlock(&apm_mutex); | ||
1557 | wait_event_interruptible(apm_suspend_waitqueue, | 1559 | wait_event_interruptible(apm_suspend_waitqueue, |
1558 | as->suspend_wait == 0); | 1560 | as->suspend_wait == 0); |
1559 | ret = as->suspend_result; | 1561 | ret = as->suspend_result; |
1560 | } | 1562 | } |
1561 | unlock_kernel(); | ||
1562 | return ret; | 1563 | return ret; |
1563 | default: | 1564 | default: |
1564 | return -ENOTTY; | 1565 | return -ENOTTY; |
@@ -1608,12 +1609,10 @@ static int do_open(struct inode *inode, struct file *filp) | |||
1608 | { | 1609 | { |
1609 | struct apm_user *as; | 1610 | struct apm_user *as; |
1610 | 1611 | ||
1611 | lock_kernel(); | ||
1612 | as = kmalloc(sizeof(*as), GFP_KERNEL); | 1612 | as = kmalloc(sizeof(*as), GFP_KERNEL); |
1613 | if (as == NULL) { | 1613 | if (as == NULL) { |
1614 | printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", | 1614 | printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", |
1615 | sizeof(*as)); | 1615 | sizeof(*as)); |
1616 | unlock_kernel(); | ||
1617 | return -ENOMEM; | 1616 | return -ENOMEM; |
1618 | } | 1617 | } |
1619 | as->magic = APM_BIOS_MAGIC; | 1618 | as->magic = APM_BIOS_MAGIC; |
@@ -1635,7 +1634,6 @@ static int do_open(struct inode *inode, struct file *filp) | |||
1635 | user_list = as; | 1634 | user_list = as; |
1636 | spin_unlock(&user_list_lock); | 1635 | spin_unlock(&user_list_lock); |
1637 | filp->private_data = as; | 1636 | filp->private_data = as; |
1638 | unlock_kernel(); | ||
1639 | return 0; | 1637 | return 0; |
1640 | } | 1638 | } |
1641 | 1639 | ||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 68537e957a9b..1d2cb383410e 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -5,6 +5,7 @@ | |||
5 | # Don't trace early stages of a secondary CPU boot | 5 | # Don't trace early stages of a secondary CPU boot |
6 | ifdef CONFIG_FUNCTION_TRACER | 6 | ifdef CONFIG_FUNCTION_TRACER |
7 | CFLAGS_REMOVE_common.o = -pg | 7 | CFLAGS_REMOVE_common.o = -pg |
8 | CFLAGS_REMOVE_perf_event.o = -pg | ||
8 | endif | 9 | endif |
9 | 10 | ||
10 | # Make sure load_percpu_segment has no stackprotector | 11 | # Make sure load_percpu_segment has no stackprotector |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c910a716a71c..7128b3799cec 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -535,7 +535,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
535 | } | 535 | } |
536 | } | 536 | } |
537 | 537 | ||
538 | display_cacheinfo(c); | 538 | cpu_detect_cache_sizes(c); |
539 | 539 | ||
540 | /* Multi core CPU? */ | 540 | /* Multi core CPU? */ |
541 | if (c->extended_cpuid_level >= 0x80000008) { | 541 | if (c->extended_cpuid_level >= 0x80000008) { |
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index c95e831bb095..e58d978e0758 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c | |||
@@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) | |||
294 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 294 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
295 | } | 295 | } |
296 | 296 | ||
297 | display_cacheinfo(c); | 297 | cpu_detect_cache_sizes(c); |
298 | } | 298 | } |
299 | 299 | ||
300 | enum { | 300 | enum { |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b4a567..a4ec8b647544 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void) | |||
61 | static void __cpuinit default_init(struct cpuinfo_x86 *c) | 61 | static void __cpuinit default_init(struct cpuinfo_x86 *c) |
62 | { | 62 | { |
63 | #ifdef CONFIG_X86_64 | 63 | #ifdef CONFIG_X86_64 |
64 | display_cacheinfo(c); | 64 | cpu_detect_cache_sizes(c); |
65 | #else | 65 | #else |
66 | /* Not much we can do here... */ | 66 | /* Not much we can do here... */ |
67 | /* Check if at least it has cpuid */ | 67 | /* Check if at least it has cpuid */ |
@@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) | |||
383 | } | 383 | } |
384 | } | 384 | } |
385 | 385 | ||
386 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | 386 | void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) |
387 | { | 387 | { |
388 | unsigned int n, dummy, ebx, ecx, edx, l2size; | 388 | unsigned int n, dummy, ebx, ecx, edx, l2size; |
389 | 389 | ||
@@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
391 | 391 | ||
392 | if (n >= 0x80000005) { | 392 | if (n >= 0x80000005) { |
393 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); | 393 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); |
394 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", | ||
395 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | ||
396 | c->x86_cache_size = (ecx>>24) + (edx>>24); | 394 | c->x86_cache_size = (ecx>>24) + (edx>>24); |
397 | #ifdef CONFIG_X86_64 | 395 | #ifdef CONFIG_X86_64 |
398 | /* On K8 L1 TLB is inclusive, so don't count it */ | 396 | /* On K8 L1 TLB is inclusive, so don't count it */ |
@@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
422 | #endif | 420 | #endif |
423 | 421 | ||
424 | c->x86_cache_size = l2size; | 422 | c->x86_cache_size = l2size; |
425 | |||
426 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | ||
427 | l2size, ecx & 0xFF); | ||
428 | } | 423 | } |
429 | 424 | ||
430 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 425 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
@@ -659,24 +654,31 @@ void __init early_cpu_init(void) | |||
659 | const struct cpu_dev *const *cdev; | 654 | const struct cpu_dev *const *cdev; |
660 | int count = 0; | 655 | int count = 0; |
661 | 656 | ||
657 | #ifdef PROCESSOR_SELECT | ||
662 | printk(KERN_INFO "KERNEL supported cpus:\n"); | 658 | printk(KERN_INFO "KERNEL supported cpus:\n"); |
659 | #endif | ||
660 | |||
663 | for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { | 661 | for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { |
664 | const struct cpu_dev *cpudev = *cdev; | 662 | const struct cpu_dev *cpudev = *cdev; |
665 | unsigned int j; | ||
666 | 663 | ||
667 | if (count >= X86_VENDOR_NUM) | 664 | if (count >= X86_VENDOR_NUM) |
668 | break; | 665 | break; |
669 | cpu_devs[count] = cpudev; | 666 | cpu_devs[count] = cpudev; |
670 | count++; | 667 | count++; |
671 | 668 | ||
672 | for (j = 0; j < 2; j++) { | 669 | #ifdef PROCESSOR_SELECT |
673 | if (!cpudev->c_ident[j]) | 670 | { |
674 | continue; | 671 | unsigned int j; |
675 | printk(KERN_INFO " %s %s\n", cpudev->c_vendor, | 672 | |
676 | cpudev->c_ident[j]); | 673 | for (j = 0; j < 2; j++) { |
674 | if (!cpudev->c_ident[j]) | ||
675 | continue; | ||
676 | printk(KERN_INFO " %s %s\n", cpudev->c_vendor, | ||
677 | cpudev->c_ident[j]); | ||
678 | } | ||
677 | } | 679 | } |
680 | #endif | ||
678 | } | 681 | } |
679 | |||
680 | early_identify_cpu(&boot_cpu_data); | 682 | early_identify_cpu(&boot_cpu_data); |
681 | } | 683 | } |
682 | 684 | ||
@@ -837,10 +839,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
837 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | 839 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; |
838 | } | 840 | } |
839 | 841 | ||
840 | #ifdef CONFIG_X86_MCE | ||
841 | /* Init Machine Check Exception if available. */ | 842 | /* Init Machine Check Exception if available. */ |
842 | mcheck_init(c); | 843 | mcheck_cpu_init(c); |
843 | #endif | ||
844 | 844 | ||
845 | select_idle_routine(c); | 845 | select_idle_routine(c); |
846 | 846 | ||
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 6de9a908e400..3624e8a0f71b 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -32,6 +32,6 @@ struct cpu_dev { | |||
32 | extern const struct cpu_dev *const __x86_cpu_dev_start[], | 32 | extern const struct cpu_dev *const __x86_cpu_dev_start[], |
33 | *const __x86_cpu_dev_end[]; | 33 | *const __x86_cpu_dev_end[]; |
34 | 34 | ||
35 | extern void display_cacheinfo(struct cpuinfo_x86 *c); | 35 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); |
36 | 36 | ||
37 | #endif | 37 | #endif |
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 19807b89f058..4fbd384fb645 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
@@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c) | |||
373 | /* Handle the GX (Formally known as the GX2) */ | 373 | /* Handle the GX (Formally known as the GX2) */ |
374 | 374 | ||
375 | if (c->x86 == 5 && c->x86_model == 5) | 375 | if (c->x86 == 5 && c->x86_model == 5) |
376 | display_cacheinfo(c); | 376 | cpu_detect_cache_sizes(c); |
377 | else | 377 | else |
378 | init_cyrix(c); | 378 | init_cyrix(c); |
379 | } | 379 | } |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 804c40e2bc3e..0df4c2b7107f 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -488,22 +488,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
488 | #endif | 488 | #endif |
489 | } | 489 | } |
490 | 490 | ||
491 | if (trace) | ||
492 | printk(KERN_INFO "CPU: Trace cache: %dK uops", trace); | ||
493 | else if (l1i) | ||
494 | printk(KERN_INFO "CPU: L1 I cache: %dK", l1i); | ||
495 | |||
496 | if (l1d) | ||
497 | printk(KERN_CONT ", L1 D cache: %dK\n", l1d); | ||
498 | else | ||
499 | printk(KERN_CONT "\n"); | ||
500 | |||
501 | if (l2) | ||
502 | printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); | ||
503 | |||
504 | if (l3) | ||
505 | printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); | ||
506 | |||
507 | c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); | 491 | c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); |
508 | 492 | ||
509 | return l2; | 493 | return l2; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 721a77ca8115..0bcaa3875863 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -46,6 +46,9 @@ | |||
46 | 46 | ||
47 | #include "mce-internal.h" | 47 | #include "mce-internal.h" |
48 | 48 | ||
49 | #define CREATE_TRACE_POINTS | ||
50 | #include <trace/events/mce.h> | ||
51 | |||
49 | int mce_disabled __read_mostly; | 52 | int mce_disabled __read_mostly; |
50 | 53 | ||
51 | #define MISC_MCELOG_MINOR 227 | 54 | #define MISC_MCELOG_MINOR 227 |
@@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | |||
85 | static DEFINE_PER_CPU(struct mce, mces_seen); | 88 | static DEFINE_PER_CPU(struct mce, mces_seen); |
86 | static int cpu_missing; | 89 | static int cpu_missing; |
87 | 90 | ||
88 | static void default_decode_mce(struct mce *m) | 91 | /* |
92 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
93 | * MCE errors in a human-readable form. | ||
94 | */ | ||
95 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
96 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | ||
97 | |||
98 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | ||
99 | void *data) | ||
89 | { | 100 | { |
90 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | 101 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); |
91 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | 102 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); |
103 | |||
104 | return NOTIFY_STOP; | ||
92 | } | 105 | } |
93 | 106 | ||
94 | /* | 107 | static struct notifier_block mce_dec_nb = { |
95 | * CPU/chipset specific EDAC code can register a callback here to print | 108 | .notifier_call = default_decode_mce, |
96 | * MCE errors in a human-readable form: | 109 | .priority = -1, |
97 | */ | 110 | }; |
98 | void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; | ||
99 | EXPORT_SYMBOL(x86_mce_decode_callback); | ||
100 | 111 | ||
101 | /* MCA banks polled by the period polling timer for corrected events */ | 112 | /* MCA banks polled by the period polling timer for corrected events */ |
102 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 113 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
@@ -141,6 +152,9 @@ void mce_log(struct mce *mce) | |||
141 | { | 152 | { |
142 | unsigned next, entry; | 153 | unsigned next, entry; |
143 | 154 | ||
155 | /* Emit the trace record: */ | ||
156 | trace_mce_record(mce); | ||
157 | |||
144 | mce->finished = 0; | 158 | mce->finished = 0; |
145 | wmb(); | 159 | wmb(); |
146 | for (;;) { | 160 | for (;;) { |
@@ -204,9 +218,9 @@ static void print_mce(struct mce *m) | |||
204 | 218 | ||
205 | /* | 219 | /* |
206 | * Print out human-readable details about the MCE error, | 220 | * Print out human-readable details about the MCE error, |
207 | * (if the CPU has an implementation for that): | 221 | * (if the CPU has an implementation for that) |
208 | */ | 222 | */ |
209 | x86_mce_decode_callback(m); | 223 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); |
210 | } | 224 | } |
211 | 225 | ||
212 | static void print_mce_head(void) | 226 | static void print_mce_head(void) |
@@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */ | |||
1122 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ | 1136 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ |
1123 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1137 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
1124 | 1138 | ||
1125 | static void mcheck_timer(unsigned long data) | 1139 | static void mce_start_timer(unsigned long data) |
1126 | { | 1140 | { |
1127 | struct timer_list *t = &per_cpu(mce_timer, data); | 1141 | struct timer_list *t = &per_cpu(mce_timer, data); |
1128 | int *n; | 1142 | int *n; |
@@ -1187,7 +1201,7 @@ int mce_notify_irq(void) | |||
1187 | } | 1201 | } |
1188 | EXPORT_SYMBOL_GPL(mce_notify_irq); | 1202 | EXPORT_SYMBOL_GPL(mce_notify_irq); |
1189 | 1203 | ||
1190 | static int mce_banks_init(void) | 1204 | static int __cpuinit __mcheck_cpu_mce_banks_init(void) |
1191 | { | 1205 | { |
1192 | int i; | 1206 | int i; |
1193 | 1207 | ||
@@ -1206,7 +1220,7 @@ static int mce_banks_init(void) | |||
1206 | /* | 1220 | /* |
1207 | * Initialize Machine Checks for a CPU. | 1221 | * Initialize Machine Checks for a CPU. |
1208 | */ | 1222 | */ |
1209 | static int __cpuinit mce_cap_init(void) | 1223 | static int __cpuinit __mcheck_cpu_cap_init(void) |
1210 | { | 1224 | { |
1211 | unsigned b; | 1225 | unsigned b; |
1212 | u64 cap; | 1226 | u64 cap; |
@@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void) | |||
1228 | WARN_ON(banks != 0 && b != banks); | 1242 | WARN_ON(banks != 0 && b != banks); |
1229 | banks = b; | 1243 | banks = b; |
1230 | if (!mce_banks) { | 1244 | if (!mce_banks) { |
1231 | int err = mce_banks_init(); | 1245 | int err = __mcheck_cpu_mce_banks_init(); |
1232 | 1246 | ||
1233 | if (err) | 1247 | if (err) |
1234 | return err; | 1248 | return err; |
@@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void) | |||
1244 | return 0; | 1258 | return 0; |
1245 | } | 1259 | } |
1246 | 1260 | ||
1247 | static void mce_init(void) | 1261 | static void __mcheck_cpu_init_generic(void) |
1248 | { | 1262 | { |
1249 | mce_banks_t all_banks; | 1263 | mce_banks_t all_banks; |
1250 | u64 cap; | 1264 | u64 cap; |
@@ -1273,7 +1287,7 @@ static void mce_init(void) | |||
1273 | } | 1287 | } |
1274 | 1288 | ||
1275 | /* Add per CPU specific workarounds here */ | 1289 | /* Add per CPU specific workarounds here */ |
1276 | static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | 1290 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) |
1277 | { | 1291 | { |
1278 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | 1292 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { |
1279 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); | 1293 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); |
@@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1341 | return 0; | 1355 | return 0; |
1342 | } | 1356 | } |
1343 | 1357 | ||
1344 | static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | 1358 | static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) |
1345 | { | 1359 | { |
1346 | if (c->x86 != 5) | 1360 | if (c->x86 != 5) |
1347 | return; | 1361 | return; |
@@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | |||
1355 | } | 1369 | } |
1356 | } | 1370 | } |
1357 | 1371 | ||
1358 | static void mce_cpu_features(struct cpuinfo_x86 *c) | 1372 | static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) |
1359 | { | 1373 | { |
1360 | switch (c->x86_vendor) { | 1374 | switch (c->x86_vendor) { |
1361 | case X86_VENDOR_INTEL: | 1375 | case X86_VENDOR_INTEL: |
@@ -1369,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) | |||
1369 | } | 1383 | } |
1370 | } | 1384 | } |
1371 | 1385 | ||
1372 | static void mce_init_timer(void) | 1386 | static void __mcheck_cpu_init_timer(void) |
1373 | { | 1387 | { |
1374 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1388 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1375 | int *n = &__get_cpu_var(mce_next_interval); | 1389 | int *n = &__get_cpu_var(mce_next_interval); |
@@ -1380,7 +1394,7 @@ static void mce_init_timer(void) | |||
1380 | *n = check_interval * HZ; | 1394 | *n = check_interval * HZ; |
1381 | if (!*n) | 1395 | if (!*n) |
1382 | return; | 1396 | return; |
1383 | setup_timer(t, mcheck_timer, smp_processor_id()); | 1397 | setup_timer(t, mce_start_timer, smp_processor_id()); |
1384 | t->expires = round_jiffies(jiffies + *n); | 1398 | t->expires = round_jiffies(jiffies + *n); |
1385 | add_timer_on(t, smp_processor_id()); | 1399 | add_timer_on(t, smp_processor_id()); |
1386 | } | 1400 | } |
@@ -1400,27 +1414,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = | |||
1400 | * Called for each booted CPU to set up machine checks. | 1414 | * Called for each booted CPU to set up machine checks. |
1401 | * Must be called with preempt off: | 1415 | * Must be called with preempt off: |
1402 | */ | 1416 | */ |
1403 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | 1417 | void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) |
1404 | { | 1418 | { |
1405 | if (mce_disabled) | 1419 | if (mce_disabled) |
1406 | return; | 1420 | return; |
1407 | 1421 | ||
1408 | mce_ancient_init(c); | 1422 | __mcheck_cpu_ancient_init(c); |
1409 | 1423 | ||
1410 | if (!mce_available(c)) | 1424 | if (!mce_available(c)) |
1411 | return; | 1425 | return; |
1412 | 1426 | ||
1413 | if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { | 1427 | if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { |
1414 | mce_disabled = 1; | 1428 | mce_disabled = 1; |
1415 | return; | 1429 | return; |
1416 | } | 1430 | } |
1417 | 1431 | ||
1418 | machine_check_vector = do_machine_check; | 1432 | machine_check_vector = do_machine_check; |
1419 | 1433 | ||
1420 | mce_init(); | 1434 | __mcheck_cpu_init_generic(); |
1421 | mce_cpu_features(c); | 1435 | __mcheck_cpu_init_vendor(c); |
1422 | mce_init_timer(); | 1436 | __mcheck_cpu_init_timer(); |
1423 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); | 1437 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); |
1438 | |||
1424 | } | 1439 | } |
1425 | 1440 | ||
1426 | /* | 1441 | /* |
@@ -1640,6 +1655,15 @@ static int __init mcheck_enable(char *str) | |||
1640 | } | 1655 | } |
1641 | __setup("mce", mcheck_enable); | 1656 | __setup("mce", mcheck_enable); |
1642 | 1657 | ||
1658 | int __init mcheck_init(void) | ||
1659 | { | ||
1660 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); | ||
1661 | |||
1662 | mcheck_intel_therm_init(); | ||
1663 | |||
1664 | return 0; | ||
1665 | } | ||
1666 | |||
1643 | /* | 1667 | /* |
1644 | * Sysfs support | 1668 | * Sysfs support |
1645 | */ | 1669 | */ |
@@ -1648,7 +1672,7 @@ __setup("mce", mcheck_enable); | |||
1648 | * Disable machine checks on suspend and shutdown. We can't really handle | 1672 | * Disable machine checks on suspend and shutdown. We can't really handle |
1649 | * them later. | 1673 | * them later. |
1650 | */ | 1674 | */ |
1651 | static int mce_disable(void) | 1675 | static int mce_disable_error_reporting(void) |
1652 | { | 1676 | { |
1653 | int i; | 1677 | int i; |
1654 | 1678 | ||
@@ -1663,12 +1687,12 @@ static int mce_disable(void) | |||
1663 | 1687 | ||
1664 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | 1688 | static int mce_suspend(struct sys_device *dev, pm_message_t state) |
1665 | { | 1689 | { |
1666 | return mce_disable(); | 1690 | return mce_disable_error_reporting(); |
1667 | } | 1691 | } |
1668 | 1692 | ||
1669 | static int mce_shutdown(struct sys_device *dev) | 1693 | static int mce_shutdown(struct sys_device *dev) |
1670 | { | 1694 | { |
1671 | return mce_disable(); | 1695 | return mce_disable_error_reporting(); |
1672 | } | 1696 | } |
1673 | 1697 | ||
1674 | /* | 1698 | /* |
@@ -1678,8 +1702,8 @@ static int mce_shutdown(struct sys_device *dev) | |||
1678 | */ | 1702 | */ |
1679 | static int mce_resume(struct sys_device *dev) | 1703 | static int mce_resume(struct sys_device *dev) |
1680 | { | 1704 | { |
1681 | mce_init(); | 1705 | __mcheck_cpu_init_generic(); |
1682 | mce_cpu_features(¤t_cpu_data); | 1706 | __mcheck_cpu_init_vendor(¤t_cpu_data); |
1683 | 1707 | ||
1684 | return 0; | 1708 | return 0; |
1685 | } | 1709 | } |
@@ -1689,8 +1713,8 @@ static void mce_cpu_restart(void *data) | |||
1689 | del_timer_sync(&__get_cpu_var(mce_timer)); | 1713 | del_timer_sync(&__get_cpu_var(mce_timer)); |
1690 | if (!mce_available(¤t_cpu_data)) | 1714 | if (!mce_available(¤t_cpu_data)) |
1691 | return; | 1715 | return; |
1692 | mce_init(); | 1716 | __mcheck_cpu_init_generic(); |
1693 | mce_init_timer(); | 1717 | __mcheck_cpu_init_timer(); |
1694 | } | 1718 | } |
1695 | 1719 | ||
1696 | /* Reinit MCEs after user configuration changes */ | 1720 | /* Reinit MCEs after user configuration changes */ |
@@ -1716,7 +1740,7 @@ static void mce_enable_ce(void *all) | |||
1716 | cmci_reenable(); | 1740 | cmci_reenable(); |
1717 | cmci_recheck(); | 1741 | cmci_recheck(); |
1718 | if (all) | 1742 | if (all) |
1719 | mce_init_timer(); | 1743 | __mcheck_cpu_init_timer(); |
1720 | } | 1744 | } |
1721 | 1745 | ||
1722 | static struct sysdev_class mce_sysclass = { | 1746 | static struct sysdev_class mce_sysclass = { |
@@ -1929,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu) | |||
1929 | } | 1953 | } |
1930 | 1954 | ||
1931 | /* Make sure there are no machine checks on offlined CPUs. */ | 1955 | /* Make sure there are no machine checks on offlined CPUs. */ |
1932 | static void mce_disable_cpu(void *h) | 1956 | static void __cpuinit mce_disable_cpu(void *h) |
1933 | { | 1957 | { |
1934 | unsigned long action = *(unsigned long *)h; | 1958 | unsigned long action = *(unsigned long *)h; |
1935 | int i; | 1959 | int i; |
1936 | 1960 | ||
1937 | if (!mce_available(¤t_cpu_data)) | 1961 | if (!mce_available(¤t_cpu_data)) |
1938 | return; | 1962 | return; |
1963 | |||
1939 | if (!(action & CPU_TASKS_FROZEN)) | 1964 | if (!(action & CPU_TASKS_FROZEN)) |
1940 | cmci_clear(); | 1965 | cmci_clear(); |
1941 | for (i = 0; i < banks; i++) { | 1966 | for (i = 0; i < banks; i++) { |
@@ -1946,7 +1971,7 @@ static void mce_disable_cpu(void *h) | |||
1946 | } | 1971 | } |
1947 | } | 1972 | } |
1948 | 1973 | ||
1949 | static void mce_reenable_cpu(void *h) | 1974 | static void __cpuinit mce_reenable_cpu(void *h) |
1950 | { | 1975 | { |
1951 | unsigned long action = *(unsigned long *)h; | 1976 | unsigned long action = *(unsigned long *)h; |
1952 | int i; | 1977 | int i; |
@@ -2025,7 +2050,7 @@ static __init void mce_init_banks(void) | |||
2025 | } | 2050 | } |
2026 | } | 2051 | } |
2027 | 2052 | ||
2028 | static __init int mce_init_device(void) | 2053 | static __init int mcheck_init_device(void) |
2029 | { | 2054 | { |
2030 | int err; | 2055 | int err; |
2031 | int i = 0; | 2056 | int i = 0; |
@@ -2053,7 +2078,7 @@ static __init int mce_init_device(void) | |||
2053 | return err; | 2078 | return err; |
2054 | } | 2079 | } |
2055 | 2080 | ||
2056 | device_initcall(mce_init_device); | 2081 | device_initcall(mcheck_init_device); |
2057 | 2082 | ||
2058 | /* | 2083 | /* |
2059 | * Old style boot options parsing. Only for compatibility. | 2084 | * Old style boot options parsing. Only for compatibility. |
@@ -2101,7 +2126,7 @@ static int fake_panic_set(void *data, u64 val) | |||
2101 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, | 2126 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, |
2102 | fake_panic_set, "%llu\n"); | 2127 | fake_panic_set, "%llu\n"); |
2103 | 2128 | ||
2104 | static int __init mce_debugfs_init(void) | 2129 | static int __init mcheck_debugfs_init(void) |
2105 | { | 2130 | { |
2106 | struct dentry *dmce, *ffake_panic; | 2131 | struct dentry *dmce, *ffake_panic; |
2107 | 2132 | ||
@@ -2115,5 +2140,5 @@ static int __init mce_debugfs_init(void) | |||
2115 | 2140 | ||
2116 | return 0; | 2141 | return 0; |
2117 | } | 2142 | } |
2118 | late_initcall(mce_debugfs_init); | 2143 | late_initcall(mcheck_debugfs_init); |
2119 | #endif | 2144 | #endif |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index b3a1dba75330..4fef985fc221 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state); | |||
49 | 49 | ||
50 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | 50 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
51 | 51 | ||
52 | static u32 lvtthmr_init __read_mostly; | ||
53 | |||
52 | #ifdef CONFIG_SYSFS | 54 | #ifdef CONFIG_SYSFS |
53 | #define define_therm_throt_sysdev_one_ro(_name) \ | 55 | #define define_therm_throt_sysdev_one_ro(_name) \ |
54 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 56 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) |
@@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | |||
254 | ack_APIC_irq(); | 256 | ack_APIC_irq(); |
255 | } | 257 | } |
256 | 258 | ||
259 | void __init mcheck_intel_therm_init(void) | ||
260 | { | ||
261 | /* | ||
262 | * This function is only called on boot CPU. Save the init thermal | ||
263 | * LVT value on BSP and use that value to restore APs' thermal LVT | ||
264 | * entry BIOS programmed later | ||
265 | */ | ||
266 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) && | ||
267 | cpu_has(&boot_cpu_data, X86_FEATURE_ACC)) | ||
268 | lvtthmr_init = apic_read(APIC_LVTTHMR); | ||
269 | } | ||
270 | |||
257 | void intel_init_thermal(struct cpuinfo_x86 *c) | 271 | void intel_init_thermal(struct cpuinfo_x86 *c) |
258 | { | 272 | { |
259 | unsigned int cpu = smp_processor_id(); | 273 | unsigned int cpu = smp_processor_id(); |
@@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
270 | * since it might be delivered via SMI already: | 284 | * since it might be delivered via SMI already: |
271 | */ | 285 | */ |
272 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 286 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
273 | h = apic_read(APIC_LVTTHMR); | 287 | |
288 | /* | ||
289 | * The initial value of thermal LVT entries on all APs always reads | ||
290 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI | ||
291 | * sequence to them and LVT registers are reset to 0s except for | ||
292 | * the mask bits which are set to 1s when APs receive INIT IPI. | ||
293 | * Always restore the value that BIOS has programmed on AP based on | ||
294 | * BSP's info we saved since BIOS is always setting the same value | ||
295 | * for all threads/cores | ||
296 | */ | ||
297 | apic_write(APIC_LVTTHMR, lvtthmr_init); | ||
298 | |||
299 | h = lvtthmr_init; | ||
300 | |||
274 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | 301 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
275 | printk(KERN_DEBUG | 302 | printk(KERN_DEBUG |
276 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | 303 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c311846..c1bbed1021d9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -77,6 +77,18 @@ struct cpu_hw_events { | |||
77 | struct debug_store *ds; | 77 | struct debug_store *ds; |
78 | }; | 78 | }; |
79 | 79 | ||
80 | struct event_constraint { | ||
81 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
82 | int code; | ||
83 | }; | ||
84 | |||
85 | #define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } | ||
86 | #define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } | ||
87 | |||
88 | #define for_each_event_constraint(e, c) \ | ||
89 | for ((e) = (c); (e)->idxmsk[0]; (e)++) | ||
90 | |||
91 | |||
80 | /* | 92 | /* |
81 | * struct x86_pmu - generic x86 pmu | 93 | * struct x86_pmu - generic x86 pmu |
82 | */ | 94 | */ |
@@ -102,6 +114,8 @@ struct x86_pmu { | |||
102 | u64 intel_ctrl; | 114 | u64 intel_ctrl; |
103 | void (*enable_bts)(u64 config); | 115 | void (*enable_bts)(u64 config); |
104 | void (*disable_bts)(void); | 116 | void (*disable_bts)(void); |
117 | int (*get_event_idx)(struct cpu_hw_events *cpuc, | ||
118 | struct hw_perf_event *hwc); | ||
105 | }; | 119 | }; |
106 | 120 | ||
107 | static struct x86_pmu x86_pmu __read_mostly; | 121 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | |||
110 | .enabled = 1, | 124 | .enabled = 1, |
111 | }; | 125 | }; |
112 | 126 | ||
127 | static const struct event_constraint *event_constraints; | ||
128 | |||
113 | /* | 129 | /* |
114 | * Not sure about some of these | 130 | * Not sure about some of these |
115 | */ | 131 | */ |
@@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event) | |||
155 | return hw_event & P6_EVNTSEL_MASK; | 171 | return hw_event & P6_EVNTSEL_MASK; |
156 | } | 172 | } |
157 | 173 | ||
174 | static const struct event_constraint intel_p6_event_constraints[] = | ||
175 | { | ||
176 | EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | ||
177 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
178 | EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | ||
179 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
180 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
181 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
182 | EVENT_CONSTRAINT_END | ||
183 | }; | ||
158 | 184 | ||
159 | /* | 185 | /* |
160 | * Intel PerfMon v3. Used on Core2 and later. | 186 | * Intel PerfMon v3. Used on Core2 and later. |
@@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] = | |||
170 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 196 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
171 | }; | 197 | }; |
172 | 198 | ||
199 | static const struct event_constraint intel_core_event_constraints[] = | ||
200 | { | ||
201 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
202 | EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
203 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
204 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
205 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
206 | EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
207 | EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
208 | EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
209 | EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
210 | EVENT_CONSTRAINT_END | ||
211 | }; | ||
212 | |||
213 | static const struct event_constraint intel_nehalem_event_constraints[] = | ||
214 | { | ||
215 | EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
216 | EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
217 | EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
218 | EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
219 | EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
220 | EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ | ||
221 | EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
222 | EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ | ||
223 | EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ | ||
224 | EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ | ||
225 | EVENT_CONSTRAINT_END | ||
226 | }; | ||
227 | |||
173 | static u64 intel_pmu_event_map(int hw_event) | 228 | static u64 intel_pmu_event_map(int hw_event) |
174 | { | 229 | { |
175 | return intel_perfmon_event_map[hw_event]; | 230 | return intel_perfmon_event_map[hw_event]; |
@@ -190,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids | |||
190 | [PERF_COUNT_HW_CACHE_OP_MAX] | 245 | [PERF_COUNT_HW_CACHE_OP_MAX] |
191 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 246 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
192 | 247 | ||
193 | static const u64 nehalem_hw_cache_event_ids | 248 | static __initconst u64 nehalem_hw_cache_event_ids |
194 | [PERF_COUNT_HW_CACHE_MAX] | 249 | [PERF_COUNT_HW_CACHE_MAX] |
195 | [PERF_COUNT_HW_CACHE_OP_MAX] | 250 | [PERF_COUNT_HW_CACHE_OP_MAX] |
196 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 251 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -281,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids | |||
281 | }, | 336 | }, |
282 | }; | 337 | }; |
283 | 338 | ||
284 | static const u64 core2_hw_cache_event_ids | 339 | static __initconst u64 core2_hw_cache_event_ids |
285 | [PERF_COUNT_HW_CACHE_MAX] | 340 | [PERF_COUNT_HW_CACHE_MAX] |
286 | [PERF_COUNT_HW_CACHE_OP_MAX] | 341 | [PERF_COUNT_HW_CACHE_OP_MAX] |
287 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 342 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -372,7 +427,7 @@ static const u64 core2_hw_cache_event_ids | |||
372 | }, | 427 | }, |
373 | }; | 428 | }; |
374 | 429 | ||
375 | static const u64 atom_hw_cache_event_ids | 430 | static __initconst u64 atom_hw_cache_event_ids |
376 | [PERF_COUNT_HW_CACHE_MAX] | 431 | [PERF_COUNT_HW_CACHE_MAX] |
377 | [PERF_COUNT_HW_CACHE_OP_MAX] | 432 | [PERF_COUNT_HW_CACHE_OP_MAX] |
378 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 433 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) | |||
469 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | 524 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL |
470 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | 525 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL |
471 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | 526 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL |
472 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | 527 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL |
473 | 528 | ||
474 | #define CORE_EVNTSEL_MASK \ | 529 | #define CORE_EVNTSEL_MASK \ |
475 | (CORE_EVNTSEL_EVENT_MASK | \ | 530 | (CORE_EVNTSEL_EVENT_MASK | \ |
@@ -481,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) | |||
481 | return hw_event & CORE_EVNTSEL_MASK; | 536 | return hw_event & CORE_EVNTSEL_MASK; |
482 | } | 537 | } |
483 | 538 | ||
484 | static const u64 amd_hw_cache_event_ids | 539 | static __initconst u64 amd_hw_cache_event_ids |
485 | [PERF_COUNT_HW_CACHE_MAX] | 540 | [PERF_COUNT_HW_CACHE_MAX] |
486 | [PERF_COUNT_HW_CACHE_OP_MAX] | 541 | [PERF_COUNT_HW_CACHE_OP_MAX] |
487 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 542 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
932 | */ | 987 | */ |
933 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | 988 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; |
934 | 989 | ||
990 | hwc->idx = -1; | ||
991 | |||
935 | /* | 992 | /* |
936 | * Count user and OS events unless requested not to. | 993 | * Count user and OS events unless requested not to. |
937 | */ | 994 | */ |
@@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) | |||
1334 | x86_pmu_enable_event(hwc, idx); | 1391 | x86_pmu_enable_event(hwc, idx); |
1335 | } | 1392 | } |
1336 | 1393 | ||
1337 | static int | 1394 | static int fixed_mode_idx(struct hw_perf_event *hwc) |
1338 | fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) | ||
1339 | { | 1395 | { |
1340 | unsigned int hw_event; | 1396 | unsigned int hw_event; |
1341 | 1397 | ||
@@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) | |||
1349 | if (!x86_pmu.num_events_fixed) | 1405 | if (!x86_pmu.num_events_fixed) |
1350 | return -1; | 1406 | return -1; |
1351 | 1407 | ||
1408 | /* | ||
1409 | * fixed counters do not take all possible filters | ||
1410 | */ | ||
1411 | if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) | ||
1412 | return -1; | ||
1413 | |||
1352 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | 1414 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) |
1353 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | 1415 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; |
1354 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | 1416 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) |
@@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) | |||
1360 | } | 1422 | } |
1361 | 1423 | ||
1362 | /* | 1424 | /* |
1363 | * Find a PMC slot for the freshly enabled / scheduled in event: | 1425 | * generic counter allocator: get next free counter |
1364 | */ | 1426 | */ |
1365 | static int x86_pmu_enable(struct perf_event *event) | 1427 | static int |
1428 | gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1429 | { | ||
1430 | int idx; | ||
1431 | |||
1432 | idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); | ||
1433 | return idx == x86_pmu.num_events ? -1 : idx; | ||
1434 | } | ||
1435 | |||
1436 | /* | ||
1437 | * intel-specific counter allocator: check event constraints | ||
1438 | */ | ||
1439 | static int | ||
1440 | intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1441 | { | ||
1442 | const struct event_constraint *event_constraint; | ||
1443 | int i, code; | ||
1444 | |||
1445 | if (!event_constraints) | ||
1446 | goto skip; | ||
1447 | |||
1448 | code = hwc->config & CORE_EVNTSEL_EVENT_MASK; | ||
1449 | |||
1450 | for_each_event_constraint(event_constraint, event_constraints) { | ||
1451 | if (code == event_constraint->code) { | ||
1452 | for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { | ||
1453 | if (!test_and_set_bit(i, cpuc->used_mask)) | ||
1454 | return i; | ||
1455 | } | ||
1456 | return -1; | ||
1457 | } | ||
1458 | } | ||
1459 | skip: | ||
1460 | return gen_get_event_idx(cpuc, hwc); | ||
1461 | } | ||
1462 | |||
1463 | static int | ||
1464 | x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1366 | { | 1465 | { |
1367 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1368 | struct hw_perf_event *hwc = &event->hw; | ||
1369 | int idx; | 1466 | int idx; |
1370 | 1467 | ||
1371 | idx = fixed_mode_idx(event, hwc); | 1468 | idx = fixed_mode_idx(hwc); |
1372 | if (idx == X86_PMC_IDX_FIXED_BTS) { | 1469 | if (idx == X86_PMC_IDX_FIXED_BTS) { |
1373 | /* BTS is already occupied. */ | 1470 | /* BTS is already occupied. */ |
1374 | if (test_and_set_bit(idx, cpuc->used_mask)) | 1471 | if (test_and_set_bit(idx, cpuc->used_mask)) |
1375 | return -EAGAIN; | 1472 | return -EAGAIN; |
1376 | 1473 | ||
1377 | hwc->config_base = 0; | 1474 | hwc->config_base = 0; |
1378 | hwc->event_base = 0; | 1475 | hwc->event_base = 0; |
1379 | hwc->idx = idx; | 1476 | hwc->idx = idx; |
1380 | } else if (idx >= 0) { | 1477 | } else if (idx >= 0) { |
1381 | /* | 1478 | /* |
@@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event) | |||
1396 | } else { | 1493 | } else { |
1397 | idx = hwc->idx; | 1494 | idx = hwc->idx; |
1398 | /* Try to get the previous generic event again */ | 1495 | /* Try to get the previous generic event again */ |
1399 | if (test_and_set_bit(idx, cpuc->used_mask)) { | 1496 | if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { |
1400 | try_generic: | 1497 | try_generic: |
1401 | idx = find_first_zero_bit(cpuc->used_mask, | 1498 | idx = x86_pmu.get_event_idx(cpuc, hwc); |
1402 | x86_pmu.num_events); | 1499 | if (idx == -1) |
1403 | if (idx == x86_pmu.num_events) | ||
1404 | return -EAGAIN; | 1500 | return -EAGAIN; |
1405 | 1501 | ||
1406 | set_bit(idx, cpuc->used_mask); | 1502 | set_bit(idx, cpuc->used_mask); |
1407 | hwc->idx = idx; | 1503 | hwc->idx = idx; |
1408 | } | 1504 | } |
1409 | hwc->config_base = x86_pmu.eventsel; | 1505 | hwc->config_base = x86_pmu.eventsel; |
1410 | hwc->event_base = x86_pmu.perfctr; | 1506 | hwc->event_base = x86_pmu.perfctr; |
1411 | } | 1507 | } |
1412 | 1508 | ||
1509 | return idx; | ||
1510 | } | ||
1511 | |||
1512 | /* | ||
1513 | * Find a PMC slot for the freshly enabled / scheduled in event: | ||
1514 | */ | ||
1515 | static int x86_pmu_enable(struct perf_event *event) | ||
1516 | { | ||
1517 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1518 | struct hw_perf_event *hwc = &event->hw; | ||
1519 | int idx; | ||
1520 | |||
1521 | idx = x86_schedule_event(cpuc, hwc); | ||
1522 | if (idx < 0) | ||
1523 | return idx; | ||
1524 | |||
1413 | perf_events_lapic_init(); | 1525 | perf_events_lapic_init(); |
1414 | 1526 | ||
1415 | x86_pmu.disable(hwc, idx); | 1527 | x86_pmu.disable(hwc, idx); |
@@ -1852,7 +1964,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { | |||
1852 | .priority = 1 | 1964 | .priority = 1 |
1853 | }; | 1965 | }; |
1854 | 1966 | ||
1855 | static struct x86_pmu p6_pmu = { | 1967 | static __initconst struct x86_pmu p6_pmu = { |
1856 | .name = "p6", | 1968 | .name = "p6", |
1857 | .handle_irq = p6_pmu_handle_irq, | 1969 | .handle_irq = p6_pmu_handle_irq, |
1858 | .disable_all = p6_pmu_disable_all, | 1970 | .disable_all = p6_pmu_disable_all, |
@@ -1877,9 +1989,10 @@ static struct x86_pmu p6_pmu = { | |||
1877 | */ | 1989 | */ |
1878 | .event_bits = 32, | 1990 | .event_bits = 32, |
1879 | .event_mask = (1ULL << 32) - 1, | 1991 | .event_mask = (1ULL << 32) - 1, |
1992 | .get_event_idx = intel_get_event_idx, | ||
1880 | }; | 1993 | }; |
1881 | 1994 | ||
1882 | static struct x86_pmu intel_pmu = { | 1995 | static __initconst struct x86_pmu intel_pmu = { |
1883 | .name = "Intel", | 1996 | .name = "Intel", |
1884 | .handle_irq = intel_pmu_handle_irq, | 1997 | .handle_irq = intel_pmu_handle_irq, |
1885 | .disable_all = intel_pmu_disable_all, | 1998 | .disable_all = intel_pmu_disable_all, |
@@ -1900,9 +2013,10 @@ static struct x86_pmu intel_pmu = { | |||
1900 | .max_period = (1ULL << 31) - 1, | 2013 | .max_period = (1ULL << 31) - 1, |
1901 | .enable_bts = intel_pmu_enable_bts, | 2014 | .enable_bts = intel_pmu_enable_bts, |
1902 | .disable_bts = intel_pmu_disable_bts, | 2015 | .disable_bts = intel_pmu_disable_bts, |
2016 | .get_event_idx = intel_get_event_idx, | ||
1903 | }; | 2017 | }; |
1904 | 2018 | ||
1905 | static struct x86_pmu amd_pmu = { | 2019 | static __initconst struct x86_pmu amd_pmu = { |
1906 | .name = "AMD", | 2020 | .name = "AMD", |
1907 | .handle_irq = amd_pmu_handle_irq, | 2021 | .handle_irq = amd_pmu_handle_irq, |
1908 | .disable_all = amd_pmu_disable_all, | 2022 | .disable_all = amd_pmu_disable_all, |
@@ -1920,9 +2034,10 @@ static struct x86_pmu amd_pmu = { | |||
1920 | .apic = 1, | 2034 | .apic = 1, |
1921 | /* use highest bit to detect overflow */ | 2035 | /* use highest bit to detect overflow */ |
1922 | .max_period = (1ULL << 47) - 1, | 2036 | .max_period = (1ULL << 47) - 1, |
2037 | .get_event_idx = gen_get_event_idx, | ||
1923 | }; | 2038 | }; |
1924 | 2039 | ||
1925 | static int p6_pmu_init(void) | 2040 | static __init int p6_pmu_init(void) |
1926 | { | 2041 | { |
1927 | switch (boot_cpu_data.x86_model) { | 2042 | switch (boot_cpu_data.x86_model) { |
1928 | case 1: | 2043 | case 1: |
@@ -1932,10 +2047,12 @@ static int p6_pmu_init(void) | |||
1932 | case 7: | 2047 | case 7: |
1933 | case 8: | 2048 | case 8: |
1934 | case 11: /* Pentium III */ | 2049 | case 11: /* Pentium III */ |
2050 | event_constraints = intel_p6_event_constraints; | ||
1935 | break; | 2051 | break; |
1936 | case 9: | 2052 | case 9: |
1937 | case 13: | 2053 | case 13: |
1938 | /* Pentium M */ | 2054 | /* Pentium M */ |
2055 | event_constraints = intel_p6_event_constraints; | ||
1939 | break; | 2056 | break; |
1940 | default: | 2057 | default: |
1941 | pr_cont("unsupported p6 CPU model %d ", | 2058 | pr_cont("unsupported p6 CPU model %d ", |
@@ -1954,7 +2071,7 @@ static int p6_pmu_init(void) | |||
1954 | return 0; | 2071 | return 0; |
1955 | } | 2072 | } |
1956 | 2073 | ||
1957 | static int intel_pmu_init(void) | 2074 | static __init int intel_pmu_init(void) |
1958 | { | 2075 | { |
1959 | union cpuid10_edx edx; | 2076 | union cpuid10_edx edx; |
1960 | union cpuid10_eax eax; | 2077 | union cpuid10_eax eax; |
@@ -2007,12 +2124,14 @@ static int intel_pmu_init(void) | |||
2007 | sizeof(hw_cache_event_ids)); | 2124 | sizeof(hw_cache_event_ids)); |
2008 | 2125 | ||
2009 | pr_cont("Core2 events, "); | 2126 | pr_cont("Core2 events, "); |
2127 | event_constraints = intel_core_event_constraints; | ||
2010 | break; | 2128 | break; |
2011 | default: | 2129 | default: |
2012 | case 26: | 2130 | case 26: |
2013 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 2131 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
2014 | sizeof(hw_cache_event_ids)); | 2132 | sizeof(hw_cache_event_ids)); |
2015 | 2133 | ||
2134 | event_constraints = intel_nehalem_event_constraints; | ||
2016 | pr_cont("Nehalem/Corei7 events, "); | 2135 | pr_cont("Nehalem/Corei7 events, "); |
2017 | break; | 2136 | break; |
2018 | case 28: | 2137 | case 28: |
@@ -2025,7 +2144,7 @@ static int intel_pmu_init(void) | |||
2025 | return 0; | 2144 | return 0; |
2026 | } | 2145 | } |
2027 | 2146 | ||
2028 | static int amd_pmu_init(void) | 2147 | static __init int amd_pmu_init(void) |
2029 | { | 2148 | { |
2030 | /* Performance-monitoring supported from K7 and later: */ | 2149 | /* Performance-monitoring supported from K7 and later: */ |
2031 | if (boot_cpu_data.x86 < 6) | 2150 | if (boot_cpu_data.x86 < 6) |
@@ -2105,11 +2224,47 @@ static const struct pmu pmu = { | |||
2105 | .unthrottle = x86_pmu_unthrottle, | 2224 | .unthrottle = x86_pmu_unthrottle, |
2106 | }; | 2225 | }; |
2107 | 2226 | ||
2227 | static int | ||
2228 | validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
2229 | { | ||
2230 | struct hw_perf_event fake_event = event->hw; | ||
2231 | |||
2232 | if (event->pmu && event->pmu != &pmu) | ||
2233 | return 0; | ||
2234 | |||
2235 | return x86_schedule_event(cpuc, &fake_event) >= 0; | ||
2236 | } | ||
2237 | |||
2238 | static int validate_group(struct perf_event *event) | ||
2239 | { | ||
2240 | struct perf_event *sibling, *leader = event->group_leader; | ||
2241 | struct cpu_hw_events fake_pmu; | ||
2242 | |||
2243 | memset(&fake_pmu, 0, sizeof(fake_pmu)); | ||
2244 | |||
2245 | if (!validate_event(&fake_pmu, leader)) | ||
2246 | return -ENOSPC; | ||
2247 | |||
2248 | list_for_each_entry(sibling, &leader->sibling_list, group_entry) { | ||
2249 | if (!validate_event(&fake_pmu, sibling)) | ||
2250 | return -ENOSPC; | ||
2251 | } | ||
2252 | |||
2253 | if (!validate_event(&fake_pmu, event)) | ||
2254 | return -ENOSPC; | ||
2255 | |||
2256 | return 0; | ||
2257 | } | ||
2258 | |||
2108 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 2259 | const struct pmu *hw_perf_event_init(struct perf_event *event) |
2109 | { | 2260 | { |
2110 | int err; | 2261 | int err; |
2111 | 2262 | ||
2112 | err = __hw_perf_event_init(event); | 2263 | err = __hw_perf_event_init(event); |
2264 | if (!err) { | ||
2265 | if (event->group_leader != event) | ||
2266 | err = validate_group(event); | ||
2267 | } | ||
2113 | if (err) { | 2268 | if (err) { |
2114 | if (event->destroy) | 2269 | if (event->destroy) |
2115 | event->destroy(event); | 2270 | event->destroy(event); |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index fab786f60ed6..898df9719afb 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void) | |||
712 | switch (boot_cpu_data.x86_vendor) { | 712 | switch (boot_cpu_data.x86_vendor) { |
713 | case X86_VENDOR_AMD: | 713 | case X86_VENDOR_AMD: |
714 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && | 714 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && |
715 | boot_cpu_data.x86 != 16) | 715 | boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) |
716 | return; | 716 | return; |
717 | wd_ops = &k7_wd_ops; | 717 | wd_ops = &k7_wd_ops; |
718 | break; | 718 | break; |
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index bb62b3e5caad..28000743bbb0 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c | |||
@@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
26 | 26 | ||
27 | early_init_transmeta(c); | 27 | early_init_transmeta(c); |
28 | 28 | ||
29 | display_cacheinfo(c); | 29 | cpu_detect_cache_sizes(c); |
30 | 30 | ||
31 | /* Print CMS and CPU revision */ | 31 | /* Print CMS and CPU revision */ |
32 | max = cpuid_eax(0x80860000); | 32 | max = cpuid_eax(0x80860000); |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 6a52d4b36a30..7ef24a796992 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -116,21 +116,16 @@ static int cpuid_open(struct inode *inode, struct file *file) | |||
116 | { | 116 | { |
117 | unsigned int cpu; | 117 | unsigned int cpu; |
118 | struct cpuinfo_x86 *c; | 118 | struct cpuinfo_x86 *c; |
119 | int ret = 0; | ||
120 | |||
121 | lock_kernel(); | ||
122 | 119 | ||
123 | cpu = iminor(file->f_path.dentry->d_inode); | 120 | cpu = iminor(file->f_path.dentry->d_inode); |
124 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { | 121 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) |
125 | ret = -ENXIO; /* No such CPU */ | 122 | return -ENXIO; /* No such CPU */ |
126 | goto out; | 123 | |
127 | } | ||
128 | c = &cpu_data(cpu); | 124 | c = &cpu_data(cpu); |
129 | if (c->cpuid_level < 0) | 125 | if (c->cpuid_level < 0) |
130 | ret = -EIO; /* CPUID not supported */ | 126 | return -EIO; /* CPUID not supported */ |
131 | out: | 127 | |
132 | unlock_kernel(); | 128 | return 0; |
133 | return ret; | ||
134 | } | 129 | } |
135 | 130 | ||
136 | /* | 131 | /* |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 5e409dc298a4..a4849c10a77e 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -27,8 +27,7 @@ | |||
27 | #include <asm/cpu.h> | 27 | #include <asm/cpu.h> |
28 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
29 | #include <asm/virtext.h> | 29 | #include <asm/virtext.h> |
30 | #include <asm/iommu.h> | 30 | #include <asm/x86_init.h> |
31 | |||
32 | 31 | ||
33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 32 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
34 | 33 | ||
@@ -106,7 +105,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
106 | #endif | 105 | #endif |
107 | 106 | ||
108 | #ifdef CONFIG_X86_64 | 107 | #ifdef CONFIG_X86_64 |
109 | pci_iommu_shutdown(); | 108 | x86_platform.iommu_shutdown(); |
110 | #endif | 109 | #endif |
111 | 110 | ||
112 | crash_save_cpu(regs, safe_smp_processor_id()); | 111 | crash_save_cpu(regs, safe_smp_processor_id()); |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2d8a371d4339..b8ce165dde5d 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -268,11 +268,12 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |||
268 | 268 | ||
269 | show_registers(regs); | 269 | show_registers(regs); |
270 | #ifdef CONFIG_X86_32 | 270 | #ifdef CONFIG_X86_32 |
271 | sp = (unsigned long) (®s->sp); | 271 | if (user_mode_vm(regs)) { |
272 | savesegment(ss, ss); | ||
273 | if (user_mode(regs)) { | ||
274 | sp = regs->sp; | 272 | sp = regs->sp; |
275 | ss = regs->ss & 0xffff; | 273 | ss = regs->ss & 0xffff; |
274 | } else { | ||
275 | sp = kernel_stack_pointer(regs); | ||
276 | savesegment(ss, ss); | ||
276 | } | 277 | } |
277 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | 278 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); |
278 | print_symbol("%s", regs->ip); | 279 | print_symbol("%s", regs->ip); |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f7dd2a7c3bf4..e0ed4c7abb62 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -10,9 +10,9 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/ptrace.h> | 11 | #include <linux/ptrace.h> |
12 | #include <linux/kexec.h> | 12 | #include <linux/kexec.h> |
13 | #include <linux/sysfs.h> | ||
13 | #include <linux/bug.h> | 14 | #include <linux/bug.h> |
14 | #include <linux/nmi.h> | 15 | #include <linux/nmi.h> |
15 | #include <linux/sysfs.h> | ||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
@@ -35,6 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
35 | 35 | ||
36 | if (!stack) { | 36 | if (!stack) { |
37 | unsigned long dummy; | 37 | unsigned long dummy; |
38 | |||
38 | stack = &dummy; | 39 | stack = &dummy; |
39 | if (task && task != current) | 40 | if (task && task != current) |
40 | stack = (unsigned long *)task->thread.sp; | 41 | stack = (unsigned long *)task->thread.sp; |
@@ -57,8 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
57 | 58 | ||
58 | context = (struct thread_info *) | 59 | context = (struct thread_info *) |
59 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 60 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); |
60 | bp = print_context_stack(context, stack, bp, ops, | 61 | bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph); |
61 | data, NULL, &graph); | ||
62 | 62 | ||
63 | stack = (unsigned long *)context->previous_esp; | 63 | stack = (unsigned long *)context->previous_esp; |
64 | if (!stack) | 64 | if (!stack) |
@@ -72,7 +72,7 @@ EXPORT_SYMBOL(dump_trace); | |||
72 | 72 | ||
73 | void | 73 | void |
74 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 74 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
75 | unsigned long *sp, unsigned long bp, char *log_lvl) | 75 | unsigned long *sp, unsigned long bp, char *log_lvl) |
76 | { | 76 | { |
77 | unsigned long *stack; | 77 | unsigned long *stack; |
78 | int i; | 78 | int i; |
@@ -156,4 +156,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
156 | 156 | ||
157 | return ud2 == 0x0b0f; | 157 | return ud2 == 0x0b0f; |
158 | } | 158 | } |
159 | |||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a071e6be177e..8e740934bd1f 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -10,26 +10,28 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/ptrace.h> | 11 | #include <linux/ptrace.h> |
12 | #include <linux/kexec.h> | 12 | #include <linux/kexec.h> |
13 | #include <linux/sysfs.h> | ||
13 | #include <linux/bug.h> | 14 | #include <linux/bug.h> |
14 | #include <linux/nmi.h> | 15 | #include <linux/nmi.h> |
15 | #include <linux/sysfs.h> | ||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | #include "dumpstack.h" | 19 | #include "dumpstack.h" |
20 | 20 | ||
21 | #define N_EXCEPTION_STACKS_END \ | ||
22 | (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) | ||
21 | 23 | ||
22 | static char x86_stack_ids[][8] = { | 24 | static char x86_stack_ids[][8] = { |
23 | [DEBUG_STACK - 1] = "#DB", | 25 | [ DEBUG_STACK-1 ] = "#DB", |
24 | [NMI_STACK - 1] = "NMI", | 26 | [ NMI_STACK-1 ] = "NMI", |
25 | [DOUBLEFAULT_STACK - 1] = "#DF", | 27 | [ DOUBLEFAULT_STACK-1 ] = "#DF", |
26 | [STACKFAULT_STACK - 1] = "#SS", | 28 | [ STACKFAULT_STACK-1 ] = "#SS", |
27 | [MCE_STACK - 1] = "#MC", | 29 | [ MCE_STACK-1 ] = "#MC", |
28 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 30 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
29 | [N_EXCEPTION_STACKS ... | 31 | [ N_EXCEPTION_STACKS ... |
30 | N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | 32 | N_EXCEPTION_STACKS_END ] = "#DB[?]" |
31 | #endif | 33 | #endif |
32 | }; | 34 | }; |
33 | 35 | ||
34 | int x86_is_stack_id(int id, char *name) | 36 | int x86_is_stack_id(int id, char *name) |
35 | { | 37 | { |
@@ -37,7 +39,7 @@ int x86_is_stack_id(int id, char *name) | |||
37 | } | 39 | } |
38 | 40 | ||
39 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 41 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
40 | unsigned *usedp, char **idp) | 42 | unsigned *usedp, char **idp) |
41 | { | 43 | { |
42 | unsigned k; | 44 | unsigned k; |
43 | 45 | ||
@@ -202,21 +204,24 @@ EXPORT_SYMBOL(dump_trace); | |||
202 | 204 | ||
203 | void | 205 | void |
204 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 206 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
205 | unsigned long *sp, unsigned long bp, char *log_lvl) | 207 | unsigned long *sp, unsigned long bp, char *log_lvl) |
206 | { | 208 | { |
209 | unsigned long *irq_stack_end; | ||
210 | unsigned long *irq_stack; | ||
207 | unsigned long *stack; | 211 | unsigned long *stack; |
212 | int cpu; | ||
208 | int i; | 213 | int i; |
209 | const int cpu = smp_processor_id(); | 214 | |
210 | unsigned long *irq_stack_end = | 215 | preempt_disable(); |
211 | (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); | 216 | cpu = smp_processor_id(); |
212 | unsigned long *irq_stack = | 217 | |
213 | (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); | 218 | irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); |
219 | irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); | ||
214 | 220 | ||
215 | /* | 221 | /* |
216 | * debugging aid: "show_stack(NULL, NULL);" prints the | 222 | * Debugging aid: "show_stack(NULL, NULL);" prints the |
217 | * back trace for this cpu. | 223 | * back trace for this cpu: |
218 | */ | 224 | */ |
219 | |||
220 | if (sp == NULL) { | 225 | if (sp == NULL) { |
221 | if (task) | 226 | if (task) |
222 | sp = (unsigned long *)task->thread.sp; | 227 | sp = (unsigned long *)task->thread.sp; |
@@ -240,6 +245,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
240 | printk(" %016lx", *stack++); | 245 | printk(" %016lx", *stack++); |
241 | touch_nmi_watchdog(); | 246 | touch_nmi_watchdog(); |
242 | } | 247 | } |
248 | preempt_enable(); | ||
249 | |||
243 | printk("\n"); | 250 | printk("\n"); |
244 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 251 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
245 | } | 252 | } |
@@ -303,4 +310,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
303 | 310 | ||
304 | return ud2 == 0x0b0f; | 311 | return ud2 == 0x0b0f; |
305 | } | 312 | } |
306 | |||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d607c6..50b9c220e121 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -334,6 +334,10 @@ ENTRY(ret_from_fork) | |||
334 | END(ret_from_fork) | 334 | END(ret_from_fork) |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * Interrupt exit functions should be protected against kprobes | ||
338 | */ | ||
339 | .pushsection .kprobes.text, "ax" | ||
340 | /* | ||
337 | * Return to user mode is not as complex as all this looks, | 341 | * Return to user mode is not as complex as all this looks, |
338 | * but we want the default path for a system call return to | 342 | * but we want the default path for a system call return to |
339 | * go as quickly as possible which is why some of this is | 343 | * go as quickly as possible which is why some of this is |
@@ -383,6 +387,10 @@ need_resched: | |||
383 | END(resume_kernel) | 387 | END(resume_kernel) |
384 | #endif | 388 | #endif |
385 | CFI_ENDPROC | 389 | CFI_ENDPROC |
390 | /* | ||
391 | * End of kprobes section | ||
392 | */ | ||
393 | .popsection | ||
386 | 394 | ||
387 | /* SYSENTER_RETURN points to after the "sysenter" instruction in | 395 | /* SYSENTER_RETURN points to after the "sysenter" instruction in |
388 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ | 396 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ |
@@ -513,6 +521,10 @@ sysexit_audit: | |||
513 | PTGS_TO_GS_EX | 521 | PTGS_TO_GS_EX |
514 | ENDPROC(ia32_sysenter_target) | 522 | ENDPROC(ia32_sysenter_target) |
515 | 523 | ||
524 | /* | ||
525 | * syscall stub including irq exit should be protected against kprobes | ||
526 | */ | ||
527 | .pushsection .kprobes.text, "ax" | ||
516 | # system call handler stub | 528 | # system call handler stub |
517 | ENTRY(system_call) | 529 | ENTRY(system_call) |
518 | RING0_INT_FRAME # can't unwind into user space anyway | 530 | RING0_INT_FRAME # can't unwind into user space anyway |
@@ -705,6 +717,10 @@ syscall_badsys: | |||
705 | jmp resume_userspace | 717 | jmp resume_userspace |
706 | END(syscall_badsys) | 718 | END(syscall_badsys) |
707 | CFI_ENDPROC | 719 | CFI_ENDPROC |
720 | /* | ||
721 | * End of kprobes section | ||
722 | */ | ||
723 | .popsection | ||
708 | 724 | ||
709 | /* | 725 | /* |
710 | * System calls that need a pt_regs pointer. | 726 | * System calls that need a pt_regs pointer. |
@@ -814,6 +830,10 @@ common_interrupt: | |||
814 | ENDPROC(common_interrupt) | 830 | ENDPROC(common_interrupt) |
815 | CFI_ENDPROC | 831 | CFI_ENDPROC |
816 | 832 | ||
833 | /* | ||
834 | * Irq entries should be protected against kprobes | ||
835 | */ | ||
836 | .pushsection .kprobes.text, "ax" | ||
817 | #define BUILD_INTERRUPT3(name, nr, fn) \ | 837 | #define BUILD_INTERRUPT3(name, nr, fn) \ |
818 | ENTRY(name) \ | 838 | ENTRY(name) \ |
819 | RING0_INT_FRAME; \ | 839 | RING0_INT_FRAME; \ |
@@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug) | |||
980 | jmp error_code | 1000 | jmp error_code |
981 | CFI_ENDPROC | 1001 | CFI_ENDPROC |
982 | END(spurious_interrupt_bug) | 1002 | END(spurious_interrupt_bug) |
1003 | /* | ||
1004 | * End of kprobes section | ||
1005 | */ | ||
1006 | .popsection | ||
983 | 1007 | ||
984 | ENTRY(kernel_thread_helper) | 1008 | ENTRY(kernel_thread_helper) |
985 | pushl $0 # fake return address for unwinder | 1009 | pushl $0 # fake return address for unwinder |
@@ -1185,17 +1209,14 @@ END(ftrace_graph_caller) | |||
1185 | 1209 | ||
1186 | .globl return_to_handler | 1210 | .globl return_to_handler |
1187 | return_to_handler: | 1211 | return_to_handler: |
1188 | pushl $0 | ||
1189 | pushl %eax | 1212 | pushl %eax |
1190 | pushl %ecx | ||
1191 | pushl %edx | 1213 | pushl %edx |
1192 | movl %ebp, %eax | 1214 | movl %ebp, %eax |
1193 | call ftrace_return_to_handler | 1215 | call ftrace_return_to_handler |
1194 | movl %eax, 0xc(%esp) | 1216 | movl %eax, %ecx |
1195 | popl %edx | 1217 | popl %edx |
1196 | popl %ecx | ||
1197 | popl %eax | 1218 | popl %eax |
1198 | ret | 1219 | jmp *%ecx |
1199 | #endif | 1220 | #endif |
1200 | 1221 | ||
1201 | .section .rodata,"a" | 1222 | .section .rodata,"a" |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b5c061f8f358..4deb8fc849dd 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -155,11 +155,11 @@ GLOBAL(return_to_handler) | |||
155 | 155 | ||
156 | call ftrace_return_to_handler | 156 | call ftrace_return_to_handler |
157 | 157 | ||
158 | movq %rax, 16(%rsp) | 158 | movq %rax, %rdi |
159 | movq 8(%rsp), %rdx | 159 | movq 8(%rsp), %rdx |
160 | movq (%rsp), %rax | 160 | movq (%rsp), %rax |
161 | addq $16, %rsp | 161 | addq $24, %rsp |
162 | retq | 162 | jmp *%rdi |
163 | #endif | 163 | #endif |
164 | 164 | ||
165 | 165 | ||
@@ -803,6 +803,10 @@ END(interrupt) | |||
803 | call \func | 803 | call \func |
804 | .endm | 804 | .endm |
805 | 805 | ||
806 | /* | ||
807 | * Interrupt entry/exit should be protected against kprobes | ||
808 | */ | ||
809 | .pushsection .kprobes.text, "ax" | ||
806 | /* | 810 | /* |
807 | * The interrupt stubs push (~vector+0x80) onto the stack and | 811 | * The interrupt stubs push (~vector+0x80) onto the stack and |
808 | * then jump to common_interrupt. | 812 | * then jump to common_interrupt. |
@@ -941,6 +945,10 @@ ENTRY(retint_kernel) | |||
941 | 945 | ||
942 | CFI_ENDPROC | 946 | CFI_ENDPROC |
943 | END(common_interrupt) | 947 | END(common_interrupt) |
948 | /* | ||
949 | * End of kprobes section | ||
950 | */ | ||
951 | .popsection | ||
944 | 952 | ||
945 | /* | 953 | /* |
946 | * APIC interrupts. | 954 | * APIC interrupts. |
@@ -1491,12 +1499,17 @@ error_kernelspace: | |||
1491 | leaq irq_return(%rip),%rcx | 1499 | leaq irq_return(%rip),%rcx |
1492 | cmpq %rcx,RIP+8(%rsp) | 1500 | cmpq %rcx,RIP+8(%rsp) |
1493 | je error_swapgs | 1501 | je error_swapgs |
1494 | movl %ecx,%ecx /* zero extend */ | 1502 | movl %ecx,%eax /* zero extend */ |
1495 | cmpq %rcx,RIP+8(%rsp) | 1503 | cmpq %rax,RIP+8(%rsp) |
1496 | je error_swapgs | 1504 | je bstep_iret |
1497 | cmpq $gs_change,RIP+8(%rsp) | 1505 | cmpq $gs_change,RIP+8(%rsp) |
1498 | je error_swapgs | 1506 | je error_swapgs |
1499 | jmp error_sti | 1507 | jmp error_sti |
1508 | |||
1509 | bstep_iret: | ||
1510 | /* Fix truncated RIP */ | ||
1511 | movq %rcx,RIP+8(%rsp) | ||
1512 | jmp error_swapgs | ||
1500 | END(error_entry) | 1513 | END(error_entry) |
1501 | 1514 | ||
1502 | 1515 | ||
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9dbb527e1652..5a1b9758fd62 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -9,6 +9,8 @@ | |||
9 | * the dangers of modifying code on the run. | 9 | * the dangers of modifying code on the run. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
13 | |||
12 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
13 | #include <linux/hardirq.h> | 15 | #include <linux/hardirq.h> |
14 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
@@ -336,15 +338,15 @@ int __init ftrace_dyn_arch_init(void *data) | |||
336 | 338 | ||
337 | switch (faulted) { | 339 | switch (faulted) { |
338 | case 0: | 340 | case 0: |
339 | pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); | 341 | pr_info("converting mcount calls to 0f 1f 44 00 00\n"); |
340 | memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); | 342 | memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); |
341 | break; | 343 | break; |
342 | case 1: | 344 | case 1: |
343 | pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); | 345 | pr_info("converting mcount calls to 66 66 66 66 90\n"); |
344 | memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); | 346 | memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); |
345 | break; | 347 | break; |
346 | case 2: | 348 | case 2: |
347 | pr_info("ftrace: converting mcount calls to jmp . + 5\n"); | 349 | pr_info("converting mcount calls to jmp . + 5\n"); |
348 | memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); | 350 | memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); |
349 | break; | 351 | break; |
350 | } | 352 | } |
@@ -468,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
468 | 470 | ||
469 | #ifdef CONFIG_FTRACE_SYSCALLS | 471 | #ifdef CONFIG_FTRACE_SYSCALLS |
470 | 472 | ||
471 | extern unsigned long __start_syscalls_metadata[]; | ||
472 | extern unsigned long __stop_syscalls_metadata[]; | ||
473 | extern unsigned long *sys_call_table; | 473 | extern unsigned long *sys_call_table; |
474 | 474 | ||
475 | static struct syscall_metadata **syscalls_metadata; | 475 | unsigned long __init arch_syscall_addr(int nr) |
476 | |||
477 | static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) | ||
478 | { | ||
479 | struct syscall_metadata *start; | ||
480 | struct syscall_metadata *stop; | ||
481 | char str[KSYM_SYMBOL_LEN]; | ||
482 | |||
483 | |||
484 | start = (struct syscall_metadata *)__start_syscalls_metadata; | ||
485 | stop = (struct syscall_metadata *)__stop_syscalls_metadata; | ||
486 | kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); | ||
487 | |||
488 | for ( ; start < stop; start++) { | ||
489 | if (start->name && !strcmp(start->name, str)) | ||
490 | return start; | ||
491 | } | ||
492 | return NULL; | ||
493 | } | ||
494 | |||
495 | struct syscall_metadata *syscall_nr_to_meta(int nr) | ||
496 | { | ||
497 | if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) | ||
498 | return NULL; | ||
499 | |||
500 | return syscalls_metadata[nr]; | ||
501 | } | ||
502 | |||
503 | int syscall_name_to_nr(char *name) | ||
504 | { | 476 | { |
505 | int i; | 477 | return (unsigned long)(&sys_call_table)[nr]; |
506 | |||
507 | if (!syscalls_metadata) | ||
508 | return -1; | ||
509 | |||
510 | for (i = 0; i < NR_syscalls; i++) { | ||
511 | if (syscalls_metadata[i]) { | ||
512 | if (!strcmp(syscalls_metadata[i]->name, name)) | ||
513 | return i; | ||
514 | } | ||
515 | } | ||
516 | return -1; | ||
517 | } | ||
518 | |||
519 | void set_syscall_enter_id(int num, int id) | ||
520 | { | ||
521 | syscalls_metadata[num]->enter_id = id; | ||
522 | } | ||
523 | |||
524 | void set_syscall_exit_id(int num, int id) | ||
525 | { | ||
526 | syscalls_metadata[num]->exit_id = id; | ||
527 | } | ||
528 | |||
529 | static int __init arch_init_ftrace_syscalls(void) | ||
530 | { | ||
531 | int i; | ||
532 | struct syscall_metadata *meta; | ||
533 | unsigned long **psys_syscall_table = &sys_call_table; | ||
534 | |||
535 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | ||
536 | NR_syscalls, GFP_KERNEL); | ||
537 | if (!syscalls_metadata) { | ||
538 | WARN_ON(1); | ||
539 | return -ENOMEM; | ||
540 | } | ||
541 | |||
542 | for (i = 0; i < NR_syscalls; i++) { | ||
543 | meta = find_syscall_meta(psys_syscall_table[i]); | ||
544 | syscalls_metadata[i] = meta; | ||
545 | } | ||
546 | return 0; | ||
547 | } | 478 | } |
548 | arch_initcall(arch_init_ftrace_syscalls); | ||
549 | #endif | 479 | #endif |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 780cd928fcd5..22db86a37643 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -212,8 +212,8 @@ ENTRY(secondary_startup_64) | |||
212 | */ | 212 | */ |
213 | lgdt early_gdt_descr(%rip) | 213 | lgdt early_gdt_descr(%rip) |
214 | 214 | ||
215 | /* set up data segments. actually 0 would do too */ | 215 | /* set up data segments */ |
216 | movl $__KERNEL_DS,%eax | 216 | xorl %eax,%eax |
217 | movl %eax,%ds | 217 | movl %eax,%ds |
218 | movl %eax,%ss | 218 | movl %eax,%ss |
219 | movl %eax,%es | 219 | movl %eax,%es |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..d42f65ac4927 --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -0,0 +1,555 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) 2007 Alan Stern | ||
17 | * Copyright (C) 2009 IBM Corporation | ||
18 | * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> | ||
19 | * | ||
20 | * Authors: Alan Stern <stern@rowland.harvard.edu> | ||
21 | * K.Prasad <prasad@linux.vnet.ibm.com> | ||
22 | * Frederic Weisbecker <fweisbec@gmail.com> | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | ||
27 | * using the CPU's debug registers. | ||
28 | */ | ||
29 | |||
30 | #include <linux/perf_event.h> | ||
31 | #include <linux/hw_breakpoint.h> | ||
32 | #include <linux/irqflags.h> | ||
33 | #include <linux/notifier.h> | ||
34 | #include <linux/kallsyms.h> | ||
35 | #include <linux/kprobes.h> | ||
36 | #include <linux/percpu.h> | ||
37 | #include <linux/kdebug.h> | ||
38 | #include <linux/kernel.h> | ||
39 | #include <linux/module.h> | ||
40 | #include <linux/sched.h> | ||
41 | #include <linux/init.h> | ||
42 | #include <linux/smp.h> | ||
43 | |||
44 | #include <asm/hw_breakpoint.h> | ||
45 | #include <asm/processor.h> | ||
46 | #include <asm/debugreg.h> | ||
47 | |||
48 | /* Per cpu debug control register value */ | ||
49 | DEFINE_PER_CPU(unsigned long, cpu_dr7); | ||
50 | EXPORT_PER_CPU_SYMBOL(cpu_dr7); | ||
51 | |||
52 | /* Per cpu debug address registers values */ | ||
53 | static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); | ||
54 | |||
55 | /* | ||
56 | * Stores the breakpoints currently in use on each breakpoint address | ||
57 | * register for each cpus | ||
58 | */ | ||
59 | static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); | ||
60 | |||
61 | |||
62 | static inline unsigned long | ||
63 | __encode_dr7(int drnum, unsigned int len, unsigned int type) | ||
64 | { | ||
65 | unsigned long bp_info; | ||
66 | |||
67 | bp_info = (len | type) & 0xf; | ||
68 | bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); | ||
69 | bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); | ||
70 | |||
71 | return bp_info; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Encode the length, type, Exact, and Enable bits for a particular breakpoint | ||
76 | * as stored in debug register 7. | ||
77 | */ | ||
78 | unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) | ||
79 | { | ||
80 | return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Decode the length and type bits for a particular breakpoint as | ||
85 | * stored in debug register 7. Return the "enabled" status. | ||
86 | */ | ||
87 | int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) | ||
88 | { | ||
89 | int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); | ||
90 | |||
91 | *len = (bp_info & 0xc) | 0x40; | ||
92 | *type = (bp_info & 0x3) | 0x80; | ||
93 | |||
94 | return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Install a perf counter breakpoint. | ||
99 | * | ||
100 | * We seek a free debug address register and use it for this | ||
101 | * breakpoint. Eventually we enable it in the debug control register. | ||
102 | * | ||
103 | * Atomic: we hold the counter->ctx->lock and we only handle variables | ||
104 | * and registers local to this cpu. | ||
105 | */ | ||
106 | int arch_install_hw_breakpoint(struct perf_event *bp) | ||
107 | { | ||
108 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
109 | unsigned long *dr7; | ||
110 | int i; | ||
111 | |||
112 | for (i = 0; i < HBP_NUM; i++) { | ||
113 | struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); | ||
114 | |||
115 | if (!*slot) { | ||
116 | *slot = bp; | ||
117 | break; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) | ||
122 | return -EBUSY; | ||
123 | |||
124 | set_debugreg(info->address, i); | ||
125 | __get_cpu_var(cpu_debugreg[i]) = info->address; | ||
126 | |||
127 | dr7 = &__get_cpu_var(cpu_dr7); | ||
128 | *dr7 |= encode_dr7(i, info->len, info->type); | ||
129 | |||
130 | set_debugreg(*dr7, 7); | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Uninstall the breakpoint contained in the given counter. | ||
137 | * | ||
138 | * First we search the debug address register it uses and then we disable | ||
139 | * it. | ||
140 | * | ||
141 | * Atomic: we hold the counter->ctx->lock and we only handle variables | ||
142 | * and registers local to this cpu. | ||
143 | */ | ||
144 | void arch_uninstall_hw_breakpoint(struct perf_event *bp) | ||
145 | { | ||
146 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
147 | unsigned long *dr7; | ||
148 | int i; | ||
149 | |||
150 | for (i = 0; i < HBP_NUM; i++) { | ||
151 | struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); | ||
152 | |||
153 | if (*slot == bp) { | ||
154 | *slot = NULL; | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | |||
159 | if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) | ||
160 | return; | ||
161 | |||
162 | dr7 = &__get_cpu_var(cpu_dr7); | ||
163 | *dr7 &= ~__encode_dr7(i, info->len, info->type); | ||
164 | |||
165 | set_debugreg(*dr7, 7); | ||
166 | } | ||
167 | |||
168 | static int get_hbp_len(u8 hbp_len) | ||
169 | { | ||
170 | unsigned int len_in_bytes = 0; | ||
171 | |||
172 | switch (hbp_len) { | ||
173 | case X86_BREAKPOINT_LEN_1: | ||
174 | len_in_bytes = 1; | ||
175 | break; | ||
176 | case X86_BREAKPOINT_LEN_2: | ||
177 | len_in_bytes = 2; | ||
178 | break; | ||
179 | case X86_BREAKPOINT_LEN_4: | ||
180 | len_in_bytes = 4; | ||
181 | break; | ||
182 | #ifdef CONFIG_X86_64 | ||
183 | case X86_BREAKPOINT_LEN_8: | ||
184 | len_in_bytes = 8; | ||
185 | break; | ||
186 | #endif | ||
187 | } | ||
188 | return len_in_bytes; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * Check for virtual address in user space. | ||
193 | */ | ||
194 | int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) | ||
195 | { | ||
196 | unsigned int len; | ||
197 | |||
198 | len = get_hbp_len(hbp_len); | ||
199 | |||
200 | return (va <= TASK_SIZE - len); | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Check for virtual address in kernel space. | ||
205 | */ | ||
206 | static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | ||
207 | { | ||
208 | unsigned int len; | ||
209 | |||
210 | len = get_hbp_len(hbp_len); | ||
211 | |||
212 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * Store a breakpoint's encoded address, length, and type. | ||
217 | */ | ||
218 | static int arch_store_info(struct perf_event *bp) | ||
219 | { | ||
220 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
221 | /* | ||
222 | * For kernel-addresses, either the address or symbol name can be | ||
223 | * specified. | ||
224 | */ | ||
225 | if (info->name) | ||
226 | info->address = (unsigned long) | ||
227 | kallsyms_lookup_name(info->name); | ||
228 | if (info->address) | ||
229 | return 0; | ||
230 | |||
231 | return -EINVAL; | ||
232 | } | ||
233 | |||
234 | int arch_bp_generic_fields(int x86_len, int x86_type, | ||
235 | int *gen_len, int *gen_type) | ||
236 | { | ||
237 | /* Len */ | ||
238 | switch (x86_len) { | ||
239 | case X86_BREAKPOINT_LEN_1: | ||
240 | *gen_len = HW_BREAKPOINT_LEN_1; | ||
241 | break; | ||
242 | case X86_BREAKPOINT_LEN_2: | ||
243 | *gen_len = HW_BREAKPOINT_LEN_2; | ||
244 | break; | ||
245 | case X86_BREAKPOINT_LEN_4: | ||
246 | *gen_len = HW_BREAKPOINT_LEN_4; | ||
247 | break; | ||
248 | #ifdef CONFIG_X86_64 | ||
249 | case X86_BREAKPOINT_LEN_8: | ||
250 | *gen_len = HW_BREAKPOINT_LEN_8; | ||
251 | break; | ||
252 | #endif | ||
253 | default: | ||
254 | return -EINVAL; | ||
255 | } | ||
256 | |||
257 | /* Type */ | ||
258 | switch (x86_type) { | ||
259 | case X86_BREAKPOINT_EXECUTE: | ||
260 | *gen_type = HW_BREAKPOINT_X; | ||
261 | break; | ||
262 | case X86_BREAKPOINT_WRITE: | ||
263 | *gen_type = HW_BREAKPOINT_W; | ||
264 | break; | ||
265 | case X86_BREAKPOINT_RW: | ||
266 | *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; | ||
267 | break; | ||
268 | default: | ||
269 | return -EINVAL; | ||
270 | } | ||
271 | |||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | |||
276 | static int arch_build_bp_info(struct perf_event *bp) | ||
277 | { | ||
278 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
279 | |||
280 | info->address = bp->attr.bp_addr; | ||
281 | |||
282 | /* Len */ | ||
283 | switch (bp->attr.bp_len) { | ||
284 | case HW_BREAKPOINT_LEN_1: | ||
285 | info->len = X86_BREAKPOINT_LEN_1; | ||
286 | break; | ||
287 | case HW_BREAKPOINT_LEN_2: | ||
288 | info->len = X86_BREAKPOINT_LEN_2; | ||
289 | break; | ||
290 | case HW_BREAKPOINT_LEN_4: | ||
291 | info->len = X86_BREAKPOINT_LEN_4; | ||
292 | break; | ||
293 | #ifdef CONFIG_X86_64 | ||
294 | case HW_BREAKPOINT_LEN_8: | ||
295 | info->len = X86_BREAKPOINT_LEN_8; | ||
296 | break; | ||
297 | #endif | ||
298 | default: | ||
299 | return -EINVAL; | ||
300 | } | ||
301 | |||
302 | /* Type */ | ||
303 | switch (bp->attr.bp_type) { | ||
304 | case HW_BREAKPOINT_W: | ||
305 | info->type = X86_BREAKPOINT_WRITE; | ||
306 | break; | ||
307 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
308 | info->type = X86_BREAKPOINT_RW; | ||
309 | break; | ||
310 | case HW_BREAKPOINT_X: | ||
311 | info->type = X86_BREAKPOINT_EXECUTE; | ||
312 | break; | ||
313 | default: | ||
314 | return -EINVAL; | ||
315 | } | ||
316 | |||
317 | return 0; | ||
318 | } | ||
319 | /* | ||
320 | * Validate the arch-specific HW Breakpoint register settings | ||
321 | */ | ||
322 | int arch_validate_hwbkpt_settings(struct perf_event *bp, | ||
323 | struct task_struct *tsk) | ||
324 | { | ||
325 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
326 | unsigned int align; | ||
327 | int ret; | ||
328 | |||
329 | |||
330 | ret = arch_build_bp_info(bp); | ||
331 | if (ret) | ||
332 | return ret; | ||
333 | |||
334 | ret = -EINVAL; | ||
335 | |||
336 | if (info->type == X86_BREAKPOINT_EXECUTE) | ||
337 | /* | ||
338 | * Ptrace-refactoring code | ||
339 | * For now, we'll allow instruction breakpoint only for user-space | ||
340 | * addresses | ||
341 | */ | ||
342 | if ((!arch_check_va_in_userspace(info->address, info->len)) && | ||
343 | info->len != X86_BREAKPOINT_EXECUTE) | ||
344 | return ret; | ||
345 | |||
346 | switch (info->len) { | ||
347 | case X86_BREAKPOINT_LEN_1: | ||
348 | align = 0; | ||
349 | break; | ||
350 | case X86_BREAKPOINT_LEN_2: | ||
351 | align = 1; | ||
352 | break; | ||
353 | case X86_BREAKPOINT_LEN_4: | ||
354 | align = 3; | ||
355 | break; | ||
356 | #ifdef CONFIG_X86_64 | ||
357 | case X86_BREAKPOINT_LEN_8: | ||
358 | align = 7; | ||
359 | break; | ||
360 | #endif | ||
361 | default: | ||
362 | return ret; | ||
363 | } | ||
364 | |||
365 | if (bp->callback) | ||
366 | ret = arch_store_info(bp); | ||
367 | |||
368 | if (ret < 0) | ||
369 | return ret; | ||
370 | /* | ||
371 | * Check that the low-order bits of the address are appropriate | ||
372 | * for the alignment implied by len. | ||
373 | */ | ||
374 | if (info->address & align) | ||
375 | return -EINVAL; | ||
376 | |||
377 | /* Check that the virtual address is in the proper range */ | ||
378 | if (tsk) { | ||
379 | if (!arch_check_va_in_userspace(info->address, info->len)) | ||
380 | return -EFAULT; | ||
381 | } else { | ||
382 | if (!arch_check_va_in_kernelspace(info->address, info->len)) | ||
383 | return -EFAULT; | ||
384 | } | ||
385 | |||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | /* | ||
390 | * Dump the debug register contents to the user. | ||
391 | * We can't dump our per cpu values because it | ||
392 | * may contain cpu wide breakpoint, something that | ||
393 | * doesn't belong to the current task. | ||
394 | * | ||
395 | * TODO: include non-ptrace user breakpoints (perf) | ||
396 | */ | ||
397 | void aout_dump_debugregs(struct user *dump) | ||
398 | { | ||
399 | int i; | ||
400 | int dr7 = 0; | ||
401 | struct perf_event *bp; | ||
402 | struct arch_hw_breakpoint *info; | ||
403 | struct thread_struct *thread = ¤t->thread; | ||
404 | |||
405 | for (i = 0; i < HBP_NUM; i++) { | ||
406 | bp = thread->ptrace_bps[i]; | ||
407 | |||
408 | if (bp && !bp->attr.disabled) { | ||
409 | dump->u_debugreg[i] = bp->attr.bp_addr; | ||
410 | info = counter_arch_bp(bp); | ||
411 | dr7 |= encode_dr7(i, info->len, info->type); | ||
412 | } else { | ||
413 | dump->u_debugreg[i] = 0; | ||
414 | } | ||
415 | } | ||
416 | |||
417 | dump->u_debugreg[4] = 0; | ||
418 | dump->u_debugreg[5] = 0; | ||
419 | dump->u_debugreg[6] = current->thread.debugreg6; | ||
420 | |||
421 | dump->u_debugreg[7] = dr7; | ||
422 | } | ||
423 | EXPORT_SYMBOL_GPL(aout_dump_debugregs); | ||
424 | |||
425 | /* | ||
426 | * Release the user breakpoints used by ptrace | ||
427 | */ | ||
428 | void flush_ptrace_hw_breakpoint(struct task_struct *tsk) | ||
429 | { | ||
430 | int i; | ||
431 | struct thread_struct *t = &tsk->thread; | ||
432 | |||
433 | for (i = 0; i < HBP_NUM; i++) { | ||
434 | unregister_hw_breakpoint(t->ptrace_bps[i]); | ||
435 | t->ptrace_bps[i] = NULL; | ||
436 | } | ||
437 | } | ||
438 | |||
439 | void hw_breakpoint_restore(void) | ||
440 | { | ||
441 | set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); | ||
442 | set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); | ||
443 | set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); | ||
444 | set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); | ||
445 | set_debugreg(current->thread.debugreg6, 6); | ||
446 | set_debugreg(__get_cpu_var(cpu_dr7), 7); | ||
447 | } | ||
448 | EXPORT_SYMBOL_GPL(hw_breakpoint_restore); | ||
449 | |||
450 | /* | ||
451 | * Handle debug exception notifications. | ||
452 | * | ||
453 | * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. | ||
454 | * | ||
455 | * NOTIFY_DONE returned if one of the following conditions is true. | ||
456 | * i) When the causative address is from user-space and the exception | ||
457 | * is a valid one, i.e. not triggered as a result of lazy debug register | ||
458 | * switching | ||
459 | * ii) When there are more bits than trap<n> set in DR6 register (such | ||
460 | * as BD, BS or BT) indicating that more than one debug condition is | ||
461 | * met and requires some more action in do_debug(). | ||
462 | * | ||
463 | * NOTIFY_STOP returned for all other cases | ||
464 | * | ||
465 | */ | ||
466 | static int __kprobes hw_breakpoint_handler(struct die_args *args) | ||
467 | { | ||
468 | int i, cpu, rc = NOTIFY_STOP; | ||
469 | struct perf_event *bp; | ||
470 | unsigned long dr7, dr6; | ||
471 | unsigned long *dr6_p; | ||
472 | |||
473 | /* The DR6 value is pointed by args->err */ | ||
474 | dr6_p = (unsigned long *)ERR_PTR(args->err); | ||
475 | dr6 = *dr6_p; | ||
476 | |||
477 | /* Do an early return if no trap bits are set in DR6 */ | ||
478 | if ((dr6 & DR_TRAP_BITS) == 0) | ||
479 | return NOTIFY_DONE; | ||
480 | |||
481 | get_debugreg(dr7, 7); | ||
482 | /* Disable breakpoints during exception handling */ | ||
483 | set_debugreg(0UL, 7); | ||
484 | /* | ||
485 | * Assert that local interrupts are disabled | ||
486 | * Reset the DRn bits in the virtualized register value. | ||
487 | * The ptrace trigger routine will add in whatever is needed. | ||
488 | */ | ||
489 | current->thread.debugreg6 &= ~DR_TRAP_BITS; | ||
490 | cpu = get_cpu(); | ||
491 | |||
492 | /* Handle all the breakpoints that were triggered */ | ||
493 | for (i = 0; i < HBP_NUM; ++i) { | ||
494 | if (likely(!(dr6 & (DR_TRAP0 << i)))) | ||
495 | continue; | ||
496 | |||
497 | /* | ||
498 | * The counter may be concurrently released but that can only | ||
499 | * occur from a call_rcu() path. We can then safely fetch | ||
500 | * the breakpoint, use its callback, touch its counter | ||
501 | * while we are in an rcu_read_lock() path. | ||
502 | */ | ||
503 | rcu_read_lock(); | ||
504 | |||
505 | bp = per_cpu(bp_per_reg[i], cpu); | ||
506 | if (bp) | ||
507 | rc = NOTIFY_DONE; | ||
508 | /* | ||
509 | * Reset the 'i'th TRAP bit in dr6 to denote completion of | ||
510 | * exception handling | ||
511 | */ | ||
512 | (*dr6_p) &= ~(DR_TRAP0 << i); | ||
513 | /* | ||
514 | * bp can be NULL due to lazy debug register switching | ||
515 | * or due to concurrent perf counter removing. | ||
516 | */ | ||
517 | if (!bp) { | ||
518 | rcu_read_unlock(); | ||
519 | break; | ||
520 | } | ||
521 | |||
522 | (bp->callback)(bp, args->regs); | ||
523 | |||
524 | rcu_read_unlock(); | ||
525 | } | ||
526 | if (dr6 & (~DR_TRAP_BITS)) | ||
527 | rc = NOTIFY_DONE; | ||
528 | |||
529 | set_debugreg(dr7, 7); | ||
530 | put_cpu(); | ||
531 | |||
532 | return rc; | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * Handle debug exception notifications. | ||
537 | */ | ||
538 | int __kprobes hw_breakpoint_exceptions_notify( | ||
539 | struct notifier_block *unused, unsigned long val, void *data) | ||
540 | { | ||
541 | if (val != DIE_DEBUG) | ||
542 | return NOTIFY_DONE; | ||
543 | |||
544 | return hw_breakpoint_handler(data); | ||
545 | } | ||
546 | |||
547 | void hw_breakpoint_pmu_read(struct perf_event *bp) | ||
548 | { | ||
549 | /* TODO */ | ||
550 | } | ||
551 | |||
552 | void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) | ||
553 | { | ||
554 | /* TODO */ | ||
555 | } | ||
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 04bbd5278568..fee6cc2b2079 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
92 | seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); | 92 | seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); |
93 | seq_printf(p, " TLB shootdowns\n"); | 93 | seq_printf(p, " TLB shootdowns\n"); |
94 | #endif | 94 | #endif |
95 | #ifdef CONFIG_X86_MCE | 95 | #ifdef CONFIG_X86_THERMAL_VECTOR |
96 | seq_printf(p, "%*s: ", prec, "TRM"); | 96 | seq_printf(p, "%*s: ", prec, "TRM"); |
97 | for_each_online_cpu(j) | 97 | for_each_online_cpu(j) |
98 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); | 98 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); |
99 | seq_printf(p, " Thermal event interrupts\n"); | 99 | seq_printf(p, " Thermal event interrupts\n"); |
100 | # ifdef CONFIG_X86_MCE_THRESHOLD | 100 | #endif |
101 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
101 | seq_printf(p, "%*s: ", prec, "THR"); | 102 | seq_printf(p, "%*s: ", prec, "THR"); |
102 | for_each_online_cpu(j) | 103 | for_each_online_cpu(j) |
103 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); | 104 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); |
104 | seq_printf(p, " Threshold APIC interrupts\n"); | 105 | seq_printf(p, " Threshold APIC interrupts\n"); |
105 | # endif | ||
106 | #endif | 106 | #endif |
107 | #ifdef CONFIG_X86_MCE | 107 | #ifdef CONFIG_X86_MCE |
108 | seq_printf(p, "%*s: ", prec, "MCE"); | 108 | seq_printf(p, "%*s: ", prec, "MCE"); |
@@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
194 | sum += irq_stats(cpu)->irq_call_count; | 194 | sum += irq_stats(cpu)->irq_call_count; |
195 | sum += irq_stats(cpu)->irq_tlb_count; | 195 | sum += irq_stats(cpu)->irq_tlb_count; |
196 | #endif | 196 | #endif |
197 | #ifdef CONFIG_X86_MCE | 197 | #ifdef CONFIG_X86_THERMAL_VECTOR |
198 | sum += irq_stats(cpu)->irq_thermal_count; | 198 | sum += irq_stats(cpu)->irq_thermal_count; |
199 | # ifdef CONFIG_X86_MCE_THRESHOLD | 199 | #endif |
200 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
200 | sum += irq_stats(cpu)->irq_threshold_count; | 201 | sum += irq_stats(cpu)->irq_threshold_count; |
201 | # endif | ||
202 | #endif | 202 | #endif |
203 | #ifdef CONFIG_X86_MCE | 203 | #ifdef CONFIG_X86_MCE |
204 | sum += per_cpu(mce_exception_count, cpu); | 204 | sum += per_cpu(mce_exception_count, cpu); |
@@ -274,3 +274,93 @@ void smp_generic_interrupt(struct pt_regs *regs) | |||
274 | } | 274 | } |
275 | 275 | ||
276 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | 276 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); |
277 | |||
278 | #ifdef CONFIG_HOTPLUG_CPU | ||
279 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | ||
280 | void fixup_irqs(void) | ||
281 | { | ||
282 | unsigned int irq, vector; | ||
283 | static int warned; | ||
284 | struct irq_desc *desc; | ||
285 | |||
286 | for_each_irq_desc(irq, desc) { | ||
287 | int break_affinity = 0; | ||
288 | int set_affinity = 1; | ||
289 | const struct cpumask *affinity; | ||
290 | |||
291 | if (!desc) | ||
292 | continue; | ||
293 | if (irq == 2) | ||
294 | continue; | ||
295 | |||
296 | /* interrupt's are disabled at this point */ | ||
297 | spin_lock(&desc->lock); | ||
298 | |||
299 | affinity = desc->affinity; | ||
300 | if (!irq_has_action(irq) || | ||
301 | cpumask_equal(affinity, cpu_online_mask)) { | ||
302 | spin_unlock(&desc->lock); | ||
303 | continue; | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * Complete the irq move. This cpu is going down and for | ||
308 | * non intr-remapping case, we can't wait till this interrupt | ||
309 | * arrives at this cpu before completing the irq move. | ||
310 | */ | ||
311 | irq_force_complete_move(irq); | ||
312 | |||
313 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { | ||
314 | break_affinity = 1; | ||
315 | affinity = cpu_all_mask; | ||
316 | } | ||
317 | |||
318 | if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask) | ||
319 | desc->chip->mask(irq); | ||
320 | |||
321 | if (desc->chip->set_affinity) | ||
322 | desc->chip->set_affinity(irq, affinity); | ||
323 | else if (!(warned++)) | ||
324 | set_affinity = 0; | ||
325 | |||
326 | if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) | ||
327 | desc->chip->unmask(irq); | ||
328 | |||
329 | spin_unlock(&desc->lock); | ||
330 | |||
331 | if (break_affinity && set_affinity) | ||
332 | printk("Broke affinity for irq %i\n", irq); | ||
333 | else if (!set_affinity) | ||
334 | printk("Cannot set affinity for irq %i\n", irq); | ||
335 | } | ||
336 | |||
337 | /* | ||
338 | * We can remove mdelay() and then send spuriuous interrupts to | ||
339 | * new cpu targets for all the irqs that were handled previously by | ||
340 | * this cpu. While it works, I have seen spurious interrupt messages | ||
341 | * (nothing wrong but still...). | ||
342 | * | ||
343 | * So for now, retain mdelay(1) and check the IRR and then send those | ||
344 | * interrupts to new targets as this cpu is already offlined... | ||
345 | */ | ||
346 | mdelay(1); | ||
347 | |||
348 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | ||
349 | unsigned int irr; | ||
350 | |||
351 | if (__get_cpu_var(vector_irq)[vector] < 0) | ||
352 | continue; | ||
353 | |||
354 | irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); | ||
355 | if (irr & (1 << (vector % 32))) { | ||
356 | irq = __get_cpu_var(vector_irq)[vector]; | ||
357 | |||
358 | desc = irq_to_desc(irq); | ||
359 | spin_lock(&desc->lock); | ||
360 | if (desc->chip->retrigger) | ||
361 | desc->chip->retrigger(irq); | ||
362 | spin_unlock(&desc->lock); | ||
363 | } | ||
364 | } | ||
365 | } | ||
366 | #endif | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 7d35d0fe2329..10709f29d166 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) | |||
211 | 211 | ||
212 | return true; | 212 | return true; |
213 | } | 213 | } |
214 | |||
215 | #ifdef CONFIG_HOTPLUG_CPU | ||
216 | |||
217 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | ||
218 | void fixup_irqs(void) | ||
219 | { | ||
220 | unsigned int irq; | ||
221 | struct irq_desc *desc; | ||
222 | |||
223 | for_each_irq_desc(irq, desc) { | ||
224 | const struct cpumask *affinity; | ||
225 | |||
226 | if (!desc) | ||
227 | continue; | ||
228 | if (irq == 2) | ||
229 | continue; | ||
230 | |||
231 | affinity = desc->affinity; | ||
232 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { | ||
233 | printk("Breaking affinity for irq %i\n", irq); | ||
234 | affinity = cpu_all_mask; | ||
235 | } | ||
236 | if (desc->chip->set_affinity) | ||
237 | desc->chip->set_affinity(irq, affinity); | ||
238 | else if (desc->action) | ||
239 | printk_once("Cannot set affinity for irq %i\n", irq); | ||
240 | } | ||
241 | |||
242 | #if 0 | ||
243 | barrier(); | ||
244 | /* Ingo Molnar says: "after the IO-APIC masks have been redirected | ||
245 | [note the nop - the interrupt-enable boundary on x86 is two | ||
246 | instructions from sti] - to flush out pending hardirqs and | ||
247 | IPIs. After this point nothing is supposed to reach this CPU." */ | ||
248 | __asm__ __volatile__("sti; nop; cli"); | ||
249 | barrier(); | ||
250 | #else | ||
251 | /* That doesn't seem sufficient. Give it 1ms. */ | ||
252 | local_irq_enable(); | ||
253 | mdelay(1); | ||
254 | local_irq_disable(); | ||
255 | #endif | ||
256 | } | ||
257 | #endif | ||
258 | |||
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 977d8b43a0dd..acf8fbf8fbda 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) | |||
62 | return true; | 62 | return true; |
63 | } | 63 | } |
64 | 64 | ||
65 | #ifdef CONFIG_HOTPLUG_CPU | ||
66 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | ||
67 | void fixup_irqs(void) | ||
68 | { | ||
69 | unsigned int irq; | ||
70 | static int warned; | ||
71 | struct irq_desc *desc; | ||
72 | |||
73 | for_each_irq_desc(irq, desc) { | ||
74 | int break_affinity = 0; | ||
75 | int set_affinity = 1; | ||
76 | const struct cpumask *affinity; | ||
77 | |||
78 | if (!desc) | ||
79 | continue; | ||
80 | if (irq == 2) | ||
81 | continue; | ||
82 | |||
83 | /* interrupt's are disabled at this point */ | ||
84 | spin_lock(&desc->lock); | ||
85 | |||
86 | affinity = desc->affinity; | ||
87 | if (!irq_has_action(irq) || | ||
88 | cpumask_equal(affinity, cpu_online_mask)) { | ||
89 | spin_unlock(&desc->lock); | ||
90 | continue; | ||
91 | } | ||
92 | |||
93 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { | ||
94 | break_affinity = 1; | ||
95 | affinity = cpu_all_mask; | ||
96 | } | ||
97 | |||
98 | if (desc->chip->mask) | ||
99 | desc->chip->mask(irq); | ||
100 | |||
101 | if (desc->chip->set_affinity) | ||
102 | desc->chip->set_affinity(irq, affinity); | ||
103 | else if (!(warned++)) | ||
104 | set_affinity = 0; | ||
105 | |||
106 | if (desc->chip->unmask) | ||
107 | desc->chip->unmask(irq); | ||
108 | |||
109 | spin_unlock(&desc->lock); | ||
110 | |||
111 | if (break_affinity && set_affinity) | ||
112 | printk("Broke affinity for irq %i\n", irq); | ||
113 | else if (!set_affinity) | ||
114 | printk("Cannot set affinity for irq %i\n", irq); | ||
115 | } | ||
116 | |||
117 | /* That doesn't seem sufficient. Give it 1ms. */ | ||
118 | local_irq_enable(); | ||
119 | mdelay(1); | ||
120 | local_irq_disable(); | ||
121 | } | ||
122 | #endif | ||
123 | 65 | ||
124 | extern void call_softirq(void); | 66 | extern void call_softirq(void); |
125 | 67 | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77a3f3b..20a5b3689463 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/smp.h> | 43 | #include <linux/smp.h> |
44 | #include <linux/nmi.h> | 44 | #include <linux/nmi.h> |
45 | 45 | ||
46 | #include <asm/debugreg.h> | ||
46 | #include <asm/apicdef.h> | 47 | #include <asm/apicdef.h> |
47 | #include <asm/system.h> | 48 | #include <asm/system.h> |
48 | 49 | ||
@@ -88,7 +89,6 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
88 | gdb_regs[GDB_SS] = __KERNEL_DS; | 89 | gdb_regs[GDB_SS] = __KERNEL_DS; |
89 | gdb_regs[GDB_FS] = 0xFFFF; | 90 | gdb_regs[GDB_FS] = 0xFFFF; |
90 | gdb_regs[GDB_GS] = 0xFFFF; | 91 | gdb_regs[GDB_GS] = 0xFFFF; |
91 | gdb_regs[GDB_SP] = (int)®s->sp; | ||
92 | #else | 92 | #else |
93 | gdb_regs[GDB_R8] = regs->r8; | 93 | gdb_regs[GDB_R8] = regs->r8; |
94 | gdb_regs[GDB_R9] = regs->r9; | 94 | gdb_regs[GDB_R9] = regs->r9; |
@@ -101,8 +101,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
101 | gdb_regs32[GDB_PS] = regs->flags; | 101 | gdb_regs32[GDB_PS] = regs->flags; |
102 | gdb_regs32[GDB_CS] = regs->cs; | 102 | gdb_regs32[GDB_CS] = regs->cs; |
103 | gdb_regs32[GDB_SS] = regs->ss; | 103 | gdb_regs32[GDB_SS] = regs->ss; |
104 | gdb_regs[GDB_SP] = regs->sp; | ||
105 | #endif | 104 | #endif |
105 | gdb_regs[GDB_SP] = kernel_stack_pointer(regs); | ||
106 | } | 106 | } |
107 | 107 | ||
108 | /** | 108 | /** |
@@ -434,6 +434,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) | |||
434 | "resuming...\n"); | 434 | "resuming...\n"); |
435 | kgdb_arch_handle_exception(args->trapnr, args->signr, | 435 | kgdb_arch_handle_exception(args->trapnr, args->signr, |
436 | args->err, "c", "", regs); | 436 | args->err, "c", "", regs); |
437 | /* | ||
438 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
439 | * denote completion of processing | ||
440 | */ | ||
441 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
437 | 442 | ||
438 | return NOTIFY_STOP; | 443 | return NOTIFY_STOP; |
439 | } | 444 | } |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..1f3186ce213c 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -48,31 +48,22 @@ | |||
48 | #include <linux/preempt.h> | 48 | #include <linux/preempt.h> |
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | #include <linux/kdebug.h> | 50 | #include <linux/kdebug.h> |
51 | #include <linux/kallsyms.h> | ||
51 | 52 | ||
52 | #include <asm/cacheflush.h> | 53 | #include <asm/cacheflush.h> |
53 | #include <asm/desc.h> | 54 | #include <asm/desc.h> |
54 | #include <asm/pgtable.h> | 55 | #include <asm/pgtable.h> |
55 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
56 | #include <asm/alternative.h> | 57 | #include <asm/alternative.h> |
58 | #include <asm/insn.h> | ||
59 | #include <asm/debugreg.h> | ||
57 | 60 | ||
58 | void jprobe_return_end(void); | 61 | void jprobe_return_end(void); |
59 | 62 | ||
60 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | 63 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
61 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | 64 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); |
62 | 65 | ||
63 | #ifdef CONFIG_X86_64 | 66 | #define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) |
64 | #define stack_addr(regs) ((unsigned long *)regs->sp) | ||
65 | #else | ||
66 | /* | ||
67 | * "®s->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs | ||
68 | * don't save the ss and esp registers if the CPU is already in kernel | ||
69 | * mode when it traps. So for kprobes, regs->sp and regs->ss are not | ||
70 | * the [nonexistent] saved stack pointer and ss register, but rather | ||
71 | * the top 8 bytes of the pre-int3 stack. So ®s->sp happens to | ||
72 | * point to the top of the pre-int3 stack. | ||
73 | */ | ||
74 | #define stack_addr(regs) ((unsigned long *)®s->sp) | ||
75 | #endif | ||
76 | 67 | ||
77 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ | 68 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ |
78 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | 69 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ |
@@ -106,50 +97,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { | |||
106 | /* ----------------------------------------------- */ | 97 | /* ----------------------------------------------- */ |
107 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 98 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
108 | }; | 99 | }; |
109 | static const u32 onebyte_has_modrm[256 / 32] = { | ||
110 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
111 | /* ----------------------------------------------- */ | ||
112 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ | ||
113 | W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ | ||
114 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ | ||
115 | W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ | ||
116 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | ||
117 | W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ | ||
118 | W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ | ||
119 | W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ | ||
120 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
121 | W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ | ||
122 | W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ | ||
123 | W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ | ||
124 | W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ | ||
125 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
126 | W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ | ||
127 | W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ | ||
128 | /* ----------------------------------------------- */ | ||
129 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
130 | }; | ||
131 | static const u32 twobyte_has_modrm[256 / 32] = { | ||
132 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
133 | /* ----------------------------------------------- */ | ||
134 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ | ||
135 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ | ||
136 | W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ | ||
137 | W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ | ||
138 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ | ||
139 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ | ||
140 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ | ||
141 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ | ||
142 | W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ | ||
143 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ | ||
144 | W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ | ||
145 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ | ||
146 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ | ||
147 | W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ | ||
148 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ | ||
149 | W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ | ||
150 | /* ----------------------------------------------- */ | ||
151 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
152 | }; | ||
153 | #undef W | 100 | #undef W |
154 | 101 | ||
155 | struct kretprobe_blackpoint kretprobe_blacklist[] = { | 102 | struct kretprobe_blackpoint kretprobe_blacklist[] = { |
@@ -244,6 +191,75 @@ retry: | |||
244 | } | 191 | } |
245 | } | 192 | } |
246 | 193 | ||
194 | /* Recover the probed instruction at addr for further analysis. */ | ||
195 | static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | ||
196 | { | ||
197 | struct kprobe *kp; | ||
198 | kp = get_kprobe((void *)addr); | ||
199 | if (!kp) | ||
200 | return -EINVAL; | ||
201 | |||
202 | /* | ||
203 | * Basically, kp->ainsn.insn has an original instruction. | ||
204 | * However, RIP-relative instruction can not do single-stepping | ||
205 | * at different place, fix_riprel() tweaks the displacement of | ||
206 | * that instruction. In that case, we can't recover the instruction | ||
207 | * from the kp->ainsn.insn. | ||
208 | * | ||
209 | * On the other hand, kp->opcode has a copy of the first byte of | ||
210 | * the probed instruction, which is overwritten by int3. And | ||
211 | * the instruction at kp->addr is not modified by kprobes except | ||
212 | * for the first byte, we can recover the original instruction | ||
213 | * from it and kp->opcode. | ||
214 | */ | ||
215 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
216 | buf[0] = kp->opcode; | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | /* Dummy buffers for kallsyms_lookup */ | ||
221 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
222 | |||
223 | /* Check if paddr is at an instruction boundary */ | ||
224 | static int __kprobes can_probe(unsigned long paddr) | ||
225 | { | ||
226 | int ret; | ||
227 | unsigned long addr, offset = 0; | ||
228 | struct insn insn; | ||
229 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
230 | |||
231 | if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) | ||
232 | return 0; | ||
233 | |||
234 | /* Decode instructions */ | ||
235 | addr = paddr - offset; | ||
236 | while (addr < paddr) { | ||
237 | kernel_insn_init(&insn, (void *)addr); | ||
238 | insn_get_opcode(&insn); | ||
239 | |||
240 | /* | ||
241 | * Check if the instruction has been modified by another | ||
242 | * kprobe, in which case we replace the breakpoint by the | ||
243 | * original instruction in our buffer. | ||
244 | */ | ||
245 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
246 | ret = recover_probed_instruction(buf, addr); | ||
247 | if (ret) | ||
248 | /* | ||
249 | * Another debugging subsystem might insert | ||
250 | * this breakpoint. In that case, we can't | ||
251 | * recover it. | ||
252 | */ | ||
253 | return 0; | ||
254 | kernel_insn_init(&insn, buf); | ||
255 | } | ||
256 | insn_get_length(&insn); | ||
257 | addr += insn.length; | ||
258 | } | ||
259 | |||
260 | return (addr == paddr); | ||
261 | } | ||
262 | |||
247 | /* | 263 | /* |
248 | * Returns non-zero if opcode modifies the interrupt flag. | 264 | * Returns non-zero if opcode modifies the interrupt flag. |
249 | */ | 265 | */ |
@@ -277,68 +293,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
277 | static void __kprobes fix_riprel(struct kprobe *p) | 293 | static void __kprobes fix_riprel(struct kprobe *p) |
278 | { | 294 | { |
279 | #ifdef CONFIG_X86_64 | 295 | #ifdef CONFIG_X86_64 |
280 | u8 *insn = p->ainsn.insn; | 296 | struct insn insn; |
281 | s64 disp; | 297 | kernel_insn_init(&insn, p->ainsn.insn); |
282 | int need_modrm; | ||
283 | |||
284 | /* Skip legacy instruction prefixes. */ | ||
285 | while (1) { | ||
286 | switch (*insn) { | ||
287 | case 0x66: | ||
288 | case 0x67: | ||
289 | case 0x2e: | ||
290 | case 0x3e: | ||
291 | case 0x26: | ||
292 | case 0x64: | ||
293 | case 0x65: | ||
294 | case 0x36: | ||
295 | case 0xf0: | ||
296 | case 0xf3: | ||
297 | case 0xf2: | ||
298 | ++insn; | ||
299 | continue; | ||
300 | } | ||
301 | break; | ||
302 | } | ||
303 | 298 | ||
304 | /* Skip REX instruction prefix. */ | 299 | if (insn_rip_relative(&insn)) { |
305 | if (is_REX_prefix(insn)) | 300 | s64 newdisp; |
306 | ++insn; | 301 | u8 *disp; |
307 | 302 | insn_get_displacement(&insn); | |
308 | if (*insn == 0x0f) { | 303 | /* |
309 | /* Two-byte opcode. */ | 304 | * The copied instruction uses the %rip-relative addressing |
310 | ++insn; | 305 | * mode. Adjust the displacement for the difference between |
311 | need_modrm = test_bit(*insn, | 306 | * the original location of this instruction and the location |
312 | (unsigned long *)twobyte_has_modrm); | 307 | * of the copy that will actually be run. The tricky bit here |
313 | } else | 308 | * is making sure that the sign extension happens correctly in |
314 | /* One-byte opcode. */ | 309 | * this calculation, since we need a signed 32-bit result to |
315 | need_modrm = test_bit(*insn, | 310 | * be sign-extended to 64 bits when it's added to the %rip |
316 | (unsigned long *)onebyte_has_modrm); | 311 | * value and yield the same 64-bit result that the sign- |
317 | 312 | * extension of the original signed 32-bit displacement would | |
318 | if (need_modrm) { | 313 | * have given. |
319 | u8 modrm = *++insn; | 314 | */ |
320 | if ((modrm & 0xc7) == 0x05) { | 315 | newdisp = (u8 *) p->addr + (s64) insn.displacement.value - |
321 | /* %rip+disp32 addressing mode */ | 316 | (u8 *) p->ainsn.insn; |
322 | /* Displacement follows ModRM byte. */ | 317 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
323 | ++insn; | 318 | disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); |
324 | /* | 319 | *(s32 *) disp = (s32) newdisp; |
325 | * The copied instruction uses the %rip-relative | ||
326 | * addressing mode. Adjust the displacement for the | ||
327 | * difference between the original location of this | ||
328 | * instruction and the location of the copy that will | ||
329 | * actually be run. The tricky bit here is making sure | ||
330 | * that the sign extension happens correctly in this | ||
331 | * calculation, since we need a signed 32-bit result to | ||
332 | * be sign-extended to 64 bits when it's added to the | ||
333 | * %rip value and yield the same 64-bit result that the | ||
334 | * sign-extension of the original signed 32-bit | ||
335 | * displacement would have given. | ||
336 | */ | ||
337 | disp = (u8 *) p->addr + *((s32 *) insn) - | ||
338 | (u8 *) p->ainsn.insn; | ||
339 | BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ | ||
340 | *(s32 *)insn = (s32) disp; | ||
341 | } | ||
342 | } | 320 | } |
343 | #endif | 321 | #endif |
344 | } | 322 | } |
@@ -359,6 +337,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) | |||
359 | 337 | ||
360 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 338 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
361 | { | 339 | { |
340 | if (!can_probe((unsigned long)p->addr)) | ||
341 | return -EILSEQ; | ||
362 | /* insn: must be on special executable page on x86. */ | 342 | /* insn: must be on special executable page on x86. */ |
363 | p->ainsn.insn = get_insn_slot(); | 343 | p->ainsn.insn = get_insn_slot(); |
364 | if (!p->ainsn.insn) | 344 | if (!p->ainsn.insn) |
@@ -472,17 +452,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
472 | { | 452 | { |
473 | switch (kcb->kprobe_status) { | 453 | switch (kcb->kprobe_status) { |
474 | case KPROBE_HIT_SSDONE: | 454 | case KPROBE_HIT_SSDONE: |
475 | #ifdef CONFIG_X86_64 | ||
476 | /* TODO: Provide re-entrancy from post_kprobes_handler() and | ||
477 | * avoid exception stack corruption while single-stepping on | ||
478 | * the instruction of the new probe. | ||
479 | */ | ||
480 | arch_disarm_kprobe(p); | ||
481 | regs->ip = (unsigned long)p->addr; | ||
482 | reset_current_kprobe(); | ||
483 | preempt_enable_no_resched(); | ||
484 | break; | ||
485 | #endif | ||
486 | case KPROBE_HIT_ACTIVE: | 455 | case KPROBE_HIT_ACTIVE: |
487 | save_previous_kprobe(kcb); | 456 | save_previous_kprobe(kcb); |
488 | set_current_kprobe(p, regs, kcb); | 457 | set_current_kprobe(p, regs, kcb); |
@@ -491,18 +460,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
491 | kcb->kprobe_status = KPROBE_REENTER; | 460 | kcb->kprobe_status = KPROBE_REENTER; |
492 | break; | 461 | break; |
493 | case KPROBE_HIT_SS: | 462 | case KPROBE_HIT_SS: |
494 | if (p == kprobe_running()) { | 463 | /* A probe has been hit in the codepath leading up to, or just |
495 | regs->flags &= ~X86_EFLAGS_TF; | 464 | * after, single-stepping of a probed instruction. This entire |
496 | regs->flags |= kcb->kprobe_saved_flags; | 465 | * codepath should strictly reside in .kprobes.text section. |
497 | return 0; | 466 | * Raise a BUG or we'll continue in an endless reentering loop |
498 | } else { | 467 | * and eventually a stack overflow. |
499 | /* A probe has been hit in the codepath leading up | 468 | */ |
500 | * to, or just after, single-stepping of a probed | 469 | printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", |
501 | * instruction. This entire codepath should strictly | 470 | p->addr); |
502 | * reside in .kprobes.text section. Raise a warning | 471 | dump_kprobe(p); |
503 | * to highlight this peculiar case. | 472 | BUG(); |
504 | */ | ||
505 | } | ||
506 | default: | 473 | default: |
507 | /* impossible cases */ | 474 | /* impossible cases */ |
508 | WARN_ON(1); | 475 | WARN_ON(1); |
@@ -967,8 +934,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | |||
967 | ret = NOTIFY_STOP; | 934 | ret = NOTIFY_STOP; |
968 | break; | 935 | break; |
969 | case DIE_DEBUG: | 936 | case DIE_DEBUG: |
970 | if (post_kprobe_handler(args->regs)) | 937 | if (post_kprobe_handler(args->regs)) { |
938 | /* | ||
939 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
940 | * denote completion of processing | ||
941 | */ | ||
942 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
971 | ret = NOTIFY_STOP; | 943 | ret = NOTIFY_STOP; |
944 | } | ||
972 | break; | 945 | break; |
973 | case DIE_GPF: | 946 | case DIE_GPF: |
974 | /* | 947 | /* |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d00130..c843f8406da2 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <asm/desc.h> | 25 | #include <asm/desc.h> |
26 | #include <asm/system.h> | 26 | #include <asm/system.h> |
27 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
28 | #include <asm/debugreg.h> | ||
28 | 29 | ||
29 | static void set_idt(void *newidt, __u16 limit) | 30 | static void set_idt(void *newidt, __u16 limit) |
30 | { | 31 | { |
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image) | |||
202 | 203 | ||
203 | /* Interrupts aren't acceptable while we reboot */ | 204 | /* Interrupts aren't acceptable while we reboot */ |
204 | local_irq_disable(); | 205 | local_irq_disable(); |
206 | hw_breakpoint_disable(); | ||
205 | 207 | ||
206 | if (image->preserve_context) { | 208 | if (image->preserve_context) { |
207 | #ifdef CONFIG_X86_IO_APIC | 209 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf209e98..4a8bb82248ae 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
19 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
20 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
21 | #include <asm/debugreg.h> | ||
21 | 22 | ||
22 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | 23 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, |
23 | unsigned long addr) | 24 | unsigned long addr) |
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) | |||
282 | 283 | ||
283 | /* Interrupts aren't acceptable while we reboot */ | 284 | /* Interrupts aren't acceptable while we reboot */ |
284 | local_irq_disable(); | 285 | local_irq_disable(); |
286 | hw_breakpoint_disable(); | ||
285 | 287 | ||
286 | if (image->preserve_context) { | 288 | if (image->preserve_context) { |
287 | #ifdef CONFIG_X86_IO_APIC | 289 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 378e9a8f1bf8..2bcad3926edb 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -73,7 +73,6 @@ | |||
73 | #include <linux/platform_device.h> | 73 | #include <linux/platform_device.h> |
74 | #include <linux/miscdevice.h> | 74 | #include <linux/miscdevice.h> |
75 | #include <linux/capability.h> | 75 | #include <linux/capability.h> |
76 | #include <linux/smp_lock.h> | ||
77 | #include <linux/kernel.h> | 76 | #include <linux/kernel.h> |
78 | #include <linux/module.h> | 77 | #include <linux/module.h> |
79 | #include <linux/mutex.h> | 78 | #include <linux/mutex.h> |
@@ -201,7 +200,6 @@ static int do_microcode_update(const void __user *buf, size_t size) | |||
201 | 200 | ||
202 | static int microcode_open(struct inode *unused1, struct file *unused2) | 201 | static int microcode_open(struct inode *unused1, struct file *unused2) |
203 | { | 202 | { |
204 | cycle_kernel_lock(); | ||
205 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | 203 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; |
206 | } | 204 | } |
207 | 205 | ||
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 6a3cefc7dda1..553449951b84 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -174,21 +174,17 @@ static int msr_open(struct inode *inode, struct file *file) | |||
174 | { | 174 | { |
175 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); | 175 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); |
176 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 176 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
177 | int ret = 0; | ||
178 | 177 | ||
179 | lock_kernel(); | ||
180 | cpu = iminor(file->f_path.dentry->d_inode); | 178 | cpu = iminor(file->f_path.dentry->d_inode); |
181 | 179 | ||
182 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { | 180 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) |
183 | ret = -ENXIO; /* No such CPU */ | 181 | return -ENXIO; /* No such CPU */ |
184 | goto out; | 182 | |
185 | } | ||
186 | c = &cpu_data(cpu); | 183 | c = &cpu_data(cpu); |
187 | if (!cpu_has(c, X86_FEATURE_MSR)) | 184 | if (!cpu_has(c, X86_FEATURE_MSR)) |
188 | ret = -EIO; /* MSR not supported */ | 185 | return -EIO; /* MSR not supported */ |
189 | out: | 186 | |
190 | unlock_kernel(); | 187 | return 0; |
191 | return ret; | ||
192 | } | 188 | } |
193 | 189 | ||
194 | /* | 190 | /* |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 971a3bec47a8..c563e4c8ff39 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <asm/dma.h> | 46 | #include <asm/dma.h> |
47 | #include <asm/rio.h> | 47 | #include <asm/rio.h> |
48 | #include <asm/bios_ebda.h> | 48 | #include <asm/bios_ebda.h> |
49 | #include <asm/x86_init.h> | ||
49 | 50 | ||
50 | #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT | 51 | #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT |
51 | int use_calgary __read_mostly = 1; | 52 | int use_calgary __read_mostly = 1; |
@@ -244,7 +245,7 @@ static unsigned long iommu_range_alloc(struct device *dev, | |||
244 | if (panic_on_overflow) | 245 | if (panic_on_overflow) |
245 | panic("Calgary: fix the allocator.\n"); | 246 | panic("Calgary: fix the allocator.\n"); |
246 | else | 247 | else |
247 | return bad_dma_address; | 248 | return DMA_ERROR_CODE; |
248 | } | 249 | } |
249 | } | 250 | } |
250 | 251 | ||
@@ -260,12 +261,15 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, | |||
260 | void *vaddr, unsigned int npages, int direction) | 261 | void *vaddr, unsigned int npages, int direction) |
261 | { | 262 | { |
262 | unsigned long entry; | 263 | unsigned long entry; |
263 | dma_addr_t ret = bad_dma_address; | 264 | dma_addr_t ret; |
264 | 265 | ||
265 | entry = iommu_range_alloc(dev, tbl, npages); | 266 | entry = iommu_range_alloc(dev, tbl, npages); |
266 | 267 | ||
267 | if (unlikely(entry == bad_dma_address)) | 268 | if (unlikely(entry == DMA_ERROR_CODE)) { |
268 | goto error; | 269 | printk(KERN_WARNING "Calgary: failed to allocate %u pages in " |
270 | "iommu %p\n", npages, tbl); | ||
271 | return DMA_ERROR_CODE; | ||
272 | } | ||
269 | 273 | ||
270 | /* set the return dma address */ | 274 | /* set the return dma address */ |
271 | ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); | 275 | ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); |
@@ -273,13 +277,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, | |||
273 | /* put the TCEs in the HW table */ | 277 | /* put the TCEs in the HW table */ |
274 | tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, | 278 | tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, |
275 | direction); | 279 | direction); |
276 | |||
277 | return ret; | 280 | return ret; |
278 | |||
279 | error: | ||
280 | printk(KERN_WARNING "Calgary: failed to allocate %u pages in " | ||
281 | "iommu %p\n", npages, tbl); | ||
282 | return bad_dma_address; | ||
283 | } | 281 | } |
284 | 282 | ||
285 | static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | 283 | static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, |
@@ -290,8 +288,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
290 | unsigned long flags; | 288 | unsigned long flags; |
291 | 289 | ||
292 | /* were we called with bad_dma_address? */ | 290 | /* were we called with bad_dma_address? */ |
293 | badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); | 291 | badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); |
294 | if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { | 292 | if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { |
295 | WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " | 293 | WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " |
296 | "address 0x%Lx\n", dma_addr); | 294 | "address 0x%Lx\n", dma_addr); |
297 | return; | 295 | return; |
@@ -318,13 +316,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev) | |||
318 | 316 | ||
319 | pdev = to_pci_dev(dev); | 317 | pdev = to_pci_dev(dev); |
320 | 318 | ||
319 | /* search up the device tree for an iommu */ | ||
321 | pbus = pdev->bus; | 320 | pbus = pdev->bus; |
322 | 321 | do { | |
323 | /* is the device behind a bridge? Look for the root bus */ | 322 | tbl = pci_iommu(pbus); |
324 | while (pbus->parent) | 323 | if (tbl && tbl->it_busno == pbus->number) |
324 | break; | ||
325 | tbl = NULL; | ||
325 | pbus = pbus->parent; | 326 | pbus = pbus->parent; |
326 | 327 | } while (pbus); | |
327 | tbl = pci_iommu(pbus); | ||
328 | 328 | ||
329 | BUG_ON(tbl && (tbl->it_busno != pbus->number)); | 329 | BUG_ON(tbl && (tbl->it_busno != pbus->number)); |
330 | 330 | ||
@@ -373,7 +373,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | |||
373 | npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); | 373 | npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); |
374 | 374 | ||
375 | entry = iommu_range_alloc(dev, tbl, npages); | 375 | entry = iommu_range_alloc(dev, tbl, npages); |
376 | if (entry == bad_dma_address) { | 376 | if (entry == DMA_ERROR_CODE) { |
377 | /* makes sure unmap knows to stop */ | 377 | /* makes sure unmap knows to stop */ |
378 | s->dma_length = 0; | 378 | s->dma_length = 0; |
379 | goto error; | 379 | goto error; |
@@ -391,7 +391,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | |||
391 | error: | 391 | error: |
392 | calgary_unmap_sg(dev, sg, nelems, dir, NULL); | 392 | calgary_unmap_sg(dev, sg, nelems, dir, NULL); |
393 | for_each_sg(sg, s, nelems, i) { | 393 | for_each_sg(sg, s, nelems, i) { |
394 | sg->dma_address = bad_dma_address; | 394 | sg->dma_address = DMA_ERROR_CODE; |
395 | sg->dma_length = 0; | 395 | sg->dma_length = 0; |
396 | } | 396 | } |
397 | return 0; | 397 | return 0; |
@@ -446,7 +446,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, | |||
446 | 446 | ||
447 | /* set up tces to cover the allocated range */ | 447 | /* set up tces to cover the allocated range */ |
448 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); | 448 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); |
449 | if (mapping == bad_dma_address) | 449 | if (mapping == DMA_ERROR_CODE) |
450 | goto free; | 450 | goto free; |
451 | *dma_handle = mapping; | 451 | *dma_handle = mapping; |
452 | return ret; | 452 | return ret; |
@@ -727,7 +727,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) | |||
727 | struct iommu_table *tbl = pci_iommu(dev->bus); | 727 | struct iommu_table *tbl = pci_iommu(dev->bus); |
728 | 728 | ||
729 | /* reserve EMERGENCY_PAGES from bad_dma_address and up */ | 729 | /* reserve EMERGENCY_PAGES from bad_dma_address and up */ |
730 | iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES); | 730 | iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES); |
731 | 731 | ||
732 | /* avoid the BIOS/VGA first 640KB-1MB region */ | 732 | /* avoid the BIOS/VGA first 640KB-1MB region */ |
733 | /* for CalIOC2 - avoid the entire first MB */ | 733 | /* for CalIOC2 - avoid the entire first MB */ |
@@ -1344,6 +1344,23 @@ static void __init get_tce_space_from_tar(void) | |||
1344 | return; | 1344 | return; |
1345 | } | 1345 | } |
1346 | 1346 | ||
1347 | static int __init calgary_iommu_init(void) | ||
1348 | { | ||
1349 | int ret; | ||
1350 | |||
1351 | /* ok, we're trying to use Calgary - let's roll */ | ||
1352 | printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); | ||
1353 | |||
1354 | ret = calgary_init(); | ||
1355 | if (ret) { | ||
1356 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " | ||
1357 | "falling back to no_iommu\n", ret); | ||
1358 | return ret; | ||
1359 | } | ||
1360 | |||
1361 | return 0; | ||
1362 | } | ||
1363 | |||
1347 | void __init detect_calgary(void) | 1364 | void __init detect_calgary(void) |
1348 | { | 1365 | { |
1349 | int bus; | 1366 | int bus; |
@@ -1357,7 +1374,7 @@ void __init detect_calgary(void) | |||
1357 | * if the user specified iommu=off or iommu=soft or we found | 1374 | * if the user specified iommu=off or iommu=soft or we found |
1358 | * another HW IOMMU already, bail out. | 1375 | * another HW IOMMU already, bail out. |
1359 | */ | 1376 | */ |
1360 | if (swiotlb || no_iommu || iommu_detected) | 1377 | if (no_iommu || iommu_detected) |
1361 | return; | 1378 | return; |
1362 | 1379 | ||
1363 | if (!use_calgary) | 1380 | if (!use_calgary) |
@@ -1442,9 +1459,7 @@ void __init detect_calgary(void) | |||
1442 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", | 1459 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", |
1443 | specified_table_size); | 1460 | specified_table_size); |
1444 | 1461 | ||
1445 | /* swiotlb for devices that aren't behind the Calgary. */ | 1462 | x86_init.iommu.iommu_init = calgary_iommu_init; |
1446 | if (max_pfn > MAX_DMA32_PFN) | ||
1447 | swiotlb = 1; | ||
1448 | } | 1463 | } |
1449 | return; | 1464 | return; |
1450 | 1465 | ||
@@ -1457,35 +1472,6 @@ cleanup: | |||
1457 | } | 1472 | } |
1458 | } | 1473 | } |
1459 | 1474 | ||
1460 | int __init calgary_iommu_init(void) | ||
1461 | { | ||
1462 | int ret; | ||
1463 | |||
1464 | if (no_iommu || (swiotlb && !calgary_detected)) | ||
1465 | return -ENODEV; | ||
1466 | |||
1467 | if (!calgary_detected) | ||
1468 | return -ENODEV; | ||
1469 | |||
1470 | /* ok, we're trying to use Calgary - let's roll */ | ||
1471 | printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); | ||
1472 | |||
1473 | ret = calgary_init(); | ||
1474 | if (ret) { | ||
1475 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " | ||
1476 | "falling back to no_iommu\n", ret); | ||
1477 | return ret; | ||
1478 | } | ||
1479 | |||
1480 | force_iommu = 1; | ||
1481 | bad_dma_address = 0x0; | ||
1482 | /* dma_ops is set to swiotlb or nommu */ | ||
1483 | if (!dma_ops) | ||
1484 | dma_ops = &nommu_dma_ops; | ||
1485 | |||
1486 | return 0; | ||
1487 | } | ||
1488 | |||
1489 | static int __init calgary_parse_options(char *p) | 1475 | static int __init calgary_parse_options(char *p) |
1490 | { | 1476 | { |
1491 | unsigned int bridge; | 1477 | unsigned int bridge; |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index a6e804d16c35..afcc58b69c7c 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -11,10 +11,11 @@ | |||
11 | #include <asm/gart.h> | 11 | #include <asm/gart.h> |
12 | #include <asm/calgary.h> | 12 | #include <asm/calgary.h> |
13 | #include <asm/amd_iommu.h> | 13 | #include <asm/amd_iommu.h> |
14 | #include <asm/x86_init.h> | ||
14 | 15 | ||
15 | static int forbid_dac __read_mostly; | 16 | static int forbid_dac __read_mostly; |
16 | 17 | ||
17 | struct dma_map_ops *dma_ops; | 18 | struct dma_map_ops *dma_ops = &nommu_dma_ops; |
18 | EXPORT_SYMBOL(dma_ops); | 19 | EXPORT_SYMBOL(dma_ops); |
19 | 20 | ||
20 | static int iommu_sac_force __read_mostly; | 21 | static int iommu_sac_force __read_mostly; |
@@ -42,9 +43,6 @@ int iommu_detected __read_mostly = 0; | |||
42 | */ | 43 | */ |
43 | int iommu_pass_through __read_mostly; | 44 | int iommu_pass_through __read_mostly; |
44 | 45 | ||
45 | dma_addr_t bad_dma_address __read_mostly = 0; | ||
46 | EXPORT_SYMBOL(bad_dma_address); | ||
47 | |||
48 | /* Dummy device used for NULL arguments (normally ISA). */ | 46 | /* Dummy device used for NULL arguments (normally ISA). */ |
49 | struct device x86_dma_fallback_dev = { | 47 | struct device x86_dma_fallback_dev = { |
50 | .init_name = "fallback device", | 48 | .init_name = "fallback device", |
@@ -126,20 +124,17 @@ void __init pci_iommu_alloc(void) | |||
126 | /* free the range so iommu could get some range less than 4G */ | 124 | /* free the range so iommu could get some range less than 4G */ |
127 | dma32_free_bootmem(); | 125 | dma32_free_bootmem(); |
128 | #endif | 126 | #endif |
127 | if (pci_swiotlb_init()) | ||
128 | return; | ||
129 | 129 | ||
130 | /* | ||
131 | * The order of these functions is important for | ||
132 | * fall-back/fail-over reasons | ||
133 | */ | ||
134 | gart_iommu_hole_init(); | 130 | gart_iommu_hole_init(); |
135 | 131 | ||
136 | detect_calgary(); | 132 | detect_calgary(); |
137 | 133 | ||
138 | detect_intel_iommu(); | 134 | detect_intel_iommu(); |
139 | 135 | ||
136 | /* needs to be called after gart_iommu_hole_init */ | ||
140 | amd_iommu_detect(); | 137 | amd_iommu_detect(); |
141 | |||
142 | pci_swiotlb_init(); | ||
143 | } | 138 | } |
144 | 139 | ||
145 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, | 140 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, |
@@ -214,7 +209,7 @@ static __init int iommu_setup(char *p) | |||
214 | if (!strncmp(p, "allowdac", 8)) | 209 | if (!strncmp(p, "allowdac", 8)) |
215 | forbid_dac = 0; | 210 | forbid_dac = 0; |
216 | if (!strncmp(p, "nodac", 5)) | 211 | if (!strncmp(p, "nodac", 5)) |
217 | forbid_dac = -1; | 212 | forbid_dac = 1; |
218 | if (!strncmp(p, "usedac", 6)) { | 213 | if (!strncmp(p, "usedac", 6)) { |
219 | forbid_dac = -1; | 214 | forbid_dac = -1; |
220 | return 1; | 215 | return 1; |
@@ -289,25 +284,17 @@ static int __init pci_iommu_init(void) | |||
289 | #ifdef CONFIG_PCI | 284 | #ifdef CONFIG_PCI |
290 | dma_debug_add_bus(&pci_bus_type); | 285 | dma_debug_add_bus(&pci_bus_type); |
291 | #endif | 286 | #endif |
287 | x86_init.iommu.iommu_init(); | ||
292 | 288 | ||
293 | calgary_iommu_init(); | 289 | if (swiotlb) { |
294 | 290 | printk(KERN_INFO "PCI-DMA: " | |
295 | intel_iommu_init(); | 291 | "Using software bounce buffering for IO (SWIOTLB)\n"); |
292 | swiotlb_print_info(); | ||
293 | } else | ||
294 | swiotlb_free(); | ||
296 | 295 | ||
297 | amd_iommu_init(); | ||
298 | |||
299 | gart_iommu_init(); | ||
300 | |||
301 | no_iommu_init(); | ||
302 | return 0; | 296 | return 0; |
303 | } | 297 | } |
304 | |||
305 | void pci_iommu_shutdown(void) | ||
306 | { | ||
307 | gart_iommu_shutdown(); | ||
308 | |||
309 | amd_iommu_shutdown(); | ||
310 | } | ||
311 | /* Must execute after PCI subsystem */ | 298 | /* Must execute after PCI subsystem */ |
312 | rootfs_initcall(pci_iommu_init); | 299 | rootfs_initcall(pci_iommu_init); |
313 | 300 | ||
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a7f1b64f86e0..e6a0d402f171 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <asm/swiotlb.h> | 39 | #include <asm/swiotlb.h> |
40 | #include <asm/dma.h> | 40 | #include <asm/dma.h> |
41 | #include <asm/k8.h> | 41 | #include <asm/k8.h> |
42 | #include <asm/x86_init.h> | ||
42 | 43 | ||
43 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ | 44 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ |
44 | static unsigned long iommu_size; /* size of remapping area bytes */ | 45 | static unsigned long iommu_size; /* size of remapping area bytes */ |
@@ -46,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */ | |||
46 | 47 | ||
47 | static u32 *iommu_gatt_base; /* Remapping table */ | 48 | static u32 *iommu_gatt_base; /* Remapping table */ |
48 | 49 | ||
50 | static dma_addr_t bad_dma_addr; | ||
51 | |||
49 | /* | 52 | /* |
50 | * If this is disabled the IOMMU will use an optimized flushing strategy | 53 | * If this is disabled the IOMMU will use an optimized flushing strategy |
51 | * of only flushing when an mapping is reused. With it true the GART is | 54 | * of only flushing when an mapping is reused. With it true the GART is |
@@ -92,7 +95,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, | |||
92 | 95 | ||
93 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), | 96 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), |
94 | PAGE_SIZE) >> PAGE_SHIFT; | 97 | PAGE_SIZE) >> PAGE_SHIFT; |
95 | boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, | 98 | boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1, |
96 | PAGE_SIZE) >> PAGE_SHIFT; | 99 | PAGE_SIZE) >> PAGE_SHIFT; |
97 | 100 | ||
98 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 101 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
@@ -216,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | |||
216 | if (panic_on_overflow) | 219 | if (panic_on_overflow) |
217 | panic("dma_map_area overflow %lu bytes\n", size); | 220 | panic("dma_map_area overflow %lu bytes\n", size); |
218 | iommu_full(dev, size, dir); | 221 | iommu_full(dev, size, dir); |
219 | return bad_dma_address; | 222 | return bad_dma_addr; |
220 | } | 223 | } |
221 | 224 | ||
222 | for (i = 0; i < npages; i++) { | 225 | for (i = 0; i < npages; i++) { |
@@ -294,7 +297,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
294 | int i; | 297 | int i; |
295 | 298 | ||
296 | #ifdef CONFIG_IOMMU_DEBUG | 299 | #ifdef CONFIG_IOMMU_DEBUG |
297 | printk(KERN_DEBUG "dma_map_sg overflow\n"); | 300 | pr_debug("dma_map_sg overflow\n"); |
298 | #endif | 301 | #endif |
299 | 302 | ||
300 | for_each_sg(sg, s, nents, i) { | 303 | for_each_sg(sg, s, nents, i) { |
@@ -302,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
302 | 305 | ||
303 | if (nonforced_iommu(dev, addr, s->length)) { | 306 | if (nonforced_iommu(dev, addr, s->length)) { |
304 | addr = dma_map_area(dev, addr, s->length, dir, 0); | 307 | addr = dma_map_area(dev, addr, s->length, dir, 0); |
305 | if (addr == bad_dma_address) { | 308 | if (addr == bad_dma_addr) { |
306 | if (i > 0) | 309 | if (i > 0) |
307 | gart_unmap_sg(dev, sg, i, dir, NULL); | 310 | gart_unmap_sg(dev, sg, i, dir, NULL); |
308 | nents = 0; | 311 | nents = 0; |
@@ -389,12 +392,14 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, | |||
389 | if (!dev) | 392 | if (!dev) |
390 | dev = &x86_dma_fallback_dev; | 393 | dev = &x86_dma_fallback_dev; |
391 | 394 | ||
392 | out = 0; | 395 | out = 0; |
393 | start = 0; | 396 | start = 0; |
394 | start_sg = sgmap = sg; | 397 | start_sg = sg; |
395 | seg_size = 0; | 398 | sgmap = sg; |
396 | max_seg_size = dma_get_max_seg_size(dev); | 399 | seg_size = 0; |
397 | ps = NULL; /* shut up gcc */ | 400 | max_seg_size = dma_get_max_seg_size(dev); |
401 | ps = NULL; /* shut up gcc */ | ||
402 | |||
398 | for_each_sg(sg, s, nents, i) { | 403 | for_each_sg(sg, s, nents, i) { |
399 | dma_addr_t addr = sg_phys(s); | 404 | dma_addr_t addr = sg_phys(s); |
400 | 405 | ||
@@ -417,11 +422,12 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, | |||
417 | sgmap, pages, need) < 0) | 422 | sgmap, pages, need) < 0) |
418 | goto error; | 423 | goto error; |
419 | out++; | 424 | out++; |
420 | seg_size = 0; | 425 | |
421 | sgmap = sg_next(sgmap); | 426 | seg_size = 0; |
422 | pages = 0; | 427 | sgmap = sg_next(sgmap); |
423 | start = i; | 428 | pages = 0; |
424 | start_sg = s; | 429 | start = i; |
430 | start_sg = s; | ||
425 | } | 431 | } |
426 | } | 432 | } |
427 | 433 | ||
@@ -455,7 +461,7 @@ error: | |||
455 | 461 | ||
456 | iommu_full(dev, pages << PAGE_SHIFT, dir); | 462 | iommu_full(dev, pages << PAGE_SHIFT, dir); |
457 | for_each_sg(sg, s, nents, i) | 463 | for_each_sg(sg, s, nents, i) |
458 | s->dma_address = bad_dma_address; | 464 | s->dma_address = bad_dma_addr; |
459 | return 0; | 465 | return 0; |
460 | } | 466 | } |
461 | 467 | ||
@@ -479,7 +485,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, | |||
479 | DMA_BIDIRECTIONAL, align_mask); | 485 | DMA_BIDIRECTIONAL, align_mask); |
480 | 486 | ||
481 | flush_gart(); | 487 | flush_gart(); |
482 | if (paddr != bad_dma_address) { | 488 | if (paddr != bad_dma_addr) { |
483 | *dma_addr = paddr; | 489 | *dma_addr = paddr; |
484 | return page_address(page); | 490 | return page_address(page); |
485 | } | 491 | } |
@@ -499,6 +505,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, | |||
499 | free_pages((unsigned long)vaddr, get_order(size)); | 505 | free_pages((unsigned long)vaddr, get_order(size)); |
500 | } | 506 | } |
501 | 507 | ||
508 | static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
509 | { | ||
510 | return (dma_addr == bad_dma_addr); | ||
511 | } | ||
512 | |||
502 | static int no_agp; | 513 | static int no_agp; |
503 | 514 | ||
504 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | 515 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) |
@@ -515,7 +526,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | |||
515 | iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; | 526 | iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; |
516 | 527 | ||
517 | if (iommu_size < 64*1024*1024) { | 528 | if (iommu_size < 64*1024*1024) { |
518 | printk(KERN_WARNING | 529 | pr_warning( |
519 | "PCI-DMA: Warning: Small IOMMU %luMB." | 530 | "PCI-DMA: Warning: Small IOMMU %luMB." |
520 | " Consider increasing the AGP aperture in BIOS\n", | 531 | " Consider increasing the AGP aperture in BIOS\n", |
521 | iommu_size >> 20); | 532 | iommu_size >> 20); |
@@ -570,28 +581,32 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc) | |||
570 | aperture_alloc = aper_alloc; | 581 | aperture_alloc = aper_alloc; |
571 | } | 582 | } |
572 | 583 | ||
573 | static int gart_resume(struct sys_device *dev) | 584 | static void gart_fixup_northbridges(struct sys_device *dev) |
574 | { | 585 | { |
575 | printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n"); | 586 | int i; |
576 | 587 | ||
577 | if (fix_up_north_bridges) { | 588 | if (!fix_up_north_bridges) |
578 | int i; | 589 | return; |
579 | 590 | ||
580 | printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n"); | 591 | pr_info("PCI-DMA: Restoring GART aperture settings\n"); |
581 | 592 | ||
582 | for (i = 0; i < num_k8_northbridges; i++) { | 593 | for (i = 0; i < num_k8_northbridges; i++) { |
583 | struct pci_dev *dev = k8_northbridges[i]; | 594 | struct pci_dev *dev = k8_northbridges[i]; |
584 | 595 | ||
585 | /* | 596 | /* |
586 | * Don't enable translations just yet. That is the next | 597 | * Don't enable translations just yet. That is the next |
587 | * step. Restore the pre-suspend aperture settings. | 598 | * step. Restore the pre-suspend aperture settings. |
588 | */ | 599 | */ |
589 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, | 600 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); |
590 | aperture_order << 1); | 601 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); |
591 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, | ||
592 | aperture_alloc >> 25); | ||
593 | } | ||
594 | } | 602 | } |
603 | } | ||
604 | |||
605 | static int gart_resume(struct sys_device *dev) | ||
606 | { | ||
607 | pr_info("PCI-DMA: Resuming GART IOMMU\n"); | ||
608 | |||
609 | gart_fixup_northbridges(dev); | ||
595 | 610 | ||
596 | enable_gart_translations(); | 611 | enable_gart_translations(); |
597 | 612 | ||
@@ -604,15 +619,14 @@ static int gart_suspend(struct sys_device *dev, pm_message_t state) | |||
604 | } | 619 | } |
605 | 620 | ||
606 | static struct sysdev_class gart_sysdev_class = { | 621 | static struct sysdev_class gart_sysdev_class = { |
607 | .name = "gart", | 622 | .name = "gart", |
608 | .suspend = gart_suspend, | 623 | .suspend = gart_suspend, |
609 | .resume = gart_resume, | 624 | .resume = gart_resume, |
610 | 625 | ||
611 | }; | 626 | }; |
612 | 627 | ||
613 | static struct sys_device device_gart = { | 628 | static struct sys_device device_gart = { |
614 | .id = 0, | 629 | .cls = &gart_sysdev_class, |
615 | .cls = &gart_sysdev_class, | ||
616 | }; | 630 | }; |
617 | 631 | ||
618 | /* | 632 | /* |
@@ -627,7 +641,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
627 | void *gatt; | 641 | void *gatt; |
628 | int i, error; | 642 | int i, error; |
629 | 643 | ||
630 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | 644 | pr_info("PCI-DMA: Disabling AGP.\n"); |
645 | |||
631 | aper_size = aper_base = info->aper_size = 0; | 646 | aper_size = aper_base = info->aper_size = 0; |
632 | dev = NULL; | 647 | dev = NULL; |
633 | for (i = 0; i < num_k8_northbridges; i++) { | 648 | for (i = 0; i < num_k8_northbridges; i++) { |
@@ -645,6 +660,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
645 | } | 660 | } |
646 | if (!aper_base) | 661 | if (!aper_base) |
647 | goto nommu; | 662 | goto nommu; |
663 | |||
648 | info->aper_base = aper_base; | 664 | info->aper_base = aper_base; |
649 | info->aper_size = aper_size >> 20; | 665 | info->aper_size = aper_size >> 20; |
650 | 666 | ||
@@ -667,14 +683,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
667 | 683 | ||
668 | flush_gart(); | 684 | flush_gart(); |
669 | 685 | ||
670 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", | 686 | pr_info("PCI-DMA: aperture base @ %x size %u KB\n", |
671 | aper_base, aper_size>>10); | 687 | aper_base, aper_size>>10); |
672 | 688 | ||
673 | return 0; | 689 | return 0; |
674 | 690 | ||
675 | nommu: | 691 | nommu: |
676 | /* Should not happen anymore */ | 692 | /* Should not happen anymore */ |
677 | printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" | 693 | pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n" |
678 | "falling back to iommu=soft.\n"); | 694 | "falling back to iommu=soft.\n"); |
679 | return -1; | 695 | return -1; |
680 | } | 696 | } |
@@ -686,14 +702,15 @@ static struct dma_map_ops gart_dma_ops = { | |||
686 | .unmap_page = gart_unmap_page, | 702 | .unmap_page = gart_unmap_page, |
687 | .alloc_coherent = gart_alloc_coherent, | 703 | .alloc_coherent = gart_alloc_coherent, |
688 | .free_coherent = gart_free_coherent, | 704 | .free_coherent = gart_free_coherent, |
705 | .mapping_error = gart_mapping_error, | ||
689 | }; | 706 | }; |
690 | 707 | ||
691 | void gart_iommu_shutdown(void) | 708 | static void gart_iommu_shutdown(void) |
692 | { | 709 | { |
693 | struct pci_dev *dev; | 710 | struct pci_dev *dev; |
694 | int i; | 711 | int i; |
695 | 712 | ||
696 | if (no_agp && (dma_ops != &gart_dma_ops)) | 713 | if (no_agp) |
697 | return; | 714 | return; |
698 | 715 | ||
699 | for (i = 0; i < num_k8_northbridges; i++) { | 716 | for (i = 0; i < num_k8_northbridges; i++) { |
@@ -708,7 +725,7 @@ void gart_iommu_shutdown(void) | |||
708 | } | 725 | } |
709 | } | 726 | } |
710 | 727 | ||
711 | void __init gart_iommu_init(void) | 728 | int __init gart_iommu_init(void) |
712 | { | 729 | { |
713 | struct agp_kern_info info; | 730 | struct agp_kern_info info; |
714 | unsigned long iommu_start; | 731 | unsigned long iommu_start; |
@@ -718,7 +735,7 @@ void __init gart_iommu_init(void) | |||
718 | long i; | 735 | long i; |
719 | 736 | ||
720 | if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) | 737 | if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) |
721 | return; | 738 | return 0; |
722 | 739 | ||
723 | #ifndef CONFIG_AGP_AMD64 | 740 | #ifndef CONFIG_AGP_AMD64 |
724 | no_agp = 1; | 741 | no_agp = 1; |
@@ -730,35 +747,28 @@ void __init gart_iommu_init(void) | |||
730 | (agp_copy_info(agp_bridge, &info) < 0); | 747 | (agp_copy_info(agp_bridge, &info) < 0); |
731 | #endif | 748 | #endif |
732 | 749 | ||
733 | if (swiotlb) | ||
734 | return; | ||
735 | |||
736 | /* Did we detect a different HW IOMMU? */ | ||
737 | if (iommu_detected && !gart_iommu_aperture) | ||
738 | return; | ||
739 | |||
740 | if (no_iommu || | 750 | if (no_iommu || |
741 | (!force_iommu && max_pfn <= MAX_DMA32_PFN) || | 751 | (!force_iommu && max_pfn <= MAX_DMA32_PFN) || |
742 | !gart_iommu_aperture || | 752 | !gart_iommu_aperture || |
743 | (no_agp && init_k8_gatt(&info) < 0)) { | 753 | (no_agp && init_k8_gatt(&info) < 0)) { |
744 | if (max_pfn > MAX_DMA32_PFN) { | 754 | if (max_pfn > MAX_DMA32_PFN) { |
745 | printk(KERN_WARNING "More than 4GB of memory " | 755 | pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); |
746 | "but GART IOMMU not available.\n"); | 756 | pr_warning("falling back to iommu=soft.\n"); |
747 | printk(KERN_WARNING "falling back to iommu=soft.\n"); | ||
748 | } | 757 | } |
749 | return; | 758 | return 0; |
750 | } | 759 | } |
751 | 760 | ||
752 | /* need to map that range */ | 761 | /* need to map that range */ |
753 | aper_size = info.aper_size << 20; | 762 | aper_size = info.aper_size << 20; |
754 | aper_base = info.aper_base; | 763 | aper_base = info.aper_base; |
755 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | 764 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); |
765 | |||
756 | if (end_pfn > max_low_pfn_mapped) { | 766 | if (end_pfn > max_low_pfn_mapped) { |
757 | start_pfn = (aper_base>>PAGE_SHIFT); | 767 | start_pfn = (aper_base>>PAGE_SHIFT); |
758 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | 768 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); |
759 | } | 769 | } |
760 | 770 | ||
761 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | 771 | pr_info("PCI-DMA: using GART IOMMU.\n"); |
762 | iommu_size = check_iommu_size(info.aper_base, aper_size); | 772 | iommu_size = check_iommu_size(info.aper_base, aper_size); |
763 | iommu_pages = iommu_size >> PAGE_SHIFT; | 773 | iommu_pages = iommu_size >> PAGE_SHIFT; |
764 | 774 | ||
@@ -773,8 +783,7 @@ void __init gart_iommu_init(void) | |||
773 | 783 | ||
774 | ret = dma_debug_resize_entries(iommu_pages); | 784 | ret = dma_debug_resize_entries(iommu_pages); |
775 | if (ret) | 785 | if (ret) |
776 | printk(KERN_DEBUG | 786 | pr_debug("PCI-DMA: Cannot trace all the entries\n"); |
777 | "PCI-DMA: Cannot trace all the entries\n"); | ||
778 | } | 787 | } |
779 | #endif | 788 | #endif |
780 | 789 | ||
@@ -784,15 +793,14 @@ void __init gart_iommu_init(void) | |||
784 | */ | 793 | */ |
785 | iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); | 794 | iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); |
786 | 795 | ||
787 | agp_memory_reserved = iommu_size; | 796 | pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", |
788 | printk(KERN_INFO | ||
789 | "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", | ||
790 | iommu_size >> 20); | 797 | iommu_size >> 20); |
791 | 798 | ||
792 | iommu_start = aper_size - iommu_size; | 799 | agp_memory_reserved = iommu_size; |
793 | iommu_bus_base = info.aper_base + iommu_start; | 800 | iommu_start = aper_size - iommu_size; |
794 | bad_dma_address = iommu_bus_base; | 801 | iommu_bus_base = info.aper_base + iommu_start; |
795 | iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); | 802 | bad_dma_addr = iommu_bus_base; |
803 | iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); | ||
796 | 804 | ||
797 | /* | 805 | /* |
798 | * Unmap the IOMMU part of the GART. The alias of the page is | 806 | * Unmap the IOMMU part of the GART. The alias of the page is |
@@ -814,7 +822,7 @@ void __init gart_iommu_init(void) | |||
814 | * the pages as Not-Present: | 822 | * the pages as Not-Present: |
815 | */ | 823 | */ |
816 | wbinvd(); | 824 | wbinvd(); |
817 | 825 | ||
818 | /* | 826 | /* |
819 | * Now all caches are flushed and we can safely enable | 827 | * Now all caches are flushed and we can safely enable |
820 | * GART hardware. Doing it early leaves the possibility | 828 | * GART hardware. Doing it early leaves the possibility |
@@ -838,6 +846,10 @@ void __init gart_iommu_init(void) | |||
838 | 846 | ||
839 | flush_gart(); | 847 | flush_gart(); |
840 | dma_ops = &gart_dma_ops; | 848 | dma_ops = &gart_dma_ops; |
849 | x86_platform.iommu_shutdown = gart_iommu_shutdown; | ||
850 | swiotlb = 0; | ||
851 | |||
852 | return 0; | ||
841 | } | 853 | } |
842 | 854 | ||
843 | void __init gart_parse_options(char *p) | 855 | void __init gart_parse_options(char *p) |
@@ -856,7 +868,7 @@ void __init gart_parse_options(char *p) | |||
856 | #endif | 868 | #endif |
857 | if (isdigit(*p) && get_option(&p, &arg)) | 869 | if (isdigit(*p) && get_option(&p, &arg)) |
858 | iommu_size = arg; | 870 | iommu_size = arg; |
859 | if (!strncmp(p, "fullflush", 8)) | 871 | if (!strncmp(p, "fullflush", 9)) |
860 | iommu_fullflush = 1; | 872 | iommu_fullflush = 1; |
861 | if (!strncmp(p, "nofullflush", 11)) | 873 | if (!strncmp(p, "nofullflush", 11)) |
862 | iommu_fullflush = 0; | 874 | iommu_fullflush = 0; |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index a3933d4330cd..22be12b60a8f 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, | |||
33 | dma_addr_t bus = page_to_phys(page) + offset; | 33 | dma_addr_t bus = page_to_phys(page) + offset; |
34 | WARN_ON(size == 0); | 34 | WARN_ON(size == 0); |
35 | if (!check_addr("map_single", dev, bus, size)) | 35 | if (!check_addr("map_single", dev, bus, size)) |
36 | return bad_dma_address; | 36 | return DMA_ERROR_CODE; |
37 | flush_write_buffers(); | 37 | flush_write_buffers(); |
38 | return bus; | 38 | return bus; |
39 | } | 39 | } |
@@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = { | |||
103 | .sync_sg_for_device = nommu_sync_sg_for_device, | 103 | .sync_sg_for_device = nommu_sync_sg_for_device, |
104 | .is_phys = 1, | 104 | .is_phys = 1, |
105 | }; | 105 | }; |
106 | |||
107 | void __init no_iommu_init(void) | ||
108 | { | ||
109 | if (dma_ops) | ||
110 | return; | ||
111 | |||
112 | force_iommu = 0; /* no HW IOMMU */ | ||
113 | dma_ops = &nommu_dma_ops; | ||
114 | } | ||
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index aaa6b7839f1e..e3c0a66b9e77 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -42,18 +42,28 @@ static struct dma_map_ops swiotlb_dma_ops = { | |||
42 | .dma_supported = NULL, | 42 | .dma_supported = NULL, |
43 | }; | 43 | }; |
44 | 44 | ||
45 | void __init pci_swiotlb_init(void) | 45 | /* |
46 | * pci_swiotlb_init - initialize swiotlb if necessary | ||
47 | * | ||
48 | * This returns non-zero if we are forced to use swiotlb (by the boot | ||
49 | * option). | ||
50 | */ | ||
51 | int __init pci_swiotlb_init(void) | ||
46 | { | 52 | { |
53 | int use_swiotlb = swiotlb | swiotlb_force; | ||
54 | |||
47 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 55 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
48 | #ifdef CONFIG_X86_64 | 56 | #ifdef CONFIG_X86_64 |
49 | if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) | 57 | if (!no_iommu && max_pfn > MAX_DMA32_PFN) |
50 | swiotlb = 1; | 58 | swiotlb = 1; |
51 | #endif | 59 | #endif |
52 | if (swiotlb_force) | 60 | if (swiotlb_force) |
53 | swiotlb = 1; | 61 | swiotlb = 1; |
62 | |||
54 | if (swiotlb) { | 63 | if (swiotlb) { |
55 | printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); | 64 | swiotlb_init(0); |
56 | swiotlb_init(); | ||
57 | dma_ops = &swiotlb_dma_ops; | 65 | dma_ops = &swiotlb_dma_ops; |
58 | } | 66 | } |
67 | |||
68 | return use_swiotlb; | ||
59 | } | 69 | } |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5284cd2b5776..744508e7cfdd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
11 | #include <linux/random.h> | 11 | #include <linux/random.h> |
12 | #include <trace/events/power.h> | 12 | #include <trace/events/power.h> |
13 | #include <linux/hw_breakpoint.h> | ||
13 | #include <asm/system.h> | 14 | #include <asm/system.h> |
14 | #include <asm/apic.h> | 15 | #include <asm/apic.h> |
15 | #include <asm/syscalls.h> | 16 | #include <asm/syscalls.h> |
@@ -17,6 +18,7 @@ | |||
17 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
18 | #include <asm/i387.h> | 19 | #include <asm/i387.h> |
19 | #include <asm/ds.h> | 20 | #include <asm/ds.h> |
21 | #include <asm/debugreg.h> | ||
20 | 22 | ||
21 | unsigned long idle_halt; | 23 | unsigned long idle_halt; |
22 | EXPORT_SYMBOL(idle_halt); | 24 | EXPORT_SYMBOL(idle_halt); |
@@ -103,14 +105,7 @@ void flush_thread(void) | |||
103 | } | 105 | } |
104 | #endif | 106 | #endif |
105 | 107 | ||
106 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | 108 | flush_ptrace_hw_breakpoint(tsk); |
107 | |||
108 | tsk->thread.debugreg0 = 0; | ||
109 | tsk->thread.debugreg1 = 0; | ||
110 | tsk->thread.debugreg2 = 0; | ||
111 | tsk->thread.debugreg3 = 0; | ||
112 | tsk->thread.debugreg6 = 0; | ||
113 | tsk->thread.debugreg7 = 0; | ||
114 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 109 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
115 | /* | 110 | /* |
116 | * Forget coprocessor state.. | 111 | * Forget coprocessor state.. |
@@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
192 | else if (next->debugctlmsr != prev->debugctlmsr) | 187 | else if (next->debugctlmsr != prev->debugctlmsr) |
193 | update_debugctlmsr(next->debugctlmsr); | 188 | update_debugctlmsr(next->debugctlmsr); |
194 | 189 | ||
195 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | ||
196 | set_debugreg(next->debugreg0, 0); | ||
197 | set_debugreg(next->debugreg1, 1); | ||
198 | set_debugreg(next->debugreg2, 2); | ||
199 | set_debugreg(next->debugreg3, 3); | ||
200 | /* no 4 and 5 */ | ||
201 | set_debugreg(next->debugreg6, 6); | ||
202 | set_debugreg(next->debugreg7, 7); | ||
203 | } | ||
204 | |||
205 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | 190 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ |
206 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | 191 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { |
207 | /* prev and next are different */ | 192 | /* prev and next are different */ |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf79567cdab..075580b35682 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <asm/idle.h> | 58 | #include <asm/idle.h> |
59 | #include <asm/syscalls.h> | 59 | #include <asm/syscalls.h> |
60 | #include <asm/ds.h> | 60 | #include <asm/ds.h> |
61 | #include <asm/debugreg.h> | ||
61 | 62 | ||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 63 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 64 | ||
@@ -134,7 +135,7 @@ void __show_regs(struct pt_regs *regs, int all) | |||
134 | ss = regs->ss & 0xffff; | 135 | ss = regs->ss & 0xffff; |
135 | gs = get_user_gs(regs); | 136 | gs = get_user_gs(regs); |
136 | } else { | 137 | } else { |
137 | sp = (unsigned long) (®s->sp); | 138 | sp = kernel_stack_pointer(regs); |
138 | savesegment(ss, ss); | 139 | savesegment(ss, ss); |
139 | savesegment(gs, gs); | 140 | savesegment(gs, gs); |
140 | } | 141 | } |
@@ -187,7 +188,7 @@ void __show_regs(struct pt_regs *regs, int all) | |||
187 | 188 | ||
188 | void show_regs(struct pt_regs *regs) | 189 | void show_regs(struct pt_regs *regs) |
189 | { | 190 | { |
190 | __show_regs(regs, 1); | 191 | show_registers(regs); |
191 | show_trace(NULL, regs, ®s->sp, regs->bp); | 192 | show_trace(NULL, regs, ®s->sp, regs->bp); |
192 | } | 193 | } |
193 | 194 | ||
@@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
259 | 260 | ||
260 | task_user_gs(p) = get_user_gs(regs); | 261 | task_user_gs(p) = get_user_gs(regs); |
261 | 262 | ||
263 | p->thread.io_bitmap_ptr = NULL; | ||
262 | tsk = current; | 264 | tsk = current; |
265 | err = -ENOMEM; | ||
266 | |||
267 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
268 | |||
263 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 269 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
264 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, | 270 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, |
265 | IO_BITMAP_BYTES, GFP_KERNEL); | 271 | IO_BITMAP_BYTES, GFP_KERNEL); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eb62cbcaa490..a98fe88fab64 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <asm/idle.h> | 52 | #include <asm/idle.h> |
53 | #include <asm/syscalls.h> | 53 | #include <asm/syscalls.h> |
54 | #include <asm/ds.h> | 54 | #include <asm/ds.h> |
55 | #include <asm/debugreg.h> | ||
55 | 56 | ||
56 | asmlinkage extern void ret_from_fork(void); | 57 | asmlinkage extern void ret_from_fork(void); |
57 | 58 | ||
@@ -226,8 +227,7 @@ void __show_regs(struct pt_regs *regs, int all) | |||
226 | 227 | ||
227 | void show_regs(struct pt_regs *regs) | 228 | void show_regs(struct pt_regs *regs) |
228 | { | 229 | { |
229 | printk(KERN_INFO "CPU %d:", smp_processor_id()); | 230 | show_registers(regs); |
230 | __show_regs(regs, 1); | ||
231 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); | 231 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); |
232 | } | 232 | } |
233 | 233 | ||
@@ -297,12 +297,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
297 | 297 | ||
298 | p->thread.fs = me->thread.fs; | 298 | p->thread.fs = me->thread.fs; |
299 | p->thread.gs = me->thread.gs; | 299 | p->thread.gs = me->thread.gs; |
300 | p->thread.io_bitmap_ptr = NULL; | ||
300 | 301 | ||
301 | savesegment(gs, p->thread.gsindex); | 302 | savesegment(gs, p->thread.gsindex); |
302 | savesegment(fs, p->thread.fsindex); | 303 | savesegment(fs, p->thread.fsindex); |
303 | savesegment(es, p->thread.es); | 304 | savesegment(es, p->thread.es); |
304 | savesegment(ds, p->thread.ds); | 305 | savesegment(ds, p->thread.ds); |
305 | 306 | ||
307 | err = -ENOMEM; | ||
308 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
309 | |||
306 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { | 310 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
307 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | 311 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); |
308 | if (!p->thread.io_bitmap_ptr) { | 312 | if (!p->thread.io_bitmap_ptr) { |
@@ -341,6 +345,7 @@ out: | |||
341 | kfree(p->thread.io_bitmap_ptr); | 345 | kfree(p->thread.io_bitmap_ptr); |
342 | p->thread.io_bitmap_max = 0; | 346 | p->thread.io_bitmap_max = 0; |
343 | } | 347 | } |
348 | |||
344 | return err; | 349 | return err; |
345 | } | 350 | } |
346 | 351 | ||
@@ -495,6 +500,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
495 | */ | 500 | */ |
496 | if (preload_fpu) | 501 | if (preload_fpu) |
497 | __math_state_restore(); | 502 | __math_state_restore(); |
503 | |||
498 | return prev_p; | 504 | return prev_p; |
499 | } | 505 | } |
500 | 506 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7b058a2dc66a..04d182a7cfdb 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <linux/seccomp.h> | 22 | #include <linux/seccomp.h> |
23 | #include <linux/signal.h> | 23 | #include <linux/signal.h> |
24 | #include <linux/workqueue.h> | 24 | #include <linux/workqueue.h> |
25 | #include <linux/perf_event.h> | ||
26 | #include <linux/hw_breakpoint.h> | ||
25 | 27 | ||
26 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
27 | #include <asm/pgtable.h> | 29 | #include <asm/pgtable.h> |
@@ -34,6 +36,7 @@ | |||
34 | #include <asm/prctl.h> | 36 | #include <asm/prctl.h> |
35 | #include <asm/proto.h> | 37 | #include <asm/proto.h> |
36 | #include <asm/ds.h> | 38 | #include <asm/ds.h> |
39 | #include <asm/hw_breakpoint.h> | ||
37 | 40 | ||
38 | #include "tls.h" | 41 | #include "tls.h" |
39 | 42 | ||
@@ -49,6 +52,118 @@ enum x86_regset { | |||
49 | REGSET_IOPERM32, | 52 | REGSET_IOPERM32, |
50 | }; | 53 | }; |
51 | 54 | ||
55 | struct pt_regs_offset { | ||
56 | const char *name; | ||
57 | int offset; | ||
58 | }; | ||
59 | |||
60 | #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} | ||
61 | #define REG_OFFSET_END {.name = NULL, .offset = 0} | ||
62 | |||
63 | static const struct pt_regs_offset regoffset_table[] = { | ||
64 | #ifdef CONFIG_X86_64 | ||
65 | REG_OFFSET_NAME(r15), | ||
66 | REG_OFFSET_NAME(r14), | ||
67 | REG_OFFSET_NAME(r13), | ||
68 | REG_OFFSET_NAME(r12), | ||
69 | REG_OFFSET_NAME(r11), | ||
70 | REG_OFFSET_NAME(r10), | ||
71 | REG_OFFSET_NAME(r9), | ||
72 | REG_OFFSET_NAME(r8), | ||
73 | #endif | ||
74 | REG_OFFSET_NAME(bx), | ||
75 | REG_OFFSET_NAME(cx), | ||
76 | REG_OFFSET_NAME(dx), | ||
77 | REG_OFFSET_NAME(si), | ||
78 | REG_OFFSET_NAME(di), | ||
79 | REG_OFFSET_NAME(bp), | ||
80 | REG_OFFSET_NAME(ax), | ||
81 | #ifdef CONFIG_X86_32 | ||
82 | REG_OFFSET_NAME(ds), | ||
83 | REG_OFFSET_NAME(es), | ||
84 | REG_OFFSET_NAME(fs), | ||
85 | REG_OFFSET_NAME(gs), | ||
86 | #endif | ||
87 | REG_OFFSET_NAME(orig_ax), | ||
88 | REG_OFFSET_NAME(ip), | ||
89 | REG_OFFSET_NAME(cs), | ||
90 | REG_OFFSET_NAME(flags), | ||
91 | REG_OFFSET_NAME(sp), | ||
92 | REG_OFFSET_NAME(ss), | ||
93 | REG_OFFSET_END, | ||
94 | }; | ||
95 | |||
96 | /** | ||
97 | * regs_query_register_offset() - query register offset from its name | ||
98 | * @name: the name of a register | ||
99 | * | ||
100 | * regs_query_register_offset() returns the offset of a register in struct | ||
101 | * pt_regs from its name. If the name is invalid, this returns -EINVAL; | ||
102 | */ | ||
103 | int regs_query_register_offset(const char *name) | ||
104 | { | ||
105 | const struct pt_regs_offset *roff; | ||
106 | for (roff = regoffset_table; roff->name != NULL; roff++) | ||
107 | if (!strcmp(roff->name, name)) | ||
108 | return roff->offset; | ||
109 | return -EINVAL; | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * regs_query_register_name() - query register name from its offset | ||
114 | * @offset: the offset of a register in struct pt_regs. | ||
115 | * | ||
116 | * regs_query_register_name() returns the name of a register from its | ||
117 | * offset in struct pt_regs. If the @offset is invalid, this returns NULL; | ||
118 | */ | ||
119 | const char *regs_query_register_name(unsigned int offset) | ||
120 | { | ||
121 | const struct pt_regs_offset *roff; | ||
122 | for (roff = regoffset_table; roff->name != NULL; roff++) | ||
123 | if (roff->offset == offset) | ||
124 | return roff->name; | ||
125 | return NULL; | ||
126 | } | ||
127 | |||
128 | static const int arg_offs_table[] = { | ||
129 | #ifdef CONFIG_X86_32 | ||
130 | [0] = offsetof(struct pt_regs, ax), | ||
131 | [1] = offsetof(struct pt_regs, dx), | ||
132 | [2] = offsetof(struct pt_regs, cx) | ||
133 | #else /* CONFIG_X86_64 */ | ||
134 | [0] = offsetof(struct pt_regs, di), | ||
135 | [1] = offsetof(struct pt_regs, si), | ||
136 | [2] = offsetof(struct pt_regs, dx), | ||
137 | [3] = offsetof(struct pt_regs, cx), | ||
138 | [4] = offsetof(struct pt_regs, r8), | ||
139 | [5] = offsetof(struct pt_regs, r9) | ||
140 | #endif | ||
141 | }; | ||
142 | |||
143 | /** | ||
144 | * regs_get_argument_nth() - get Nth argument at function call | ||
145 | * @regs: pt_regs which contains registers at function entry. | ||
146 | * @n: argument number. | ||
147 | * | ||
148 | * regs_get_argument_nth() returns @n th argument of a function call. | ||
149 | * Since usually the kernel stack will be changed right after function entry, | ||
150 | * you must use this at function entry. If the @n th entry is NOT in the | ||
151 | * kernel stack or pt_regs, this returns 0. | ||
152 | */ | ||
153 | unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) | ||
154 | { | ||
155 | if (n < ARRAY_SIZE(arg_offs_table)) | ||
156 | return *(unsigned long *)((char *)regs + arg_offs_table[n]); | ||
157 | else { | ||
158 | /* | ||
159 | * The typical case: arg n is on the stack. | ||
160 | * (Note: stack[0] = return address, so skip it) | ||
161 | */ | ||
162 | n -= ARRAY_SIZE(arg_offs_table); | ||
163 | return regs_get_kernel_stack_nth(regs, 1 + n); | ||
164 | } | ||
165 | } | ||
166 | |||
52 | /* | 167 | /* |
53 | * does not yet catch signals sent when the child dies. | 168 | * does not yet catch signals sent when the child dies. |
54 | * in exit.c or in signal.c. | 169 | * in exit.c or in signal.c. |
@@ -137,11 +252,6 @@ static int set_segment_reg(struct task_struct *task, | |||
137 | return 0; | 252 | return 0; |
138 | } | 253 | } |
139 | 254 | ||
140 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
141 | { | ||
142 | return TASK_SIZE - 3; | ||
143 | } | ||
144 | |||
145 | #else /* CONFIG_X86_64 */ | 255 | #else /* CONFIG_X86_64 */ |
146 | 256 | ||
147 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) | 257 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) |
@@ -266,15 +376,6 @@ static int set_segment_reg(struct task_struct *task, | |||
266 | return 0; | 376 | return 0; |
267 | } | 377 | } |
268 | 378 | ||
269 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
270 | { | ||
271 | #ifdef CONFIG_IA32_EMULATION | ||
272 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
273 | return IA32_PAGE_OFFSET - 3; | ||
274 | #endif | ||
275 | return TASK_SIZE_MAX - 7; | ||
276 | } | ||
277 | |||
278 | #endif /* CONFIG_X86_32 */ | 379 | #endif /* CONFIG_X86_32 */ |
279 | 380 | ||
280 | static unsigned long get_flags(struct task_struct *task) | 381 | static unsigned long get_flags(struct task_struct *task) |
@@ -454,99 +555,239 @@ static int genregs_set(struct task_struct *target, | |||
454 | return ret; | 555 | return ret; |
455 | } | 556 | } |
456 | 557 | ||
558 | static void ptrace_triggered(struct perf_event *bp, void *data) | ||
559 | { | ||
560 | int i; | ||
561 | struct thread_struct *thread = &(current->thread); | ||
562 | |||
563 | /* | ||
564 | * Store in the virtual DR6 register the fact that the breakpoint | ||
565 | * was hit so the thread's debugger will see it. | ||
566 | */ | ||
567 | for (i = 0; i < HBP_NUM; i++) { | ||
568 | if (thread->ptrace_bps[i] == bp) | ||
569 | break; | ||
570 | } | ||
571 | |||
572 | thread->debugreg6 |= (DR_TRAP0 << i); | ||
573 | } | ||
574 | |||
457 | /* | 575 | /* |
458 | * This function is trivial and will be inlined by the compiler. | 576 | * Walk through every ptrace breakpoints for this thread and |
459 | * Having it separates the implementation details of debug | 577 | * build the dr7 value on top of their attributes. |
460 | * registers from the interface details of ptrace. | 578 | * |
461 | */ | 579 | */ |
462 | static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) | 580 | static unsigned long ptrace_get_dr7(struct perf_event *bp[]) |
463 | { | 581 | { |
464 | switch (n) { | 582 | int i; |
465 | case 0: return child->thread.debugreg0; | 583 | int dr7 = 0; |
466 | case 1: return child->thread.debugreg1; | 584 | struct arch_hw_breakpoint *info; |
467 | case 2: return child->thread.debugreg2; | 585 | |
468 | case 3: return child->thread.debugreg3; | 586 | for (i = 0; i < HBP_NUM; i++) { |
469 | case 6: return child->thread.debugreg6; | 587 | if (bp[i] && !bp[i]->attr.disabled) { |
470 | case 7: return child->thread.debugreg7; | 588 | info = counter_arch_bp(bp[i]); |
589 | dr7 |= encode_dr7(i, info->len, info->type); | ||
590 | } | ||
471 | } | 591 | } |
472 | return 0; | 592 | |
593 | return dr7; | ||
473 | } | 594 | } |
474 | 595 | ||
475 | static int ptrace_set_debugreg(struct task_struct *child, | 596 | static struct perf_event * |
476 | int n, unsigned long data) | 597 | ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, |
598 | struct task_struct *tsk, int disabled) | ||
477 | { | 599 | { |
478 | int i; | 600 | int err; |
601 | int gen_len, gen_type; | ||
602 | DEFINE_BREAKPOINT_ATTR(attr); | ||
479 | 603 | ||
480 | if (unlikely(n == 4 || n == 5)) | 604 | /* |
481 | return -EIO; | 605 | * We shoud have at least an inactive breakpoint at this |
606 | * slot. It means the user is writing dr7 without having | ||
607 | * written the address register first | ||
608 | */ | ||
609 | if (!bp) | ||
610 | return ERR_PTR(-EINVAL); | ||
482 | 611 | ||
483 | if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) | 612 | err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); |
484 | return -EIO; | 613 | if (err) |
614 | return ERR_PTR(err); | ||
485 | 615 | ||
486 | switch (n) { | 616 | attr = bp->attr; |
487 | case 0: child->thread.debugreg0 = data; break; | 617 | attr.bp_len = gen_len; |
488 | case 1: child->thread.debugreg1 = data; break; | 618 | attr.bp_type = gen_type; |
489 | case 2: child->thread.debugreg2 = data; break; | 619 | attr.disabled = disabled; |
490 | case 3: child->thread.debugreg3 = data; break; | ||
491 | 620 | ||
492 | case 6: | 621 | return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); |
493 | if ((data & ~0xffffffffUL) != 0) | 622 | } |
494 | return -EIO; | 623 | |
495 | child->thread.debugreg6 = data; | 624 | /* |
496 | break; | 625 | * Handle ptrace writes to debug register 7. |
626 | */ | ||
627 | static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) | ||
628 | { | ||
629 | struct thread_struct *thread = &(tsk->thread); | ||
630 | unsigned long old_dr7; | ||
631 | int i, orig_ret = 0, rc = 0; | ||
632 | int enabled, second_pass = 0; | ||
633 | unsigned len, type; | ||
634 | struct perf_event *bp; | ||
635 | |||
636 | data &= ~DR_CONTROL_RESERVED; | ||
637 | old_dr7 = ptrace_get_dr7(thread->ptrace_bps); | ||
638 | restore: | ||
639 | /* | ||
640 | * Loop through all the hardware breakpoints, making the | ||
641 | * appropriate changes to each. | ||
642 | */ | ||
643 | for (i = 0; i < HBP_NUM; i++) { | ||
644 | enabled = decode_dr7(data, i, &len, &type); | ||
645 | bp = thread->ptrace_bps[i]; | ||
646 | |||
647 | if (!enabled) { | ||
648 | if (bp) { | ||
649 | /* | ||
650 | * Don't unregister the breakpoints right-away, | ||
651 | * unless all register_user_hw_breakpoint() | ||
652 | * requests have succeeded. This prevents | ||
653 | * any window of opportunity for debug | ||
654 | * register grabbing by other users. | ||
655 | */ | ||
656 | if (!second_pass) | ||
657 | continue; | ||
658 | |||
659 | thread->ptrace_bps[i] = NULL; | ||
660 | bp = ptrace_modify_breakpoint(bp, len, type, | ||
661 | tsk, 1); | ||
662 | if (IS_ERR(bp)) { | ||
663 | rc = PTR_ERR(bp); | ||
664 | thread->ptrace_bps[i] = NULL; | ||
665 | break; | ||
666 | } | ||
667 | thread->ptrace_bps[i] = bp; | ||
668 | } | ||
669 | continue; | ||
670 | } | ||
671 | |||
672 | bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); | ||
673 | |||
674 | /* Incorrect bp, or we have a bug in bp API */ | ||
675 | if (IS_ERR(bp)) { | ||
676 | rc = PTR_ERR(bp); | ||
677 | thread->ptrace_bps[i] = NULL; | ||
678 | break; | ||
679 | } | ||
680 | thread->ptrace_bps[i] = bp; | ||
681 | } | ||
682 | /* | ||
683 | * Make a second pass to free the remaining unused breakpoints | ||
684 | * or to restore the original breakpoints if an error occurred. | ||
685 | */ | ||
686 | if (!second_pass) { | ||
687 | second_pass = 1; | ||
688 | if (rc < 0) { | ||
689 | orig_ret = rc; | ||
690 | data = old_dr7; | ||
691 | } | ||
692 | goto restore; | ||
693 | } | ||
694 | return ((orig_ret < 0) ? orig_ret : rc); | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * Handle PTRACE_PEEKUSR calls for the debug register area. | ||
699 | */ | ||
700 | static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) | ||
701 | { | ||
702 | struct thread_struct *thread = &(tsk->thread); | ||
703 | unsigned long val = 0; | ||
497 | 704 | ||
498 | case 7: | 705 | if (n < HBP_NUM) { |
706 | struct perf_event *bp; | ||
707 | bp = thread->ptrace_bps[n]; | ||
708 | if (!bp) | ||
709 | return 0; | ||
710 | val = bp->hw.info.address; | ||
711 | } else if (n == 6) { | ||
712 | val = thread->debugreg6; | ||
713 | } else if (n == 7) { | ||
714 | val = ptrace_get_dr7(thread->ptrace_bps); | ||
715 | } | ||
716 | return val; | ||
717 | } | ||
718 | |||
719 | static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | ||
720 | unsigned long addr) | ||
721 | { | ||
722 | struct perf_event *bp; | ||
723 | struct thread_struct *t = &tsk->thread; | ||
724 | DEFINE_BREAKPOINT_ATTR(attr); | ||
725 | |||
726 | if (!t->ptrace_bps[nr]) { | ||
499 | /* | 727 | /* |
500 | * Sanity-check data. Take one half-byte at once with | 728 | * Put stub len and type to register (reserve) an inactive but |
501 | * check = (val >> (16 + 4*i)) & 0xf. It contains the | 729 | * correct bp |
502 | * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits | ||
503 | * 2 and 3 are LENi. Given a list of invalid values, | ||
504 | * we do mask |= 1 << invalid_value, so that | ||
505 | * (mask >> check) & 1 is a correct test for invalid | ||
506 | * values. | ||
507 | * | ||
508 | * R/Wi contains the type of the breakpoint / | ||
509 | * watchpoint, LENi contains the length of the watched | ||
510 | * data in the watchpoint case. | ||
511 | * | ||
512 | * The invalid values are: | ||
513 | * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] | ||
514 | * - R/Wi == 0x10 (break on I/O reads or writes), so | ||
515 | * mask |= 0x4444. | ||
516 | * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= | ||
517 | * 0x1110. | ||
518 | * | ||
519 | * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. | ||
520 | * | ||
521 | * See the Intel Manual "System Programming Guide", | ||
522 | * 15.2.4 | ||
523 | * | ||
524 | * Note that LENi == 0x10 is defined on x86_64 in long | ||
525 | * mode (i.e. even for 32-bit userspace software, but | ||
526 | * 64-bit kernel), so the x86_64 mask value is 0x5454. | ||
527 | * See the AMD manual no. 24593 (AMD64 System Programming) | ||
528 | */ | 730 | */ |
529 | #ifdef CONFIG_X86_32 | 731 | attr.bp_addr = addr; |
530 | #define DR7_MASK 0x5f54 | 732 | attr.bp_len = HW_BREAKPOINT_LEN_1; |
531 | #else | 733 | attr.bp_type = HW_BREAKPOINT_W; |
532 | #define DR7_MASK 0x5554 | 734 | attr.disabled = 1; |
533 | #endif | 735 | |
534 | data &= ~DR_CONTROL_RESERVED; | 736 | bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); |
535 | for (i = 0; i < 4; i++) | 737 | } else { |
536 | if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) | 738 | bp = t->ptrace_bps[nr]; |
537 | return -EIO; | 739 | t->ptrace_bps[nr] = NULL; |
538 | child->thread.debugreg7 = data; | 740 | |
539 | if (data) | 741 | attr = bp->attr; |
540 | set_tsk_thread_flag(child, TIF_DEBUG); | 742 | attr.bp_addr = addr; |
541 | else | 743 | bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); |
542 | clear_tsk_thread_flag(child, TIF_DEBUG); | ||
543 | break; | ||
544 | } | 744 | } |
745 | /* | ||
746 | * CHECKME: the previous code returned -EIO if the addr wasn't a | ||
747 | * valid task virtual addr. The new one will return -EINVAL in this | ||
748 | * case. | ||
749 | * -EINVAL may be what we want for in-kernel breakpoints users, but | ||
750 | * -EIO looks better for ptrace, since we refuse a register writing | ||
751 | * for the user. And anyway this is the previous behaviour. | ||
752 | */ | ||
753 | if (IS_ERR(bp)) | ||
754 | return PTR_ERR(bp); | ||
755 | |||
756 | t->ptrace_bps[nr] = bp; | ||
545 | 757 | ||
546 | return 0; | 758 | return 0; |
547 | } | 759 | } |
548 | 760 | ||
549 | /* | 761 | /* |
762 | * Handle PTRACE_POKEUSR calls for the debug register area. | ||
763 | */ | ||
764 | int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) | ||
765 | { | ||
766 | struct thread_struct *thread = &(tsk->thread); | ||
767 | int rc = 0; | ||
768 | |||
769 | /* There are no DR4 or DR5 registers */ | ||
770 | if (n == 4 || n == 5) | ||
771 | return -EIO; | ||
772 | |||
773 | if (n == 6) { | ||
774 | thread->debugreg6 = val; | ||
775 | goto ret_path; | ||
776 | } | ||
777 | if (n < HBP_NUM) { | ||
778 | rc = ptrace_set_breakpoint_addr(tsk, n, val); | ||
779 | if (rc) | ||
780 | return rc; | ||
781 | } | ||
782 | /* All that's left is DR7 */ | ||
783 | if (n == 7) | ||
784 | rc = ptrace_write_dr7(tsk, val); | ||
785 | |||
786 | ret_path: | ||
787 | return rc; | ||
788 | } | ||
789 | |||
790 | /* | ||
550 | * These access the current or another (stopped) task's io permission | 791 | * These access the current or another (stopped) task's io permission |
551 | * bitmap for debugging or core dump. | 792 | * bitmap for debugging or core dump. |
552 | */ | 793 | */ |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f93078746e00..2b97fc5b124e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -23,7 +23,7 @@ | |||
23 | # include <linux/ctype.h> | 23 | # include <linux/ctype.h> |
24 | # include <linux/mc146818rtc.h> | 24 | # include <linux/mc146818rtc.h> |
25 | #else | 25 | #else |
26 | # include <asm/iommu.h> | 26 | # include <asm/x86_init.h> |
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | /* | 29 | /* |
@@ -622,7 +622,7 @@ void native_machine_shutdown(void) | |||
622 | #endif | 622 | #endif |
623 | 623 | ||
624 | #ifdef CONFIG_X86_64 | 624 | #ifdef CONFIG_X86_64 |
625 | pci_iommu_shutdown(); | 625 | x86_platform.iommu_shutdown(); |
626 | #endif | 626 | #endif |
627 | } | 627 | } |
628 | 628 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2a34f9c5be21..82e88cdda9bc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -109,6 +109,7 @@ | |||
109 | #ifdef CONFIG_X86_64 | 109 | #ifdef CONFIG_X86_64 |
110 | #include <asm/numa_64.h> | 110 | #include <asm/numa_64.h> |
111 | #endif | 111 | #endif |
112 | #include <asm/mce.h> | ||
112 | 113 | ||
113 | /* | 114 | /* |
114 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 115 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. |
@@ -247,7 +248,7 @@ EXPORT_SYMBOL(edd); | |||
247 | * from boot_params into a safe place. | 248 | * from boot_params into a safe place. |
248 | * | 249 | * |
249 | */ | 250 | */ |
250 | static inline void copy_edd(void) | 251 | static inline void __init copy_edd(void) |
251 | { | 252 | { |
252 | memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, | 253 | memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, |
253 | sizeof(edd.mbr_signature)); | 254 | sizeof(edd.mbr_signature)); |
@@ -256,7 +257,7 @@ static inline void copy_edd(void) | |||
256 | edd.edd_info_nr = boot_params.eddbuf_entries; | 257 | edd.edd_info_nr = boot_params.eddbuf_entries; |
257 | } | 258 | } |
258 | #else | 259 | #else |
259 | static inline void copy_edd(void) | 260 | static inline void __init copy_edd(void) |
260 | { | 261 | { |
261 | } | 262 | } |
262 | #endif | 263 | #endif |
@@ -1031,6 +1032,8 @@ void __init setup_arch(char **cmdline_p) | |||
1031 | #endif | 1032 | #endif |
1032 | #endif | 1033 | #endif |
1033 | x86_init.oem.banner(); | 1034 | x86_init.oem.banner(); |
1035 | |||
1036 | mcheck_init(); | ||
1034 | } | 1037 | } |
1035 | 1038 | ||
1036 | #ifdef CONFIG_X86_32 | 1039 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6a44a76055ad..fbf3b07c8567 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs) | |||
799 | 799 | ||
800 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | 800 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); |
801 | if (signr > 0) { | 801 | if (signr > 0) { |
802 | /* | ||
803 | * Re-enable any watchpoints before delivering the | ||
804 | * signal to user space. The processor register will | ||
805 | * have been cleared if the watchpoint triggered | ||
806 | * inside the kernel. | ||
807 | */ | ||
808 | if (current->thread.debugreg7) | ||
809 | set_debugreg(current->thread.debugreg7, 7); | ||
810 | |||
811 | /* Whee! Actually deliver the signal. */ | 802 | /* Whee! Actually deliver the signal. */ |
812 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | 803 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { |
813 | /* | 804 | /* |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 565ebc65920e..324f2a44c221 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1250,16 +1250,7 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1250 | void cpu_disable_common(void) | 1250 | void cpu_disable_common(void) |
1251 | { | 1251 | { |
1252 | int cpu = smp_processor_id(); | 1252 | int cpu = smp_processor_id(); |
1253 | /* | ||
1254 | * HACK: | ||
1255 | * Allow any queued timer interrupts to get serviced | ||
1256 | * This is only a temporary solution until we cleanup | ||
1257 | * fixup_irqs as we do for IA64. | ||
1258 | */ | ||
1259 | local_irq_enable(); | ||
1260 | mdelay(1); | ||
1261 | 1253 | ||
1262 | local_irq_disable(); | ||
1263 | remove_siblinginfo(cpu); | 1254 | remove_siblinginfo(cpu); |
1264 | 1255 | ||
1265 | /* It's now safe to remove this processor from the online map */ | 1256 | /* It's now safe to remove this processor from the online map */ |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7e37dcee0cc3..33399176512a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
529 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | 529 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) |
530 | { | 530 | { |
531 | struct task_struct *tsk = current; | 531 | struct task_struct *tsk = current; |
532 | unsigned long condition; | 532 | unsigned long dr6; |
533 | int si_code; | 533 | int si_code; |
534 | 534 | ||
535 | get_debugreg(condition, 6); | 535 | get_debugreg(dr6, 6); |
536 | 536 | ||
537 | /* Catch kmemcheck conditions first of all! */ | 537 | /* Catch kmemcheck conditions first of all! */ |
538 | if (condition & DR_STEP && kmemcheck_trap(regs)) | 538 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) |
539 | return; | 539 | return; |
540 | 540 | ||
541 | /* DR6 may or may not be cleared by the CPU */ | ||
542 | set_debugreg(0, 6); | ||
541 | /* | 543 | /* |
542 | * The processor cleared BTF, so don't mark that we need it set. | 544 | * The processor cleared BTF, so don't mark that we need it set. |
543 | */ | 545 | */ |
544 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | 546 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); |
545 | tsk->thread.debugctlmsr = 0; | 547 | tsk->thread.debugctlmsr = 0; |
546 | 548 | ||
547 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 549 | /* Store the virtualized DR6 value */ |
548 | SIGTRAP) == NOTIFY_STOP) | 550 | tsk->thread.debugreg6 = dr6; |
551 | |||
552 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, | ||
553 | SIGTRAP) == NOTIFY_STOP) | ||
549 | return; | 554 | return; |
550 | 555 | ||
551 | /* It's safe to allow irq's after DR6 has been saved */ | 556 | /* It's safe to allow irq's after DR6 has been saved */ |
552 | preempt_conditional_sti(regs); | 557 | preempt_conditional_sti(regs); |
553 | 558 | ||
554 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 559 | if (regs->flags & X86_VM_MASK) { |
555 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 560 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
556 | if (!tsk->thread.debugreg7) | 561 | error_code, 1); |
557 | goto clear_dr7; | 562 | return; |
558 | } | 563 | } |
559 | 564 | ||
560 | #ifdef CONFIG_X86_32 | ||
561 | if (regs->flags & X86_VM_MASK) | ||
562 | goto debug_vm86; | ||
563 | #endif | ||
564 | |||
565 | /* Save debug status register where ptrace can see it */ | ||
566 | tsk->thread.debugreg6 = condition; | ||
567 | |||
568 | /* | 565 | /* |
569 | * Single-stepping through TF: make sure we ignore any events in | 566 | * Single-stepping through system calls: ignore any exceptions in |
570 | * kernel space (but re-enable TF when returning to user mode). | 567 | * kernel space, but re-enable TF when returning to user mode. |
568 | * | ||
569 | * We already checked v86 mode above, so we can check for kernel mode | ||
570 | * by just checking the CPL of CS. | ||
571 | */ | 571 | */ |
572 | if (condition & DR_STEP) { | 572 | if ((dr6 & DR_STEP) && !user_mode(regs)) { |
573 | if (!user_mode(regs)) | 573 | tsk->thread.debugreg6 &= ~DR_STEP; |
574 | goto clear_TF_reenable; | 574 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
575 | regs->flags &= ~X86_EFLAGS_TF; | ||
575 | } | 576 | } |
576 | 577 | si_code = get_si_code(tsk->thread.debugreg6); | |
577 | si_code = get_si_code(condition); | 578 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) |
578 | /* Ok, finally something we can handle */ | 579 | send_sigtrap(tsk, regs, error_code, si_code); |
579 | send_sigtrap(tsk, regs, error_code, si_code); | ||
580 | |||
581 | /* | ||
582 | * Disable additional traps. They'll be re-enabled when | ||
583 | * the signal is delivered. | ||
584 | */ | ||
585 | clear_dr7: | ||
586 | set_debugreg(0, 7); | ||
587 | preempt_conditional_cli(regs); | 580 | preempt_conditional_cli(regs); |
588 | return; | ||
589 | 581 | ||
590 | #ifdef CONFIG_X86_32 | ||
591 | debug_vm86: | ||
592 | /* reenable preemption: handle_vm86_trap() might sleep */ | ||
593 | dec_preempt_count(); | ||
594 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | ||
595 | conditional_cli(regs); | ||
596 | return; | ||
597 | #endif | ||
598 | |||
599 | clear_TF_reenable: | ||
600 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | ||
601 | regs->flags &= ~X86_EFLAGS_TF; | ||
602 | preempt_conditional_cli(regs); | ||
603 | return; | 582 | return; |
604 | } | 583 | } |
605 | 584 | ||
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index f37930954d15..eed156851f5d 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -114,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
114 | return; | 114 | return; |
115 | 115 | ||
116 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { | 116 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { |
117 | printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n"); | 117 | if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) |
118 | pr_info( | ||
119 | "Skipped synchronization checks as TSC is reliable.\n"); | ||
118 | return; | 120 | return; |
119 | } | 121 | } |
120 | 122 | ||
121 | pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:", | ||
122 | smp_processor_id(), cpu); | ||
123 | |||
124 | /* | 123 | /* |
125 | * Reset it - in case this is a second bootup: | 124 | * Reset it - in case this is a second bootup: |
126 | */ | 125 | */ |
@@ -142,12 +141,14 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
142 | cpu_relax(); | 141 | cpu_relax(); |
143 | 142 | ||
144 | if (nr_warps) { | 143 | if (nr_warps) { |
145 | printk("\n"); | 144 | pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", |
145 | smp_processor_id(), cpu); | ||
146 | pr_warning("Measured %Ld cycles TSC warp between CPUs, " | 146 | pr_warning("Measured %Ld cycles TSC warp between CPUs, " |
147 | "turning off TSC clock.\n", max_warp); | 147 | "turning off TSC clock.\n", max_warp); |
148 | mark_tsc_unstable("check_tsc_sync_source failed"); | 148 | mark_tsc_unstable("check_tsc_sync_source failed"); |
149 | } else { | 149 | } else { |
150 | printk(" passed.\n"); | 150 | pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", |
151 | smp_processor_id(), cpu); | ||
151 | } | 152 | } |
152 | 153 | ||
153 | /* | 154 | /* |
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index aeef529917e4..61d805df4c91 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c | |||
@@ -9,10 +9,25 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/rbtree.h> | ||
12 | #include <linux/irq.h> | 13 | #include <linux/irq.h> |
13 | 14 | ||
14 | #include <asm/apic.h> | 15 | #include <asm/apic.h> |
15 | #include <asm/uv/uv_irq.h> | 16 | #include <asm/uv/uv_irq.h> |
17 | #include <asm/uv/uv_hub.h> | ||
18 | |||
19 | /* MMR offset and pnode of hub sourcing interrupts for a given irq */ | ||
20 | struct uv_irq_2_mmr_pnode{ | ||
21 | struct rb_node list; | ||
22 | unsigned long offset; | ||
23 | int pnode; | ||
24 | int irq; | ||
25 | }; | ||
26 | |||
27 | static spinlock_t uv_irq_lock; | ||
28 | static struct rb_root uv_irq_root; | ||
29 | |||
30 | static int uv_set_irq_affinity(unsigned int, const struct cpumask *); | ||
16 | 31 | ||
17 | static void uv_noop(unsigned int irq) | 32 | static void uv_noop(unsigned int irq) |
18 | { | 33 | { |
@@ -39,25 +54,214 @@ struct irq_chip uv_irq_chip = { | |||
39 | .unmask = uv_noop, | 54 | .unmask = uv_noop, |
40 | .eoi = uv_ack_apic, | 55 | .eoi = uv_ack_apic, |
41 | .end = uv_noop, | 56 | .end = uv_noop, |
57 | .set_affinity = uv_set_irq_affinity, | ||
42 | }; | 58 | }; |
43 | 59 | ||
44 | /* | 60 | /* |
61 | * Add offset and pnode information of the hub sourcing interrupts to the | ||
62 | * rb tree for a specific irq. | ||
63 | */ | ||
64 | static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) | ||
65 | { | ||
66 | struct rb_node **link = &uv_irq_root.rb_node; | ||
67 | struct rb_node *parent = NULL; | ||
68 | struct uv_irq_2_mmr_pnode *n; | ||
69 | struct uv_irq_2_mmr_pnode *e; | ||
70 | unsigned long irqflags; | ||
71 | |||
72 | n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, | ||
73 | uv_blade_to_memory_nid(blade)); | ||
74 | if (!n) | ||
75 | return -ENOMEM; | ||
76 | |||
77 | n->irq = irq; | ||
78 | n->offset = offset; | ||
79 | n->pnode = uv_blade_to_pnode(blade); | ||
80 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
81 | /* Find the right place in the rbtree: */ | ||
82 | while (*link) { | ||
83 | parent = *link; | ||
84 | e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); | ||
85 | |||
86 | if (unlikely(irq == e->irq)) { | ||
87 | /* irq entry exists */ | ||
88 | e->pnode = uv_blade_to_pnode(blade); | ||
89 | e->offset = offset; | ||
90 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
91 | kfree(n); | ||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | if (irq < e->irq) | ||
96 | link = &(*link)->rb_left; | ||
97 | else | ||
98 | link = &(*link)->rb_right; | ||
99 | } | ||
100 | |||
101 | /* Insert the node into the rbtree. */ | ||
102 | rb_link_node(&n->list, parent, link); | ||
103 | rb_insert_color(&n->list, &uv_irq_root); | ||
104 | |||
105 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | /* Retrieve offset and pnode information from the rb tree for a specific irq */ | ||
110 | int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) | ||
111 | { | ||
112 | struct uv_irq_2_mmr_pnode *e; | ||
113 | struct rb_node *n; | ||
114 | unsigned long irqflags; | ||
115 | |||
116 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
117 | n = uv_irq_root.rb_node; | ||
118 | while (n) { | ||
119 | e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); | ||
120 | |||
121 | if (e->irq == irq) { | ||
122 | *offset = e->offset; | ||
123 | *pnode = e->pnode; | ||
124 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | if (irq < e->irq) | ||
129 | n = n->rb_left; | ||
130 | else | ||
131 | n = n->rb_right; | ||
132 | } | ||
133 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
134 | return -1; | ||
135 | } | ||
136 | |||
137 | /* | ||
138 | * Re-target the irq to the specified CPU and enable the specified MMR located | ||
139 | * on the specified blade to allow the sending of MSIs to the specified CPU. | ||
140 | */ | ||
141 | static int | ||
142 | arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | ||
143 | unsigned long mmr_offset, int restrict) | ||
144 | { | ||
145 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | ||
146 | struct irq_desc *desc = irq_to_desc(irq); | ||
147 | struct irq_cfg *cfg; | ||
148 | int mmr_pnode; | ||
149 | unsigned long mmr_value; | ||
150 | struct uv_IO_APIC_route_entry *entry; | ||
151 | int err; | ||
152 | |||
153 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | ||
154 | sizeof(unsigned long)); | ||
155 | |||
156 | cfg = irq_cfg(irq); | ||
157 | |||
158 | err = assign_irq_vector(irq, cfg, eligible_cpu); | ||
159 | if (err != 0) | ||
160 | return err; | ||
161 | |||
162 | if (restrict == UV_AFFINITY_CPU) | ||
163 | desc->status |= IRQ_NO_BALANCING; | ||
164 | else | ||
165 | desc->status |= IRQ_MOVE_PCNTXT; | ||
166 | |||
167 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, | ||
168 | irq_name); | ||
169 | |||
170 | mmr_value = 0; | ||
171 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
172 | entry->vector = cfg->vector; | ||
173 | entry->delivery_mode = apic->irq_delivery_mode; | ||
174 | entry->dest_mode = apic->irq_dest_mode; | ||
175 | entry->polarity = 0; | ||
176 | entry->trigger = 0; | ||
177 | entry->mask = 0; | ||
178 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); | ||
179 | |||
180 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
181 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
182 | |||
183 | if (cfg->move_in_progress) | ||
184 | send_cleanup_vector(cfg); | ||
185 | |||
186 | return irq; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Disable the specified MMR located on the specified blade so that MSIs are | ||
191 | * longer allowed to be sent. | ||
192 | */ | ||
193 | static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) | ||
194 | { | ||
195 | unsigned long mmr_value; | ||
196 | struct uv_IO_APIC_route_entry *entry; | ||
197 | |||
198 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | ||
199 | sizeof(unsigned long)); | ||
200 | |||
201 | mmr_value = 0; | ||
202 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
203 | entry->mask = 1; | ||
204 | |||
205 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
206 | } | ||
207 | |||
208 | static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | ||
209 | { | ||
210 | struct irq_desc *desc = irq_to_desc(irq); | ||
211 | struct irq_cfg *cfg = desc->chip_data; | ||
212 | unsigned int dest; | ||
213 | unsigned long mmr_value; | ||
214 | struct uv_IO_APIC_route_entry *entry; | ||
215 | unsigned long mmr_offset; | ||
216 | unsigned mmr_pnode; | ||
217 | |||
218 | dest = set_desc_affinity(desc, mask); | ||
219 | if (dest == BAD_APICID) | ||
220 | return -1; | ||
221 | |||
222 | mmr_value = 0; | ||
223 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
224 | |||
225 | entry->vector = cfg->vector; | ||
226 | entry->delivery_mode = apic->irq_delivery_mode; | ||
227 | entry->dest_mode = apic->irq_dest_mode; | ||
228 | entry->polarity = 0; | ||
229 | entry->trigger = 0; | ||
230 | entry->mask = 0; | ||
231 | entry->dest = dest; | ||
232 | |||
233 | /* Get previously stored MMR and pnode of hub sourcing interrupts */ | ||
234 | if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) | ||
235 | return -1; | ||
236 | |||
237 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
238 | |||
239 | if (cfg->move_in_progress) | ||
240 | send_cleanup_vector(cfg); | ||
241 | |||
242 | return 0; | ||
243 | } | ||
244 | |||
245 | /* | ||
45 | * Set up a mapping of an available irq and vector, and enable the specified | 246 | * Set up a mapping of an available irq and vector, and enable the specified |
46 | * MMR that defines the MSI that is to be sent to the specified CPU when an | 247 | * MMR that defines the MSI that is to be sent to the specified CPU when an |
47 | * interrupt is raised. | 248 | * interrupt is raised. |
48 | */ | 249 | */ |
49 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, | 250 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, |
50 | unsigned long mmr_offset) | 251 | unsigned long mmr_offset, int restrict) |
51 | { | 252 | { |
52 | int irq; | 253 | int irq, ret; |
53 | int ret; | 254 | |
255 | irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); | ||
54 | 256 | ||
55 | irq = create_irq(); | ||
56 | if (irq <= 0) | 257 | if (irq <= 0) |
57 | return -EBUSY; | 258 | return -EBUSY; |
58 | 259 | ||
59 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); | 260 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, |
60 | if (ret != irq) | 261 | restrict); |
262 | if (ret == irq) | ||
263 | uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); | ||
264 | else | ||
61 | destroy_irq(irq); | 265 | destroy_irq(irq); |
62 | 266 | ||
63 | return ret; | 267 | return ret; |
@@ -71,9 +275,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); | |||
71 | * | 275 | * |
72 | * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). | 276 | * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). |
73 | */ | 277 | */ |
74 | void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) | 278 | void uv_teardown_irq(unsigned int irq) |
75 | { | 279 | { |
76 | arch_disable_uv_irq(mmr_blade, mmr_offset); | 280 | struct uv_irq_2_mmr_pnode *e; |
281 | struct rb_node *n; | ||
282 | unsigned long irqflags; | ||
283 | |||
284 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
285 | n = uv_irq_root.rb_node; | ||
286 | while (n) { | ||
287 | e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); | ||
288 | if (e->irq == irq) { | ||
289 | arch_disable_uv_irq(e->pnode, e->offset); | ||
290 | rb_erase(n, &uv_irq_root); | ||
291 | kfree(e); | ||
292 | break; | ||
293 | } | ||
294 | if (irq < e->irq) | ||
295 | n = n->rb_left; | ||
296 | else | ||
297 | n = n->rb_right; | ||
298 | } | ||
299 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
77 | destroy_irq(irq); | 300 | destroy_irq(irq); |
78 | } | 301 | } |
79 | EXPORT_SYMBOL_GPL(uv_teardown_irq); | 302 | EXPORT_SYMBOL_GPL(uv_teardown_irq); |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index f068553a1b17..abda6f53e71e 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -183,7 +183,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
183 | return; | 183 | return; |
184 | } | 184 | } |
185 | 185 | ||
186 | apic_cpus = apic->apicid_to_cpu_present(m->apicid); | 186 | apic->apicid_to_cpu_present(m->apicid, &apic_cpus); |
187 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); | 187 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); |
188 | /* | 188 | /* |
189 | * Validate version | 189 | * Validate version |
@@ -486,7 +486,7 @@ static void end_cobalt_irq(unsigned int irq) | |||
486 | } | 486 | } |
487 | 487 | ||
488 | static struct irq_chip cobalt_irq_type = { | 488 | static struct irq_chip cobalt_irq_type = { |
489 | .typename = "Cobalt-APIC", | 489 | .name = "Cobalt-APIC", |
490 | .startup = startup_cobalt_irq, | 490 | .startup = startup_cobalt_irq, |
491 | .shutdown = disable_cobalt_irq, | 491 | .shutdown = disable_cobalt_irq, |
492 | .enable = enable_cobalt_irq, | 492 | .enable = enable_cobalt_irq, |
@@ -523,7 +523,7 @@ static void end_piix4_master_irq(unsigned int irq) | |||
523 | } | 523 | } |
524 | 524 | ||
525 | static struct irq_chip piix4_master_irq_type = { | 525 | static struct irq_chip piix4_master_irq_type = { |
526 | .typename = "PIIX4-master", | 526 | .name = "PIIX4-master", |
527 | .startup = startup_piix4_master_irq, | 527 | .startup = startup_piix4_master_irq, |
528 | .ack = ack_cobalt_irq, | 528 | .ack = ack_cobalt_irq, |
529 | .end = end_piix4_master_irq, | 529 | .end = end_piix4_master_irq, |
@@ -531,7 +531,7 @@ static struct irq_chip piix4_master_irq_type = { | |||
531 | 531 | ||
532 | 532 | ||
533 | static struct irq_chip piix4_virtual_irq_type = { | 533 | static struct irq_chip piix4_virtual_irq_type = { |
534 | .typename = "PIIX4-virtual", | 534 | .name = "PIIX4-virtual", |
535 | .shutdown = disable_8259A_irq, | 535 | .shutdown = disable_8259A_irq, |
536 | .enable = enable_8259A_irq, | 536 | .enable = enable_8259A_irq, |
537 | .disable = disable_8259A_irq, | 537 | .disable = disable_8259A_irq, |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 3909e3ba5ce3..a1029769b6f2 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -30,9 +30,8 @@ EXPORT_SYMBOL(__put_user_8); | |||
30 | 30 | ||
31 | EXPORT_SYMBOL(copy_user_generic); | 31 | EXPORT_SYMBOL(copy_user_generic); |
32 | EXPORT_SYMBOL(__copy_user_nocache); | 32 | EXPORT_SYMBOL(__copy_user_nocache); |
33 | EXPORT_SYMBOL(copy_from_user); | 33 | EXPORT_SYMBOL(_copy_from_user); |
34 | EXPORT_SYMBOL(copy_to_user); | 34 | EXPORT_SYMBOL(_copy_to_user); |
35 | EXPORT_SYMBOL(__copy_from_user_inatomic); | ||
36 | 35 | ||
37 | EXPORT_SYMBOL(copy_page); | 36 | EXPORT_SYMBOL(copy_page); |
38 | EXPORT_SYMBOL(clear_page); | 37 | EXPORT_SYMBOL(clear_page); |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 4449a4a2c2ed..d11c5ff7c65e 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -14,10 +14,13 @@ | |||
14 | #include <asm/time.h> | 14 | #include <asm/time.h> |
15 | #include <asm/irq.h> | 15 | #include <asm/irq.h> |
16 | #include <asm/tsc.h> | 16 | #include <asm/tsc.h> |
17 | #include <asm/iommu.h> | ||
17 | 18 | ||
18 | void __cpuinit x86_init_noop(void) { } | 19 | void __cpuinit x86_init_noop(void) { } |
19 | void __init x86_init_uint_noop(unsigned int unused) { } | 20 | void __init x86_init_uint_noop(unsigned int unused) { } |
20 | void __init x86_init_pgd_noop(pgd_t *unused) { } | 21 | void __init x86_init_pgd_noop(pgd_t *unused) { } |
22 | int __init iommu_init_noop(void) { return 0; } | ||
23 | void iommu_shutdown_noop(void) { } | ||
21 | 24 | ||
22 | /* | 25 | /* |
23 | * The platform setup functions are preset with the default functions | 26 | * The platform setup functions are preset with the default functions |
@@ -62,6 +65,10 @@ struct x86_init_ops x86_init __initdata = { | |||
62 | .tsc_pre_init = x86_init_noop, | 65 | .tsc_pre_init = x86_init_noop, |
63 | .timer_init = hpet_time_init, | 66 | .timer_init = hpet_time_init, |
64 | }, | 67 | }, |
68 | |||
69 | .iommu = { | ||
70 | .iommu_init = iommu_init_noop, | ||
71 | }, | ||
65 | }; | 72 | }; |
66 | 73 | ||
67 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { | 74 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { |
@@ -72,4 +79,5 @@ struct x86_platform_ops x86_platform = { | |||
72 | .calibrate_tsc = native_calibrate_tsc, | 79 | .calibrate_tsc = native_calibrate_tsc, |
73 | .get_wallclock = mach_get_cmos_time, | 80 | .get_wallclock = mach_get_cmos_time, |
74 | .set_wallclock = mach_set_rtc_mmss, | 81 | .set_wallclock = mach_set_rtc_mmss, |
82 | .iommu_shutdown = iommu_shutdown_noop, | ||
75 | }; | 83 | }; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae07d261527c..4fc80174191c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #define CREATE_TRACE_POINTS | 42 | #define CREATE_TRACE_POINTS |
43 | #include "trace.h" | 43 | #include "trace.h" |
44 | 44 | ||
45 | #include <asm/debugreg.h> | ||
45 | #include <asm/uaccess.h> | 46 | #include <asm/uaccess.h> |
46 | #include <asm/msr.h> | 47 | #include <asm/msr.h> |
47 | #include <asm/desc.h> | 48 | #include <asm/desc.h> |
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3643 | trace_kvm_entry(vcpu->vcpu_id); | 3644 | trace_kvm_entry(vcpu->vcpu_id); |
3644 | kvm_x86_ops->run(vcpu, kvm_run); | 3645 | kvm_x86_ops->run(vcpu, kvm_run); |
3645 | 3646 | ||
3646 | if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { | 3647 | /* |
3647 | set_debugreg(current->thread.debugreg0, 0); | 3648 | * If the guest has used debug registers, at least dr7 |
3648 | set_debugreg(current->thread.debugreg1, 1); | 3649 | * will be disabled while returning to the host. |
3649 | set_debugreg(current->thread.debugreg2, 2); | 3650 | * If we don't have active breakpoints in the host, we don't |
3650 | set_debugreg(current->thread.debugreg3, 3); | 3651 | * care about the messed up debug address registers. But if |
3651 | set_debugreg(current->thread.debugreg6, 6); | 3652 | * we have some of them active, restore the old state. |
3652 | set_debugreg(current->thread.debugreg7, 7); | 3653 | */ |
3653 | } | 3654 | if (hw_breakpoint_active()) |
3655 | hw_breakpoint_restore(); | ||
3654 | 3656 | ||
3655 | set_bit(KVM_REQ_KICK, &vcpu->requests); | 3657 | set_bit(KVM_REQ_KICK, &vcpu->requests); |
3656 | local_irq_enable(); | 3658 | local_irq_enable(); |
diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore new file mode 100644 index 000000000000..8df89f0a3fe6 --- /dev/null +++ b/arch/x86/lib/.gitignore | |||
@@ -0,0 +1 @@ | |||
inat-tables.c | |||
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 85f5db95c60f..a2d6472895fb 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -2,12 +2,25 @@ | |||
2 | # Makefile for x86 specific library files. | 2 | # Makefile for x86 specific library files. |
3 | # | 3 | # |
4 | 4 | ||
5 | inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk | ||
6 | inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt | ||
7 | quiet_cmd_inat_tables = GEN $@ | ||
8 | cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ | ||
9 | |||
10 | $(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) | ||
11 | $(call cmd,inat_tables) | ||
12 | |||
13 | $(obj)/inat.o: $(obj)/inat-tables.c | ||
14 | |||
15 | clean-files := inat-tables.c | ||
16 | |||
5 | obj-$(CONFIG_SMP) := msr.o | 17 | obj-$(CONFIG_SMP) := msr.o |
6 | 18 | ||
7 | lib-y := delay.o | 19 | lib-y := delay.o |
8 | lib-y += thunk_$(BITS).o | 20 | lib-y += thunk_$(BITS).o |
9 | lib-y += usercopy_$(BITS).o getuser.o putuser.o | 21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o |
10 | lib-y += memcpy_$(BITS).o | 22 | lib-y += memcpy_$(BITS).o |
23 | lib-y += insn.o inat.o | ||
11 | 24 | ||
12 | obj-y += msr-reg.o msr-reg-export.o | 25 | obj-y += msr-reg.o msr-reg-export.o |
13 | 26 | ||
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 6ba0f7bb85ea..cf889d4e076a 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -65,7 +65,7 @@ | |||
65 | .endm | 65 | .endm |
66 | 66 | ||
67 | /* Standard copy_to_user with segment limit checking */ | 67 | /* Standard copy_to_user with segment limit checking */ |
68 | ENTRY(copy_to_user) | 68 | ENTRY(_copy_to_user) |
69 | CFI_STARTPROC | 69 | CFI_STARTPROC |
70 | GET_THREAD_INFO(%rax) | 70 | GET_THREAD_INFO(%rax) |
71 | movq %rdi,%rcx | 71 | movq %rdi,%rcx |
@@ -75,10 +75,10 @@ ENTRY(copy_to_user) | |||
75 | jae bad_to_user | 75 | jae bad_to_user |
76 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 76 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
77 | CFI_ENDPROC | 77 | CFI_ENDPROC |
78 | ENDPROC(copy_to_user) | 78 | ENDPROC(_copy_to_user) |
79 | 79 | ||
80 | /* Standard copy_from_user with segment limit checking */ | 80 | /* Standard copy_from_user with segment limit checking */ |
81 | ENTRY(copy_from_user) | 81 | ENTRY(_copy_from_user) |
82 | CFI_STARTPROC | 82 | CFI_STARTPROC |
83 | GET_THREAD_INFO(%rax) | 83 | GET_THREAD_INFO(%rax) |
84 | movq %rsi,%rcx | 84 | movq %rsi,%rcx |
@@ -88,7 +88,7 @@ ENTRY(copy_from_user) | |||
88 | jae bad_from_user | 88 | jae bad_from_user |
89 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 89 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
90 | CFI_ENDPROC | 90 | CFI_ENDPROC |
91 | ENDPROC(copy_from_user) | 91 | ENDPROC(_copy_from_user) |
92 | 92 | ||
93 | ENTRY(copy_user_generic) | 93 | ENTRY(copy_user_generic) |
94 | CFI_STARTPROC | 94 | CFI_STARTPROC |
@@ -96,12 +96,6 @@ ENTRY(copy_user_generic) | |||
96 | CFI_ENDPROC | 96 | CFI_ENDPROC |
97 | ENDPROC(copy_user_generic) | 97 | ENDPROC(copy_user_generic) |
98 | 98 | ||
99 | ENTRY(__copy_from_user_inatomic) | ||
100 | CFI_STARTPROC | ||
101 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
102 | CFI_ENDPROC | ||
103 | ENDPROC(__copy_from_user_inatomic) | ||
104 | |||
105 | .section .fixup,"ax" | 99 | .section .fixup,"ax" |
106 | /* must zero dest */ | 100 | /* must zero dest */ |
107 | ENTRY(bad_from_user) | 101 | ENTRY(bad_from_user) |
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c new file mode 100644 index 000000000000..46fc4ee09fc4 --- /dev/null +++ b/arch/x86/lib/inat.c | |||
@@ -0,0 +1,90 @@ | |||
1 | /* | ||
2 | * x86 instruction attribute tables | ||
3 | * | ||
4 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | */ | ||
21 | #include <asm/insn.h> | ||
22 | |||
23 | /* Attribute tables are generated from opcode map */ | ||
24 | #include "inat-tables.c" | ||
25 | |||
26 | /* Attribute search APIs */ | ||
27 | insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) | ||
28 | { | ||
29 | return inat_primary_table[opcode]; | ||
30 | } | ||
31 | |||
32 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, | ||
33 | insn_attr_t esc_attr) | ||
34 | { | ||
35 | const insn_attr_t *table; | ||
36 | insn_attr_t lpfx_attr; | ||
37 | int n, m = 0; | ||
38 | |||
39 | n = inat_escape_id(esc_attr); | ||
40 | if (last_pfx) { | ||
41 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
42 | m = inat_last_prefix_id(lpfx_attr); | ||
43 | } | ||
44 | table = inat_escape_tables[n][0]; | ||
45 | if (!table) | ||
46 | return 0; | ||
47 | if (inat_has_variant(table[opcode]) && m) { | ||
48 | table = inat_escape_tables[n][m]; | ||
49 | if (!table) | ||
50 | return 0; | ||
51 | } | ||
52 | return table[opcode]; | ||
53 | } | ||
54 | |||
55 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, | ||
56 | insn_attr_t grp_attr) | ||
57 | { | ||
58 | const insn_attr_t *table; | ||
59 | insn_attr_t lpfx_attr; | ||
60 | int n, m = 0; | ||
61 | |||
62 | n = inat_group_id(grp_attr); | ||
63 | if (last_pfx) { | ||
64 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
65 | m = inat_last_prefix_id(lpfx_attr); | ||
66 | } | ||
67 | table = inat_group_tables[n][0]; | ||
68 | if (!table) | ||
69 | return inat_group_common_attribute(grp_attr); | ||
70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { | ||
71 | table = inat_group_tables[n][m]; | ||
72 | if (!table) | ||
73 | return inat_group_common_attribute(grp_attr); | ||
74 | } | ||
75 | return table[X86_MODRM_REG(modrm)] | | ||
76 | inat_group_common_attribute(grp_attr); | ||
77 | } | ||
78 | |||
79 | insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, | ||
80 | insn_byte_t vex_p) | ||
81 | { | ||
82 | const insn_attr_t *table; | ||
83 | if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) | ||
84 | return 0; | ||
85 | table = inat_avx_tables[vex_m][vex_p]; | ||
86 | if (!table) | ||
87 | return 0; | ||
88 | return table[opcode]; | ||
89 | } | ||
90 | |||
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 000000000000..9f33b984d0ef --- /dev/null +++ b/arch/x86/lib/insn.c | |||
@@ -0,0 +1,516 @@ | |||
1 | /* | ||
2 | * x86 instruction analysis | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2002, 2004, 2009 | ||
19 | */ | ||
20 | |||
21 | #include <linux/string.h> | ||
22 | #include <asm/inat.h> | ||
23 | #include <asm/insn.h> | ||
24 | |||
25 | #define get_next(t, insn) \ | ||
26 | ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) | ||
27 | |||
28 | #define peek_next(t, insn) \ | ||
29 | ({t r; r = *(t*)insn->next_byte; r; }) | ||
30 | |||
31 | #define peek_nbyte_next(t, insn, n) \ | ||
32 | ({t r; r = *(t*)((insn)->next_byte + n); r; }) | ||
33 | |||
34 | /** | ||
35 | * insn_init() - initialize struct insn | ||
36 | * @insn: &struct insn to be initialized | ||
37 | * @kaddr: address (in kernel memory) of instruction (or copy thereof) | ||
38 | * @x86_64: !0 for 64-bit kernel or 64-bit app | ||
39 | */ | ||
40 | void insn_init(struct insn *insn, const void *kaddr, int x86_64) | ||
41 | { | ||
42 | memset(insn, 0, sizeof(*insn)); | ||
43 | insn->kaddr = kaddr; | ||
44 | insn->next_byte = kaddr; | ||
45 | insn->x86_64 = x86_64 ? 1 : 0; | ||
46 | insn->opnd_bytes = 4; | ||
47 | if (x86_64) | ||
48 | insn->addr_bytes = 8; | ||
49 | else | ||
50 | insn->addr_bytes = 4; | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * insn_get_prefixes - scan x86 instruction prefix bytes | ||
55 | * @insn: &struct insn containing instruction | ||
56 | * | ||
57 | * Populates the @insn->prefixes bitmap, and updates @insn->next_byte | ||
58 | * to point to the (first) opcode. No effect if @insn->prefixes.got | ||
59 | * is already set. | ||
60 | */ | ||
61 | void insn_get_prefixes(struct insn *insn) | ||
62 | { | ||
63 | struct insn_field *prefixes = &insn->prefixes; | ||
64 | insn_attr_t attr; | ||
65 | insn_byte_t b, lb; | ||
66 | int i, nb; | ||
67 | |||
68 | if (prefixes->got) | ||
69 | return; | ||
70 | |||
71 | nb = 0; | ||
72 | lb = 0; | ||
73 | b = peek_next(insn_byte_t, insn); | ||
74 | attr = inat_get_opcode_attribute(b); | ||
75 | while (inat_is_legacy_prefix(attr)) { | ||
76 | /* Skip if same prefix */ | ||
77 | for (i = 0; i < nb; i++) | ||
78 | if (prefixes->bytes[i] == b) | ||
79 | goto found; | ||
80 | if (nb == 4) | ||
81 | /* Invalid instruction */ | ||
82 | break; | ||
83 | prefixes->bytes[nb++] = b; | ||
84 | if (inat_is_address_size_prefix(attr)) { | ||
85 | /* address size switches 2/4 or 4/8 */ | ||
86 | if (insn->x86_64) | ||
87 | insn->addr_bytes ^= 12; | ||
88 | else | ||
89 | insn->addr_bytes ^= 6; | ||
90 | } else if (inat_is_operand_size_prefix(attr)) { | ||
91 | /* oprand size switches 2/4 */ | ||
92 | insn->opnd_bytes ^= 6; | ||
93 | } | ||
94 | found: | ||
95 | prefixes->nbytes++; | ||
96 | insn->next_byte++; | ||
97 | lb = b; | ||
98 | b = peek_next(insn_byte_t, insn); | ||
99 | attr = inat_get_opcode_attribute(b); | ||
100 | } | ||
101 | /* Set the last prefix */ | ||
102 | if (lb && lb != insn->prefixes.bytes[3]) { | ||
103 | if (unlikely(insn->prefixes.bytes[3])) { | ||
104 | /* Swap the last prefix */ | ||
105 | b = insn->prefixes.bytes[3]; | ||
106 | for (i = 0; i < nb; i++) | ||
107 | if (prefixes->bytes[i] == lb) | ||
108 | prefixes->bytes[i] = b; | ||
109 | } | ||
110 | insn->prefixes.bytes[3] = lb; | ||
111 | } | ||
112 | |||
113 | /* Decode REX prefix */ | ||
114 | if (insn->x86_64) { | ||
115 | b = peek_next(insn_byte_t, insn); | ||
116 | attr = inat_get_opcode_attribute(b); | ||
117 | if (inat_is_rex_prefix(attr)) { | ||
118 | insn->rex_prefix.value = b; | ||
119 | insn->rex_prefix.nbytes = 1; | ||
120 | insn->next_byte++; | ||
121 | if (X86_REX_W(b)) | ||
122 | /* REX.W overrides opnd_size */ | ||
123 | insn->opnd_bytes = 8; | ||
124 | } | ||
125 | } | ||
126 | insn->rex_prefix.got = 1; | ||
127 | |||
128 | /* Decode VEX prefix */ | ||
129 | b = peek_next(insn_byte_t, insn); | ||
130 | attr = inat_get_opcode_attribute(b); | ||
131 | if (inat_is_vex_prefix(attr)) { | ||
132 | insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); | ||
133 | if (!insn->x86_64) { | ||
134 | /* | ||
135 | * In 32-bits mode, if the [7:6] bits (mod bits of | ||
136 | * ModRM) on the second byte are not 11b, it is | ||
137 | * LDS or LES. | ||
138 | */ | ||
139 | if (X86_MODRM_MOD(b2) != 3) | ||
140 | goto vex_end; | ||
141 | } | ||
142 | insn->vex_prefix.bytes[0] = b; | ||
143 | insn->vex_prefix.bytes[1] = b2; | ||
144 | if (inat_is_vex3_prefix(attr)) { | ||
145 | b2 = peek_nbyte_next(insn_byte_t, insn, 2); | ||
146 | insn->vex_prefix.bytes[2] = b2; | ||
147 | insn->vex_prefix.nbytes = 3; | ||
148 | insn->next_byte += 3; | ||
149 | if (insn->x86_64 && X86_VEX_W(b2)) | ||
150 | /* VEX.W overrides opnd_size */ | ||
151 | insn->opnd_bytes = 8; | ||
152 | } else { | ||
153 | insn->vex_prefix.nbytes = 2; | ||
154 | insn->next_byte += 2; | ||
155 | } | ||
156 | } | ||
157 | vex_end: | ||
158 | insn->vex_prefix.got = 1; | ||
159 | |||
160 | prefixes->got = 1; | ||
161 | return; | ||
162 | } | ||
163 | |||
164 | /** | ||
165 | * insn_get_opcode - collect opcode(s) | ||
166 | * @insn: &struct insn containing instruction | ||
167 | * | ||
168 | * Populates @insn->opcode, updates @insn->next_byte to point past the | ||
169 | * opcode byte(s), and set @insn->attr (except for groups). | ||
170 | * If necessary, first collects any preceding (prefix) bytes. | ||
171 | * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got | ||
172 | * is already 1. | ||
173 | */ | ||
174 | void insn_get_opcode(struct insn *insn) | ||
175 | { | ||
176 | struct insn_field *opcode = &insn->opcode; | ||
177 | insn_byte_t op, pfx; | ||
178 | if (opcode->got) | ||
179 | return; | ||
180 | if (!insn->prefixes.got) | ||
181 | insn_get_prefixes(insn); | ||
182 | |||
183 | /* Get first opcode */ | ||
184 | op = get_next(insn_byte_t, insn); | ||
185 | opcode->bytes[0] = op; | ||
186 | opcode->nbytes = 1; | ||
187 | |||
188 | /* Check if there is VEX prefix or not */ | ||
189 | if (insn_is_avx(insn)) { | ||
190 | insn_byte_t m, p; | ||
191 | m = insn_vex_m_bits(insn); | ||
192 | p = insn_vex_p_bits(insn); | ||
193 | insn->attr = inat_get_avx_attribute(op, m, p); | ||
194 | if (!inat_accept_vex(insn->attr)) | ||
195 | insn->attr = 0; /* This instruction is bad */ | ||
196 | goto end; /* VEX has only 1 byte for opcode */ | ||
197 | } | ||
198 | |||
199 | insn->attr = inat_get_opcode_attribute(op); | ||
200 | while (inat_is_escape(insn->attr)) { | ||
201 | /* Get escaped opcode */ | ||
202 | op = get_next(insn_byte_t, insn); | ||
203 | opcode->bytes[opcode->nbytes++] = op; | ||
204 | pfx = insn_last_prefix(insn); | ||
205 | insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); | ||
206 | } | ||
207 | if (inat_must_vex(insn->attr)) | ||
208 | insn->attr = 0; /* This instruction is bad */ | ||
209 | end: | ||
210 | opcode->got = 1; | ||
211 | } | ||
212 | |||
213 | /** | ||
214 | * insn_get_modrm - collect ModRM byte, if any | ||
215 | * @insn: &struct insn containing instruction | ||
216 | * | ||
217 | * Populates @insn->modrm and updates @insn->next_byte to point past the | ||
218 | * ModRM byte, if any. If necessary, first collects the preceding bytes | ||
219 | * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. | ||
220 | */ | ||
221 | void insn_get_modrm(struct insn *insn) | ||
222 | { | ||
223 | struct insn_field *modrm = &insn->modrm; | ||
224 | insn_byte_t pfx, mod; | ||
225 | if (modrm->got) | ||
226 | return; | ||
227 | if (!insn->opcode.got) | ||
228 | insn_get_opcode(insn); | ||
229 | |||
230 | if (inat_has_modrm(insn->attr)) { | ||
231 | mod = get_next(insn_byte_t, insn); | ||
232 | modrm->value = mod; | ||
233 | modrm->nbytes = 1; | ||
234 | if (inat_is_group(insn->attr)) { | ||
235 | pfx = insn_last_prefix(insn); | ||
236 | insn->attr = inat_get_group_attribute(mod, pfx, | ||
237 | insn->attr); | ||
238 | } | ||
239 | } | ||
240 | |||
241 | if (insn->x86_64 && inat_is_force64(insn->attr)) | ||
242 | insn->opnd_bytes = 8; | ||
243 | modrm->got = 1; | ||
244 | } | ||
245 | |||
246 | |||
247 | /** | ||
248 | * insn_rip_relative() - Does instruction use RIP-relative addressing mode? | ||
249 | * @insn: &struct insn containing instruction | ||
250 | * | ||
251 | * If necessary, first collects the instruction up to and including the | ||
252 | * ModRM byte. No effect if @insn->x86_64 is 0. | ||
253 | */ | ||
254 | int insn_rip_relative(struct insn *insn) | ||
255 | { | ||
256 | struct insn_field *modrm = &insn->modrm; | ||
257 | |||
258 | if (!insn->x86_64) | ||
259 | return 0; | ||
260 | if (!modrm->got) | ||
261 | insn_get_modrm(insn); | ||
262 | /* | ||
263 | * For rip-relative instructions, the mod field (top 2 bits) | ||
264 | * is zero and the r/m field (bottom 3 bits) is 0x5. | ||
265 | */ | ||
266 | return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); | ||
267 | } | ||
268 | |||
269 | /** | ||
270 | * insn_get_sib() - Get the SIB byte of instruction | ||
271 | * @insn: &struct insn containing instruction | ||
272 | * | ||
273 | * If necessary, first collects the instruction up to and including the | ||
274 | * ModRM byte. | ||
275 | */ | ||
276 | void insn_get_sib(struct insn *insn) | ||
277 | { | ||
278 | insn_byte_t modrm; | ||
279 | |||
280 | if (insn->sib.got) | ||
281 | return; | ||
282 | if (!insn->modrm.got) | ||
283 | insn_get_modrm(insn); | ||
284 | if (insn->modrm.nbytes) { | ||
285 | modrm = (insn_byte_t)insn->modrm.value; | ||
286 | if (insn->addr_bytes != 2 && | ||
287 | X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { | ||
288 | insn->sib.value = get_next(insn_byte_t, insn); | ||
289 | insn->sib.nbytes = 1; | ||
290 | } | ||
291 | } | ||
292 | insn->sib.got = 1; | ||
293 | } | ||
294 | |||
295 | |||
296 | /** | ||
297 | * insn_get_displacement() - Get the displacement of instruction | ||
298 | * @insn: &struct insn containing instruction | ||
299 | * | ||
300 | * If necessary, first collects the instruction up to and including the | ||
301 | * SIB byte. | ||
302 | * Displacement value is sign-expanded. | ||
303 | */ | ||
304 | void insn_get_displacement(struct insn *insn) | ||
305 | { | ||
306 | insn_byte_t mod, rm, base; | ||
307 | |||
308 | if (insn->displacement.got) | ||
309 | return; | ||
310 | if (!insn->sib.got) | ||
311 | insn_get_sib(insn); | ||
312 | if (insn->modrm.nbytes) { | ||
313 | /* | ||
314 | * Interpreting the modrm byte: | ||
315 | * mod = 00 - no displacement fields (exceptions below) | ||
316 | * mod = 01 - 1-byte displacement field | ||
317 | * mod = 10 - displacement field is 4 bytes, or 2 bytes if | ||
318 | * address size = 2 (0x67 prefix in 32-bit mode) | ||
319 | * mod = 11 - no memory operand | ||
320 | * | ||
321 | * If address size = 2... | ||
322 | * mod = 00, r/m = 110 - displacement field is 2 bytes | ||
323 | * | ||
324 | * If address size != 2... | ||
325 | * mod != 11, r/m = 100 - SIB byte exists | ||
326 | * mod = 00, SIB base = 101 - displacement field is 4 bytes | ||
327 | * mod = 00, r/m = 101 - rip-relative addressing, displacement | ||
328 | * field is 4 bytes | ||
329 | */ | ||
330 | mod = X86_MODRM_MOD(insn->modrm.value); | ||
331 | rm = X86_MODRM_RM(insn->modrm.value); | ||
332 | base = X86_SIB_BASE(insn->sib.value); | ||
333 | if (mod == 3) | ||
334 | goto out; | ||
335 | if (mod == 1) { | ||
336 | insn->displacement.value = get_next(char, insn); | ||
337 | insn->displacement.nbytes = 1; | ||
338 | } else if (insn->addr_bytes == 2) { | ||
339 | if ((mod == 0 && rm == 6) || mod == 2) { | ||
340 | insn->displacement.value = | ||
341 | get_next(short, insn); | ||
342 | insn->displacement.nbytes = 2; | ||
343 | } | ||
344 | } else { | ||
345 | if ((mod == 0 && rm == 5) || mod == 2 || | ||
346 | (mod == 0 && base == 5)) { | ||
347 | insn->displacement.value = get_next(int, insn); | ||
348 | insn->displacement.nbytes = 4; | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | out: | ||
353 | insn->displacement.got = 1; | ||
354 | } | ||
355 | |||
356 | /* Decode moffset16/32/64 */ | ||
357 | static void __get_moffset(struct insn *insn) | ||
358 | { | ||
359 | switch (insn->addr_bytes) { | ||
360 | case 2: | ||
361 | insn->moffset1.value = get_next(short, insn); | ||
362 | insn->moffset1.nbytes = 2; | ||
363 | break; | ||
364 | case 4: | ||
365 | insn->moffset1.value = get_next(int, insn); | ||
366 | insn->moffset1.nbytes = 4; | ||
367 | break; | ||
368 | case 8: | ||
369 | insn->moffset1.value = get_next(int, insn); | ||
370 | insn->moffset1.nbytes = 4; | ||
371 | insn->moffset2.value = get_next(int, insn); | ||
372 | insn->moffset2.nbytes = 4; | ||
373 | break; | ||
374 | } | ||
375 | insn->moffset1.got = insn->moffset2.got = 1; | ||
376 | } | ||
377 | |||
378 | /* Decode imm v32(Iz) */ | ||
379 | static void __get_immv32(struct insn *insn) | ||
380 | { | ||
381 | switch (insn->opnd_bytes) { | ||
382 | case 2: | ||
383 | insn->immediate.value = get_next(short, insn); | ||
384 | insn->immediate.nbytes = 2; | ||
385 | break; | ||
386 | case 4: | ||
387 | case 8: | ||
388 | insn->immediate.value = get_next(int, insn); | ||
389 | insn->immediate.nbytes = 4; | ||
390 | break; | ||
391 | } | ||
392 | } | ||
393 | |||
394 | /* Decode imm v64(Iv/Ov) */ | ||
395 | static void __get_immv(struct insn *insn) | ||
396 | { | ||
397 | switch (insn->opnd_bytes) { | ||
398 | case 2: | ||
399 | insn->immediate1.value = get_next(short, insn); | ||
400 | insn->immediate1.nbytes = 2; | ||
401 | break; | ||
402 | case 4: | ||
403 | insn->immediate1.value = get_next(int, insn); | ||
404 | insn->immediate1.nbytes = 4; | ||
405 | break; | ||
406 | case 8: | ||
407 | insn->immediate1.value = get_next(int, insn); | ||
408 | insn->immediate1.nbytes = 4; | ||
409 | insn->immediate2.value = get_next(int, insn); | ||
410 | insn->immediate2.nbytes = 4; | ||
411 | break; | ||
412 | } | ||
413 | insn->immediate1.got = insn->immediate2.got = 1; | ||
414 | } | ||
415 | |||
416 | /* Decode ptr16:16/32(Ap) */ | ||
417 | static void __get_immptr(struct insn *insn) | ||
418 | { | ||
419 | switch (insn->opnd_bytes) { | ||
420 | case 2: | ||
421 | insn->immediate1.value = get_next(short, insn); | ||
422 | insn->immediate1.nbytes = 2; | ||
423 | break; | ||
424 | case 4: | ||
425 | insn->immediate1.value = get_next(int, insn); | ||
426 | insn->immediate1.nbytes = 4; | ||
427 | break; | ||
428 | case 8: | ||
429 | /* ptr16:64 is not exist (no segment) */ | ||
430 | return; | ||
431 | } | ||
432 | insn->immediate2.value = get_next(unsigned short, insn); | ||
433 | insn->immediate2.nbytes = 2; | ||
434 | insn->immediate1.got = insn->immediate2.got = 1; | ||
435 | } | ||
436 | |||
437 | /** | ||
438 | * insn_get_immediate() - Get the immediates of instruction | ||
439 | * @insn: &struct insn containing instruction | ||
440 | * | ||
441 | * If necessary, first collects the instruction up to and including the | ||
442 | * displacement bytes. | ||
443 | * Basically, most of immediates are sign-expanded. Unsigned-value can be | ||
444 | * get by bit masking with ((1 << (nbytes * 8)) - 1) | ||
445 | */ | ||
446 | void insn_get_immediate(struct insn *insn) | ||
447 | { | ||
448 | if (insn->immediate.got) | ||
449 | return; | ||
450 | if (!insn->displacement.got) | ||
451 | insn_get_displacement(insn); | ||
452 | |||
453 | if (inat_has_moffset(insn->attr)) { | ||
454 | __get_moffset(insn); | ||
455 | goto done; | ||
456 | } | ||
457 | |||
458 | if (!inat_has_immediate(insn->attr)) | ||
459 | /* no immediates */ | ||
460 | goto done; | ||
461 | |||
462 | switch (inat_immediate_size(insn->attr)) { | ||
463 | case INAT_IMM_BYTE: | ||
464 | insn->immediate.value = get_next(char, insn); | ||
465 | insn->immediate.nbytes = 1; | ||
466 | break; | ||
467 | case INAT_IMM_WORD: | ||
468 | insn->immediate.value = get_next(short, insn); | ||
469 | insn->immediate.nbytes = 2; | ||
470 | break; | ||
471 | case INAT_IMM_DWORD: | ||
472 | insn->immediate.value = get_next(int, insn); | ||
473 | insn->immediate.nbytes = 4; | ||
474 | break; | ||
475 | case INAT_IMM_QWORD: | ||
476 | insn->immediate1.value = get_next(int, insn); | ||
477 | insn->immediate1.nbytes = 4; | ||
478 | insn->immediate2.value = get_next(int, insn); | ||
479 | insn->immediate2.nbytes = 4; | ||
480 | break; | ||
481 | case INAT_IMM_PTR: | ||
482 | __get_immptr(insn); | ||
483 | break; | ||
484 | case INAT_IMM_VWORD32: | ||
485 | __get_immv32(insn); | ||
486 | break; | ||
487 | case INAT_IMM_VWORD: | ||
488 | __get_immv(insn); | ||
489 | break; | ||
490 | default: | ||
491 | break; | ||
492 | } | ||
493 | if (inat_has_second_immediate(insn->attr)) { | ||
494 | insn->immediate2.value = get_next(char, insn); | ||
495 | insn->immediate2.nbytes = 1; | ||
496 | } | ||
497 | done: | ||
498 | insn->immediate.got = 1; | ||
499 | } | ||
500 | |||
501 | /** | ||
502 | * insn_get_length() - Get the length of instruction | ||
503 | * @insn: &struct insn containing instruction | ||
504 | * | ||
505 | * If necessary, first collects the instruction up to and including the | ||
506 | * immediates bytes. | ||
507 | */ | ||
508 | void insn_get_length(struct insn *insn) | ||
509 | { | ||
510 | if (insn->length) | ||
511 | return; | ||
512 | if (!insn->immediate.got) | ||
513 | insn_get_immediate(insn); | ||
514 | insn->length = (unsigned char)((unsigned long)insn->next_byte | ||
515 | - (unsigned long)insn->kaddr); | ||
516 | } | ||
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 33a1e3ca22d8..41628b104b9e 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c | |||
@@ -71,14 +71,9 @@ int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | |||
71 | } | 71 | } |
72 | EXPORT_SYMBOL(wrmsr_on_cpu); | 72 | EXPORT_SYMBOL(wrmsr_on_cpu); |
73 | 73 | ||
74 | /* rdmsr on a bunch of CPUs | 74 | static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, |
75 | * | 75 | struct msr *msrs, |
76 | * @mask: which CPUs | 76 | void (*msr_func) (void *info)) |
77 | * @msr_no: which MSR | ||
78 | * @msrs: array of MSR values | ||
79 | * | ||
80 | */ | ||
81 | void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) | ||
82 | { | 77 | { |
83 | struct msr_info rv; | 78 | struct msr_info rv; |
84 | int this_cpu; | 79 | int this_cpu; |
@@ -92,11 +87,23 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) | |||
92 | this_cpu = get_cpu(); | 87 | this_cpu = get_cpu(); |
93 | 88 | ||
94 | if (cpumask_test_cpu(this_cpu, mask)) | 89 | if (cpumask_test_cpu(this_cpu, mask)) |
95 | __rdmsr_on_cpu(&rv); | 90 | msr_func(&rv); |
96 | 91 | ||
97 | smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); | 92 | smp_call_function_many(mask, msr_func, &rv, 1); |
98 | put_cpu(); | 93 | put_cpu(); |
99 | } | 94 | } |
95 | |||
96 | /* rdmsr on a bunch of CPUs | ||
97 | * | ||
98 | * @mask: which CPUs | ||
99 | * @msr_no: which MSR | ||
100 | * @msrs: array of MSR values | ||
101 | * | ||
102 | */ | ||
103 | void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) | ||
104 | { | ||
105 | __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); | ||
106 | } | ||
100 | EXPORT_SYMBOL(rdmsr_on_cpus); | 107 | EXPORT_SYMBOL(rdmsr_on_cpus); |
101 | 108 | ||
102 | /* | 109 | /* |
@@ -107,24 +114,9 @@ EXPORT_SYMBOL(rdmsr_on_cpus); | |||
107 | * @msrs: array of MSR values | 114 | * @msrs: array of MSR values |
108 | * | 115 | * |
109 | */ | 116 | */ |
110 | void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) | 117 | void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) |
111 | { | 118 | { |
112 | struct msr_info rv; | 119 | __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); |
113 | int this_cpu; | ||
114 | |||
115 | memset(&rv, 0, sizeof(rv)); | ||
116 | |||
117 | rv.off = cpumask_first(mask); | ||
118 | rv.msrs = msrs; | ||
119 | rv.msr_no = msr_no; | ||
120 | |||
121 | this_cpu = get_cpu(); | ||
122 | |||
123 | if (cpumask_test_cpu(this_cpu, mask)) | ||
124 | __wrmsr_on_cpu(&rv); | ||
125 | |||
126 | smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); | ||
127 | put_cpu(); | ||
128 | } | 120 | } |
129 | EXPORT_SYMBOL(wrmsr_on_cpus); | 121 | EXPORT_SYMBOL(wrmsr_on_cpus); |
130 | 122 | ||
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1f118d462acc..e218d5df85ff 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c | |||
@@ -874,7 +874,7 @@ EXPORT_SYMBOL(copy_to_user); | |||
874 | * data to the requested size using zero bytes. | 874 | * data to the requested size using zero bytes. |
875 | */ | 875 | */ |
876 | unsigned long | 876 | unsigned long |
877 | copy_from_user(void *to, const void __user *from, unsigned long n) | 877 | _copy_from_user(void *to, const void __user *from, unsigned long n) |
878 | { | 878 | { |
879 | if (access_ok(VERIFY_READ, from, n)) | 879 | if (access_ok(VERIFY_READ, from, n)) |
880 | n = __copy_from_user(to, from, n); | 880 | n = __copy_from_user(to, from, n); |
@@ -882,4 +882,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) | |||
882 | memset(to, 0, n); | 882 | memset(to, 0, n); |
883 | return n; | 883 | return n; |
884 | } | 884 | } |
885 | EXPORT_SYMBOL(copy_from_user); | 885 | EXPORT_SYMBOL(_copy_from_user); |
886 | |||
887 | void copy_from_user_overflow(void) | ||
888 | { | ||
889 | WARN(1, "Buffer overflow detected!\n"); | ||
890 | } | ||
891 | EXPORT_SYMBOL(copy_from_user_overflow); | ||
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt new file mode 100644 index 000000000000..a793da5e560e --- /dev/null +++ b/arch/x86/lib/x86-opcode-map.txt | |||
@@ -0,0 +1,893 @@ | |||
1 | # x86 Opcode Maps | ||
2 | # | ||
3 | #<Opcode maps> | ||
4 | # Table: table-name | ||
5 | # Referrer: escaped-name | ||
6 | # AVXcode: avx-code | ||
7 | # opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] | ||
8 | # (or) | ||
9 | # opcode: escape # escaped-name | ||
10 | # EndTable | ||
11 | # | ||
12 | #<group maps> | ||
13 | # GrpTable: GrpXXX | ||
14 | # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] | ||
15 | # EndTable | ||
16 | # | ||
17 | # AVX Superscripts | ||
18 | # (VEX): this opcode can accept VEX prefix. | ||
19 | # (oVEX): this opcode requires VEX prefix. | ||
20 | # (o128): this opcode only supports 128bit VEX. | ||
21 | # (o256): this opcode only supports 256bit VEX. | ||
22 | # | ||
23 | |||
24 | Table: one byte opcode | ||
25 | Referrer: | ||
26 | AVXcode: | ||
27 | # 0x00 - 0x0f | ||
28 | 00: ADD Eb,Gb | ||
29 | 01: ADD Ev,Gv | ||
30 | 02: ADD Gb,Eb | ||
31 | 03: ADD Gv,Ev | ||
32 | 04: ADD AL,Ib | ||
33 | 05: ADD rAX,Iz | ||
34 | 06: PUSH ES (i64) | ||
35 | 07: POP ES (i64) | ||
36 | 08: OR Eb,Gb | ||
37 | 09: OR Ev,Gv | ||
38 | 0a: OR Gb,Eb | ||
39 | 0b: OR Gv,Ev | ||
40 | 0c: OR AL,Ib | ||
41 | 0d: OR rAX,Iz | ||
42 | 0e: PUSH CS (i64) | ||
43 | 0f: escape # 2-byte escape | ||
44 | # 0x10 - 0x1f | ||
45 | 10: ADC Eb,Gb | ||
46 | 11: ADC Ev,Gv | ||
47 | 12: ADC Gb,Eb | ||
48 | 13: ADC Gv,Ev | ||
49 | 14: ADC AL,Ib | ||
50 | 15: ADC rAX,Iz | ||
51 | 16: PUSH SS (i64) | ||
52 | 17: POP SS (i64) | ||
53 | 18: SBB Eb,Gb | ||
54 | 19: SBB Ev,Gv | ||
55 | 1a: SBB Gb,Eb | ||
56 | 1b: SBB Gv,Ev | ||
57 | 1c: SBB AL,Ib | ||
58 | 1d: SBB rAX,Iz | ||
59 | 1e: PUSH DS (i64) | ||
60 | 1f: POP DS (i64) | ||
61 | # 0x20 - 0x2f | ||
62 | 20: AND Eb,Gb | ||
63 | 21: AND Ev,Gv | ||
64 | 22: AND Gb,Eb | ||
65 | 23: AND Gv,Ev | ||
66 | 24: AND AL,Ib | ||
67 | 25: AND rAx,Iz | ||
68 | 26: SEG=ES (Prefix) | ||
69 | 27: DAA (i64) | ||
70 | 28: SUB Eb,Gb | ||
71 | 29: SUB Ev,Gv | ||
72 | 2a: SUB Gb,Eb | ||
73 | 2b: SUB Gv,Ev | ||
74 | 2c: SUB AL,Ib | ||
75 | 2d: SUB rAX,Iz | ||
76 | 2e: SEG=CS (Prefix) | ||
77 | 2f: DAS (i64) | ||
78 | # 0x30 - 0x3f | ||
79 | 30: XOR Eb,Gb | ||
80 | 31: XOR Ev,Gv | ||
81 | 32: XOR Gb,Eb | ||
82 | 33: XOR Gv,Ev | ||
83 | 34: XOR AL,Ib | ||
84 | 35: XOR rAX,Iz | ||
85 | 36: SEG=SS (Prefix) | ||
86 | 37: AAA (i64) | ||
87 | 38: CMP Eb,Gb | ||
88 | 39: CMP Ev,Gv | ||
89 | 3a: CMP Gb,Eb | ||
90 | 3b: CMP Gv,Ev | ||
91 | 3c: CMP AL,Ib | ||
92 | 3d: CMP rAX,Iz | ||
93 | 3e: SEG=DS (Prefix) | ||
94 | 3f: AAS (i64) | ||
95 | # 0x40 - 0x4f | ||
96 | 40: INC eAX (i64) | REX (o64) | ||
97 | 41: INC eCX (i64) | REX.B (o64) | ||
98 | 42: INC eDX (i64) | REX.X (o64) | ||
99 | 43: INC eBX (i64) | REX.XB (o64) | ||
100 | 44: INC eSP (i64) | REX.R (o64) | ||
101 | 45: INC eBP (i64) | REX.RB (o64) | ||
102 | 46: INC eSI (i64) | REX.RX (o64) | ||
103 | 47: INC eDI (i64) | REX.RXB (o64) | ||
104 | 48: DEC eAX (i64) | REX.W (o64) | ||
105 | 49: DEC eCX (i64) | REX.WB (o64) | ||
106 | 4a: DEC eDX (i64) | REX.WX (o64) | ||
107 | 4b: DEC eBX (i64) | REX.WXB (o64) | ||
108 | 4c: DEC eSP (i64) | REX.WR (o64) | ||
109 | 4d: DEC eBP (i64) | REX.WRB (o64) | ||
110 | 4e: DEC eSI (i64) | REX.WRX (o64) | ||
111 | 4f: DEC eDI (i64) | REX.WRXB (o64) | ||
112 | # 0x50 - 0x5f | ||
113 | 50: PUSH rAX/r8 (d64) | ||
114 | 51: PUSH rCX/r9 (d64) | ||
115 | 52: PUSH rDX/r10 (d64) | ||
116 | 53: PUSH rBX/r11 (d64) | ||
117 | 54: PUSH rSP/r12 (d64) | ||
118 | 55: PUSH rBP/r13 (d64) | ||
119 | 56: PUSH rSI/r14 (d64) | ||
120 | 57: PUSH rDI/r15 (d64) | ||
121 | 58: POP rAX/r8 (d64) | ||
122 | 59: POP rCX/r9 (d64) | ||
123 | 5a: POP rDX/r10 (d64) | ||
124 | 5b: POP rBX/r11 (d64) | ||
125 | 5c: POP rSP/r12 (d64) | ||
126 | 5d: POP rBP/r13 (d64) | ||
127 | 5e: POP rSI/r14 (d64) | ||
128 | 5f: POP rDI/r15 (d64) | ||
129 | # 0x60 - 0x6f | ||
130 | 60: PUSHA/PUSHAD (i64) | ||
131 | 61: POPA/POPAD (i64) | ||
132 | 62: BOUND Gv,Ma (i64) | ||
133 | 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) | ||
134 | 64: SEG=FS (Prefix) | ||
135 | 65: SEG=GS (Prefix) | ||
136 | 66: Operand-Size (Prefix) | ||
137 | 67: Address-Size (Prefix) | ||
138 | 68: PUSH Iz (d64) | ||
139 | 69: IMUL Gv,Ev,Iz | ||
140 | 6a: PUSH Ib (d64) | ||
141 | 6b: IMUL Gv,Ev,Ib | ||
142 | 6c: INS/INSB Yb,DX | ||
143 | 6d: INS/INSW/INSD Yz,DX | ||
144 | 6e: OUTS/OUTSB DX,Xb | ||
145 | 6f: OUTS/OUTSW/OUTSD DX,Xz | ||
146 | # 0x70 - 0x7f | ||
147 | 70: JO Jb | ||
148 | 71: JNO Jb | ||
149 | 72: JB/JNAE/JC Jb | ||
150 | 73: JNB/JAE/JNC Jb | ||
151 | 74: JZ/JE Jb | ||
152 | 75: JNZ/JNE Jb | ||
153 | 76: JBE/JNA Jb | ||
154 | 77: JNBE/JA Jb | ||
155 | 78: JS Jb | ||
156 | 79: JNS Jb | ||
157 | 7a: JP/JPE Jb | ||
158 | 7b: JNP/JPO Jb | ||
159 | 7c: JL/JNGE Jb | ||
160 | 7d: JNL/JGE Jb | ||
161 | 7e: JLE/JNG Jb | ||
162 | 7f: JNLE/JG Jb | ||
163 | # 0x80 - 0x8f | ||
164 | 80: Grp1 Eb,Ib (1A) | ||
165 | 81: Grp1 Ev,Iz (1A) | ||
166 | 82: Grp1 Eb,Ib (1A),(i64) | ||
167 | 83: Grp1 Ev,Ib (1A) | ||
168 | 84: TEST Eb,Gb | ||
169 | 85: TEST Ev,Gv | ||
170 | 86: XCHG Eb,Gb | ||
171 | 87: XCHG Ev,Gv | ||
172 | 88: MOV Eb,Gb | ||
173 | 89: MOV Ev,Gv | ||
174 | 8a: MOV Gb,Eb | ||
175 | 8b: MOV Gv,Ev | ||
176 | 8c: MOV Ev,Sw | ||
177 | 8d: LEA Gv,M | ||
178 | 8e: MOV Sw,Ew | ||
179 | 8f: Grp1A (1A) | POP Ev (d64) | ||
180 | # 0x90 - 0x9f | ||
181 | 90: NOP | PAUSE (F3) | XCHG r8,rAX | ||
182 | 91: XCHG rCX/r9,rAX | ||
183 | 92: XCHG rDX/r10,rAX | ||
184 | 93: XCHG rBX/r11,rAX | ||
185 | 94: XCHG rSP/r12,rAX | ||
186 | 95: XCHG rBP/r13,rAX | ||
187 | 96: XCHG rSI/r14,rAX | ||
188 | 97: XCHG rDI/r15,rAX | ||
189 | 98: CBW/CWDE/CDQE | ||
190 | 99: CWD/CDQ/CQO | ||
191 | 9a: CALLF Ap (i64) | ||
192 | 9b: FWAIT/WAIT | ||
193 | 9c: PUSHF/D/Q Fv (d64) | ||
194 | 9d: POPF/D/Q Fv (d64) | ||
195 | 9e: SAHF | ||
196 | 9f: LAHF | ||
197 | # 0xa0 - 0xaf | ||
198 | a0: MOV AL,Ob | ||
199 | a1: MOV rAX,Ov | ||
200 | a2: MOV Ob,AL | ||
201 | a3: MOV Ov,rAX | ||
202 | a4: MOVS/B Xb,Yb | ||
203 | a5: MOVS/W/D/Q Xv,Yv | ||
204 | a6: CMPS/B Xb,Yb | ||
205 | a7: CMPS/W/D Xv,Yv | ||
206 | a8: TEST AL,Ib | ||
207 | a9: TEST rAX,Iz | ||
208 | aa: STOS/B Yb,AL | ||
209 | ab: STOS/W/D/Q Yv,rAX | ||
210 | ac: LODS/B AL,Xb | ||
211 | ad: LODS/W/D/Q rAX,Xv | ||
212 | ae: SCAS/B AL,Yb | ||
213 | af: SCAS/W/D/Q rAX,Xv | ||
214 | # 0xb0 - 0xbf | ||
215 | b0: MOV AL/R8L,Ib | ||
216 | b1: MOV CL/R9L,Ib | ||
217 | b2: MOV DL/R10L,Ib | ||
218 | b3: MOV BL/R11L,Ib | ||
219 | b4: MOV AH/R12L,Ib | ||
220 | b5: MOV CH/R13L,Ib | ||
221 | b6: MOV DH/R14L,Ib | ||
222 | b7: MOV BH/R15L,Ib | ||
223 | b8: MOV rAX/r8,Iv | ||
224 | b9: MOV rCX/r9,Iv | ||
225 | ba: MOV rDX/r10,Iv | ||
226 | bb: MOV rBX/r11,Iv | ||
227 | bc: MOV rSP/r12,Iv | ||
228 | bd: MOV rBP/r13,Iv | ||
229 | be: MOV rSI/r14,Iv | ||
230 | bf: MOV rDI/r15,Iv | ||
231 | # 0xc0 - 0xcf | ||
232 | c0: Grp2 Eb,Ib (1A) | ||
233 | c1: Grp2 Ev,Ib (1A) | ||
234 | c2: RETN Iw (f64) | ||
235 | c3: RETN | ||
236 | c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) | ||
237 | c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) | ||
238 | c6: Grp11 Eb,Ib (1A) | ||
239 | c7: Grp11 Ev,Iz (1A) | ||
240 | c8: ENTER Iw,Ib | ||
241 | c9: LEAVE (d64) | ||
242 | ca: RETF Iw | ||
243 | cb: RETF | ||
244 | cc: INT3 | ||
245 | cd: INT Ib | ||
246 | ce: INTO (i64) | ||
247 | cf: IRET/D/Q | ||
248 | # 0xd0 - 0xdf | ||
249 | d0: Grp2 Eb,1 (1A) | ||
250 | d1: Grp2 Ev,1 (1A) | ||
251 | d2: Grp2 Eb,CL (1A) | ||
252 | d3: Grp2 Ev,CL (1A) | ||
253 | d4: AAM Ib (i64) | ||
254 | d5: AAD Ib (i64) | ||
255 | d6: | ||
256 | d7: XLAT/XLATB | ||
257 | d8: ESC | ||
258 | d9: ESC | ||
259 | da: ESC | ||
260 | db: ESC | ||
261 | dc: ESC | ||
262 | dd: ESC | ||
263 | de: ESC | ||
264 | df: ESC | ||
265 | # 0xe0 - 0xef | ||
266 | e0: LOOPNE/LOOPNZ Jb (f64) | ||
267 | e1: LOOPE/LOOPZ Jb (f64) | ||
268 | e2: LOOP Jb (f64) | ||
269 | e3: JrCXZ Jb (f64) | ||
270 | e4: IN AL,Ib | ||
271 | e5: IN eAX,Ib | ||
272 | e6: OUT Ib,AL | ||
273 | e7: OUT Ib,eAX | ||
274 | e8: CALL Jz (f64) | ||
275 | e9: JMP-near Jz (f64) | ||
276 | ea: JMP-far Ap (i64) | ||
277 | eb: JMP-short Jb (f64) | ||
278 | ec: IN AL,DX | ||
279 | ed: IN eAX,DX | ||
280 | ee: OUT DX,AL | ||
281 | ef: OUT DX,eAX | ||
282 | # 0xf0 - 0xff | ||
283 | f0: LOCK (Prefix) | ||
284 | f1: | ||
285 | f2: REPNE (Prefix) | ||
286 | f3: REP/REPE (Prefix) | ||
287 | f4: HLT | ||
288 | f5: CMC | ||
289 | f6: Grp3_1 Eb (1A) | ||
290 | f7: Grp3_2 Ev (1A) | ||
291 | f8: CLC | ||
292 | f9: STC | ||
293 | fa: CLI | ||
294 | fb: STI | ||
295 | fc: CLD | ||
296 | fd: STD | ||
297 | fe: Grp4 (1A) | ||
298 | ff: Grp5 (1A) | ||
299 | EndTable | ||
300 | |||
301 | Table: 2-byte opcode (0x0f) | ||
302 | Referrer: 2-byte escape | ||
303 | AVXcode: 1 | ||
304 | # 0x0f 0x00-0x0f | ||
305 | 00: Grp6 (1A) | ||
306 | 01: Grp7 (1A) | ||
307 | 02: LAR Gv,Ew | ||
308 | 03: LSL Gv,Ew | ||
309 | 04: | ||
310 | 05: SYSCALL (o64) | ||
311 | 06: CLTS | ||
312 | 07: SYSRET (o64) | ||
313 | 08: INVD | ||
314 | 09: WBINVD | ||
315 | 0a: | ||
316 | 0b: UD2 (1B) | ||
317 | 0c: | ||
318 | 0d: NOP Ev | GrpP | ||
319 | 0e: FEMMS | ||
320 | # 3DNow! uses the last imm byte as opcode extension. | ||
321 | 0f: 3DNow! Pq,Qq,Ib | ||
322 | # 0x0f 0x10-0x1f | ||
323 | 10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) | ||
324 | 11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) | ||
325 | 12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) | ||
326 | 13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) | ||
327 | 14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) | ||
328 | 15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) | ||
329 | 16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) | ||
330 | 17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) | ||
331 | 18: Grp16 (1A) | ||
332 | 19: | ||
333 | 1a: | ||
334 | 1b: | ||
335 | 1c: | ||
336 | 1d: | ||
337 | 1e: | ||
338 | 1f: NOP Ev | ||
339 | # 0x0f 0x20-0x2f | ||
340 | 20: MOV Rd,Cd | ||
341 | 21: MOV Rd,Dd | ||
342 | 22: MOV Cd,Rd | ||
343 | 23: MOV Dd,Rd | ||
344 | 24: | ||
345 | 25: | ||
346 | 26: | ||
347 | 27: | ||
348 | 28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) | ||
349 | 29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) | ||
350 | 2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) | ||
351 | 2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) | ||
352 | 2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) | ||
353 | 2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) | ||
354 | 2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) | ||
355 | 2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) | ||
356 | # 0x0f 0x30-0x3f | ||
357 | 30: WRMSR | ||
358 | 31: RDTSC | ||
359 | 32: RDMSR | ||
360 | 33: RDPMC | ||
361 | 34: SYSENTER | ||
362 | 35: SYSEXIT | ||
363 | 36: | ||
364 | 37: GETSEC | ||
365 | 38: escape # 3-byte escape 1 | ||
366 | 39: | ||
367 | 3a: escape # 3-byte escape 2 | ||
368 | 3b: | ||
369 | 3c: | ||
370 | 3d: | ||
371 | 3e: | ||
372 | 3f: | ||
373 | # 0x0f 0x40-0x4f | ||
374 | 40: CMOVO Gv,Ev | ||
375 | 41: CMOVNO Gv,Ev | ||
376 | 42: CMOVB/C/NAE Gv,Ev | ||
377 | 43: CMOVAE/NB/NC Gv,Ev | ||
378 | 44: CMOVE/Z Gv,Ev | ||
379 | 45: CMOVNE/NZ Gv,Ev | ||
380 | 46: CMOVBE/NA Gv,Ev | ||
381 | 47: CMOVA/NBE Gv,Ev | ||
382 | 48: CMOVS Gv,Ev | ||
383 | 49: CMOVNS Gv,Ev | ||
384 | 4a: CMOVP/PE Gv,Ev | ||
385 | 4b: CMOVNP/PO Gv,Ev | ||
386 | 4c: CMOVL/NGE Gv,Ev | ||
387 | 4d: CMOVNL/GE Gv,Ev | ||
388 | 4e: CMOVLE/NG Gv,Ev | ||
389 | 4f: CMOVNLE/G Gv,Ev | ||
390 | # 0x0f 0x50-0x5f | ||
391 | 50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) | ||
392 | 51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) | ||
393 | 52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) | ||
394 | 53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) | ||
395 | 54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) | ||
396 | 55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) | ||
397 | 56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) | ||
398 | 57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) | ||
399 | 58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) | ||
400 | 59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) | ||
401 | 5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) | ||
402 | 5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) | ||
403 | 5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) | ||
404 | 5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) | ||
405 | 5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) | ||
406 | 5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) | ||
407 | # 0x0f 0x60-0x6f | ||
408 | 60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) | ||
409 | 61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) | ||
410 | 62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) | ||
411 | 63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) | ||
412 | 64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) | ||
413 | 65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) | ||
414 | 66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) | ||
415 | 67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) | ||
416 | 68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) | ||
417 | 69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) | ||
418 | 6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) | ||
419 | 6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) | ||
420 | 6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) | ||
421 | 6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) | ||
422 | 6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) | ||
423 | 6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) | ||
424 | # 0x0f 0x70-0x7f | ||
425 | 70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) | ||
426 | 71: Grp12 (1A) | ||
427 | 72: Grp13 (1A) | ||
428 | 73: Grp14 (1A) | ||
429 | 74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) | ||
430 | 75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) | ||
431 | 76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) | ||
432 | 77: emms/vzeroupper/vzeroall (VEX) | ||
433 | 78: VMREAD Ed/q,Gd/q | ||
434 | 79: VMWRITE Gd/q,Ed/q | ||
435 | 7a: | ||
436 | 7b: | ||
437 | 7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) | ||
438 | 7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) | ||
439 | 7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) | ||
440 | 7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) | ||
441 | # 0x0f 0x80-0x8f | ||
442 | 80: JO Jz (f64) | ||
443 | 81: JNO Jz (f64) | ||
444 | 82: JB/JNAE/JC Jz (f64) | ||
445 | 83: JNB/JAE/JNC Jz (f64) | ||
446 | 84: JZ/JE Jz (f64) | ||
447 | 85: JNZ/JNE Jz (f64) | ||
448 | 86: JBE/JNA Jz (f64) | ||
449 | 87: JNBE/JA Jz (f64) | ||
450 | 88: JS Jz (f64) | ||
451 | 89: JNS Jz (f64) | ||
452 | 8a: JP/JPE Jz (f64) | ||
453 | 8b: JNP/JPO Jz (f64) | ||
454 | 8c: JL/JNGE Jz (f64) | ||
455 | 8d: JNL/JGE Jz (f64) | ||
456 | 8e: JLE/JNG Jz (f64) | ||
457 | 8f: JNLE/JG Jz (f64) | ||
458 | # 0x0f 0x90-0x9f | ||
459 | 90: SETO Eb | ||
460 | 91: SETNO Eb | ||
461 | 92: SETB/C/NAE Eb | ||
462 | 93: SETAE/NB/NC Eb | ||
463 | 94: SETE/Z Eb | ||
464 | 95: SETNE/NZ Eb | ||
465 | 96: SETBE/NA Eb | ||
466 | 97: SETA/NBE Eb | ||
467 | 98: SETS Eb | ||
468 | 99: SETNS Eb | ||
469 | 9a: SETP/PE Eb | ||
470 | 9b: SETNP/PO Eb | ||
471 | 9c: SETL/NGE Eb | ||
472 | 9d: SETNL/GE Eb | ||
473 | 9e: SETLE/NG Eb | ||
474 | 9f: SETNLE/G Eb | ||
475 | # 0x0f 0xa0-0xaf | ||
476 | a0: PUSH FS (d64) | ||
477 | a1: POP FS (d64) | ||
478 | a2: CPUID | ||
479 | a3: BT Ev,Gv | ||
480 | a4: SHLD Ev,Gv,Ib | ||
481 | a5: SHLD Ev,Gv,CL | ||
482 | a6: GrpPDLK | ||
483 | a7: GrpRNG | ||
484 | a8: PUSH GS (d64) | ||
485 | a9: POP GS (d64) | ||
486 | aa: RSM | ||
487 | ab: BTS Ev,Gv | ||
488 | ac: SHRD Ev,Gv,Ib | ||
489 | ad: SHRD Ev,Gv,CL | ||
490 | ae: Grp15 (1A),(1C) | ||
491 | af: IMUL Gv,Ev | ||
492 | # 0x0f 0xb0-0xbf | ||
493 | b0: CMPXCHG Eb,Gb | ||
494 | b1: CMPXCHG Ev,Gv | ||
495 | b2: LSS Gv,Mp | ||
496 | b3: BTR Ev,Gv | ||
497 | b4: LFS Gv,Mp | ||
498 | b5: LGS Gv,Mp | ||
499 | b6: MOVZX Gv,Eb | ||
500 | b7: MOVZX Gv,Ew | ||
501 | b8: JMPE | POPCNT Gv,Ev (F3) | ||
502 | b9: Grp10 (1A) | ||
503 | ba: Grp8 Ev,Ib (1A) | ||
504 | bb: BTC Ev,Gv | ||
505 | bc: BSF Gv,Ev | ||
506 | bd: BSR Gv,Ev | ||
507 | be: MOVSX Gv,Eb | ||
508 | bf: MOVSX Gv,Ew | ||
509 | # 0x0f 0xc0-0xcf | ||
510 | c0: XADD Eb,Gb | ||
511 | c1: XADD Ev,Gv | ||
512 | c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) | ||
513 | c3: movnti Md/q,Gd/q | ||
514 | c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) | ||
515 | c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) | ||
516 | c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) | ||
517 | c7: Grp9 (1A) | ||
518 | c8: BSWAP RAX/EAX/R8/R8D | ||
519 | c9: BSWAP RCX/ECX/R9/R9D | ||
520 | ca: BSWAP RDX/EDX/R10/R10D | ||
521 | cb: BSWAP RBX/EBX/R11/R11D | ||
522 | cc: BSWAP RSP/ESP/R12/R12D | ||
523 | cd: BSWAP RBP/EBP/R13/R13D | ||
524 | ce: BSWAP RSI/ESI/R14/R14D | ||
525 | cf: BSWAP RDI/EDI/R15/R15D | ||
526 | # 0x0f 0xd0-0xdf | ||
527 | d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) | ||
528 | d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) | ||
529 | d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) | ||
530 | d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) | ||
531 | d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) | ||
532 | d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) | ||
533 | d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) | ||
534 | d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) | ||
535 | d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) | ||
536 | d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) | ||
537 | da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) | ||
538 | db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) | ||
539 | dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) | ||
540 | dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) | ||
541 | de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) | ||
542 | df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) | ||
543 | # 0x0f 0xe0-0xef | ||
544 | e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) | ||
545 | e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) | ||
546 | e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) | ||
547 | e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) | ||
548 | e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) | ||
549 | e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) | ||
550 | e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) | ||
551 | e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) | ||
552 | e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) | ||
553 | e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) | ||
554 | ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) | ||
555 | eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) | ||
556 | ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) | ||
557 | ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) | ||
558 | ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) | ||
559 | ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) | ||
560 | # 0x0f 0xf0-0xff | ||
561 | f0: lddqu Vdq,Mdq (F2),(VEX) | ||
562 | f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) | ||
563 | f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) | ||
564 | f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) | ||
565 | f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) | ||
566 | f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) | ||
567 | f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) | ||
568 | f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) | ||
569 | f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) | ||
570 | f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) | ||
571 | fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) | ||
572 | fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) | ||
573 | fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) | ||
574 | fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) | ||
575 | fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) | ||
576 | ff: | ||
577 | EndTable | ||
578 | |||
579 | Table: 3-byte opcode 1 (0x0f 0x38) | ||
580 | Referrer: 3-byte escape 1 | ||
581 | AVXcode: 2 | ||
582 | # 0x0f 0x38 0x00-0x0f | ||
583 | 00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) | ||
584 | 01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) | ||
585 | 02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) | ||
586 | 03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) | ||
587 | 04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) | ||
588 | 05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) | ||
589 | 06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) | ||
590 | 07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) | ||
591 | 08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) | ||
592 | 09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) | ||
593 | 0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) | ||
594 | 0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) | ||
595 | 0c: Vpermilps /r (66),(oVEX) | ||
596 | 0d: Vpermilpd /r (66),(oVEX) | ||
597 | 0e: vtestps /r (66),(oVEX) | ||
598 | 0f: vtestpd /r (66),(oVEX) | ||
599 | # 0x0f 0x38 0x10-0x1f | ||
600 | 10: pblendvb Vdq,Wdq (66) | ||
601 | 11: | ||
602 | 12: | ||
603 | 13: | ||
604 | 14: blendvps Vdq,Wdq (66) | ||
605 | 15: blendvpd Vdq,Wdq (66) | ||
606 | 16: | ||
607 | 17: ptest Vdq,Wdq (66),(VEX) | ||
608 | 18: vbroadcastss /r (66),(oVEX) | ||
609 | 19: vbroadcastsd /r (66),(oVEX),(o256) | ||
610 | 1a: vbroadcastf128 /r (66),(oVEX),(o256) | ||
611 | 1b: | ||
612 | 1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) | ||
613 | 1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) | ||
614 | 1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) | ||
615 | 1f: | ||
616 | # 0x0f 0x38 0x20-0x2f | ||
617 | 20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) | ||
618 | 21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) | ||
619 | 22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) | ||
620 | 23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) | ||
621 | 24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) | ||
622 | 25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) | ||
623 | 26: | ||
624 | 27: | ||
625 | 28: pmuldq Vdq,Wdq (66),(VEX),(o128) | ||
626 | 29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) | ||
627 | 2a: movntdqa Vdq,Mdq (66),(VEX),(o128) | ||
628 | 2b: packusdw Vdq,Wdq (66),(VEX),(o128) | ||
629 | 2c: vmaskmovps(ld) /r (66),(oVEX) | ||
630 | 2d: vmaskmovpd(ld) /r (66),(oVEX) | ||
631 | 2e: vmaskmovps(st) /r (66),(oVEX) | ||
632 | 2f: vmaskmovpd(st) /r (66),(oVEX) | ||
633 | # 0x0f 0x38 0x30-0x3f | ||
634 | 30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) | ||
635 | 31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) | ||
636 | 32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) | ||
637 | 33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) | ||
638 | 34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) | ||
639 | 35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) | ||
640 | 36: | ||
641 | 37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) | ||
642 | 38: pminsb Vdq,Wdq (66),(VEX),(o128) | ||
643 | 39: pminsd Vdq,Wdq (66),(VEX),(o128) | ||
644 | 3a: pminuw Vdq,Wdq (66),(VEX),(o128) | ||
645 | 3b: pminud Vdq,Wdq (66),(VEX),(o128) | ||
646 | 3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) | ||
647 | 3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) | ||
648 | 3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) | ||
649 | 3f: pmaxud Vdq,Wdq (66),(VEX),(o128) | ||
650 | # 0x0f 0x38 0x40-0x8f | ||
651 | 40: pmulld Vdq,Wdq (66),(VEX),(o128) | ||
652 | 41: phminposuw Vdq,Wdq (66),(VEX),(o128) | ||
653 | 80: INVEPT Gd/q,Mdq (66) | ||
654 | 81: INVPID Gd/q,Mdq (66) | ||
655 | # 0x0f 0x38 0x90-0xbf (FMA) | ||
656 | 96: vfmaddsub132pd/ps /r (66),(VEX) | ||
657 | 97: vfmsubadd132pd/ps /r (66),(VEX) | ||
658 | 98: vfmadd132pd/ps /r (66),(VEX) | ||
659 | 99: vfmadd132sd/ss /r (66),(VEX),(o128) | ||
660 | 9a: vfmsub132pd/ps /r (66),(VEX) | ||
661 | 9b: vfmsub132sd/ss /r (66),(VEX),(o128) | ||
662 | 9c: vfnmadd132pd/ps /r (66),(VEX) | ||
663 | 9d: vfnmadd132sd/ss /r (66),(VEX),(o128) | ||
664 | 9e: vfnmsub132pd/ps /r (66),(VEX) | ||
665 | 9f: vfnmsub132sd/ss /r (66),(VEX),(o128) | ||
666 | a6: vfmaddsub213pd/ps /r (66),(VEX) | ||
667 | a7: vfmsubadd213pd/ps /r (66),(VEX) | ||
668 | a8: vfmadd213pd/ps /r (66),(VEX) | ||
669 | a9: vfmadd213sd/ss /r (66),(VEX),(o128) | ||
670 | aa: vfmsub213pd/ps /r (66),(VEX) | ||
671 | ab: vfmsub213sd/ss /r (66),(VEX),(o128) | ||
672 | ac: vfnmadd213pd/ps /r (66),(VEX) | ||
673 | ad: vfnmadd213sd/ss /r (66),(VEX),(o128) | ||
674 | ae: vfnmsub213pd/ps /r (66),(VEX) | ||
675 | af: vfnmsub213sd/ss /r (66),(VEX),(o128) | ||
676 | b6: vfmaddsub231pd/ps /r (66),(VEX) | ||
677 | b7: vfmsubadd231pd/ps /r (66),(VEX) | ||
678 | b8: vfmadd231pd/ps /r (66),(VEX) | ||
679 | b9: vfmadd231sd/ss /r (66),(VEX),(o128) | ||
680 | ba: vfmsub231pd/ps /r (66),(VEX) | ||
681 | bb: vfmsub231sd/ss /r (66),(VEX),(o128) | ||
682 | bc: vfnmadd231pd/ps /r (66),(VEX) | ||
683 | bd: vfnmadd231sd/ss /r (66),(VEX),(o128) | ||
684 | be: vfnmsub231pd/ps /r (66),(VEX) | ||
685 | bf: vfnmsub231sd/ss /r (66),(VEX),(o128) | ||
686 | # 0x0f 0x38 0xc0-0xff | ||
687 | db: aesimc Vdq,Wdq (66),(VEX),(o128) | ||
688 | dc: aesenc Vdq,Wdq (66),(VEX),(o128) | ||
689 | dd: aesenclast Vdq,Wdq (66),(VEX),(o128) | ||
690 | de: aesdec Vdq,Wdq (66),(VEX),(o128) | ||
691 | df: aesdeclast Vdq,Wdq (66),(VEX),(o128) | ||
692 | f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) | ||
693 | f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) | ||
694 | EndTable | ||
695 | |||
696 | Table: 3-byte opcode 2 (0x0f 0x3a) | ||
697 | Referrer: 3-byte escape 2 | ||
698 | AVXcode: 3 | ||
699 | # 0x0f 0x3a 0x00-0xff | ||
700 | 04: vpermilps /r,Ib (66),(oVEX) | ||
701 | 05: vpermilpd /r,Ib (66),(oVEX) | ||
702 | 06: vperm2f128 /r,Ib (66),(oVEX),(o256) | ||
703 | 08: roundps Vdq,Wdq,Ib (66),(VEX) | ||
704 | 09: roundpd Vdq,Wdq,Ib (66),(VEX) | ||
705 | 0a: roundss Vss,Wss,Ib (66),(VEX),(o128) | ||
706 | 0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) | ||
707 | 0c: blendps Vdq,Wdq,Ib (66),(VEX) | ||
708 | 0d: blendpd Vdq,Wdq,Ib (66),(VEX) | ||
709 | 0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) | ||
710 | 0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) | ||
711 | 14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) | ||
712 | 15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) | ||
713 | 16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) | ||
714 | 17: extractps Ed,Vdq,Ib (66),(VEX),(o128) | ||
715 | 18: vinsertf128 /r,Ib (66),(oVEX),(o256) | ||
716 | 19: vextractf128 /r,Ib (66),(oVEX),(o256) | ||
717 | 20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) | ||
718 | 21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) | ||
719 | 22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) | ||
720 | 40: dpps Vdq,Wdq,Ib (66),(VEX) | ||
721 | 41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) | ||
722 | 42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) | ||
723 | 44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) | ||
724 | 4a: vblendvps /r,Ib (66),(oVEX) | ||
725 | 4b: vblendvpd /r,Ib (66),(oVEX) | ||
726 | 4c: vpblendvb /r,Ib (66),(oVEX),(o128) | ||
727 | 60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) | ||
728 | 61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) | ||
729 | 62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) | ||
730 | 63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) | ||
731 | df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) | ||
732 | EndTable | ||
733 | |||
734 | GrpTable: Grp1 | ||
735 | 0: ADD | ||
736 | 1: OR | ||
737 | 2: ADC | ||
738 | 3: SBB | ||
739 | 4: AND | ||
740 | 5: SUB | ||
741 | 6: XOR | ||
742 | 7: CMP | ||
743 | EndTable | ||
744 | |||
745 | GrpTable: Grp1A | ||
746 | 0: POP | ||
747 | EndTable | ||
748 | |||
749 | GrpTable: Grp2 | ||
750 | 0: ROL | ||
751 | 1: ROR | ||
752 | 2: RCL | ||
753 | 3: RCR | ||
754 | 4: SHL/SAL | ||
755 | 5: SHR | ||
756 | 6: | ||
757 | 7: SAR | ||
758 | EndTable | ||
759 | |||
760 | GrpTable: Grp3_1 | ||
761 | 0: TEST Eb,Ib | ||
762 | 1: | ||
763 | 2: NOT Eb | ||
764 | 3: NEG Eb | ||
765 | 4: MUL AL,Eb | ||
766 | 5: IMUL AL,Eb | ||
767 | 6: DIV AL,Eb | ||
768 | 7: IDIV AL,Eb | ||
769 | EndTable | ||
770 | |||
771 | GrpTable: Grp3_2 | ||
772 | 0: TEST Ev,Iz | ||
773 | 1: | ||
774 | 2: NOT Ev | ||
775 | 3: NEG Ev | ||
776 | 4: MUL rAX,Ev | ||
777 | 5: IMUL rAX,Ev | ||
778 | 6: DIV rAX,Ev | ||
779 | 7: IDIV rAX,Ev | ||
780 | EndTable | ||
781 | |||
782 | GrpTable: Grp4 | ||
783 | 0: INC Eb | ||
784 | 1: DEC Eb | ||
785 | EndTable | ||
786 | |||
787 | GrpTable: Grp5 | ||
788 | 0: INC Ev | ||
789 | 1: DEC Ev | ||
790 | 2: CALLN Ev (f64) | ||
791 | 3: CALLF Ep | ||
792 | 4: JMPN Ev (f64) | ||
793 | 5: JMPF Ep | ||
794 | 6: PUSH Ev (d64) | ||
795 | 7: | ||
796 | EndTable | ||
797 | |||
798 | GrpTable: Grp6 | ||
799 | 0: SLDT Rv/Mw | ||
800 | 1: STR Rv/Mw | ||
801 | 2: LLDT Ew | ||
802 | 3: LTR Ew | ||
803 | 4: VERR Ew | ||
804 | 5: VERW Ew | ||
805 | EndTable | ||
806 | |||
807 | GrpTable: Grp7 | ||
808 | 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | ||
809 | 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) | ||
810 | 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | ||
811 | 3: LIDT Ms | ||
812 | 4: SMSW Mw/Rv | ||
813 | 5: | ||
814 | 6: LMSW Ew | ||
815 | 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) | ||
816 | EndTable | ||
817 | |||
818 | GrpTable: Grp8 | ||
819 | 4: BT | ||
820 | 5: BTS | ||
821 | 6: BTR | ||
822 | 7: BTC | ||
823 | EndTable | ||
824 | |||
825 | GrpTable: Grp9 | ||
826 | 1: CMPXCHG8B/16B Mq/Mdq | ||
827 | 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | ||
828 | 7: VMPTRST Mq | ||
829 | EndTable | ||
830 | |||
831 | GrpTable: Grp10 | ||
832 | EndTable | ||
833 | |||
834 | GrpTable: Grp11 | ||
835 | 0: MOV | ||
836 | EndTable | ||
837 | |||
838 | GrpTable: Grp12 | ||
839 | 2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) | ||
840 | 4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) | ||
841 | 6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) | ||
842 | EndTable | ||
843 | |||
844 | GrpTable: Grp13 | ||
845 | 2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) | ||
846 | 4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) | ||
847 | 6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) | ||
848 | EndTable | ||
849 | |||
850 | GrpTable: Grp14 | ||
851 | 2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) | ||
852 | 3: psrldq Udq,Ib (66),(11B),(VEX),(o128) | ||
853 | 6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) | ||
854 | 7: pslldq Udq,Ib (66),(11B),(VEX),(o128) | ||
855 | EndTable | ||
856 | |||
857 | GrpTable: Grp15 | ||
858 | 0: fxsave | ||
859 | 1: fxstor | ||
860 | 2: ldmxcsr (VEX) | ||
861 | 3: stmxcsr (VEX) | ||
862 | 4: XSAVE | ||
863 | 5: XRSTOR | lfence (11B) | ||
864 | 6: mfence (11B) | ||
865 | 7: clflush | sfence (11B) | ||
866 | EndTable | ||
867 | |||
868 | GrpTable: Grp16 | ||
869 | 0: prefetch NTA | ||
870 | 1: prefetch T0 | ||
871 | 2: prefetch T1 | ||
872 | 3: prefetch T2 | ||
873 | EndTable | ||
874 | |||
875 | # AMD's Prefetch Group | ||
876 | GrpTable: GrpP | ||
877 | 0: PREFETCH | ||
878 | 1: PREFETCHW | ||
879 | EndTable | ||
880 | |||
881 | GrpTable: GrpPDLK | ||
882 | 0: MONTMUL | ||
883 | 1: XSHA1 | ||
884 | 2: XSHA2 | ||
885 | EndTable | ||
886 | |||
887 | GrpTable: GrpRNG | ||
888 | 0: xstore-rng | ||
889 | 1: xcrypt-ecb | ||
890 | 2: xcrypt-cbc | ||
891 | 4: xcrypt-cfb | ||
892 | 5: xcrypt-ofb | ||
893 | EndTable | ||
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 61b41ca3b5a2..d0474ad2a6e5 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c | |||
@@ -35,34 +35,3 @@ int fixup_exception(struct pt_regs *regs) | |||
35 | 35 | ||
36 | return 0; | 36 | return 0; |
37 | } | 37 | } |
38 | |||
39 | #ifdef CONFIG_X86_64 | ||
40 | /* | ||
41 | * Need to defined our own search_extable on X86_64 to work around | ||
42 | * a B stepping K8 bug. | ||
43 | */ | ||
44 | const struct exception_table_entry * | ||
45 | search_extable(const struct exception_table_entry *first, | ||
46 | const struct exception_table_entry *last, | ||
47 | unsigned long value) | ||
48 | { | ||
49 | /* B stepping K8 bug */ | ||
50 | if ((value >> 32) == 0) | ||
51 | value |= 0xffffffffUL << 32; | ||
52 | |||
53 | while (first <= last) { | ||
54 | const struct exception_table_entry *mid; | ||
55 | long diff; | ||
56 | |||
57 | mid = (last - first) / 2 + first; | ||
58 | diff = mid->insn - value; | ||
59 | if (diff == 0) | ||
60 | return mid; | ||
61 | else if (diff < 0) | ||
62 | first = mid+1; | ||
63 | else | ||
64 | last = mid-1; | ||
65 | } | ||
66 | return NULL; | ||
67 | } | ||
68 | #endif | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f4cee9028cf0..f62777940dfb 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -38,7 +38,8 @@ enum x86_pf_error_code { | |||
38 | * Returns 0 if mmiotrace is disabled, or if the fault is not | 38 | * Returns 0 if mmiotrace is disabled, or if the fault is not |
39 | * handled by mmiotrace: | 39 | * handled by mmiotrace: |
40 | */ | 40 | */ |
41 | static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) | 41 | static inline int __kprobes |
42 | kmmio_fault(struct pt_regs *regs, unsigned long addr) | ||
42 | { | 43 | { |
43 | if (unlikely(is_kmmio_active())) | 44 | if (unlikely(is_kmmio_active())) |
44 | if (kmmio_handler(regs, addr) == 1) | 45 | if (kmmio_handler(regs, addr) == 1) |
@@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) | |||
46 | return 0; | 47 | return 0; |
47 | } | 48 | } |
48 | 49 | ||
49 | static inline int notify_page_fault(struct pt_regs *regs) | 50 | static inline int __kprobes notify_page_fault(struct pt_regs *regs) |
50 | { | 51 | { |
51 | int ret = 0; | 52 | int ret = 0; |
52 | 53 | ||
@@ -240,7 +241,7 @@ void vmalloc_sync_all(void) | |||
240 | * | 241 | * |
241 | * Handle a fault on the vmalloc or module mapping area | 242 | * Handle a fault on the vmalloc or module mapping area |
242 | */ | 243 | */ |
243 | static noinline int vmalloc_fault(unsigned long address) | 244 | static noinline __kprobes int vmalloc_fault(unsigned long address) |
244 | { | 245 | { |
245 | unsigned long pgd_paddr; | 246 | unsigned long pgd_paddr; |
246 | pmd_t *pmd_k; | 247 | pmd_t *pmd_k; |
@@ -357,7 +358,7 @@ void vmalloc_sync_all(void) | |||
357 | * | 358 | * |
358 | * This assumes no large pages in there. | 359 | * This assumes no large pages in there. |
359 | */ | 360 | */ |
360 | static noinline int vmalloc_fault(unsigned long address) | 361 | static noinline __kprobes int vmalloc_fault(unsigned long address) |
361 | { | 362 | { |
362 | pgd_t *pgd, *pgd_ref; | 363 | pgd_t *pgd, *pgd_ref; |
363 | pud_t *pud, *pud_ref; | 364 | pud_t *pud, *pud_ref; |
@@ -658,7 +659,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
658 | show_fault_oops(regs, error_code, address); | 659 | show_fault_oops(regs, error_code, address); |
659 | 660 | ||
660 | stackend = end_of_stack(tsk); | 661 | stackend = end_of_stack(tsk); |
661 | if (*stackend != STACK_END_MAGIC) | 662 | if (tsk != &init_task && *stackend != STACK_END_MAGIC) |
662 | printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); | 663 | printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); |
663 | 664 | ||
664 | tsk->thread.cr2 = address; | 665 | tsk->thread.cr2 = address; |
@@ -860,7 +861,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) | |||
860 | * There are no security implications to leaving a stale TLB when | 861 | * There are no security implications to leaving a stale TLB when |
861 | * increasing the permissions on a page. | 862 | * increasing the permissions on a page. |
862 | */ | 863 | */ |
863 | static noinline int | 864 | static noinline __kprobes int |
864 | spurious_fault(unsigned long error_code, unsigned long address) | 865 | spurious_fault(unsigned long error_code, unsigned long address) |
865 | { | 866 | { |
866 | pgd_t *pgd; | 867 | pgd_t *pgd; |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917f..11a4ad4d6253 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) | |||
540 | struct die_args *arg = args; | 540 | struct die_args *arg = args; |
541 | 541 | ||
542 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) | 542 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) |
543 | if (post_kmmio_handler(arg->err, arg->regs) == 1) | 543 | if (post_kmmio_handler(arg->err, arg->regs) == 1) { |
544 | /* | ||
545 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
546 | * denote completion of processing | ||
547 | */ | ||
548 | (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; | ||
544 | return NOTIFY_STOP; | 549 | return NOTIFY_STOP; |
550 | } | ||
545 | 551 | ||
546 | return NOTIFY_DONE; | 552 | return NOTIFY_DONE; |
547 | } | 553 | } |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index dbb5381f7b3b..9d7ce96e5a5c 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -136,7 +136,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | |||
136 | apicid_to_node[apic_id] = node; | 136 | apicid_to_node[apic_id] = node; |
137 | node_set(node, cpu_nodes_parsed); | 137 | node_set(node, cpu_nodes_parsed); |
138 | acpi_numa = 1; | 138 | acpi_numa = 1; |
139 | printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", | 139 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", |
140 | pxm, apic_id, node); | 140 | pxm, apic_id, node); |
141 | } | 141 | } |
142 | 142 | ||
@@ -170,7 +170,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
170 | apicid_to_node[apic_id] = node; | 170 | apicid_to_node[apic_id] = node; |
171 | node_set(node, cpu_nodes_parsed); | 171 | node_set(node, cpu_nodes_parsed); |
172 | acpi_numa = 1; | 172 | acpi_numa = 1; |
173 | printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", | 173 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", |
174 | pxm, apic_id, node); | 174 | pxm, apic_id, node); |
175 | } | 175 | } |
176 | 176 | ||
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 427fd1b56df5..8565d944f7cf 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c | |||
@@ -1,12 +1,13 @@ | |||
1 | /* | 1 | /* |
2 | * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> | 2 | * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> |
3 | */ | 3 | */ |
4 | |||
5 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
6 | |||
4 | #include <linux/module.h> | 7 | #include <linux/module.h> |
5 | #include <linux/io.h> | 8 | #include <linux/io.h> |
6 | #include <linux/mmiotrace.h> | 9 | #include <linux/mmiotrace.h> |
7 | 10 | ||
8 | #define MODULE_NAME "testmmiotrace" | ||
9 | |||
10 | static unsigned long mmio_address; | 11 | static unsigned long mmio_address; |
11 | module_param(mmio_address, ulong, 0); | 12 | module_param(mmio_address, ulong, 0); |
12 | MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " | 13 | MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " |
@@ -30,7 +31,7 @@ static unsigned v32(unsigned i) | |||
30 | static void do_write_test(void __iomem *p) | 31 | static void do_write_test(void __iomem *p) |
31 | { | 32 | { |
32 | unsigned int i; | 33 | unsigned int i; |
33 | pr_info(MODULE_NAME ": write test.\n"); | 34 | pr_info("write test.\n"); |
34 | mmiotrace_printk("Write test.\n"); | 35 | mmiotrace_printk("Write test.\n"); |
35 | 36 | ||
36 | for (i = 0; i < 256; i++) | 37 | for (i = 0; i < 256; i++) |
@@ -47,7 +48,7 @@ static void do_read_test(void __iomem *p) | |||
47 | { | 48 | { |
48 | unsigned int i; | 49 | unsigned int i; |
49 | unsigned errs[3] = { 0 }; | 50 | unsigned errs[3] = { 0 }; |
50 | pr_info(MODULE_NAME ": read test.\n"); | 51 | pr_info("read test.\n"); |
51 | mmiotrace_printk("Read test.\n"); | 52 | mmiotrace_printk("Read test.\n"); |
52 | 53 | ||
53 | for (i = 0; i < 256; i++) | 54 | for (i = 0; i < 256; i++) |
@@ -68,7 +69,7 @@ static void do_read_test(void __iomem *p) | |||
68 | 69 | ||
69 | static void do_read_far_test(void __iomem *p) | 70 | static void do_read_far_test(void __iomem *p) |
70 | { | 71 | { |
71 | pr_info(MODULE_NAME ": read far test.\n"); | 72 | pr_info("read far test.\n"); |
72 | mmiotrace_printk("Read far test.\n"); | 73 | mmiotrace_printk("Read far test.\n"); |
73 | 74 | ||
74 | ioread32(p + read_far); | 75 | ioread32(p + read_far); |
@@ -78,7 +79,7 @@ static void do_test(unsigned long size) | |||
78 | { | 79 | { |
79 | void __iomem *p = ioremap_nocache(mmio_address, size); | 80 | void __iomem *p = ioremap_nocache(mmio_address, size); |
80 | if (!p) { | 81 | if (!p) { |
81 | pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); | 82 | pr_err("could not ioremap, aborting.\n"); |
82 | return; | 83 | return; |
83 | } | 84 | } |
84 | mmiotrace_printk("ioremap returned %p.\n", p); | 85 | mmiotrace_printk("ioremap returned %p.\n", p); |
@@ -94,24 +95,22 @@ static int __init init(void) | |||
94 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); | 95 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); |
95 | 96 | ||
96 | if (mmio_address == 0) { | 97 | if (mmio_address == 0) { |
97 | pr_err(MODULE_NAME ": you have to use the module argument " | 98 | pr_err("you have to use the module argument mmio_address.\n"); |
98 | "mmio_address.\n"); | 99 | pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n"); |
99 | pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" | ||
100 | " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); | ||
101 | return -ENXIO; | 100 | return -ENXIO; |
102 | } | 101 | } |
103 | 102 | ||
104 | pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " | 103 | pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " |
105 | "address space, and writing 16 kB of rubbish in there.\n", | 104 | "and writing 16 kB of rubbish in there.\n", |
106 | size >> 10, mmio_address); | 105 | size >> 10, mmio_address); |
107 | do_test(size); | 106 | do_test(size); |
108 | pr_info(MODULE_NAME ": All done.\n"); | 107 | pr_info("All done.\n"); |
109 | return 0; | 108 | return 0; |
110 | } | 109 | } |
111 | 110 | ||
112 | static void __exit cleanup(void) | 111 | static void __exit cleanup(void) |
113 | { | 112 | { |
114 | pr_debug(MODULE_NAME ": unloaded.\n"); | 113 | pr_debug("unloaded.\n"); |
115 | } | 114 | } |
116 | 115 | ||
117 | module_init(init); | 116 | module_init(init); |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 8aa85f17667e..0a979f3e5b8a 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/mce.h> | 18 | #include <asm/mce.h> |
19 | #include <asm/xcr.h> | 19 | #include <asm/xcr.h> |
20 | #include <asm/suspend.h> | 20 | #include <asm/suspend.h> |
21 | #include <asm/debugreg.h> | ||
21 | 22 | ||
22 | #ifdef CONFIG_X86_32 | 23 | #ifdef CONFIG_X86_32 |
23 | static struct saved_context saved_context; | 24 | static struct saved_context saved_context; |
@@ -142,31 +143,6 @@ static void fix_processor_context(void) | |||
142 | #endif | 143 | #endif |
143 | load_TR_desc(); /* This does ltr */ | 144 | load_TR_desc(); /* This does ltr */ |
144 | load_LDT(¤t->active_mm->context); /* This does lldt */ | 145 | load_LDT(¤t->active_mm->context); /* This does lldt */ |
145 | |||
146 | /* | ||
147 | * Now maybe reload the debug registers | ||
148 | */ | ||
149 | if (current->thread.debugreg7) { | ||
150 | #ifdef CONFIG_X86_32 | ||
151 | set_debugreg(current->thread.debugreg0, 0); | ||
152 | set_debugreg(current->thread.debugreg1, 1); | ||
153 | set_debugreg(current->thread.debugreg2, 2); | ||
154 | set_debugreg(current->thread.debugreg3, 3); | ||
155 | /* no 4 and 5 */ | ||
156 | set_debugreg(current->thread.debugreg6, 6); | ||
157 | set_debugreg(current->thread.debugreg7, 7); | ||
158 | #else | ||
159 | /* CONFIG_X86_64 */ | ||
160 | loaddebug(¤t->thread, 0); | ||
161 | loaddebug(¤t->thread, 1); | ||
162 | loaddebug(¤t->thread, 2); | ||
163 | loaddebug(¤t->thread, 3); | ||
164 | /* no 4 and 5 */ | ||
165 | loaddebug(¤t->thread, 6); | ||
166 | loaddebug(¤t->thread, 7); | ||
167 | #endif | ||
168 | } | ||
169 | |||
170 | } | 146 | } |
171 | 147 | ||
172 | /** | 148 | /** |
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile new file mode 100644 index 000000000000..f82082677337 --- /dev/null +++ b/arch/x86/tools/Makefile | |||
@@ -0,0 +1,31 @@ | |||
1 | PHONY += posttest | ||
2 | |||
3 | ifeq ($(KBUILD_VERBOSE),1) | ||
4 | posttest_verbose = -v | ||
5 | else | ||
6 | posttest_verbose = | ||
7 | endif | ||
8 | |||
9 | ifeq ($(CONFIG_64BIT),y) | ||
10 | posttest_64bit = -y | ||
11 | else | ||
12 | posttest_64bit = -n | ||
13 | endif | ||
14 | |||
15 | distill_awk = $(srctree)/arch/x86/tools/distill.awk | ||
16 | chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk | ||
17 | |||
18 | quiet_cmd_posttest = TEST $@ | ||
19 | cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) | ||
20 | |||
21 | posttest: $(obj)/test_get_len vmlinux | ||
22 | $(call cmd,posttest) | ||
23 | |||
24 | hostprogs-y := test_get_len | ||
25 | |||
26 | # -I needed for generated C source and C source which in the kernel tree. | ||
27 | HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ | ||
28 | |||
29 | # Dependencies are also needed. | ||
30 | $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c | ||
31 | |||
diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk new file mode 100644 index 000000000000..0d13cd9fdcff --- /dev/null +++ b/arch/x86/tools/chkobjdump.awk | |||
@@ -0,0 +1,23 @@ | |||
1 | # GNU objdump version checker | ||
2 | # | ||
3 | # Usage: | ||
4 | # objdump -v | awk -f chkobjdump.awk | ||
5 | BEGIN { | ||
6 | # objdump version 2.19 or later is OK for the test. | ||
7 | od_ver = 2; | ||
8 | od_sver = 19; | ||
9 | } | ||
10 | |||
11 | /^GNU/ { | ||
12 | split($4, ver, "."); | ||
13 | if (ver[1] > od_ver || | ||
14 | (ver[1] == od_ver && ver[2] >= od_sver)) { | ||
15 | exit 1; | ||
16 | } else { | ||
17 | printf("Warning: objdump version %s is older than %d.%d\n", | ||
18 | $4, od_ver, od_sver); | ||
19 | print("Warning: Skipping posttest."); | ||
20 | # Logic is inverted, because we just skip test without error. | ||
21 | exit 0; | ||
22 | } | ||
23 | } | ||
diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk new file mode 100644 index 000000000000..c13c0ee48ab4 --- /dev/null +++ b/arch/x86/tools/distill.awk | |||
@@ -0,0 +1,47 @@ | |||
1 | #!/bin/awk -f | ||
2 | # Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len | ||
3 | # Distills the disassembly as follows: | ||
4 | # - Removes all lines except the disassembled instructions. | ||
5 | # - For instructions that exceed 1 line (7 bytes), crams all the hex bytes | ||
6 | # into a single line. | ||
7 | # - Remove bad(or prefix only) instructions | ||
8 | |||
9 | BEGIN { | ||
10 | prev_addr = "" | ||
11 | prev_hex = "" | ||
12 | prev_mnemonic = "" | ||
13 | bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))" | ||
14 | fwait_expr = "^9b " | ||
15 | fwait_str="9b\tfwait" | ||
16 | } | ||
17 | |||
18 | /^ *[0-9a-f]+ <[^>]*>:/ { | ||
19 | # Symbol entry | ||
20 | printf("%s%s\n", $2, $1) | ||
21 | } | ||
22 | |||
23 | /^ *[0-9a-f]+:/ { | ||
24 | if (split($0, field, "\t") < 3) { | ||
25 | # This is a continuation of the same insn. | ||
26 | prev_hex = prev_hex field[2] | ||
27 | } else { | ||
28 | # Skip bad instructions | ||
29 | if (match(prev_mnemonic, bad_expr)) | ||
30 | prev_addr = "" | ||
31 | # Split fwait from other f* instructions | ||
32 | if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") { | ||
33 | printf "%s\t%s\n", prev_addr, fwait_str | ||
34 | sub(fwait_expr, "", prev_hex) | ||
35 | } | ||
36 | if (prev_addr != "") | ||
37 | printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic | ||
38 | prev_addr = field[1] | ||
39 | prev_hex = field[2] | ||
40 | prev_mnemonic = field[3] | ||
41 | } | ||
42 | } | ||
43 | |||
44 | END { | ||
45 | if (prev_addr != "") | ||
46 | printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic | ||
47 | } | ||
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk new file mode 100644 index 000000000000..e34e92a28eb6 --- /dev/null +++ b/arch/x86/tools/gen-insn-attr-x86.awk | |||
@@ -0,0 +1,380 @@ | |||
1 | #!/bin/awk -f | ||
2 | # gen-insn-attr-x86.awk: Instruction attribute table generator | ||
3 | # Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
4 | # | ||
5 | # Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c | ||
6 | |||
7 | # Awk implementation sanity check | ||
8 | function check_awk_implement() { | ||
9 | if (!match("abc", "[[:lower:]]+")) | ||
10 | return "Your awk doesn't support charactor-class." | ||
11 | if (sprintf("%x", 0) != "0") | ||
12 | return "Your awk has a printf-format problem." | ||
13 | return "" | ||
14 | } | ||
15 | |||
16 | # Clear working vars | ||
17 | function clear_vars() { | ||
18 | delete table | ||
19 | delete lptable2 | ||
20 | delete lptable1 | ||
21 | delete lptable3 | ||
22 | eid = -1 # escape id | ||
23 | gid = -1 # group id | ||
24 | aid = -1 # AVX id | ||
25 | tname = "" | ||
26 | } | ||
27 | |||
28 | BEGIN { | ||
29 | # Implementation error checking | ||
30 | awkchecked = check_awk_implement() | ||
31 | if (awkchecked != "") { | ||
32 | print "Error: " awkchecked > "/dev/stderr" | ||
33 | print "Please try to use gawk." > "/dev/stderr" | ||
34 | exit 1 | ||
35 | } | ||
36 | |||
37 | # Setup generating tables | ||
38 | print "/* x86 opcode map generated from x86-opcode-map.txt */" | ||
39 | print "/* Do not change this code. */\n" | ||
40 | ggid = 1 | ||
41 | geid = 1 | ||
42 | gaid = 0 | ||
43 | delete etable | ||
44 | delete gtable | ||
45 | delete atable | ||
46 | |||
47 | opnd_expr = "^[[:alpha:]/]" | ||
48 | ext_expr = "^\\(" | ||
49 | sep_expr = "^\\|$" | ||
50 | group_expr = "^Grp[[:alnum:]]+" | ||
51 | |||
52 | imm_expr = "^[IJAO][[:lower:]]" | ||
53 | imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | ||
54 | imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | ||
55 | imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" | ||
56 | imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" | ||
57 | imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" | ||
58 | imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" | ||
59 | imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" | ||
60 | imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" | ||
61 | imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" | ||
62 | imm_flag["Ob"] = "INAT_MOFFSET" | ||
63 | imm_flag["Ov"] = "INAT_MOFFSET" | ||
64 | |||
65 | modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])" | ||
66 | force64_expr = "\\([df]64\\)" | ||
67 | rex_expr = "^REX(\\.[XRWB]+)*" | ||
68 | fpu_expr = "^ESC" # TODO | ||
69 | |||
70 | lprefix1_expr = "\\(66\\)" | ||
71 | lprefix2_expr = "\\(F3\\)" | ||
72 | lprefix3_expr = "\\(F2\\)" | ||
73 | max_lprefix = 4 | ||
74 | |||
75 | vexok_expr = "\\(VEX\\)" | ||
76 | vexonly_expr = "\\(oVEX\\)" | ||
77 | |||
78 | prefix_expr = "\\(Prefix\\)" | ||
79 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" | ||
80 | prefix_num["REPNE"] = "INAT_PFX_REPNE" | ||
81 | prefix_num["REP/REPE"] = "INAT_PFX_REPE" | ||
82 | prefix_num["LOCK"] = "INAT_PFX_LOCK" | ||
83 | prefix_num["SEG=CS"] = "INAT_PFX_CS" | ||
84 | prefix_num["SEG=DS"] = "INAT_PFX_DS" | ||
85 | prefix_num["SEG=ES"] = "INAT_PFX_ES" | ||
86 | prefix_num["SEG=FS"] = "INAT_PFX_FS" | ||
87 | prefix_num["SEG=GS"] = "INAT_PFX_GS" | ||
88 | prefix_num["SEG=SS"] = "INAT_PFX_SS" | ||
89 | prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" | ||
90 | prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" | ||
91 | prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" | ||
92 | |||
93 | clear_vars() | ||
94 | } | ||
95 | |||
96 | function semantic_error(msg) { | ||
97 | print "Semantic error at " NR ": " msg > "/dev/stderr" | ||
98 | exit 1 | ||
99 | } | ||
100 | |||
101 | function debug(msg) { | ||
102 | print "DEBUG: " msg | ||
103 | } | ||
104 | |||
105 | function array_size(arr, i,c) { | ||
106 | c = 0 | ||
107 | for (i in arr) | ||
108 | c++ | ||
109 | return c | ||
110 | } | ||
111 | |||
112 | /^Table:/ { | ||
113 | print "/* " $0 " */" | ||
114 | if (tname != "") | ||
115 | semantic_error("Hit Table: before EndTable:."); | ||
116 | } | ||
117 | |||
118 | /^Referrer:/ { | ||
119 | if (NF != 1) { | ||
120 | # escape opcode table | ||
121 | ref = "" | ||
122 | for (i = 2; i <= NF; i++) | ||
123 | ref = ref $i | ||
124 | eid = escape[ref] | ||
125 | tname = sprintf("inat_escape_table_%d", eid) | ||
126 | } | ||
127 | } | ||
128 | |||
129 | /^AVXcode:/ { | ||
130 | if (NF != 1) { | ||
131 | # AVX/escape opcode table | ||
132 | aid = $2 | ||
133 | if (gaid <= aid) | ||
134 | gaid = aid + 1 | ||
135 | if (tname == "") # AVX only opcode table | ||
136 | tname = sprintf("inat_avx_table_%d", $2) | ||
137 | } | ||
138 | if (aid == -1 && eid == -1) # primary opcode table | ||
139 | tname = "inat_primary_table" | ||
140 | } | ||
141 | |||
142 | /^GrpTable:/ { | ||
143 | print "/* " $0 " */" | ||
144 | if (!($2 in group)) | ||
145 | semantic_error("No group: " $2 ) | ||
146 | gid = group[$2] | ||
147 | tname = "inat_group_table_" gid | ||
148 | } | ||
149 | |||
150 | function print_table(tbl,name,fmt,n) | ||
151 | { | ||
152 | print "const insn_attr_t " name " = {" | ||
153 | for (i = 0; i < n; i++) { | ||
154 | id = sprintf(fmt, i) | ||
155 | if (tbl[id]) | ||
156 | print " [" id "] = " tbl[id] "," | ||
157 | } | ||
158 | print "};" | ||
159 | } | ||
160 | |||
161 | /^EndTable/ { | ||
162 | if (gid != -1) { | ||
163 | # print group tables | ||
164 | if (array_size(table) != 0) { | ||
165 | print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", | ||
166 | "0x%x", 8) | ||
167 | gtable[gid,0] = tname | ||
168 | } | ||
169 | if (array_size(lptable1) != 0) { | ||
170 | print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", | ||
171 | "0x%x", 8) | ||
172 | gtable[gid,1] = tname "_1" | ||
173 | } | ||
174 | if (array_size(lptable2) != 0) { | ||
175 | print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", | ||
176 | "0x%x", 8) | ||
177 | gtable[gid,2] = tname "_2" | ||
178 | } | ||
179 | if (array_size(lptable3) != 0) { | ||
180 | print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", | ||
181 | "0x%x", 8) | ||
182 | gtable[gid,3] = tname "_3" | ||
183 | } | ||
184 | } else { | ||
185 | # print primary/escaped tables | ||
186 | if (array_size(table) != 0) { | ||
187 | print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", | ||
188 | "0x%02x", 256) | ||
189 | etable[eid,0] = tname | ||
190 | if (aid >= 0) | ||
191 | atable[aid,0] = tname | ||
192 | } | ||
193 | if (array_size(lptable1) != 0) { | ||
194 | print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", | ||
195 | "0x%02x", 256) | ||
196 | etable[eid,1] = tname "_1" | ||
197 | if (aid >= 0) | ||
198 | atable[aid,1] = tname "_1" | ||
199 | } | ||
200 | if (array_size(lptable2) != 0) { | ||
201 | print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", | ||
202 | "0x%02x", 256) | ||
203 | etable[eid,2] = tname "_2" | ||
204 | if (aid >= 0) | ||
205 | atable[aid,2] = tname "_2" | ||
206 | } | ||
207 | if (array_size(lptable3) != 0) { | ||
208 | print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", | ||
209 | "0x%02x", 256) | ||
210 | etable[eid,3] = tname "_3" | ||
211 | if (aid >= 0) | ||
212 | atable[aid,3] = tname "_3" | ||
213 | } | ||
214 | } | ||
215 | print "" | ||
216 | clear_vars() | ||
217 | } | ||
218 | |||
219 | function add_flags(old,new) { | ||
220 | if (old && new) | ||
221 | return old " | " new | ||
222 | else if (old) | ||
223 | return old | ||
224 | else | ||
225 | return new | ||
226 | } | ||
227 | |||
228 | # convert operands to flags. | ||
229 | function convert_operands(opnd, i,imm,mod) | ||
230 | { | ||
231 | imm = null | ||
232 | mod = null | ||
233 | for (i in opnd) { | ||
234 | i = opnd[i] | ||
235 | if (match(i, imm_expr) == 1) { | ||
236 | if (!imm_flag[i]) | ||
237 | semantic_error("Unknown imm opnd: " i) | ||
238 | if (imm) { | ||
239 | if (i != "Ib") | ||
240 | semantic_error("Second IMM error") | ||
241 | imm = add_flags(imm, "INAT_SCNDIMM") | ||
242 | } else | ||
243 | imm = imm_flag[i] | ||
244 | } else if (match(i, modrm_expr)) | ||
245 | mod = "INAT_MODRM" | ||
246 | } | ||
247 | return add_flags(imm, mod) | ||
248 | } | ||
249 | |||
250 | /^[0-9a-f]+\:/ { | ||
251 | if (NR == 1) | ||
252 | next | ||
253 | # get index | ||
254 | idx = "0x" substr($1, 1, index($1,":") - 1) | ||
255 | if (idx in table) | ||
256 | semantic_error("Redefine " idx " in " tname) | ||
257 | |||
258 | # check if escaped opcode | ||
259 | if ("escape" == $2) { | ||
260 | if ($3 != "#") | ||
261 | semantic_error("No escaped name") | ||
262 | ref = "" | ||
263 | for (i = 4; i <= NF; i++) | ||
264 | ref = ref $i | ||
265 | if (ref in escape) | ||
266 | semantic_error("Redefine escape (" ref ")") | ||
267 | escape[ref] = geid | ||
268 | geid++ | ||
269 | table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" | ||
270 | next | ||
271 | } | ||
272 | |||
273 | variant = null | ||
274 | # converts | ||
275 | i = 2 | ||
276 | while (i <= NF) { | ||
277 | opcode = $(i++) | ||
278 | delete opnds | ||
279 | ext = null | ||
280 | flags = null | ||
281 | opnd = null | ||
282 | # parse one opcode | ||
283 | if (match($i, opnd_expr)) { | ||
284 | opnd = $i | ||
285 | split($(i++), opnds, ",") | ||
286 | flags = convert_operands(opnds) | ||
287 | } | ||
288 | if (match($i, ext_expr)) | ||
289 | ext = $(i++) | ||
290 | if (match($i, sep_expr)) | ||
291 | i++ | ||
292 | else if (i < NF) | ||
293 | semantic_error($i " is not a separator") | ||
294 | |||
295 | # check if group opcode | ||
296 | if (match(opcode, group_expr)) { | ||
297 | if (!(opcode in group)) { | ||
298 | group[opcode] = ggid | ||
299 | ggid++ | ||
300 | } | ||
301 | flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") | ||
302 | } | ||
303 | # check force(or default) 64bit | ||
304 | if (match(ext, force64_expr)) | ||
305 | flags = add_flags(flags, "INAT_FORCE64") | ||
306 | |||
307 | # check REX prefix | ||
308 | if (match(opcode, rex_expr)) | ||
309 | flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") | ||
310 | |||
311 | # check coprocessor escape : TODO | ||
312 | if (match(opcode, fpu_expr)) | ||
313 | flags = add_flags(flags, "INAT_MODRM") | ||
314 | |||
315 | # check VEX only code | ||
316 | if (match(ext, vexonly_expr)) | ||
317 | flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") | ||
318 | |||
319 | # check VEX only code | ||
320 | if (match(ext, vexok_expr)) | ||
321 | flags = add_flags(flags, "INAT_VEXOK") | ||
322 | |||
323 | # check prefixes | ||
324 | if (match(ext, prefix_expr)) { | ||
325 | if (!prefix_num[opcode]) | ||
326 | semantic_error("Unknown prefix: " opcode) | ||
327 | flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") | ||
328 | } | ||
329 | if (length(flags) == 0) | ||
330 | continue | ||
331 | # check if last prefix | ||
332 | if (match(ext, lprefix1_expr)) { | ||
333 | lptable1[idx] = add_flags(lptable1[idx],flags) | ||
334 | variant = "INAT_VARIANT" | ||
335 | } else if (match(ext, lprefix2_expr)) { | ||
336 | lptable2[idx] = add_flags(lptable2[idx],flags) | ||
337 | variant = "INAT_VARIANT" | ||
338 | } else if (match(ext, lprefix3_expr)) { | ||
339 | lptable3[idx] = add_flags(lptable3[idx],flags) | ||
340 | variant = "INAT_VARIANT" | ||
341 | } else { | ||
342 | table[idx] = add_flags(table[idx],flags) | ||
343 | } | ||
344 | } | ||
345 | if (variant) | ||
346 | table[idx] = add_flags(table[idx],variant) | ||
347 | } | ||
348 | |||
349 | END { | ||
350 | if (awkchecked != "") | ||
351 | exit 1 | ||
352 | # print escape opcode map's array | ||
353 | print "/* Escape opcode map array */" | ||
354 | print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ | ||
355 | "[INAT_LSTPFX_MAX + 1] = {" | ||
356 | for (i = 0; i < geid; i++) | ||
357 | for (j = 0; j < max_lprefix; j++) | ||
358 | if (etable[i,j]) | ||
359 | print " ["i"]["j"] = "etable[i,j]"," | ||
360 | print "};\n" | ||
361 | # print group opcode map's array | ||
362 | print "/* Group opcode map array */" | ||
363 | print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ | ||
364 | "[INAT_LSTPFX_MAX + 1] = {" | ||
365 | for (i = 0; i < ggid; i++) | ||
366 | for (j = 0; j < max_lprefix; j++) | ||
367 | if (gtable[i,j]) | ||
368 | print " ["i"]["j"] = "gtable[i,j]"," | ||
369 | print "};\n" | ||
370 | # print AVX opcode map's array | ||
371 | print "/* AVX opcode map array */" | ||
372 | print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\ | ||
373 | "[INAT_LSTPFX_MAX + 1] = {" | ||
374 | for (i = 0; i < gaid; i++) | ||
375 | for (j = 0; j < max_lprefix; j++) | ||
376 | if (atable[i,j]) | ||
377 | print " ["i"]["j"] = "atable[i,j]"," | ||
378 | print "};" | ||
379 | } | ||
380 | |||
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c new file mode 100644 index 000000000000..d8214dc03fa7 --- /dev/null +++ b/arch/x86/tools/test_get_len.c | |||
@@ -0,0 +1,173 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) IBM Corporation, 2009 | ||
17 | */ | ||
18 | |||
19 | #include <stdlib.h> | ||
20 | #include <stdio.h> | ||
21 | #include <string.h> | ||
22 | #include <assert.h> | ||
23 | #include <unistd.h> | ||
24 | |||
25 | #define unlikely(cond) (cond) | ||
26 | |||
27 | #include <asm/insn.h> | ||
28 | #include <inat.c> | ||
29 | #include <insn.c> | ||
30 | |||
31 | /* | ||
32 | * Test of instruction analysis in general and insn_get_length() in | ||
33 | * particular. See if insn_get_length() and the disassembler agree | ||
34 | * on the length of each instruction in an elf disassembly. | ||
35 | * | ||
36 | * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len | ||
37 | */ | ||
38 | |||
39 | const char *prog; | ||
40 | static int verbose; | ||
41 | static int x86_64; | ||
42 | |||
43 | static void usage(void) | ||
44 | { | ||
45 | fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" | ||
46 | " %s [-y|-n] [-v] \n", prog); | ||
47 | fprintf(stderr, "\t-y 64bit mode\n"); | ||
48 | fprintf(stderr, "\t-n 32bit mode\n"); | ||
49 | fprintf(stderr, "\t-v verbose mode\n"); | ||
50 | exit(1); | ||
51 | } | ||
52 | |||
53 | static void malformed_line(const char *line, int line_nr) | ||
54 | { | ||
55 | fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); | ||
56 | exit(3); | ||
57 | } | ||
58 | |||
59 | static void dump_field(FILE *fp, const char *name, const char *indent, | ||
60 | struct insn_field *field) | ||
61 | { | ||
62 | fprintf(fp, "%s.%s = {\n", indent, name); | ||
63 | fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", | ||
64 | indent, field->value, field->bytes[0], field->bytes[1], | ||
65 | field->bytes[2], field->bytes[3]); | ||
66 | fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, | ||
67 | field->got, field->nbytes); | ||
68 | } | ||
69 | |||
70 | static void dump_insn(FILE *fp, struct insn *insn) | ||
71 | { | ||
72 | fprintf(fp, "Instruction = { \n"); | ||
73 | dump_field(fp, "prefixes", "\t", &insn->prefixes); | ||
74 | dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); | ||
75 | dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); | ||
76 | dump_field(fp, "opcode", "\t", &insn->opcode); | ||
77 | dump_field(fp, "modrm", "\t", &insn->modrm); | ||
78 | dump_field(fp, "sib", "\t", &insn->sib); | ||
79 | dump_field(fp, "displacement", "\t", &insn->displacement); | ||
80 | dump_field(fp, "immediate1", "\t", &insn->immediate1); | ||
81 | dump_field(fp, "immediate2", "\t", &insn->immediate2); | ||
82 | fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", | ||
83 | insn->attr, insn->opnd_bytes, insn->addr_bytes); | ||
84 | fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", | ||
85 | insn->length, insn->x86_64, insn->kaddr); | ||
86 | } | ||
87 | |||
88 | static void parse_args(int argc, char **argv) | ||
89 | { | ||
90 | int c; | ||
91 | prog = argv[0]; | ||
92 | while ((c = getopt(argc, argv, "ynv")) != -1) { | ||
93 | switch (c) { | ||
94 | case 'y': | ||
95 | x86_64 = 1; | ||
96 | break; | ||
97 | case 'n': | ||
98 | x86_64 = 0; | ||
99 | break; | ||
100 | case 'v': | ||
101 | verbose = 1; | ||
102 | break; | ||
103 | default: | ||
104 | usage(); | ||
105 | } | ||
106 | } | ||
107 | } | ||
108 | |||
109 | #define BUFSIZE 256 | ||
110 | |||
111 | int main(int argc, char **argv) | ||
112 | { | ||
113 | char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; | ||
114 | unsigned char insn_buf[16]; | ||
115 | struct insn insn; | ||
116 | int insns = 0, c; | ||
117 | int warnings = 0; | ||
118 | |||
119 | parse_args(argc, argv); | ||
120 | |||
121 | while (fgets(line, BUFSIZE, stdin)) { | ||
122 | char copy[BUFSIZE], *s, *tab1, *tab2; | ||
123 | int nb = 0; | ||
124 | unsigned int b; | ||
125 | |||
126 | if (line[0] == '<') { | ||
127 | /* Symbol line */ | ||
128 | strcpy(sym, line); | ||
129 | continue; | ||
130 | } | ||
131 | |||
132 | insns++; | ||
133 | memset(insn_buf, 0, 16); | ||
134 | strcpy(copy, line); | ||
135 | tab1 = strchr(copy, '\t'); | ||
136 | if (!tab1) | ||
137 | malformed_line(line, insns); | ||
138 | s = tab1 + 1; | ||
139 | s += strspn(s, " "); | ||
140 | tab2 = strchr(s, '\t'); | ||
141 | if (!tab2) | ||
142 | malformed_line(line, insns); | ||
143 | *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ | ||
144 | while (s < tab2) { | ||
145 | if (sscanf(s, "%x", &b) == 1) { | ||
146 | insn_buf[nb++] = (unsigned char) b; | ||
147 | s += 3; | ||
148 | } else | ||
149 | break; | ||
150 | } | ||
151 | /* Decode an instruction */ | ||
152 | insn_init(&insn, insn_buf, x86_64); | ||
153 | insn_get_length(&insn); | ||
154 | if (insn.length != nb) { | ||
155 | warnings++; | ||
156 | fprintf(stderr, "Warning: %s found difference at %s\n", | ||
157 | prog, sym); | ||
158 | fprintf(stderr, "Warning: %s", line); | ||
159 | fprintf(stderr, "Warning: objdump says %d bytes, but " | ||
160 | "insn_get_length() says %d\n", nb, | ||
161 | insn.length); | ||
162 | if (verbose) | ||
163 | dump_insn(stderr, &insn); | ||
164 | } | ||
165 | } | ||
166 | if (warnings) | ||
167 | fprintf(stderr, "Warning: decoded and checked %d" | ||
168 | " instructions with %d warnings\n", insns, warnings); | ||
169 | else | ||
170 | fprintf(stderr, "Succeed: decoded and checked %d" | ||
171 | " instructions\n", insns); | ||
172 | return 0; | ||
173 | } | ||