diff options
Diffstat (limited to 'arch/x86')
29 files changed, 1193 insertions, 546 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 76f5a466547a..8443c50fbbf6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -40,7 +40,6 @@ config X86 | |||
40 | select HAVE_FUNCTION_GRAPH_TRACER | 40 | select HAVE_FUNCTION_GRAPH_TRACER |
41 | select HAVE_FUNCTION_GRAPH_FP_TEST | 41 | select HAVE_FUNCTION_GRAPH_FP_TEST |
42 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST | 42 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST |
43 | select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE | ||
44 | select HAVE_SYSCALL_TRACEPOINTS | 43 | select HAVE_SYSCALL_TRACEPOINTS |
45 | select HAVE_KVM | 44 | select HAVE_KVM |
46 | select HAVE_ARCH_KGDB | 45 | select HAVE_ARCH_KGDB |
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index 4be406abeefd..36b62bc52638 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um | |||
@@ -14,6 +14,9 @@ LINK-y += $(call cc-option,-m32) | |||
14 | 14 | ||
15 | export LDFLAGS | 15 | export LDFLAGS |
16 | 16 | ||
17 | LDS_EXTRA := -Ui386 | ||
18 | export LDS_EXTRA | ||
19 | |||
17 | # First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. | 20 | # First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. |
18 | include $(srctree)/arch/x86/Makefile_32.cpu | 21 | include $(srctree)/arch/x86/Makefile_32.cpu |
19 | 22 | ||
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index d511d951a052..4824fb45560f 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c | |||
@@ -119,9 +119,7 @@ static void set_brk(unsigned long start, unsigned long end) | |||
119 | end = PAGE_ALIGN(end); | 119 | end = PAGE_ALIGN(end); |
120 | if (end <= start) | 120 | if (end <= start) |
121 | return; | 121 | return; |
122 | down_write(¤t->mm->mmap_sem); | 122 | vm_brk(start, end - start); |
123 | do_brk(start, end - start); | ||
124 | up_write(¤t->mm->mmap_sem); | ||
125 | } | 123 | } |
126 | 124 | ||
127 | #ifdef CORE_DUMP | 125 | #ifdef CORE_DUMP |
@@ -332,9 +330,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
332 | pos = 32; | 330 | pos = 32; |
333 | map_size = ex.a_text+ex.a_data; | 331 | map_size = ex.a_text+ex.a_data; |
334 | 332 | ||
335 | down_write(¤t->mm->mmap_sem); | 333 | error = vm_brk(text_addr & PAGE_MASK, map_size); |
336 | error = do_brk(text_addr & PAGE_MASK, map_size); | ||
337 | up_write(¤t->mm->mmap_sem); | ||
338 | 334 | ||
339 | if (error != (text_addr & PAGE_MASK)) { | 335 | if (error != (text_addr & PAGE_MASK)) { |
340 | send_sig(SIGKILL, current, 0); | 336 | send_sig(SIGKILL, current, 0); |
@@ -373,9 +369,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
373 | if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) { | 369 | if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) { |
374 | loff_t pos = fd_offset; | 370 | loff_t pos = fd_offset; |
375 | 371 | ||
376 | down_write(¤t->mm->mmap_sem); | 372 | vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); |
377 | do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); | ||
378 | up_write(¤t->mm->mmap_sem); | ||
379 | bprm->file->f_op->read(bprm->file, | 373 | bprm->file->f_op->read(bprm->file, |
380 | (char __user *)N_TXTADDR(ex), | 374 | (char __user *)N_TXTADDR(ex), |
381 | ex.a_text+ex.a_data, &pos); | 375 | ex.a_text+ex.a_data, &pos); |
@@ -385,26 +379,22 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
385 | goto beyond_if; | 379 | goto beyond_if; |
386 | } | 380 | } |
387 | 381 | ||
388 | down_write(¤t->mm->mmap_sem); | 382 | error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, |
389 | error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, | ||
390 | PROT_READ | PROT_EXEC, | 383 | PROT_READ | PROT_EXEC, |
391 | MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | | 384 | MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | |
392 | MAP_EXECUTABLE | MAP_32BIT, | 385 | MAP_EXECUTABLE | MAP_32BIT, |
393 | fd_offset); | 386 | fd_offset); |
394 | up_write(¤t->mm->mmap_sem); | ||
395 | 387 | ||
396 | if (error != N_TXTADDR(ex)) { | 388 | if (error != N_TXTADDR(ex)) { |
397 | send_sig(SIGKILL, current, 0); | 389 | send_sig(SIGKILL, current, 0); |
398 | return error; | 390 | return error; |
399 | } | 391 | } |
400 | 392 | ||
401 | down_write(¤t->mm->mmap_sem); | 393 | error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, |
402 | error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, | ||
403 | PROT_READ | PROT_WRITE | PROT_EXEC, | 394 | PROT_READ | PROT_WRITE | PROT_EXEC, |
404 | MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | | 395 | MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | |
405 | MAP_EXECUTABLE | MAP_32BIT, | 396 | MAP_EXECUTABLE | MAP_32BIT, |
406 | fd_offset + ex.a_text); | 397 | fd_offset + ex.a_text); |
407 | up_write(¤t->mm->mmap_sem); | ||
408 | if (error != N_DATADDR(ex)) { | 398 | if (error != N_DATADDR(ex)) { |
409 | send_sig(SIGKILL, current, 0); | 399 | send_sig(SIGKILL, current, 0); |
410 | return error; | 400 | return error; |
@@ -476,9 +466,7 @@ static int load_aout_library(struct file *file) | |||
476 | error_time = jiffies; | 466 | error_time = jiffies; |
477 | } | 467 | } |
478 | #endif | 468 | #endif |
479 | down_write(¤t->mm->mmap_sem); | 469 | vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); |
480 | do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); | ||
481 | up_write(¤t->mm->mmap_sem); | ||
482 | 470 | ||
483 | file->f_op->read(file, (char __user *)start_addr, | 471 | file->f_op->read(file, (char __user *)start_addr, |
484 | ex.a_text + ex.a_data, &pos); | 472 | ex.a_text + ex.a_data, &pos); |
@@ -490,12 +478,10 @@ static int load_aout_library(struct file *file) | |||
490 | goto out; | 478 | goto out; |
491 | } | 479 | } |
492 | /* Now use mmap to map the library into memory. */ | 480 | /* Now use mmap to map the library into memory. */ |
493 | down_write(¤t->mm->mmap_sem); | 481 | error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, |
494 | error = do_mmap(file, start_addr, ex.a_text + ex.a_data, | ||
495 | PROT_READ | PROT_WRITE | PROT_EXEC, | 482 | PROT_READ | PROT_WRITE | PROT_EXEC, |
496 | MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT, | 483 | MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT, |
497 | N_TXTOFF(ex)); | 484 | N_TXTOFF(ex)); |
498 | up_write(¤t->mm->mmap_sem); | ||
499 | retval = error; | 485 | retval = error; |
500 | if (error != start_addr) | 486 | if (error != start_addr) |
501 | goto out; | 487 | goto out; |
@@ -503,9 +489,7 @@ static int load_aout_library(struct file *file) | |||
503 | len = PAGE_ALIGN(ex.a_text + ex.a_data); | 489 | len = PAGE_ALIGN(ex.a_text + ex.a_data); |
504 | bss = ex.a_text + ex.a_data + ex.a_bss; | 490 | bss = ex.a_text + ex.a_data + ex.a_bss; |
505 | if (bss > len) { | 491 | if (bss > len) { |
506 | down_write(¤t->mm->mmap_sem); | 492 | error = vm_brk(start_addr + len, bss - len); |
507 | error = do_brk(start_addr + len, bss - len); | ||
508 | up_write(¤t->mm->mmap_sem); | ||
509 | retval = error; | 493 | retval = error; |
510 | if (error != start_addr + len) | 494 | if (error != start_addr + len) |
511 | goto out; | 495 | goto out; |
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index b3b733262909..99480e55973d 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h | |||
@@ -43,7 +43,7 @@ extern void __add_wrong_size(void) | |||
43 | switch (sizeof(*(ptr))) { \ | 43 | switch (sizeof(*(ptr))) { \ |
44 | case __X86_CASE_B: \ | 44 | case __X86_CASE_B: \ |
45 | asm volatile (lock #op "b %b0, %1\n" \ | 45 | asm volatile (lock #op "b %b0, %1\n" \ |
46 | : "+r" (__ret), "+m" (*(ptr)) \ | 46 | : "+q" (__ret), "+m" (*(ptr)) \ |
47 | : : "memory", "cc"); \ | 47 | : : "memory", "cc"); \ |
48 | break; \ | 48 | break; \ |
49 | case __X86_CASE_W: \ | 49 | case __X86_CASE_W: \ |
@@ -173,7 +173,7 @@ extern void __add_wrong_size(void) | |||
173 | switch (sizeof(*(ptr))) { \ | 173 | switch (sizeof(*(ptr))) { \ |
174 | case __X86_CASE_B: \ | 174 | case __X86_CASE_B: \ |
175 | asm volatile (lock "addb %b1, %0\n" \ | 175 | asm volatile (lock "addb %b1, %0\n" \ |
176 | : "+m" (*(ptr)) : "ri" (inc) \ | 176 | : "+m" (*(ptr)) : "qi" (inc) \ |
177 | : "memory", "cc"); \ | 177 | : "memory", "cc"); \ |
178 | break; \ | 178 | break; \ |
179 | case __X86_CASE_W: \ | 179 | case __X86_CASE_W: \ |
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 268c783ab1c0..18d9005d9e4f 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h | |||
@@ -34,6 +34,7 @@ | |||
34 | 34 | ||
35 | #ifndef __ASSEMBLY__ | 35 | #ifndef __ASSEMBLY__ |
36 | extern void mcount(void); | 36 | extern void mcount(void); |
37 | extern int modifying_ftrace_code; | ||
37 | 38 | ||
38 | static inline unsigned long ftrace_call_adjust(unsigned long addr) | 39 | static inline unsigned long ftrace_call_adjust(unsigned long addr) |
39 | { | 40 | { |
@@ -50,6 +51,8 @@ struct dyn_arch_ftrace { | |||
50 | /* No extra data needed for x86 */ | 51 | /* No extra data needed for x86 */ |
51 | }; | 52 | }; |
52 | 53 | ||
54 | int ftrace_int3_handler(struct pt_regs *regs); | ||
55 | |||
53 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 56 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
54 | #endif /* __ASSEMBLY__ */ | 57 | #endif /* __ASSEMBLY__ */ |
55 | #endif /* CONFIG_FUNCTION_TRACER */ | 58 | #endif /* CONFIG_FUNCTION_TRACER */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ccb805966f68..957ec87385af 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -134,6 +134,8 @@ | |||
134 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 | 134 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 |
135 | #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 | 135 | #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 |
136 | #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 | 136 | #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 |
137 | #define MSR_AMD64_IBSFETCH_REG_COUNT 3 | ||
138 | #define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1) | ||
137 | #define MSR_AMD64_IBSOPCTL 0xc0011033 | 139 | #define MSR_AMD64_IBSOPCTL 0xc0011033 |
138 | #define MSR_AMD64_IBSOPRIP 0xc0011034 | 140 | #define MSR_AMD64_IBSOPRIP 0xc0011034 |
139 | #define MSR_AMD64_IBSOPDATA 0xc0011035 | 141 | #define MSR_AMD64_IBSOPDATA 0xc0011035 |
@@ -141,8 +143,11 @@ | |||
141 | #define MSR_AMD64_IBSOPDATA3 0xc0011037 | 143 | #define MSR_AMD64_IBSOPDATA3 0xc0011037 |
142 | #define MSR_AMD64_IBSDCLINAD 0xc0011038 | 144 | #define MSR_AMD64_IBSDCLINAD 0xc0011038 |
143 | #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 | 145 | #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 |
146 | #define MSR_AMD64_IBSOP_REG_COUNT 7 | ||
147 | #define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) | ||
144 | #define MSR_AMD64_IBSCTL 0xc001103a | 148 | #define MSR_AMD64_IBSCTL 0xc001103a |
145 | #define MSR_AMD64_IBSBRTARGET 0xc001103b | 149 | #define MSR_AMD64_IBSBRTARGET 0xc001103b |
150 | #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ | ||
146 | 151 | ||
147 | /* Fam 15h MSRs */ | 152 | /* Fam 15h MSRs */ |
148 | #define MSR_F15H_PERF_CTL 0xc0010200 | 153 | #define MSR_F15H_PERF_CTL 0xc0010200 |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 2291895b1836..4e40a64315c9 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -158,6 +158,7 @@ struct x86_pmu_capability { | |||
158 | #define IBS_CAPS_OPCNT (1U<<4) | 158 | #define IBS_CAPS_OPCNT (1U<<4) |
159 | #define IBS_CAPS_BRNTRGT (1U<<5) | 159 | #define IBS_CAPS_BRNTRGT (1U<<5) |
160 | #define IBS_CAPS_OPCNTEXT (1U<<6) | 160 | #define IBS_CAPS_OPCNTEXT (1U<<6) |
161 | #define IBS_CAPS_RIPINVALIDCHK (1U<<7) | ||
161 | 162 | ||
162 | #define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ | 163 | #define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ |
163 | | IBS_CAPS_FETCHSAM \ | 164 | | IBS_CAPS_FETCHSAM \ |
@@ -170,19 +171,22 @@ struct x86_pmu_capability { | |||
170 | #define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) | 171 | #define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) |
171 | #define IBSCTL_LVT_OFFSET_MASK 0x0F | 172 | #define IBSCTL_LVT_OFFSET_MASK 0x0F |
172 | 173 | ||
173 | /* IbsFetchCtl bits/masks */ | 174 | /* ibs fetch bits/masks */ |
174 | #define IBS_FETCH_RAND_EN (1ULL<<57) | 175 | #define IBS_FETCH_RAND_EN (1ULL<<57) |
175 | #define IBS_FETCH_VAL (1ULL<<49) | 176 | #define IBS_FETCH_VAL (1ULL<<49) |
176 | #define IBS_FETCH_ENABLE (1ULL<<48) | 177 | #define IBS_FETCH_ENABLE (1ULL<<48) |
177 | #define IBS_FETCH_CNT 0xFFFF0000ULL | 178 | #define IBS_FETCH_CNT 0xFFFF0000ULL |
178 | #define IBS_FETCH_MAX_CNT 0x0000FFFFULL | 179 | #define IBS_FETCH_MAX_CNT 0x0000FFFFULL |
179 | 180 | ||
180 | /* IbsOpCtl bits */ | 181 | /* ibs op bits/masks */ |
182 | /* lower 4 bits of the current count are ignored: */ | ||
183 | #define IBS_OP_CUR_CNT (0xFFFF0ULL<<32) | ||
181 | #define IBS_OP_CNT_CTL (1ULL<<19) | 184 | #define IBS_OP_CNT_CTL (1ULL<<19) |
182 | #define IBS_OP_VAL (1ULL<<18) | 185 | #define IBS_OP_VAL (1ULL<<18) |
183 | #define IBS_OP_ENABLE (1ULL<<17) | 186 | #define IBS_OP_ENABLE (1ULL<<17) |
184 | #define IBS_OP_MAX_CNT 0x0000FFFFULL | 187 | #define IBS_OP_MAX_CNT 0x0000FFFFULL |
185 | #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ | 188 | #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ |
189 | #define IBS_RIP_INVALID (1ULL<<38) | ||
186 | 190 | ||
187 | extern u32 get_ibs_caps(void); | 191 | extern u32 get_ibs_caps(void); |
188 | 192 | ||
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8be5f54d9360..e0544597cfe7 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -557,6 +557,8 @@ struct __large_struct { unsigned long buf[100]; }; | |||
557 | 557 | ||
558 | extern unsigned long | 558 | extern unsigned long |
559 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n); | 559 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n); |
560 | extern __must_check long | ||
561 | strncpy_from_user(char *dst, const char __user *src, long count); | ||
560 | 562 | ||
561 | /* | 563 | /* |
562 | * movsl can be slow when source and dest are not both 8-byte aligned | 564 | * movsl can be slow when source and dest are not both 8-byte aligned |
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 566e803cc602..8084bc73b18c 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h | |||
@@ -213,11 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to, | |||
213 | return n; | 213 | return n; |
214 | } | 214 | } |
215 | 215 | ||
216 | long __must_check strncpy_from_user(char *dst, const char __user *src, | ||
217 | long count); | ||
218 | long __must_check __strncpy_from_user(char *dst, | ||
219 | const char __user *src, long count); | ||
220 | |||
221 | /** | 216 | /** |
222 | * strlen_user: - Get the size of a string in user space. | 217 | * strlen_user: - Get the size of a string in user space. |
223 | * @str: The string to measure. | 218 | * @str: The string to measure. |
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 1c66d30971ad..fcd4b6f3ef02 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -208,10 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) | |||
208 | } | 208 | } |
209 | } | 209 | } |
210 | 210 | ||
211 | __must_check long | ||
212 | strncpy_from_user(char *dst, const char __user *src, long count); | ||
213 | __must_check long | ||
214 | __strncpy_from_user(char *dst, const char __user *src, long count); | ||
215 | __must_check long strnlen_user(const char __user *str, long n); | 211 | __must_check long strnlen_user(const char __user *str, long n); |
216 | __must_check long __strnlen_user(const char __user *str, long n); | 212 | __must_check long __strnlen_user(const char __user *str, long n); |
217 | __must_check long strlen_user(const char __user *str); | 213 | __must_check long strlen_user(const char __user *str); |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index bb8e03407e18..e049d6da0183 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -484,9 +484,6 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
484 | 484 | ||
485 | /* mark unused */ | 485 | /* mark unused */ |
486 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | 486 | event->hw.extra_reg.idx = EXTRA_REG_NONE; |
487 | |||
488 | /* mark not used */ | ||
489 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
490 | event->hw.branch_reg.idx = EXTRA_REG_NONE; | 487 | event->hw.branch_reg.idx = EXTRA_REG_NONE; |
491 | 488 | ||
492 | return x86_pmu.hw_config(event); | 489 | return x86_pmu.hw_config(event); |
@@ -1186,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1186 | int idx, handled = 0; | 1183 | int idx, handled = 0; |
1187 | u64 val; | 1184 | u64 val; |
1188 | 1185 | ||
1189 | perf_sample_data_init(&data, 0); | ||
1190 | |||
1191 | cpuc = &__get_cpu_var(cpu_hw_events); | 1186 | cpuc = &__get_cpu_var(cpu_hw_events); |
1192 | 1187 | ||
1193 | /* | 1188 | /* |
@@ -1222,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1222 | * event overflow | 1217 | * event overflow |
1223 | */ | 1218 | */ |
1224 | handled++; | 1219 | handled++; |
1225 | data.period = event->hw.last_period; | 1220 | perf_sample_data_init(&data, 0, event->hw.last_period); |
1226 | 1221 | ||
1227 | if (!x86_perf_event_set_period(event)) | 1222 | if (!x86_perf_event_set_period(event)) |
1228 | continue; | 1223 | continue; |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 95e7fe1c5f0b..65652265fffd 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event) | |||
134 | 134 | ||
135 | static int amd_pmu_hw_config(struct perf_event *event) | 135 | static int amd_pmu_hw_config(struct perf_event *event) |
136 | { | 136 | { |
137 | int ret = x86_pmu_hw_config(event); | 137 | int ret; |
138 | 138 | ||
139 | /* pass precise event sampling to ibs: */ | ||
140 | if (event->attr.precise_ip && get_ibs_caps()) | ||
141 | return -ENOENT; | ||
142 | |||
143 | ret = x86_pmu_hw_config(event); | ||
139 | if (ret) | 144 | if (ret) |
140 | return ret; | 145 | return ret; |
141 | 146 | ||
@@ -205,10 +210,8 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | |||
205 | * when we come here | 210 | * when we come here |
206 | */ | 211 | */ |
207 | for (i = 0; i < x86_pmu.num_counters; i++) { | 212 | for (i = 0; i < x86_pmu.num_counters; i++) { |
208 | if (nb->owners[i] == event) { | 213 | if (cmpxchg(nb->owners + i, event, NULL) == event) |
209 | cmpxchg(nb->owners+i, event, NULL); | ||
210 | break; | 214 | break; |
211 | } | ||
212 | } | 215 | } |
213 | } | 216 | } |
214 | 217 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 3b8a2d30d14e..da9bcdcd9856 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/perf_event.h> | 9 | #include <linux/perf_event.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/pci.h> | 11 | #include <linux/pci.h> |
12 | #include <linux/ptrace.h> | ||
12 | 13 | ||
13 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
14 | 15 | ||
@@ -16,36 +17,591 @@ static u32 ibs_caps; | |||
16 | 17 | ||
17 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) | 18 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) |
18 | 19 | ||
19 | static struct pmu perf_ibs; | 20 | #include <linux/kprobes.h> |
21 | #include <linux/hardirq.h> | ||
22 | |||
23 | #include <asm/nmi.h> | ||
24 | |||
25 | #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) | ||
26 | #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT | ||
27 | |||
28 | enum ibs_states { | ||
29 | IBS_ENABLED = 0, | ||
30 | IBS_STARTED = 1, | ||
31 | IBS_STOPPING = 2, | ||
32 | |||
33 | IBS_MAX_STATES, | ||
34 | }; | ||
35 | |||
36 | struct cpu_perf_ibs { | ||
37 | struct perf_event *event; | ||
38 | unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)]; | ||
39 | }; | ||
40 | |||
41 | struct perf_ibs { | ||
42 | struct pmu pmu; | ||
43 | unsigned int msr; | ||
44 | u64 config_mask; | ||
45 | u64 cnt_mask; | ||
46 | u64 enable_mask; | ||
47 | u64 valid_mask; | ||
48 | u64 max_period; | ||
49 | unsigned long offset_mask[1]; | ||
50 | int offset_max; | ||
51 | struct cpu_perf_ibs __percpu *pcpu; | ||
52 | u64 (*get_count)(u64 config); | ||
53 | }; | ||
54 | |||
55 | struct perf_ibs_data { | ||
56 | u32 size; | ||
57 | union { | ||
58 | u32 data[0]; /* data buffer starts here */ | ||
59 | u32 caps; | ||
60 | }; | ||
61 | u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; | ||
62 | }; | ||
63 | |||
64 | static int | ||
65 | perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) | ||
66 | { | ||
67 | s64 left = local64_read(&hwc->period_left); | ||
68 | s64 period = hwc->sample_period; | ||
69 | int overflow = 0; | ||
70 | |||
71 | /* | ||
72 | * If we are way outside a reasonable range then just skip forward: | ||
73 | */ | ||
74 | if (unlikely(left <= -period)) { | ||
75 | left = period; | ||
76 | local64_set(&hwc->period_left, left); | ||
77 | hwc->last_period = period; | ||
78 | overflow = 1; | ||
79 | } | ||
80 | |||
81 | if (unlikely(left < (s64)min)) { | ||
82 | left += period; | ||
83 | local64_set(&hwc->period_left, left); | ||
84 | hwc->last_period = period; | ||
85 | overflow = 1; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * If the hw period that triggers the sw overflow is too short | ||
90 | * we might hit the irq handler. This biases the results. | ||
91 | * Thus we shorten the next-to-last period and set the last | ||
92 | * period to the max period. | ||
93 | */ | ||
94 | if (left > max) { | ||
95 | left -= max; | ||
96 | if (left > max) | ||
97 | left = max; | ||
98 | else if (left < min) | ||
99 | left = min; | ||
100 | } | ||
101 | |||
102 | *hw_period = (u64)left; | ||
103 | |||
104 | return overflow; | ||
105 | } | ||
106 | |||
107 | static int | ||
108 | perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) | ||
109 | { | ||
110 | struct hw_perf_event *hwc = &event->hw; | ||
111 | int shift = 64 - width; | ||
112 | u64 prev_raw_count; | ||
113 | u64 delta; | ||
114 | |||
115 | /* | ||
116 | * Careful: an NMI might modify the previous event value. | ||
117 | * | ||
118 | * Our tactic to handle this is to first atomically read and | ||
119 | * exchange a new raw count - then add that new-prev delta | ||
120 | * count to the generic event atomically: | ||
121 | */ | ||
122 | prev_raw_count = local64_read(&hwc->prev_count); | ||
123 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
124 | new_raw_count) != prev_raw_count) | ||
125 | return 0; | ||
126 | |||
127 | /* | ||
128 | * Now we have the new raw value and have updated the prev | ||
129 | * timestamp already. We can now calculate the elapsed delta | ||
130 | * (event-)time and add that to the generic event. | ||
131 | * | ||
132 | * Careful, not all hw sign-extends above the physical width | ||
133 | * of the count. | ||
134 | */ | ||
135 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
136 | delta >>= shift; | ||
137 | |||
138 | local64_add(delta, &event->count); | ||
139 | local64_sub(delta, &hwc->period_left); | ||
140 | |||
141 | return 1; | ||
142 | } | ||
143 | |||
144 | static struct perf_ibs perf_ibs_fetch; | ||
145 | static struct perf_ibs perf_ibs_op; | ||
146 | |||
147 | static struct perf_ibs *get_ibs_pmu(int type) | ||
148 | { | ||
149 | if (perf_ibs_fetch.pmu.type == type) | ||
150 | return &perf_ibs_fetch; | ||
151 | if (perf_ibs_op.pmu.type == type) | ||
152 | return &perf_ibs_op; | ||
153 | return NULL; | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * Use IBS for precise event sampling: | ||
158 | * | ||
159 | * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count | ||
160 | * perf record -a -e r076:p ... # same as -e cpu-cycles:p | ||
161 | * perf record -a -e r0C1:p ... # use ibs op counting micro-ops | ||
162 | * | ||
163 | * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, | ||
164 | * MSRC001_1033) is used to select either cycle or micro-ops counting | ||
165 | * mode. | ||
166 | * | ||
167 | * The rip of IBS samples has skid 0. Thus, IBS supports precise | ||
168 | * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the | ||
169 | * rip is invalid when IBS was not able to record the rip correctly. | ||
170 | * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. | ||
171 | * | ||
172 | */ | ||
173 | static int perf_ibs_precise_event(struct perf_event *event, u64 *config) | ||
174 | { | ||
175 | switch (event->attr.precise_ip) { | ||
176 | case 0: | ||
177 | return -ENOENT; | ||
178 | case 1: | ||
179 | case 2: | ||
180 | break; | ||
181 | default: | ||
182 | return -EOPNOTSUPP; | ||
183 | } | ||
184 | |||
185 | switch (event->attr.type) { | ||
186 | case PERF_TYPE_HARDWARE: | ||
187 | switch (event->attr.config) { | ||
188 | case PERF_COUNT_HW_CPU_CYCLES: | ||
189 | *config = 0; | ||
190 | return 0; | ||
191 | } | ||
192 | break; | ||
193 | case PERF_TYPE_RAW: | ||
194 | switch (event->attr.config) { | ||
195 | case 0x0076: | ||
196 | *config = 0; | ||
197 | return 0; | ||
198 | case 0x00C1: | ||
199 | *config = IBS_OP_CNT_CTL; | ||
200 | return 0; | ||
201 | } | ||
202 | break; | ||
203 | default: | ||
204 | return -ENOENT; | ||
205 | } | ||
206 | |||
207 | return -EOPNOTSUPP; | ||
208 | } | ||
20 | 209 | ||
21 | static int perf_ibs_init(struct perf_event *event) | 210 | static int perf_ibs_init(struct perf_event *event) |
22 | { | 211 | { |
23 | if (perf_ibs.type != event->attr.type) | 212 | struct hw_perf_event *hwc = &event->hw; |
213 | struct perf_ibs *perf_ibs; | ||
214 | u64 max_cnt, config; | ||
215 | int ret; | ||
216 | |||
217 | perf_ibs = get_ibs_pmu(event->attr.type); | ||
218 | if (perf_ibs) { | ||
219 | config = event->attr.config; | ||
220 | } else { | ||
221 | perf_ibs = &perf_ibs_op; | ||
222 | ret = perf_ibs_precise_event(event, &config); | ||
223 | if (ret) | ||
224 | return ret; | ||
225 | } | ||
226 | |||
227 | if (event->pmu != &perf_ibs->pmu) | ||
24 | return -ENOENT; | 228 | return -ENOENT; |
229 | |||
230 | if (config & ~perf_ibs->config_mask) | ||
231 | return -EINVAL; | ||
232 | |||
233 | if (hwc->sample_period) { | ||
234 | if (config & perf_ibs->cnt_mask) | ||
235 | /* raw max_cnt may not be set */ | ||
236 | return -EINVAL; | ||
237 | if (!event->attr.sample_freq && hwc->sample_period & 0x0f) | ||
238 | /* | ||
239 | * lower 4 bits can not be set in ibs max cnt, | ||
240 | * but allowing it in case we adjust the | ||
241 | * sample period to set a frequency. | ||
242 | */ | ||
243 | return -EINVAL; | ||
244 | hwc->sample_period &= ~0x0FULL; | ||
245 | if (!hwc->sample_period) | ||
246 | hwc->sample_period = 0x10; | ||
247 | } else { | ||
248 | max_cnt = config & perf_ibs->cnt_mask; | ||
249 | config &= ~perf_ibs->cnt_mask; | ||
250 | event->attr.sample_period = max_cnt << 4; | ||
251 | hwc->sample_period = event->attr.sample_period; | ||
252 | } | ||
253 | |||
254 | if (!hwc->sample_period) | ||
255 | return -EINVAL; | ||
256 | |||
257 | /* | ||
258 | * If we modify hwc->sample_period, we also need to update | ||
259 | * hwc->last_period and hwc->period_left. | ||
260 | */ | ||
261 | hwc->last_period = hwc->sample_period; | ||
262 | local64_set(&hwc->period_left, hwc->sample_period); | ||
263 | |||
264 | hwc->config_base = perf_ibs->msr; | ||
265 | hwc->config = config; | ||
266 | |||
25 | return 0; | 267 | return 0; |
26 | } | 268 | } |
27 | 269 | ||
270 | static int perf_ibs_set_period(struct perf_ibs *perf_ibs, | ||
271 | struct hw_perf_event *hwc, u64 *period) | ||
272 | { | ||
273 | int overflow; | ||
274 | |||
275 | /* ignore lower 4 bits in min count: */ | ||
276 | overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); | ||
277 | local64_set(&hwc->prev_count, 0); | ||
278 | |||
279 | return overflow; | ||
280 | } | ||
281 | |||
282 | static u64 get_ibs_fetch_count(u64 config) | ||
283 | { | ||
284 | return (config & IBS_FETCH_CNT) >> 12; | ||
285 | } | ||
286 | |||
287 | static u64 get_ibs_op_count(u64 config) | ||
288 | { | ||
289 | u64 count = 0; | ||
290 | |||
291 | if (config & IBS_OP_VAL) | ||
292 | count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */ | ||
293 | |||
294 | if (ibs_caps & IBS_CAPS_RDWROPCNT) | ||
295 | count += (config & IBS_OP_CUR_CNT) >> 32; | ||
296 | |||
297 | return count; | ||
298 | } | ||
299 | |||
300 | static void | ||
301 | perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, | ||
302 | u64 *config) | ||
303 | { | ||
304 | u64 count = perf_ibs->get_count(*config); | ||
305 | |||
306 | /* | ||
307 | * Set width to 64 since we do not overflow on max width but | ||
308 | * instead on max count. In perf_ibs_set_period() we clear | ||
309 | * prev count manually on overflow. | ||
310 | */ | ||
311 | while (!perf_event_try_update(event, count, 64)) { | ||
312 | rdmsrl(event->hw.config_base, *config); | ||
313 | count = perf_ibs->get_count(*config); | ||
314 | } | ||
315 | } | ||
316 | |||
317 | static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, | ||
318 | struct hw_perf_event *hwc, u64 config) | ||
319 | { | ||
320 | wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * Erratum #420 Instruction-Based Sampling Engine May Generate | ||
325 | * Interrupt that Cannot Be Cleared: | ||
326 | * | ||
327 | * Must clear counter mask first, then clear the enable bit. See | ||
328 | * Revision Guide for AMD Family 10h Processors, Publication #41322. | ||
329 | */ | ||
330 | static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, | ||
331 | struct hw_perf_event *hwc, u64 config) | ||
332 | { | ||
333 | config &= ~perf_ibs->cnt_mask; | ||
334 | wrmsrl(hwc->config_base, config); | ||
335 | config &= ~perf_ibs->enable_mask; | ||
336 | wrmsrl(hwc->config_base, config); | ||
337 | } | ||
338 | |||
339 | /* | ||
340 | * We cannot restore the ibs pmu state, so we always needs to update | ||
341 | * the event while stopping it and then reset the state when starting | ||
342 | * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in | ||
343 | * perf_ibs_start()/perf_ibs_stop() and instead always do it. | ||
344 | */ | ||
345 | static void perf_ibs_start(struct perf_event *event, int flags) | ||
346 | { | ||
347 | struct hw_perf_event *hwc = &event->hw; | ||
348 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | ||
349 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | ||
350 | u64 period; | ||
351 | |||
352 | if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) | ||
353 | return; | ||
354 | |||
355 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | ||
356 | hwc->state = 0; | ||
357 | |||
358 | perf_ibs_set_period(perf_ibs, hwc, &period); | ||
359 | set_bit(IBS_STARTED, pcpu->state); | ||
360 | perf_ibs_enable_event(perf_ibs, hwc, period >> 4); | ||
361 | |||
362 | perf_event_update_userpage(event); | ||
363 | } | ||
364 | |||
365 | static void perf_ibs_stop(struct perf_event *event, int flags) | ||
366 | { | ||
367 | struct hw_perf_event *hwc = &event->hw; | ||
368 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | ||
369 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | ||
370 | u64 config; | ||
371 | int stopping; | ||
372 | |||
373 | stopping = test_and_clear_bit(IBS_STARTED, pcpu->state); | ||
374 | |||
375 | if (!stopping && (hwc->state & PERF_HES_UPTODATE)) | ||
376 | return; | ||
377 | |||
378 | rdmsrl(hwc->config_base, config); | ||
379 | |||
380 | if (stopping) { | ||
381 | set_bit(IBS_STOPPING, pcpu->state); | ||
382 | perf_ibs_disable_event(perf_ibs, hwc, config); | ||
383 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
384 | hwc->state |= PERF_HES_STOPPED; | ||
385 | } | ||
386 | |||
387 | if (hwc->state & PERF_HES_UPTODATE) | ||
388 | return; | ||
389 | |||
390 | /* | ||
391 | * Clear valid bit to not count rollovers on update, rollovers | ||
392 | * are only updated in the irq handler. | ||
393 | */ | ||
394 | config &= ~perf_ibs->valid_mask; | ||
395 | |||
396 | perf_ibs_event_update(perf_ibs, event, &config); | ||
397 | hwc->state |= PERF_HES_UPTODATE; | ||
398 | } | ||
399 | |||
28 | static int perf_ibs_add(struct perf_event *event, int flags) | 400 | static int perf_ibs_add(struct perf_event *event, int flags) |
29 | { | 401 | { |
402 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | ||
403 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | ||
404 | |||
405 | if (test_and_set_bit(IBS_ENABLED, pcpu->state)) | ||
406 | return -ENOSPC; | ||
407 | |||
408 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
409 | |||
410 | pcpu->event = event; | ||
411 | |||
412 | if (flags & PERF_EF_START) | ||
413 | perf_ibs_start(event, PERF_EF_RELOAD); | ||
414 | |||
30 | return 0; | 415 | return 0; |
31 | } | 416 | } |
32 | 417 | ||
33 | static void perf_ibs_del(struct perf_event *event, int flags) | 418 | static void perf_ibs_del(struct perf_event *event, int flags) |
34 | { | 419 | { |
420 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | ||
421 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | ||
422 | |||
423 | if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) | ||
424 | return; | ||
425 | |||
426 | perf_ibs_stop(event, PERF_EF_UPDATE); | ||
427 | |||
428 | pcpu->event = NULL; | ||
429 | |||
430 | perf_event_update_userpage(event); | ||
35 | } | 431 | } |
36 | 432 | ||
37 | static struct pmu perf_ibs = { | 433 | static void perf_ibs_read(struct perf_event *event) { } |
38 | .event_init= perf_ibs_init, | 434 | |
39 | .add= perf_ibs_add, | 435 | static struct perf_ibs perf_ibs_fetch = { |
40 | .del= perf_ibs_del, | 436 | .pmu = { |
437 | .task_ctx_nr = perf_invalid_context, | ||
438 | |||
439 | .event_init = perf_ibs_init, | ||
440 | .add = perf_ibs_add, | ||
441 | .del = perf_ibs_del, | ||
442 | .start = perf_ibs_start, | ||
443 | .stop = perf_ibs_stop, | ||
444 | .read = perf_ibs_read, | ||
445 | }, | ||
446 | .msr = MSR_AMD64_IBSFETCHCTL, | ||
447 | .config_mask = IBS_FETCH_CONFIG_MASK, | ||
448 | .cnt_mask = IBS_FETCH_MAX_CNT, | ||
449 | .enable_mask = IBS_FETCH_ENABLE, | ||
450 | .valid_mask = IBS_FETCH_VAL, | ||
451 | .max_period = IBS_FETCH_MAX_CNT << 4, | ||
452 | .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, | ||
453 | .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, | ||
454 | |||
455 | .get_count = get_ibs_fetch_count, | ||
41 | }; | 456 | }; |
42 | 457 | ||
458 | static struct perf_ibs perf_ibs_op = { | ||
459 | .pmu = { | ||
460 | .task_ctx_nr = perf_invalid_context, | ||
461 | |||
462 | .event_init = perf_ibs_init, | ||
463 | .add = perf_ibs_add, | ||
464 | .del = perf_ibs_del, | ||
465 | .start = perf_ibs_start, | ||
466 | .stop = perf_ibs_stop, | ||
467 | .read = perf_ibs_read, | ||
468 | }, | ||
469 | .msr = MSR_AMD64_IBSOPCTL, | ||
470 | .config_mask = IBS_OP_CONFIG_MASK, | ||
471 | .cnt_mask = IBS_OP_MAX_CNT, | ||
472 | .enable_mask = IBS_OP_ENABLE, | ||
473 | .valid_mask = IBS_OP_VAL, | ||
474 | .max_period = IBS_OP_MAX_CNT << 4, | ||
475 | .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, | ||
476 | .offset_max = MSR_AMD64_IBSOP_REG_COUNT, | ||
477 | |||
478 | .get_count = get_ibs_op_count, | ||
479 | }; | ||
480 | |||
481 | static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) | ||
482 | { | ||
483 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | ||
484 | struct perf_event *event = pcpu->event; | ||
485 | struct hw_perf_event *hwc = &event->hw; | ||
486 | struct perf_sample_data data; | ||
487 | struct perf_raw_record raw; | ||
488 | struct pt_regs regs; | ||
489 | struct perf_ibs_data ibs_data; | ||
490 | int offset, size, check_rip, offset_max, throttle = 0; | ||
491 | unsigned int msr; | ||
492 | u64 *buf, *config, period; | ||
493 | |||
494 | if (!test_bit(IBS_STARTED, pcpu->state)) { | ||
495 | /* | ||
496 | * Catch spurious interrupts after stopping IBS: After | ||
497 | * disabling IBS there could be still incomming NMIs | ||
498 | * with samples that even have the valid bit cleared. | ||
499 | * Mark all this NMIs as handled. | ||
500 | */ | ||
501 | return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0; | ||
502 | } | ||
503 | |||
504 | msr = hwc->config_base; | ||
505 | buf = ibs_data.regs; | ||
506 | rdmsrl(msr, *buf); | ||
507 | if (!(*buf++ & perf_ibs->valid_mask)) | ||
508 | return 0; | ||
509 | |||
510 | config = &ibs_data.regs[0]; | ||
511 | perf_ibs_event_update(perf_ibs, event, config); | ||
512 | perf_sample_data_init(&data, 0, hwc->last_period); | ||
513 | if (!perf_ibs_set_period(perf_ibs, hwc, &period)) | ||
514 | goto out; /* no sw counter overflow */ | ||
515 | |||
516 | ibs_data.caps = ibs_caps; | ||
517 | size = 1; | ||
518 | offset = 1; | ||
519 | check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); | ||
520 | if (event->attr.sample_type & PERF_SAMPLE_RAW) | ||
521 | offset_max = perf_ibs->offset_max; | ||
522 | else if (check_rip) | ||
523 | offset_max = 2; | ||
524 | else | ||
525 | offset_max = 1; | ||
526 | do { | ||
527 | rdmsrl(msr + offset, *buf++); | ||
528 | size++; | ||
529 | offset = find_next_bit(perf_ibs->offset_mask, | ||
530 | perf_ibs->offset_max, | ||
531 | offset + 1); | ||
532 | } while (offset < offset_max); | ||
533 | ibs_data.size = sizeof(u64) * size; | ||
534 | |||
535 | regs = *iregs; | ||
536 | if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { | ||
537 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
538 | } else { | ||
539 | instruction_pointer_set(®s, ibs_data.regs[1]); | ||
540 | regs.flags |= PERF_EFLAGS_EXACT; | ||
541 | } | ||
542 | |||
543 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { | ||
544 | raw.size = sizeof(u32) + ibs_data.size; | ||
545 | raw.data = ibs_data.data; | ||
546 | data.raw = &raw; | ||
547 | } | ||
548 | |||
549 | throttle = perf_event_overflow(event, &data, ®s); | ||
550 | out: | ||
551 | if (throttle) | ||
552 | perf_ibs_disable_event(perf_ibs, hwc, *config); | ||
553 | else | ||
554 | perf_ibs_enable_event(perf_ibs, hwc, period >> 4); | ||
555 | |||
556 | perf_event_update_userpage(event); | ||
557 | |||
558 | return 1; | ||
559 | } | ||
560 | |||
561 | static int __kprobes | ||
562 | perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) | ||
563 | { | ||
564 | int handled = 0; | ||
565 | |||
566 | handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs); | ||
567 | handled += perf_ibs_handle_irq(&perf_ibs_op, regs); | ||
568 | |||
569 | if (handled) | ||
570 | inc_irq_stat(apic_perf_irqs); | ||
571 | |||
572 | return handled; | ||
573 | } | ||
574 | |||
575 | static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) | ||
576 | { | ||
577 | struct cpu_perf_ibs __percpu *pcpu; | ||
578 | int ret; | ||
579 | |||
580 | pcpu = alloc_percpu(struct cpu_perf_ibs); | ||
581 | if (!pcpu) | ||
582 | return -ENOMEM; | ||
583 | |||
584 | perf_ibs->pcpu = pcpu; | ||
585 | |||
586 | ret = perf_pmu_register(&perf_ibs->pmu, name, -1); | ||
587 | if (ret) { | ||
588 | perf_ibs->pcpu = NULL; | ||
589 | free_percpu(pcpu); | ||
590 | } | ||
591 | |||
592 | return ret; | ||
593 | } | ||
594 | |||
43 | static __init int perf_event_ibs_init(void) | 595 | static __init int perf_event_ibs_init(void) |
44 | { | 596 | { |
45 | if (!ibs_caps) | 597 | if (!ibs_caps) |
46 | return -ENODEV; /* ibs not supported by the cpu */ | 598 | return -ENODEV; /* ibs not supported by the cpu */ |
47 | 599 | ||
48 | perf_pmu_register(&perf_ibs, "ibs", -1); | 600 | perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); |
601 | if (ibs_caps & IBS_CAPS_OPCNT) | ||
602 | perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; | ||
603 | perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); | ||
604 | register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); | ||
49 | printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); | 605 | printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); |
50 | 606 | ||
51 | return 0; | 607 | return 0; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 26b3e2fef104..166546ec6aef 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1027 | u64 status; | 1027 | u64 status; |
1028 | int handled; | 1028 | int handled; |
1029 | 1029 | ||
1030 | perf_sample_data_init(&data, 0); | ||
1031 | |||
1032 | cpuc = &__get_cpu_var(cpu_hw_events); | 1030 | cpuc = &__get_cpu_var(cpu_hw_events); |
1033 | 1031 | ||
1034 | /* | 1032 | /* |
@@ -1082,7 +1080,7 @@ again: | |||
1082 | if (!intel_pmu_save_and_restart(event)) | 1080 | if (!intel_pmu_save_and_restart(event)) |
1083 | continue; | 1081 | continue; |
1084 | 1082 | ||
1085 | data.period = event->hw.last_period; | 1083 | perf_sample_data_init(&data, 0, event->hw.last_period); |
1086 | 1084 | ||
1087 | if (has_branch_stack(event)) | 1085 | if (has_branch_stack(event)) |
1088 | data.br_stack = &cpuc->lbr_stack; | 1086 | data.br_stack = &cpuc->lbr_stack; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 7f64df19e7dd..5a3edc27f6e5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void) | |||
316 | 316 | ||
317 | ds->bts_index = ds->bts_buffer_base; | 317 | ds->bts_index = ds->bts_buffer_base; |
318 | 318 | ||
319 | perf_sample_data_init(&data, 0); | 319 | perf_sample_data_init(&data, 0, event->hw.last_period); |
320 | data.period = event->hw.last_period; | ||
321 | regs.ip = 0; | 320 | regs.ip = 0; |
322 | 321 | ||
323 | /* | 322 | /* |
@@ -564,8 +563,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
564 | if (!intel_pmu_save_and_restart(event)) | 563 | if (!intel_pmu_save_and_restart(event)) |
565 | return; | 564 | return; |
566 | 565 | ||
567 | perf_sample_data_init(&data, 0); | 566 | perf_sample_data_init(&data, 0, event->hw.last_period); |
568 | data.period = event->hw.last_period; | ||
569 | 567 | ||
570 | /* | 568 | /* |
571 | * We use the interrupt regs as a base because the PEBS record | 569 | * We use the interrupt regs as a base because the PEBS record |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index a2dfacfd7103..47124a73dd73 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
1005 | int idx, handled = 0; | 1005 | int idx, handled = 0; |
1006 | u64 val; | 1006 | u64 val; |
1007 | 1007 | ||
1008 | perf_sample_data_init(&data, 0); | ||
1009 | |||
1010 | cpuc = &__get_cpu_var(cpu_hw_events); | 1008 | cpuc = &__get_cpu_var(cpu_hw_events); |
1011 | 1009 | ||
1012 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1010 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
@@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
1034 | handled += overflow; | 1032 | handled += overflow; |
1035 | 1033 | ||
1036 | /* event overflow for sure */ | 1034 | /* event overflow for sure */ |
1037 | data.period = event->hw.last_period; | 1035 | perf_sample_data_init(&data, 0, hwc->last_period); |
1038 | 1036 | ||
1039 | if (!x86_perf_event_set_period(event)) | 1037 | if (!x86_perf_event_set_period(event)) |
1040 | continue; | 1038 | continue; |
1039 | |||
1040 | |||
1041 | if (perf_event_overflow(event, &data, regs)) | 1041 | if (perf_event_overflow(event, &data, regs)) |
1042 | x86_pmu_stop(event, 0); | 1042 | x86_pmu_stop(event, 0); |
1043 | } | 1043 | } |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c9a281f272fd..4243e8bbdcb1 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -24,40 +24,21 @@ | |||
24 | #include <trace/syscall.h> | 24 | #include <trace/syscall.h> |
25 | 25 | ||
26 | #include <asm/cacheflush.h> | 26 | #include <asm/cacheflush.h> |
27 | #include <asm/kprobes.h> | ||
27 | #include <asm/ftrace.h> | 28 | #include <asm/ftrace.h> |
28 | #include <asm/nops.h> | 29 | #include <asm/nops.h> |
29 | #include <asm/nmi.h> | ||
30 | |||
31 | 30 | ||
32 | #ifdef CONFIG_DYNAMIC_FTRACE | 31 | #ifdef CONFIG_DYNAMIC_FTRACE |
33 | 32 | ||
34 | /* | ||
35 | * modifying_code is set to notify NMIs that they need to use | ||
36 | * memory barriers when entering or exiting. But we don't want | ||
37 | * to burden NMIs with unnecessary memory barriers when code | ||
38 | * modification is not being done (which is most of the time). | ||
39 | * | ||
40 | * A mutex is already held when ftrace_arch_code_modify_prepare | ||
41 | * and post_process are called. No locks need to be taken here. | ||
42 | * | ||
43 | * Stop machine will make sure currently running NMIs are done | ||
44 | * and new NMIs will see the updated variable before we need | ||
45 | * to worry about NMIs doing memory barriers. | ||
46 | */ | ||
47 | static int modifying_code __read_mostly; | ||
48 | static DEFINE_PER_CPU(int, save_modifying_code); | ||
49 | |||
50 | int ftrace_arch_code_modify_prepare(void) | 33 | int ftrace_arch_code_modify_prepare(void) |
51 | { | 34 | { |
52 | set_kernel_text_rw(); | 35 | set_kernel_text_rw(); |
53 | set_all_modules_text_rw(); | 36 | set_all_modules_text_rw(); |
54 | modifying_code = 1; | ||
55 | return 0; | 37 | return 0; |
56 | } | 38 | } |
57 | 39 | ||
58 | int ftrace_arch_code_modify_post_process(void) | 40 | int ftrace_arch_code_modify_post_process(void) |
59 | { | 41 | { |
60 | modifying_code = 0; | ||
61 | set_all_modules_text_ro(); | 42 | set_all_modules_text_ro(); |
62 | set_kernel_text_ro(); | 43 | set_kernel_text_ro(); |
63 | return 0; | 44 | return 0; |
@@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | |||
90 | return calc.code; | 71 | return calc.code; |
91 | } | 72 | } |
92 | 73 | ||
93 | /* | ||
94 | * Modifying code must take extra care. On an SMP machine, if | ||
95 | * the code being modified is also being executed on another CPU | ||
96 | * that CPU will have undefined results and possibly take a GPF. | ||
97 | * We use kstop_machine to stop other CPUS from exectuing code. | ||
98 | * But this does not stop NMIs from happening. We still need | ||
99 | * to protect against that. We separate out the modification of | ||
100 | * the code to take care of this. | ||
101 | * | ||
102 | * Two buffers are added: An IP buffer and a "code" buffer. | ||
103 | * | ||
104 | * 1) Put the instruction pointer into the IP buffer | ||
105 | * and the new code into the "code" buffer. | ||
106 | * 2) Wait for any running NMIs to finish and set a flag that says | ||
107 | * we are modifying code, it is done in an atomic operation. | ||
108 | * 3) Write the code | ||
109 | * 4) clear the flag. | ||
110 | * 5) Wait for any running NMIs to finish. | ||
111 | * | ||
112 | * If an NMI is executed, the first thing it does is to call | ||
113 | * "ftrace_nmi_enter". This will check if the flag is set to write | ||
114 | * and if it is, it will write what is in the IP and "code" buffers. | ||
115 | * | ||
116 | * The trick is, it does not matter if everyone is writing the same | ||
117 | * content to the code location. Also, if a CPU is executing code | ||
118 | * it is OK to write to that code location if the contents being written | ||
119 | * are the same as what exists. | ||
120 | */ | ||
121 | |||
122 | #define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ | ||
123 | static atomic_t nmi_running = ATOMIC_INIT(0); | ||
124 | static int mod_code_status; /* holds return value of text write */ | ||
125 | static void *mod_code_ip; /* holds the IP to write to */ | ||
126 | static const void *mod_code_newcode; /* holds the text to write to the IP */ | ||
127 | |||
128 | static unsigned nmi_wait_count; | ||
129 | static atomic_t nmi_update_count = ATOMIC_INIT(0); | ||
130 | |||
131 | int ftrace_arch_read_dyn_info(char *buf, int size) | ||
132 | { | ||
133 | int r; | ||
134 | |||
135 | r = snprintf(buf, size, "%u %u", | ||
136 | nmi_wait_count, | ||
137 | atomic_read(&nmi_update_count)); | ||
138 | return r; | ||
139 | } | ||
140 | |||
141 | static void clear_mod_flag(void) | ||
142 | { | ||
143 | int old = atomic_read(&nmi_running); | ||
144 | |||
145 | for (;;) { | ||
146 | int new = old & ~MOD_CODE_WRITE_FLAG; | ||
147 | |||
148 | if (old == new) | ||
149 | break; | ||
150 | |||
151 | old = atomic_cmpxchg(&nmi_running, old, new); | ||
152 | } | ||
153 | } | ||
154 | |||
155 | static void ftrace_mod_code(void) | ||
156 | { | ||
157 | /* | ||
158 | * Yes, more than one CPU process can be writing to mod_code_status. | ||
159 | * (and the code itself) | ||
160 | * But if one were to fail, then they all should, and if one were | ||
161 | * to succeed, then they all should. | ||
162 | */ | ||
163 | mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, | ||
164 | MCOUNT_INSN_SIZE); | ||
165 | |||
166 | /* if we fail, then kill any new writers */ | ||
167 | if (mod_code_status) | ||
168 | clear_mod_flag(); | ||
169 | } | ||
170 | |||
171 | void ftrace_nmi_enter(void) | ||
172 | { | ||
173 | __this_cpu_write(save_modifying_code, modifying_code); | ||
174 | |||
175 | if (!__this_cpu_read(save_modifying_code)) | ||
176 | return; | ||
177 | |||
178 | if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { | ||
179 | smp_rmb(); | ||
180 | ftrace_mod_code(); | ||
181 | atomic_inc(&nmi_update_count); | ||
182 | } | ||
183 | /* Must have previous changes seen before executions */ | ||
184 | smp_mb(); | ||
185 | } | ||
186 | |||
187 | void ftrace_nmi_exit(void) | ||
188 | { | ||
189 | if (!__this_cpu_read(save_modifying_code)) | ||
190 | return; | ||
191 | |||
192 | /* Finish all executions before clearing nmi_running */ | ||
193 | smp_mb(); | ||
194 | atomic_dec(&nmi_running); | ||
195 | } | ||
196 | |||
197 | static void wait_for_nmi_and_set_mod_flag(void) | ||
198 | { | ||
199 | if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) | ||
200 | return; | ||
201 | |||
202 | do { | ||
203 | cpu_relax(); | ||
204 | } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); | ||
205 | |||
206 | nmi_wait_count++; | ||
207 | } | ||
208 | |||
209 | static void wait_for_nmi(void) | ||
210 | { | ||
211 | if (!atomic_read(&nmi_running)) | ||
212 | return; | ||
213 | |||
214 | do { | ||
215 | cpu_relax(); | ||
216 | } while (atomic_read(&nmi_running)); | ||
217 | |||
218 | nmi_wait_count++; | ||
219 | } | ||
220 | |||
221 | static inline int | 74 | static inline int |
222 | within(unsigned long addr, unsigned long start, unsigned long end) | 75 | within(unsigned long addr, unsigned long start, unsigned long end) |
223 | { | 76 | { |
@@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code) | |||
238 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | 91 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) |
239 | ip = (unsigned long)__va(__pa(ip)); | 92 | ip = (unsigned long)__va(__pa(ip)); |
240 | 93 | ||
241 | mod_code_ip = (void *)ip; | 94 | return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); |
242 | mod_code_newcode = new_code; | ||
243 | |||
244 | /* The buffers need to be visible before we let NMIs write them */ | ||
245 | smp_mb(); | ||
246 | |||
247 | wait_for_nmi_and_set_mod_flag(); | ||
248 | |||
249 | /* Make sure all running NMIs have finished before we write the code */ | ||
250 | smp_mb(); | ||
251 | |||
252 | ftrace_mod_code(); | ||
253 | |||
254 | /* Make sure the write happens before clearing the bit */ | ||
255 | smp_mb(); | ||
256 | |||
257 | clear_mod_flag(); | ||
258 | wait_for_nmi(); | ||
259 | |||
260 | return mod_code_status; | ||
261 | } | 95 | } |
262 | 96 | ||
263 | static const unsigned char *ftrace_nop_replace(void) | 97 | static const unsigned char *ftrace_nop_replace(void) |
@@ -334,6 +168,347 @@ int ftrace_update_ftrace_func(ftrace_func_t func) | |||
334 | return ret; | 168 | return ret; |
335 | } | 169 | } |
336 | 170 | ||
171 | int modifying_ftrace_code __read_mostly; | ||
172 | |||
173 | /* | ||
174 | * A breakpoint was added to the code address we are about to | ||
175 | * modify, and this is the handle that will just skip over it. | ||
176 | * We are either changing a nop into a trace call, or a trace | ||
177 | * call to a nop. While the change is taking place, we treat | ||
178 | * it just like it was a nop. | ||
179 | */ | ||
180 | int ftrace_int3_handler(struct pt_regs *regs) | ||
181 | { | ||
182 | if (WARN_ON_ONCE(!regs)) | ||
183 | return 0; | ||
184 | |||
185 | if (!ftrace_location(regs->ip - 1)) | ||
186 | return 0; | ||
187 | |||
188 | regs->ip += MCOUNT_INSN_SIZE - 1; | ||
189 | |||
190 | return 1; | ||
191 | } | ||
192 | |||
193 | static int ftrace_write(unsigned long ip, const char *val, int size) | ||
194 | { | ||
195 | /* | ||
196 | * On x86_64, kernel text mappings are mapped read-only with | ||
197 | * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead | ||
198 | * of the kernel text mapping to modify the kernel text. | ||
199 | * | ||
200 | * For 32bit kernels, these mappings are same and we can use | ||
201 | * kernel identity mapping to modify code. | ||
202 | */ | ||
203 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | ||
204 | ip = (unsigned long)__va(__pa(ip)); | ||
205 | |||
206 | return probe_kernel_write((void *)ip, val, size); | ||
207 | } | ||
208 | |||
209 | static int add_break(unsigned long ip, const char *old) | ||
210 | { | ||
211 | unsigned char replaced[MCOUNT_INSN_SIZE]; | ||
212 | unsigned char brk = BREAKPOINT_INSTRUCTION; | ||
213 | |||
214 | if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) | ||
215 | return -EFAULT; | ||
216 | |||
217 | /* Make sure it is what we expect it to be */ | ||
218 | if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) | ||
219 | return -EINVAL; | ||
220 | |||
221 | if (ftrace_write(ip, &brk, 1)) | ||
222 | return -EPERM; | ||
223 | |||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) | ||
228 | { | ||
229 | unsigned const char *old; | ||
230 | unsigned long ip = rec->ip; | ||
231 | |||
232 | old = ftrace_call_replace(ip, addr); | ||
233 | |||
234 | return add_break(rec->ip, old); | ||
235 | } | ||
236 | |||
237 | |||
238 | static int add_brk_on_nop(struct dyn_ftrace *rec) | ||
239 | { | ||
240 | unsigned const char *old; | ||
241 | |||
242 | old = ftrace_nop_replace(); | ||
243 | |||
244 | return add_break(rec->ip, old); | ||
245 | } | ||
246 | |||
247 | static int add_breakpoints(struct dyn_ftrace *rec, int enable) | ||
248 | { | ||
249 | unsigned long ftrace_addr; | ||
250 | int ret; | ||
251 | |||
252 | ret = ftrace_test_record(rec, enable); | ||
253 | |||
254 | ftrace_addr = (unsigned long)FTRACE_ADDR; | ||
255 | |||
256 | switch (ret) { | ||
257 | case FTRACE_UPDATE_IGNORE: | ||
258 | return 0; | ||
259 | |||
260 | case FTRACE_UPDATE_MAKE_CALL: | ||
261 | /* converting nop to call */ | ||
262 | return add_brk_on_nop(rec); | ||
263 | |||
264 | case FTRACE_UPDATE_MAKE_NOP: | ||
265 | /* converting a call to a nop */ | ||
266 | return add_brk_on_call(rec, ftrace_addr); | ||
267 | } | ||
268 | return 0; | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * On error, we need to remove breakpoints. This needs to | ||
273 | * be done caefully. If the address does not currently have a | ||
274 | * breakpoint, we know we are done. Otherwise, we look at the | ||
275 | * remaining 4 bytes of the instruction. If it matches a nop | ||
276 | * we replace the breakpoint with the nop. Otherwise we replace | ||
277 | * it with the call instruction. | ||
278 | */ | ||
279 | static int remove_breakpoint(struct dyn_ftrace *rec) | ||
280 | { | ||
281 | unsigned char ins[MCOUNT_INSN_SIZE]; | ||
282 | unsigned char brk = BREAKPOINT_INSTRUCTION; | ||
283 | const unsigned char *nop; | ||
284 | unsigned long ftrace_addr; | ||
285 | unsigned long ip = rec->ip; | ||
286 | |||
287 | /* If we fail the read, just give up */ | ||
288 | if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) | ||
289 | return -EFAULT; | ||
290 | |||
291 | /* If this does not have a breakpoint, we are done */ | ||
292 | if (ins[0] != brk) | ||
293 | return -1; | ||
294 | |||
295 | nop = ftrace_nop_replace(); | ||
296 | |||
297 | /* | ||
298 | * If the last 4 bytes of the instruction do not match | ||
299 | * a nop, then we assume that this is a call to ftrace_addr. | ||
300 | */ | ||
301 | if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { | ||
302 | /* | ||
303 | * For extra paranoidism, we check if the breakpoint is on | ||
304 | * a call that would actually jump to the ftrace_addr. | ||
305 | * If not, don't touch the breakpoint, we make just create | ||
306 | * a disaster. | ||
307 | */ | ||
308 | ftrace_addr = (unsigned long)FTRACE_ADDR; | ||
309 | nop = ftrace_call_replace(ip, ftrace_addr); | ||
310 | |||
311 | if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) | ||
312 | return -EINVAL; | ||
313 | } | ||
314 | |||
315 | return probe_kernel_write((void *)ip, &nop[0], 1); | ||
316 | } | ||
317 | |||
318 | static int add_update_code(unsigned long ip, unsigned const char *new) | ||
319 | { | ||
320 | /* skip breakpoint */ | ||
321 | ip++; | ||
322 | new++; | ||
323 | if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1)) | ||
324 | return -EPERM; | ||
325 | return 0; | ||
326 | } | ||
327 | |||
328 | static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) | ||
329 | { | ||
330 | unsigned long ip = rec->ip; | ||
331 | unsigned const char *new; | ||
332 | |||
333 | new = ftrace_call_replace(ip, addr); | ||
334 | return add_update_code(ip, new); | ||
335 | } | ||
336 | |||
337 | static int add_update_nop(struct dyn_ftrace *rec) | ||
338 | { | ||
339 | unsigned long ip = rec->ip; | ||
340 | unsigned const char *new; | ||
341 | |||
342 | new = ftrace_nop_replace(); | ||
343 | return add_update_code(ip, new); | ||
344 | } | ||
345 | |||
346 | static int add_update(struct dyn_ftrace *rec, int enable) | ||
347 | { | ||
348 | unsigned long ftrace_addr; | ||
349 | int ret; | ||
350 | |||
351 | ret = ftrace_test_record(rec, enable); | ||
352 | |||
353 | ftrace_addr = (unsigned long)FTRACE_ADDR; | ||
354 | |||
355 | switch (ret) { | ||
356 | case FTRACE_UPDATE_IGNORE: | ||
357 | return 0; | ||
358 | |||
359 | case FTRACE_UPDATE_MAKE_CALL: | ||
360 | /* converting nop to call */ | ||
361 | return add_update_call(rec, ftrace_addr); | ||
362 | |||
363 | case FTRACE_UPDATE_MAKE_NOP: | ||
364 | /* converting a call to a nop */ | ||
365 | return add_update_nop(rec); | ||
366 | } | ||
367 | |||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) | ||
372 | { | ||
373 | unsigned long ip = rec->ip; | ||
374 | unsigned const char *new; | ||
375 | |||
376 | new = ftrace_call_replace(ip, addr); | ||
377 | |||
378 | if (ftrace_write(ip, new, 1)) | ||
379 | return -EPERM; | ||
380 | |||
381 | return 0; | ||
382 | } | ||
383 | |||
384 | static int finish_update_nop(struct dyn_ftrace *rec) | ||
385 | { | ||
386 | unsigned long ip = rec->ip; | ||
387 | unsigned const char *new; | ||
388 | |||
389 | new = ftrace_nop_replace(); | ||
390 | |||
391 | if (ftrace_write(ip, new, 1)) | ||
392 | return -EPERM; | ||
393 | return 0; | ||
394 | } | ||
395 | |||
396 | static int finish_update(struct dyn_ftrace *rec, int enable) | ||
397 | { | ||
398 | unsigned long ftrace_addr; | ||
399 | int ret; | ||
400 | |||
401 | ret = ftrace_update_record(rec, enable); | ||
402 | |||
403 | ftrace_addr = (unsigned long)FTRACE_ADDR; | ||
404 | |||
405 | switch (ret) { | ||
406 | case FTRACE_UPDATE_IGNORE: | ||
407 | return 0; | ||
408 | |||
409 | case FTRACE_UPDATE_MAKE_CALL: | ||
410 | /* converting nop to call */ | ||
411 | return finish_update_call(rec, ftrace_addr); | ||
412 | |||
413 | case FTRACE_UPDATE_MAKE_NOP: | ||
414 | /* converting a call to a nop */ | ||
415 | return finish_update_nop(rec); | ||
416 | } | ||
417 | |||
418 | return 0; | ||
419 | } | ||
420 | |||
421 | static void do_sync_core(void *data) | ||
422 | { | ||
423 | sync_core(); | ||
424 | } | ||
425 | |||
426 | static void run_sync(void) | ||
427 | { | ||
428 | int enable_irqs = irqs_disabled(); | ||
429 | |||
430 | /* We may be called with interrupts disbled (on bootup). */ | ||
431 | if (enable_irqs) | ||
432 | local_irq_enable(); | ||
433 | on_each_cpu(do_sync_core, NULL, 1); | ||
434 | if (enable_irqs) | ||
435 | local_irq_disable(); | ||
436 | } | ||
437 | |||
438 | static void ftrace_replace_code(int enable) | ||
439 | { | ||
440 | struct ftrace_rec_iter *iter; | ||
441 | struct dyn_ftrace *rec; | ||
442 | const char *report = "adding breakpoints"; | ||
443 | int count = 0; | ||
444 | int ret; | ||
445 | |||
446 | for_ftrace_rec_iter(iter) { | ||
447 | rec = ftrace_rec_iter_record(iter); | ||
448 | |||
449 | ret = add_breakpoints(rec, enable); | ||
450 | if (ret) | ||
451 | goto remove_breakpoints; | ||
452 | count++; | ||
453 | } | ||
454 | |||
455 | run_sync(); | ||
456 | |||
457 | report = "updating code"; | ||
458 | |||
459 | for_ftrace_rec_iter(iter) { | ||
460 | rec = ftrace_rec_iter_record(iter); | ||
461 | |||
462 | ret = add_update(rec, enable); | ||
463 | if (ret) | ||
464 | goto remove_breakpoints; | ||
465 | } | ||
466 | |||
467 | run_sync(); | ||
468 | |||
469 | report = "removing breakpoints"; | ||
470 | |||
471 | for_ftrace_rec_iter(iter) { | ||
472 | rec = ftrace_rec_iter_record(iter); | ||
473 | |||
474 | ret = finish_update(rec, enable); | ||
475 | if (ret) | ||
476 | goto remove_breakpoints; | ||
477 | } | ||
478 | |||
479 | run_sync(); | ||
480 | |||
481 | return; | ||
482 | |||
483 | remove_breakpoints: | ||
484 | ftrace_bug(ret, rec ? rec->ip : 0); | ||
485 | printk(KERN_WARNING "Failed on %s (%d):\n", report, count); | ||
486 | for_ftrace_rec_iter(iter) { | ||
487 | rec = ftrace_rec_iter_record(iter); | ||
488 | remove_breakpoint(rec); | ||
489 | } | ||
490 | } | ||
491 | |||
492 | void arch_ftrace_update_code(int command) | ||
493 | { | ||
494 | modifying_ftrace_code++; | ||
495 | |||
496 | if (command & FTRACE_UPDATE_CALLS) | ||
497 | ftrace_replace_code(1); | ||
498 | else if (command & FTRACE_DISABLE_CALLS) | ||
499 | ftrace_replace_code(0); | ||
500 | |||
501 | if (command & FTRACE_UPDATE_TRACE_FUNC) | ||
502 | ftrace_update_ftrace_func(ftrace_trace_function); | ||
503 | |||
504 | if (command & FTRACE_START_FUNC_RET) | ||
505 | ftrace_enable_ftrace_graph_caller(); | ||
506 | else if (command & FTRACE_STOP_FUNC_RET) | ||
507 | ftrace_disable_ftrace_graph_caller(); | ||
508 | |||
509 | modifying_ftrace_code--; | ||
510 | } | ||
511 | |||
337 | int __init ftrace_dyn_arch_init(void *data) | 512 | int __init ftrace_dyn_arch_init(void *data) |
338 | { | 513 | { |
339 | /* The return code is retured via data */ | 514 | /* The return code is retured via data */ |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 47acaf319165..eb1539eac393 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -84,7 +84,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | |||
84 | 84 | ||
85 | #define nmi_to_desc(type) (&nmi_desc[type]) | 85 | #define nmi_to_desc(type) (&nmi_desc[type]) |
86 | 86 | ||
87 | static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) | 87 | static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) |
88 | { | 88 | { |
89 | struct nmi_desc *desc = nmi_to_desc(type); | 89 | struct nmi_desc *desc = nmi_to_desc(type); |
90 | struct nmiaction *a; | 90 | struct nmiaction *a; |
@@ -209,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name) | |||
209 | 209 | ||
210 | EXPORT_SYMBOL_GPL(unregister_nmi_handler); | 210 | EXPORT_SYMBOL_GPL(unregister_nmi_handler); |
211 | 211 | ||
212 | static notrace __kprobes void | 212 | static __kprobes void |
213 | pci_serr_error(unsigned char reason, struct pt_regs *regs) | 213 | pci_serr_error(unsigned char reason, struct pt_regs *regs) |
214 | { | 214 | { |
215 | pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", | 215 | pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", |
@@ -236,7 +236,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) | |||
236 | outb(reason, NMI_REASON_PORT); | 236 | outb(reason, NMI_REASON_PORT); |
237 | } | 237 | } |
238 | 238 | ||
239 | static notrace __kprobes void | 239 | static __kprobes void |
240 | io_check_error(unsigned char reason, struct pt_regs *regs) | 240 | io_check_error(unsigned char reason, struct pt_regs *regs) |
241 | { | 241 | { |
242 | unsigned long i; | 242 | unsigned long i; |
@@ -263,7 +263,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
263 | outb(reason, NMI_REASON_PORT); | 263 | outb(reason, NMI_REASON_PORT); |
264 | } | 264 | } |
265 | 265 | ||
266 | static notrace __kprobes void | 266 | static __kprobes void |
267 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | 267 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) |
268 | { | 268 | { |
269 | int handled; | 269 | int handled; |
@@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
305 | static DEFINE_PER_CPU(bool, swallow_nmi); | 305 | static DEFINE_PER_CPU(bool, swallow_nmi); |
306 | static DEFINE_PER_CPU(unsigned long, last_nmi_rip); | 306 | static DEFINE_PER_CPU(unsigned long, last_nmi_rip); |
307 | 307 | ||
308 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | 308 | static __kprobes void default_do_nmi(struct pt_regs *regs) |
309 | { | 309 | { |
310 | unsigned char reason = 0; | 310 | unsigned char reason = 0; |
311 | int handled; | 311 | int handled; |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ff9281f16029..92d5756d85fc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <asm/processor.h> | 50 | #include <asm/processor.h> |
51 | #include <asm/debugreg.h> | 51 | #include <asm/debugreg.h> |
52 | #include <linux/atomic.h> | 52 | #include <linux/atomic.h> |
53 | #include <asm/ftrace.h> | ||
53 | #include <asm/traps.h> | 54 | #include <asm/traps.h> |
54 | #include <asm/desc.h> | 55 | #include <asm/desc.h> |
55 | #include <asm/i387.h> | 56 | #include <asm/i387.h> |
@@ -303,8 +304,13 @@ gp_in_kernel: | |||
303 | } | 304 | } |
304 | 305 | ||
305 | /* May run on IST stack. */ | 306 | /* May run on IST stack. */ |
306 | dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | 307 | dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) |
307 | { | 308 | { |
309 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
310 | /* ftrace must be first, everything else may cause a recursive crash */ | ||
311 | if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) | ||
312 | return; | ||
313 | #endif | ||
308 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 314 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |
309 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, | 315 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, |
310 | SIGTRAP) == NOTIFY_STOP) | 316 | SIGTRAP) == NOTIFY_STOP) |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index f386dc49f988..7515cf0e1805 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -216,9 +216,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
216 | current_thread_info()->sig_on_uaccess_error = 1; | 216 | current_thread_info()->sig_on_uaccess_error = 1; |
217 | 217 | ||
218 | /* | 218 | /* |
219 | * 0 is a valid user pointer (in the access_ok sense) on 32-bit and | 219 | * NULL is a valid user pointer (in the access_ok sense) on 32-bit and |
220 | * 64-bit, so we don't need to special-case it here. For all the | 220 | * 64-bit, so we don't need to special-case it here. For all the |
221 | * vsyscalls, 0 means "don't write anything" not "write it at | 221 | * vsyscalls, NULL means "don't write anything" not "write it at |
222 | * address 0". | 222 | * address 0". |
223 | */ | 223 | */ |
224 | ret = -EFAULT; | 224 | ret = -EFAULT; |
@@ -247,7 +247,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
247 | 247 | ||
248 | ret = sys_getcpu((unsigned __user *)regs->di, | 248 | ret = sys_getcpu((unsigned __user *)regs->di, |
249 | (unsigned __user *)regs->si, | 249 | (unsigned __user *)regs->si, |
250 | 0); | 250 | NULL); |
251 | break; | 251 | break; |
252 | } | 252 | } |
253 | 253 | ||
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 173df38dbda5..2e88438ffd83 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -459,17 +459,17 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
459 | pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); | 459 | pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); |
460 | 460 | ||
461 | if (pmu->version == 1) { | 461 | if (pmu->version == 1) { |
462 | pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1; | 462 | pmu->nr_arch_fixed_counters = 0; |
463 | return; | 463 | } else { |
464 | pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), | ||
465 | X86_PMC_MAX_FIXED); | ||
466 | pmu->counter_bitmask[KVM_PMC_FIXED] = | ||
467 | ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; | ||
464 | } | 468 | } |
465 | 469 | ||
466 | pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), | 470 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | |
467 | X86_PMC_MAX_FIXED); | 471 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED); |
468 | pmu->counter_bitmask[KVM_PMC_FIXED] = | 472 | pmu->global_ctrl_mask = ~pmu->global_ctrl; |
469 | ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; | ||
470 | pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1) | ||
471 | | (((1ull << pmu->nr_arch_fixed_counters) - 1) | ||
472 | << X86_PMC_IDX_FIXED)); | ||
473 | } | 473 | } |
474 | 474 | ||
475 | void kvm_pmu_init(struct kvm_vcpu *vcpu) | 475 | void kvm_pmu_init(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ad85adfef843..4ff0ab9bc3c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -2210,9 +2210,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
2210 | msr = find_msr_entry(vmx, msr_index); | 2210 | msr = find_msr_entry(vmx, msr_index); |
2211 | if (msr) { | 2211 | if (msr) { |
2212 | msr->data = data; | 2212 | msr->data = data; |
2213 | if (msr - vmx->guest_msrs < vmx->save_nmsrs) | 2213 | if (msr - vmx->guest_msrs < vmx->save_nmsrs) { |
2214 | preempt_disable(); | ||
2214 | kvm_set_shared_msr(msr->index, msr->data, | 2215 | kvm_set_shared_msr(msr->index, msr->data, |
2215 | msr->mask); | 2216 | msr->mask); |
2217 | preempt_enable(); | ||
2218 | } | ||
2216 | break; | 2219 | break; |
2217 | } | 2220 | } |
2218 | ret = kvm_set_msr_common(vcpu, msr_index, data); | 2221 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4044ce0bf7c1..91a5e989abcf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -6336,13 +6336,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
6336 | if (npages && !old.rmap) { | 6336 | if (npages && !old.rmap) { |
6337 | unsigned long userspace_addr; | 6337 | unsigned long userspace_addr; |
6338 | 6338 | ||
6339 | down_write(¤t->mm->mmap_sem); | 6339 | userspace_addr = vm_mmap(NULL, 0, |
6340 | userspace_addr = do_mmap(NULL, 0, | ||
6341 | npages * PAGE_SIZE, | 6340 | npages * PAGE_SIZE, |
6342 | PROT_READ | PROT_WRITE, | 6341 | PROT_READ | PROT_WRITE, |
6343 | map_flags, | 6342 | map_flags, |
6344 | 0); | 6343 | 0); |
6345 | up_write(¤t->mm->mmap_sem); | ||
6346 | 6344 | ||
6347 | if (IS_ERR((void *)userspace_addr)) | 6345 | if (IS_ERR((void *)userspace_addr)) |
6348 | return PTR_ERR((void *)userspace_addr); | 6346 | return PTR_ERR((void *)userspace_addr); |
@@ -6366,10 +6364,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6366 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { | 6364 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { |
6367 | int ret; | 6365 | int ret; |
6368 | 6366 | ||
6369 | down_write(¤t->mm->mmap_sem); | 6367 | ret = vm_munmap(old.userspace_addr, |
6370 | ret = do_munmap(current->mm, old.userspace_addr, | ||
6371 | old.npages * PAGE_SIZE); | 6368 | old.npages * PAGE_SIZE); |
6372 | up_write(¤t->mm->mmap_sem); | ||
6373 | if (ret < 0) | 6369 | if (ret < 0) |
6374 | printk(KERN_WARNING | 6370 | printk(KERN_WARNING |
6375 | "kvm_vm_ioctl_set_memory_region: " | 6371 | "kvm_vm_ioctl_set_memory_region: " |
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 25feb1ae71c5..b1e6c4b2e8eb 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c | |||
@@ -379,8 +379,8 @@ err_out: | |||
379 | return; | 379 | return; |
380 | } | 380 | } |
381 | 381 | ||
382 | /* Decode moffset16/32/64 */ | 382 | /* Decode moffset16/32/64. Return 0 if failed */ |
383 | static void __get_moffset(struct insn *insn) | 383 | static int __get_moffset(struct insn *insn) |
384 | { | 384 | { |
385 | switch (insn->addr_bytes) { | 385 | switch (insn->addr_bytes) { |
386 | case 2: | 386 | case 2: |
@@ -397,15 +397,19 @@ static void __get_moffset(struct insn *insn) | |||
397 | insn->moffset2.value = get_next(int, insn); | 397 | insn->moffset2.value = get_next(int, insn); |
398 | insn->moffset2.nbytes = 4; | 398 | insn->moffset2.nbytes = 4; |
399 | break; | 399 | break; |
400 | default: /* opnd_bytes must be modified manually */ | ||
401 | goto err_out; | ||
400 | } | 402 | } |
401 | insn->moffset1.got = insn->moffset2.got = 1; | 403 | insn->moffset1.got = insn->moffset2.got = 1; |
402 | 404 | ||
405 | return 1; | ||
406 | |||
403 | err_out: | 407 | err_out: |
404 | return; | 408 | return 0; |
405 | } | 409 | } |
406 | 410 | ||
407 | /* Decode imm v32(Iz) */ | 411 | /* Decode imm v32(Iz). Return 0 if failed */ |
408 | static void __get_immv32(struct insn *insn) | 412 | static int __get_immv32(struct insn *insn) |
409 | { | 413 | { |
410 | switch (insn->opnd_bytes) { | 414 | switch (insn->opnd_bytes) { |
411 | case 2: | 415 | case 2: |
@@ -417,14 +421,18 @@ static void __get_immv32(struct insn *insn) | |||
417 | insn->immediate.value = get_next(int, insn); | 421 | insn->immediate.value = get_next(int, insn); |
418 | insn->immediate.nbytes = 4; | 422 | insn->immediate.nbytes = 4; |
419 | break; | 423 | break; |
424 | default: /* opnd_bytes must be modified manually */ | ||
425 | goto err_out; | ||
420 | } | 426 | } |
421 | 427 | ||
428 | return 1; | ||
429 | |||
422 | err_out: | 430 | err_out: |
423 | return; | 431 | return 0; |
424 | } | 432 | } |
425 | 433 | ||
426 | /* Decode imm v64(Iv/Ov) */ | 434 | /* Decode imm v64(Iv/Ov), Return 0 if failed */ |
427 | static void __get_immv(struct insn *insn) | 435 | static int __get_immv(struct insn *insn) |
428 | { | 436 | { |
429 | switch (insn->opnd_bytes) { | 437 | switch (insn->opnd_bytes) { |
430 | case 2: | 438 | case 2: |
@@ -441,15 +449,18 @@ static void __get_immv(struct insn *insn) | |||
441 | insn->immediate2.value = get_next(int, insn); | 449 | insn->immediate2.value = get_next(int, insn); |
442 | insn->immediate2.nbytes = 4; | 450 | insn->immediate2.nbytes = 4; |
443 | break; | 451 | break; |
452 | default: /* opnd_bytes must be modified manually */ | ||
453 | goto err_out; | ||
444 | } | 454 | } |
445 | insn->immediate1.got = insn->immediate2.got = 1; | 455 | insn->immediate1.got = insn->immediate2.got = 1; |
446 | 456 | ||
457 | return 1; | ||
447 | err_out: | 458 | err_out: |
448 | return; | 459 | return 0; |
449 | } | 460 | } |
450 | 461 | ||
451 | /* Decode ptr16:16/32(Ap) */ | 462 | /* Decode ptr16:16/32(Ap) */ |
452 | static void __get_immptr(struct insn *insn) | 463 | static int __get_immptr(struct insn *insn) |
453 | { | 464 | { |
454 | switch (insn->opnd_bytes) { | 465 | switch (insn->opnd_bytes) { |
455 | case 2: | 466 | case 2: |
@@ -462,14 +473,17 @@ static void __get_immptr(struct insn *insn) | |||
462 | break; | 473 | break; |
463 | case 8: | 474 | case 8: |
464 | /* ptr16:64 is not exist (no segment) */ | 475 | /* ptr16:64 is not exist (no segment) */ |
465 | return; | 476 | return 0; |
477 | default: /* opnd_bytes must be modified manually */ | ||
478 | goto err_out; | ||
466 | } | 479 | } |
467 | insn->immediate2.value = get_next(unsigned short, insn); | 480 | insn->immediate2.value = get_next(unsigned short, insn); |
468 | insn->immediate2.nbytes = 2; | 481 | insn->immediate2.nbytes = 2; |
469 | insn->immediate1.got = insn->immediate2.got = 1; | 482 | insn->immediate1.got = insn->immediate2.got = 1; |
470 | 483 | ||
484 | return 1; | ||
471 | err_out: | 485 | err_out: |
472 | return; | 486 | return 0; |
473 | } | 487 | } |
474 | 488 | ||
475 | /** | 489 | /** |
@@ -489,7 +503,8 @@ void insn_get_immediate(struct insn *insn) | |||
489 | insn_get_displacement(insn); | 503 | insn_get_displacement(insn); |
490 | 504 | ||
491 | if (inat_has_moffset(insn->attr)) { | 505 | if (inat_has_moffset(insn->attr)) { |
492 | __get_moffset(insn); | 506 | if (!__get_moffset(insn)) |
507 | goto err_out; | ||
493 | goto done; | 508 | goto done; |
494 | } | 509 | } |
495 | 510 | ||
@@ -517,16 +532,20 @@ void insn_get_immediate(struct insn *insn) | |||
517 | insn->immediate2.nbytes = 4; | 532 | insn->immediate2.nbytes = 4; |
518 | break; | 533 | break; |
519 | case INAT_IMM_PTR: | 534 | case INAT_IMM_PTR: |
520 | __get_immptr(insn); | 535 | if (!__get_immptr(insn)) |
536 | goto err_out; | ||
521 | break; | 537 | break; |
522 | case INAT_IMM_VWORD32: | 538 | case INAT_IMM_VWORD32: |
523 | __get_immv32(insn); | 539 | if (!__get_immv32(insn)) |
540 | goto err_out; | ||
524 | break; | 541 | break; |
525 | case INAT_IMM_VWORD: | 542 | case INAT_IMM_VWORD: |
526 | __get_immv(insn); | 543 | if (!__get_immv(insn)) |
544 | goto err_out; | ||
527 | break; | 545 | break; |
528 | default: | 546 | default: |
529 | break; | 547 | /* Here, insn must have an immediate, but failed */ |
548 | goto err_out; | ||
530 | } | 549 | } |
531 | if (inat_has_second_immediate(insn->attr)) { | 550 | if (inat_has_second_immediate(insn->attr)) { |
532 | insn->immediate2.value = get_next(char, insn); | 551 | insn->immediate2.value = get_next(char, insn); |
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 97be9cb54483..d6ae30bbd7bb 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/highmem.h> | 7 | #include <linux/highmem.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | 9 | ||
10 | #include <asm/word-at-a-time.h> | ||
11 | |||
10 | /* | 12 | /* |
11 | * best effort, GUP based copy_from_user() that is NMI-safe | 13 | * best effort, GUP based copy_from_user() that is NMI-safe |
12 | */ | 14 | */ |
@@ -41,3 +43,104 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | |||
41 | return len; | 43 | return len; |
42 | } | 44 | } |
43 | EXPORT_SYMBOL_GPL(copy_from_user_nmi); | 45 | EXPORT_SYMBOL_GPL(copy_from_user_nmi); |
46 | |||
47 | static inline unsigned long count_bytes(unsigned long mask) | ||
48 | { | ||
49 | mask = (mask - 1) & ~mask; | ||
50 | mask >>= 7; | ||
51 | return count_masked_bytes(mask); | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * Do a strncpy, return length of string without final '\0'. | ||
56 | * 'count' is the user-supplied count (return 'count' if we | ||
57 | * hit it), 'max' is the address space maximum (and we return | ||
58 | * -EFAULT if we hit it). | ||
59 | */ | ||
60 | static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max) | ||
61 | { | ||
62 | long res = 0; | ||
63 | |||
64 | /* | ||
65 | * Truncate 'max' to the user-specified limit, so that | ||
66 | * we only have one limit we need to check in the loop | ||
67 | */ | ||
68 | if (max > count) | ||
69 | max = count; | ||
70 | |||
71 | while (max >= sizeof(unsigned long)) { | ||
72 | unsigned long c; | ||
73 | |||
74 | /* Fall back to byte-at-a-time if we get a page fault */ | ||
75 | if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) | ||
76 | break; | ||
77 | /* This can write a few bytes past the NUL character, but that's ok */ | ||
78 | *(unsigned long *)(dst+res) = c; | ||
79 | c = has_zero(c); | ||
80 | if (c) | ||
81 | return res + count_bytes(c); | ||
82 | res += sizeof(unsigned long); | ||
83 | max -= sizeof(unsigned long); | ||
84 | } | ||
85 | |||
86 | while (max) { | ||
87 | char c; | ||
88 | |||
89 | if (unlikely(__get_user(c,src+res))) | ||
90 | return -EFAULT; | ||
91 | dst[res] = c; | ||
92 | if (!c) | ||
93 | return res; | ||
94 | res++; | ||
95 | max--; | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * Uhhuh. We hit 'max'. But was that the user-specified maximum | ||
100 | * too? If so, that's ok - we got as much as the user asked for. | ||
101 | */ | ||
102 | if (res >= count) | ||
103 | return res; | ||
104 | |||
105 | /* | ||
106 | * Nope: we hit the address space limit, and we still had more | ||
107 | * characters the caller would have wanted. That's an EFAULT. | ||
108 | */ | ||
109 | return -EFAULT; | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * strncpy_from_user: - Copy a NUL terminated string from userspace. | ||
114 | * @dst: Destination address, in kernel space. This buffer must be at | ||
115 | * least @count bytes long. | ||
116 | * @src: Source address, in user space. | ||
117 | * @count: Maximum number of bytes to copy, including the trailing NUL. | ||
118 | * | ||
119 | * Copies a NUL-terminated string from userspace to kernel space. | ||
120 | * | ||
121 | * On success, returns the length of the string (not including the trailing | ||
122 | * NUL). | ||
123 | * | ||
124 | * If access to userspace fails, returns -EFAULT (some data may have been | ||
125 | * copied). | ||
126 | * | ||
127 | * If @count is smaller than the length of the string, copies @count bytes | ||
128 | * and returns @count. | ||
129 | */ | ||
130 | long | ||
131 | strncpy_from_user(char *dst, const char __user *src, long count) | ||
132 | { | ||
133 | unsigned long max_addr, src_addr; | ||
134 | |||
135 | if (unlikely(count <= 0)) | ||
136 | return 0; | ||
137 | |||
138 | max_addr = current_thread_info()->addr_limit.seg; | ||
139 | src_addr = (unsigned long)src; | ||
140 | if (likely(src_addr < max_addr)) { | ||
141 | unsigned long max = max_addr - src_addr; | ||
142 | return do_strncpy_from_user(dst, src, count, max); | ||
143 | } | ||
144 | return -EFAULT; | ||
145 | } | ||
146 | EXPORT_SYMBOL(strncpy_from_user); | ||
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index d9b094ca7aaa..ef2a6a5d78e3 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c | |||
@@ -33,93 +33,6 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon | |||
33 | __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) | 33 | __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * Copy a null terminated string from userspace. | ||
37 | */ | ||
38 | |||
39 | #define __do_strncpy_from_user(dst, src, count, res) \ | ||
40 | do { \ | ||
41 | int __d0, __d1, __d2; \ | ||
42 | might_fault(); \ | ||
43 | __asm__ __volatile__( \ | ||
44 | " testl %1,%1\n" \ | ||
45 | " jz 2f\n" \ | ||
46 | "0: lodsb\n" \ | ||
47 | " stosb\n" \ | ||
48 | " testb %%al,%%al\n" \ | ||
49 | " jz 1f\n" \ | ||
50 | " decl %1\n" \ | ||
51 | " jnz 0b\n" \ | ||
52 | "1: subl %1,%0\n" \ | ||
53 | "2:\n" \ | ||
54 | ".section .fixup,\"ax\"\n" \ | ||
55 | "3: movl %5,%0\n" \ | ||
56 | " jmp 2b\n" \ | ||
57 | ".previous\n" \ | ||
58 | _ASM_EXTABLE(0b,3b) \ | ||
59 | : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ | ||
60 | "=&D" (__d2) \ | ||
61 | : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ | ||
62 | : "memory"); \ | ||
63 | } while (0) | ||
64 | |||
65 | /** | ||
66 | * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. | ||
67 | * @dst: Destination address, in kernel space. This buffer must be at | ||
68 | * least @count bytes long. | ||
69 | * @src: Source address, in user space. | ||
70 | * @count: Maximum number of bytes to copy, including the trailing NUL. | ||
71 | * | ||
72 | * Copies a NUL-terminated string from userspace to kernel space. | ||
73 | * Caller must check the specified block with access_ok() before calling | ||
74 | * this function. | ||
75 | * | ||
76 | * On success, returns the length of the string (not including the trailing | ||
77 | * NUL). | ||
78 | * | ||
79 | * If access to userspace fails, returns -EFAULT (some data may have been | ||
80 | * copied). | ||
81 | * | ||
82 | * If @count is smaller than the length of the string, copies @count bytes | ||
83 | * and returns @count. | ||
84 | */ | ||
85 | long | ||
86 | __strncpy_from_user(char *dst, const char __user *src, long count) | ||
87 | { | ||
88 | long res; | ||
89 | __do_strncpy_from_user(dst, src, count, res); | ||
90 | return res; | ||
91 | } | ||
92 | EXPORT_SYMBOL(__strncpy_from_user); | ||
93 | |||
94 | /** | ||
95 | * strncpy_from_user: - Copy a NUL terminated string from userspace. | ||
96 | * @dst: Destination address, in kernel space. This buffer must be at | ||
97 | * least @count bytes long. | ||
98 | * @src: Source address, in user space. | ||
99 | * @count: Maximum number of bytes to copy, including the trailing NUL. | ||
100 | * | ||
101 | * Copies a NUL-terminated string from userspace to kernel space. | ||
102 | * | ||
103 | * On success, returns the length of the string (not including the trailing | ||
104 | * NUL). | ||
105 | * | ||
106 | * If access to userspace fails, returns -EFAULT (some data may have been | ||
107 | * copied). | ||
108 | * | ||
109 | * If @count is smaller than the length of the string, copies @count bytes | ||
110 | * and returns @count. | ||
111 | */ | ||
112 | long | ||
113 | strncpy_from_user(char *dst, const char __user *src, long count) | ||
114 | { | ||
115 | long res = -EFAULT; | ||
116 | if (access_ok(VERIFY_READ, src, 1)) | ||
117 | __do_strncpy_from_user(dst, src, count, res); | ||
118 | return res; | ||
119 | } | ||
120 | EXPORT_SYMBOL(strncpy_from_user); | ||
121 | |||
122 | /* | ||
123 | * Zero Userspace | 36 | * Zero Userspace |
124 | */ | 37 | */ |
125 | 38 | ||
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index b7c2849ffb66..0d0326f388c0 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c | |||
@@ -9,55 +9,6 @@ | |||
9 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * Copy a null terminated string from userspace. | ||
13 | */ | ||
14 | |||
15 | #define __do_strncpy_from_user(dst,src,count,res) \ | ||
16 | do { \ | ||
17 | long __d0, __d1, __d2; \ | ||
18 | might_fault(); \ | ||
19 | __asm__ __volatile__( \ | ||
20 | " testq %1,%1\n" \ | ||
21 | " jz 2f\n" \ | ||
22 | "0: lodsb\n" \ | ||
23 | " stosb\n" \ | ||
24 | " testb %%al,%%al\n" \ | ||
25 | " jz 1f\n" \ | ||
26 | " decq %1\n" \ | ||
27 | " jnz 0b\n" \ | ||
28 | "1: subq %1,%0\n" \ | ||
29 | "2:\n" \ | ||
30 | ".section .fixup,\"ax\"\n" \ | ||
31 | "3: movq %5,%0\n" \ | ||
32 | " jmp 2b\n" \ | ||
33 | ".previous\n" \ | ||
34 | _ASM_EXTABLE(0b,3b) \ | ||
35 | : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ | ||
36 | "=&D" (__d2) \ | ||
37 | : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ | ||
38 | : "memory"); \ | ||
39 | } while (0) | ||
40 | |||
41 | long | ||
42 | __strncpy_from_user(char *dst, const char __user *src, long count) | ||
43 | { | ||
44 | long res; | ||
45 | __do_strncpy_from_user(dst, src, count, res); | ||
46 | return res; | ||
47 | } | ||
48 | EXPORT_SYMBOL(__strncpy_from_user); | ||
49 | |||
50 | long | ||
51 | strncpy_from_user(char *dst, const char __user *src, long count) | ||
52 | { | ||
53 | long res = -EFAULT; | ||
54 | if (access_ok(VERIFY_READ, src, 1)) | ||
55 | return __strncpy_from_user(dst, src, count); | ||
56 | return res; | ||
57 | } | ||
58 | EXPORT_SYMBOL(strncpy_from_user); | ||
59 | |||
60 | /* | ||
61 | * Zero Userspace | 12 | * Zero Userspace |
62 | */ | 13 | */ |
63 | 14 | ||
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h new file mode 100644 index 000000000000..7d01b8c56c00 --- /dev/null +++ b/arch/x86/um/asm/barrier.h | |||
@@ -0,0 +1,75 @@ | |||
1 | #ifndef _ASM_UM_BARRIER_H_ | ||
2 | #define _ASM_UM_BARRIER_H_ | ||
3 | |||
4 | #include <asm/asm.h> | ||
5 | #include <asm/segment.h> | ||
6 | #include <asm/cpufeature.h> | ||
7 | #include <asm/cmpxchg.h> | ||
8 | #include <asm/nops.h> | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/irqflags.h> | ||
12 | |||
13 | /* | ||
14 | * Force strict CPU ordering. | ||
15 | * And yes, this is required on UP too when we're talking | ||
16 | * to devices. | ||
17 | */ | ||
18 | #ifdef CONFIG_X86_32 | ||
19 | |||
20 | #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) | ||
21 | #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) | ||
22 | #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) | ||
23 | |||
24 | #else /* CONFIG_X86_32 */ | ||
25 | |||
26 | #define mb() asm volatile("mfence" : : : "memory") | ||
27 | #define rmb() asm volatile("lfence" : : : "memory") | ||
28 | #define wmb() asm volatile("sfence" : : : "memory") | ||
29 | |||
30 | #endif /* CONFIG_X86_32 */ | ||
31 | |||
32 | #define read_barrier_depends() do { } while (0) | ||
33 | |||
34 | #ifdef CONFIG_SMP | ||
35 | |||
36 | #define smp_mb() mb() | ||
37 | #ifdef CONFIG_X86_PPRO_FENCE | ||
38 | #define smp_rmb() rmb() | ||
39 | #else /* CONFIG_X86_PPRO_FENCE */ | ||
40 | #define smp_rmb() barrier() | ||
41 | #endif /* CONFIG_X86_PPRO_FENCE */ | ||
42 | |||
43 | #ifdef CONFIG_X86_OOSTORE | ||
44 | #define smp_wmb() wmb() | ||
45 | #else /* CONFIG_X86_OOSTORE */ | ||
46 | #define smp_wmb() barrier() | ||
47 | #endif /* CONFIG_X86_OOSTORE */ | ||
48 | |||
49 | #define smp_read_barrier_depends() read_barrier_depends() | ||
50 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) | ||
51 | |||
52 | #else /* CONFIG_SMP */ | ||
53 | |||
54 | #define smp_mb() barrier() | ||
55 | #define smp_rmb() barrier() | ||
56 | #define smp_wmb() barrier() | ||
57 | #define smp_read_barrier_depends() do { } while (0) | ||
58 | #define set_mb(var, value) do { var = value; barrier(); } while (0) | ||
59 | |||
60 | #endif /* CONFIG_SMP */ | ||
61 | |||
62 | /* | ||
63 | * Stop RDTSC speculation. This is needed when you need to use RDTSC | ||
64 | * (or get_cycles or vread that possibly accesses the TSC) in a defined | ||
65 | * code region. | ||
66 | * | ||
67 | * (Could use an alternative three way for this if there was one.) | ||
68 | */ | ||
69 | static inline void rdtsc_barrier(void) | ||
70 | { | ||
71 | alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); | ||
72 | alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); | ||
73 | } | ||
74 | |||
75 | #endif | ||
diff --git a/arch/x86/um/asm/system.h b/arch/x86/um/asm/system.h deleted file mode 100644 index a459fd9b7598..000000000000 --- a/arch/x86/um/asm/system.h +++ /dev/null | |||
@@ -1,135 +0,0 @@ | |||
1 | #ifndef _ASM_X86_SYSTEM_H_ | ||
2 | #define _ASM_X86_SYSTEM_H_ | ||
3 | |||
4 | #include <asm/asm.h> | ||
5 | #include <asm/segment.h> | ||
6 | #include <asm/cpufeature.h> | ||
7 | #include <asm/cmpxchg.h> | ||
8 | #include <asm/nops.h> | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/irqflags.h> | ||
12 | |||
13 | /* entries in ARCH_DLINFO: */ | ||
14 | #ifdef CONFIG_IA32_EMULATION | ||
15 | # define AT_VECTOR_SIZE_ARCH 2 | ||
16 | #else | ||
17 | # define AT_VECTOR_SIZE_ARCH 1 | ||
18 | #endif | ||
19 | |||
20 | extern unsigned long arch_align_stack(unsigned long sp); | ||
21 | |||
22 | void default_idle(void); | ||
23 | |||
24 | /* | ||
25 | * Force strict CPU ordering. | ||
26 | * And yes, this is required on UP too when we're talking | ||
27 | * to devices. | ||
28 | */ | ||
29 | #ifdef CONFIG_X86_32 | ||
30 | /* | ||
31 | * Some non-Intel clones support out of order store. wmb() ceases to be a | ||
32 | * nop for these. | ||
33 | */ | ||
34 | #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) | ||
35 | #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) | ||
36 | #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) | ||
37 | #else | ||
38 | #define mb() asm volatile("mfence":::"memory") | ||
39 | #define rmb() asm volatile("lfence":::"memory") | ||
40 | #define wmb() asm volatile("sfence" ::: "memory") | ||
41 | #endif | ||
42 | |||
43 | /** | ||
44 | * read_barrier_depends - Flush all pending reads that subsequents reads | ||
45 | * depend on. | ||
46 | * | ||
47 | * No data-dependent reads from memory-like regions are ever reordered | ||
48 | * over this barrier. All reads preceding this primitive are guaranteed | ||
49 | * to access memory (but not necessarily other CPUs' caches) before any | ||
50 | * reads following this primitive that depend on the data return by | ||
51 | * any of the preceding reads. This primitive is much lighter weight than | ||
52 | * rmb() on most CPUs, and is never heavier weight than is | ||
53 | * rmb(). | ||
54 | * | ||
55 | * These ordering constraints are respected by both the local CPU | ||
56 | * and the compiler. | ||
57 | * | ||
58 | * Ordering is not guaranteed by anything other than these primitives, | ||
59 | * not even by data dependencies. See the documentation for | ||
60 | * memory_barrier() for examples and URLs to more information. | ||
61 | * | ||
62 | * For example, the following code would force ordering (the initial | ||
63 | * value of "a" is zero, "b" is one, and "p" is "&a"): | ||
64 | * | ||
65 | * <programlisting> | ||
66 | * CPU 0 CPU 1 | ||
67 | * | ||
68 | * b = 2; | ||
69 | * memory_barrier(); | ||
70 | * p = &b; q = p; | ||
71 | * read_barrier_depends(); | ||
72 | * d = *q; | ||
73 | * </programlisting> | ||
74 | * | ||
75 | * because the read of "*q" depends on the read of "p" and these | ||
76 | * two reads are separated by a read_barrier_depends(). However, | ||
77 | * the following code, with the same initial values for "a" and "b": | ||
78 | * | ||
79 | * <programlisting> | ||
80 | * CPU 0 CPU 1 | ||
81 | * | ||
82 | * a = 2; | ||
83 | * memory_barrier(); | ||
84 | * b = 3; y = b; | ||
85 | * read_barrier_depends(); | ||
86 | * x = a; | ||
87 | * </programlisting> | ||
88 | * | ||
89 | * does not enforce ordering, since there is no data dependency between | ||
90 | * the read of "a" and the read of "b". Therefore, on some CPUs, such | ||
91 | * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() | ||
92 | * in cases like this where there are no data dependencies. | ||
93 | **/ | ||
94 | |||
95 | #define read_barrier_depends() do { } while (0) | ||
96 | |||
97 | #ifdef CONFIG_SMP | ||
98 | #define smp_mb() mb() | ||
99 | #ifdef CONFIG_X86_PPRO_FENCE | ||
100 | # define smp_rmb() rmb() | ||
101 | #else | ||
102 | # define smp_rmb() barrier() | ||
103 | #endif | ||
104 | #ifdef CONFIG_X86_OOSTORE | ||
105 | # define smp_wmb() wmb() | ||
106 | #else | ||
107 | # define smp_wmb() barrier() | ||
108 | #endif | ||
109 | #define smp_read_barrier_depends() read_barrier_depends() | ||
110 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) | ||
111 | #else | ||
112 | #define smp_mb() barrier() | ||
113 | #define smp_rmb() barrier() | ||
114 | #define smp_wmb() barrier() | ||
115 | #define smp_read_barrier_depends() do { } while (0) | ||
116 | #define set_mb(var, value) do { var = value; barrier(); } while (0) | ||
117 | #endif | ||
118 | |||
119 | /* | ||
120 | * Stop RDTSC speculation. This is needed when you need to use RDTSC | ||
121 | * (or get_cycles or vread that possibly accesses the TSC) in a defined | ||
122 | * code region. | ||
123 | * | ||
124 | * (Could use an alternative three way for this if there was one.) | ||
125 | */ | ||
126 | static inline void rdtsc_barrier(void) | ||
127 | { | ||
128 | alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); | ||
129 | alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); | ||
130 | } | ||
131 | |||
132 | extern void *_switch_to(void *prev, void *next, void *last); | ||
133 | #define switch_to(prev, next, last) prev = _switch_to(prev, next, last) | ||
134 | |||
135 | #endif | ||