aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Makefile.um3
-rw-r--r--arch/x86/ia32/ia32_aout.c32
-rw-r--r--arch/x86/include/asm/cmpxchg.h4
-rw-r--r--arch/x86/include/asm/ftrace.h3
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/perf_event.h8
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h5
-rw-r--r--arch/x86/include/asm/uaccess_64.h4
-rw-r--r--arch/x86/kernel/cpu/perf_event.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c570
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c6
-rw-r--r--arch/x86/kernel/ftrace.c511
-rw-r--r--arch/x86/kernel/nmi.c10
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kernel/vsyscall_64.c6
-rw-r--r--arch/x86/kvm/pmu.c18
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/lib/insn.c53
-rw-r--r--arch/x86/lib/usercopy.c103
-rw-r--r--arch/x86/lib/usercopy_32.c87
-rw-r--r--arch/x86/lib/usercopy_64.c49
-rw-r--r--arch/x86/um/asm/barrier.h75
-rw-r--r--arch/x86/um/asm/system.h135
29 files changed, 1193 insertions, 546 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 76f5a466547a..8443c50fbbf6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -40,7 +40,6 @@ config X86
40 select HAVE_FUNCTION_GRAPH_TRACER 40 select HAVE_FUNCTION_GRAPH_TRACER
41 select HAVE_FUNCTION_GRAPH_FP_TEST 41 select HAVE_FUNCTION_GRAPH_FP_TEST
42 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 42 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
43 select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
44 select HAVE_SYSCALL_TRACEPOINTS 43 select HAVE_SYSCALL_TRACEPOINTS
45 select HAVE_KVM 44 select HAVE_KVM
46 select HAVE_ARCH_KGDB 45 select HAVE_ARCH_KGDB
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
index 4be406abeefd..36b62bc52638 100644
--- a/arch/x86/Makefile.um
+++ b/arch/x86/Makefile.um
@@ -14,6 +14,9 @@ LINK-y += $(call cc-option,-m32)
14 14
15export LDFLAGS 15export LDFLAGS
16 16
17LDS_EXTRA := -Ui386
18export LDS_EXTRA
19
17# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. 20# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y.
18include $(srctree)/arch/x86/Makefile_32.cpu 21include $(srctree)/arch/x86/Makefile_32.cpu
19 22
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index d511d951a052..4824fb45560f 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -119,9 +119,7 @@ static void set_brk(unsigned long start, unsigned long end)
119 end = PAGE_ALIGN(end); 119 end = PAGE_ALIGN(end);
120 if (end <= start) 120 if (end <= start)
121 return; 121 return;
122 down_write(&current->mm->mmap_sem); 122 vm_brk(start, end - start);
123 do_brk(start, end - start);
124 up_write(&current->mm->mmap_sem);
125} 123}
126 124
127#ifdef CORE_DUMP 125#ifdef CORE_DUMP
@@ -332,9 +330,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
332 pos = 32; 330 pos = 32;
333 map_size = ex.a_text+ex.a_data; 331 map_size = ex.a_text+ex.a_data;
334 332
335 down_write(&current->mm->mmap_sem); 333 error = vm_brk(text_addr & PAGE_MASK, map_size);
336 error = do_brk(text_addr & PAGE_MASK, map_size);
337 up_write(&current->mm->mmap_sem);
338 334
339 if (error != (text_addr & PAGE_MASK)) { 335 if (error != (text_addr & PAGE_MASK)) {
340 send_sig(SIGKILL, current, 0); 336 send_sig(SIGKILL, current, 0);
@@ -373,9 +369,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
373 if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) { 369 if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) {
374 loff_t pos = fd_offset; 370 loff_t pos = fd_offset;
375 371
376 down_write(&current->mm->mmap_sem); 372 vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
377 do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
378 up_write(&current->mm->mmap_sem);
379 bprm->file->f_op->read(bprm->file, 373 bprm->file->f_op->read(bprm->file,
380 (char __user *)N_TXTADDR(ex), 374 (char __user *)N_TXTADDR(ex),
381 ex.a_text+ex.a_data, &pos); 375 ex.a_text+ex.a_data, &pos);
@@ -385,26 +379,22 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
385 goto beyond_if; 379 goto beyond_if;
386 } 380 }
387 381
388 down_write(&current->mm->mmap_sem); 382 error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
389 error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
390 PROT_READ | PROT_EXEC, 383 PROT_READ | PROT_EXEC,
391 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | 384 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
392 MAP_EXECUTABLE | MAP_32BIT, 385 MAP_EXECUTABLE | MAP_32BIT,
393 fd_offset); 386 fd_offset);
394 up_write(&current->mm->mmap_sem);
395 387
396 if (error != N_TXTADDR(ex)) { 388 if (error != N_TXTADDR(ex)) {
397 send_sig(SIGKILL, current, 0); 389 send_sig(SIGKILL, current, 0);
398 return error; 390 return error;
399 } 391 }
400 392
401 down_write(&current->mm->mmap_sem); 393 error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
402 error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
403 PROT_READ | PROT_WRITE | PROT_EXEC, 394 PROT_READ | PROT_WRITE | PROT_EXEC,
404 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | 395 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
405 MAP_EXECUTABLE | MAP_32BIT, 396 MAP_EXECUTABLE | MAP_32BIT,
406 fd_offset + ex.a_text); 397 fd_offset + ex.a_text);
407 up_write(&current->mm->mmap_sem);
408 if (error != N_DATADDR(ex)) { 398 if (error != N_DATADDR(ex)) {
409 send_sig(SIGKILL, current, 0); 399 send_sig(SIGKILL, current, 0);
410 return error; 400 return error;
@@ -476,9 +466,7 @@ static int load_aout_library(struct file *file)
476 error_time = jiffies; 466 error_time = jiffies;
477 } 467 }
478#endif 468#endif
479 down_write(&current->mm->mmap_sem); 469 vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
480 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
481 up_write(&current->mm->mmap_sem);
482 470
483 file->f_op->read(file, (char __user *)start_addr, 471 file->f_op->read(file, (char __user *)start_addr,
484 ex.a_text + ex.a_data, &pos); 472 ex.a_text + ex.a_data, &pos);
@@ -490,12 +478,10 @@ static int load_aout_library(struct file *file)
490 goto out; 478 goto out;
491 } 479 }
492 /* Now use mmap to map the library into memory. */ 480 /* Now use mmap to map the library into memory. */
493 down_write(&current->mm->mmap_sem); 481 error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
494 error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
495 PROT_READ | PROT_WRITE | PROT_EXEC, 482 PROT_READ | PROT_WRITE | PROT_EXEC,
496 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT, 483 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT,
497 N_TXTOFF(ex)); 484 N_TXTOFF(ex));
498 up_write(&current->mm->mmap_sem);
499 retval = error; 485 retval = error;
500 if (error != start_addr) 486 if (error != start_addr)
501 goto out; 487 goto out;
@@ -503,9 +489,7 @@ static int load_aout_library(struct file *file)
503 len = PAGE_ALIGN(ex.a_text + ex.a_data); 489 len = PAGE_ALIGN(ex.a_text + ex.a_data);
504 bss = ex.a_text + ex.a_data + ex.a_bss; 490 bss = ex.a_text + ex.a_data + ex.a_bss;
505 if (bss > len) { 491 if (bss > len) {
506 down_write(&current->mm->mmap_sem); 492 error = vm_brk(start_addr + len, bss - len);
507 error = do_brk(start_addr + len, bss - len);
508 up_write(&current->mm->mmap_sem);
509 retval = error; 493 retval = error;
510 if (error != start_addr + len) 494 if (error != start_addr + len)
511 goto out; 495 goto out;
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index b3b733262909..99480e55973d 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -43,7 +43,7 @@ extern void __add_wrong_size(void)
43 switch (sizeof(*(ptr))) { \ 43 switch (sizeof(*(ptr))) { \
44 case __X86_CASE_B: \ 44 case __X86_CASE_B: \
45 asm volatile (lock #op "b %b0, %1\n" \ 45 asm volatile (lock #op "b %b0, %1\n" \
46 : "+r" (__ret), "+m" (*(ptr)) \ 46 : "+q" (__ret), "+m" (*(ptr)) \
47 : : "memory", "cc"); \ 47 : : "memory", "cc"); \
48 break; \ 48 break; \
49 case __X86_CASE_W: \ 49 case __X86_CASE_W: \
@@ -173,7 +173,7 @@ extern void __add_wrong_size(void)
173 switch (sizeof(*(ptr))) { \ 173 switch (sizeof(*(ptr))) { \
174 case __X86_CASE_B: \ 174 case __X86_CASE_B: \
175 asm volatile (lock "addb %b1, %0\n" \ 175 asm volatile (lock "addb %b1, %0\n" \
176 : "+m" (*(ptr)) : "ri" (inc) \ 176 : "+m" (*(ptr)) : "qi" (inc) \
177 : "memory", "cc"); \ 177 : "memory", "cc"); \
178 break; \ 178 break; \
179 case __X86_CASE_W: \ 179 case __X86_CASE_W: \
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 268c783ab1c0..18d9005d9e4f 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -34,6 +34,7 @@
34 34
35#ifndef __ASSEMBLY__ 35#ifndef __ASSEMBLY__
36extern void mcount(void); 36extern void mcount(void);
37extern int modifying_ftrace_code;
37 38
38static inline unsigned long ftrace_call_adjust(unsigned long addr) 39static inline unsigned long ftrace_call_adjust(unsigned long addr)
39{ 40{
@@ -50,6 +51,8 @@ struct dyn_arch_ftrace {
50 /* No extra data needed for x86 */ 51 /* No extra data needed for x86 */
51}; 52};
52 53
54int ftrace_int3_handler(struct pt_regs *regs);
55
53#endif /* CONFIG_DYNAMIC_FTRACE */ 56#endif /* CONFIG_DYNAMIC_FTRACE */
54#endif /* __ASSEMBLY__ */ 57#endif /* __ASSEMBLY__ */
55#endif /* CONFIG_FUNCTION_TRACER */ 58#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ccb805966f68..957ec87385af 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -134,6 +134,8 @@
134#define MSR_AMD64_IBSFETCHCTL 0xc0011030 134#define MSR_AMD64_IBSFETCHCTL 0xc0011030
135#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 135#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
136#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 136#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
137#define MSR_AMD64_IBSFETCH_REG_COUNT 3
138#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
137#define MSR_AMD64_IBSOPCTL 0xc0011033 139#define MSR_AMD64_IBSOPCTL 0xc0011033
138#define MSR_AMD64_IBSOPRIP 0xc0011034 140#define MSR_AMD64_IBSOPRIP 0xc0011034
139#define MSR_AMD64_IBSOPDATA 0xc0011035 141#define MSR_AMD64_IBSOPDATA 0xc0011035
@@ -141,8 +143,11 @@
141#define MSR_AMD64_IBSOPDATA3 0xc0011037 143#define MSR_AMD64_IBSOPDATA3 0xc0011037
142#define MSR_AMD64_IBSDCLINAD 0xc0011038 144#define MSR_AMD64_IBSDCLINAD 0xc0011038
143#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 145#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
146#define MSR_AMD64_IBSOP_REG_COUNT 7
147#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
144#define MSR_AMD64_IBSCTL 0xc001103a 148#define MSR_AMD64_IBSCTL 0xc001103a
145#define MSR_AMD64_IBSBRTARGET 0xc001103b 149#define MSR_AMD64_IBSBRTARGET 0xc001103b
150#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
146 151
147/* Fam 15h MSRs */ 152/* Fam 15h MSRs */
148#define MSR_F15H_PERF_CTL 0xc0010200 153#define MSR_F15H_PERF_CTL 0xc0010200
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 2291895b1836..4e40a64315c9 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -158,6 +158,7 @@ struct x86_pmu_capability {
158#define IBS_CAPS_OPCNT (1U<<4) 158#define IBS_CAPS_OPCNT (1U<<4)
159#define IBS_CAPS_BRNTRGT (1U<<5) 159#define IBS_CAPS_BRNTRGT (1U<<5)
160#define IBS_CAPS_OPCNTEXT (1U<<6) 160#define IBS_CAPS_OPCNTEXT (1U<<6)
161#define IBS_CAPS_RIPINVALIDCHK (1U<<7)
161 162
162#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ 163#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
163 | IBS_CAPS_FETCHSAM \ 164 | IBS_CAPS_FETCHSAM \
@@ -170,19 +171,22 @@ struct x86_pmu_capability {
170#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) 171#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
171#define IBSCTL_LVT_OFFSET_MASK 0x0F 172#define IBSCTL_LVT_OFFSET_MASK 0x0F
172 173
173/* IbsFetchCtl bits/masks */ 174/* ibs fetch bits/masks */
174#define IBS_FETCH_RAND_EN (1ULL<<57) 175#define IBS_FETCH_RAND_EN (1ULL<<57)
175#define IBS_FETCH_VAL (1ULL<<49) 176#define IBS_FETCH_VAL (1ULL<<49)
176#define IBS_FETCH_ENABLE (1ULL<<48) 177#define IBS_FETCH_ENABLE (1ULL<<48)
177#define IBS_FETCH_CNT 0xFFFF0000ULL 178#define IBS_FETCH_CNT 0xFFFF0000ULL
178#define IBS_FETCH_MAX_CNT 0x0000FFFFULL 179#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
179 180
180/* IbsOpCtl bits */ 181/* ibs op bits/masks */
182/* lower 4 bits of the current count are ignored: */
183#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
181#define IBS_OP_CNT_CTL (1ULL<<19) 184#define IBS_OP_CNT_CTL (1ULL<<19)
182#define IBS_OP_VAL (1ULL<<18) 185#define IBS_OP_VAL (1ULL<<18)
183#define IBS_OP_ENABLE (1ULL<<17) 186#define IBS_OP_ENABLE (1ULL<<17)
184#define IBS_OP_MAX_CNT 0x0000FFFFULL 187#define IBS_OP_MAX_CNT 0x0000FFFFULL
185#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ 188#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
189#define IBS_RIP_INVALID (1ULL<<38)
186 190
187extern u32 get_ibs_caps(void); 191extern u32 get_ibs_caps(void);
188 192
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 8be5f54d9360..e0544597cfe7 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -557,6 +557,8 @@ struct __large_struct { unsigned long buf[100]; };
557 557
558extern unsigned long 558extern unsigned long
559copy_from_user_nmi(void *to, const void __user *from, unsigned long n); 559copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
560extern __must_check long
561strncpy_from_user(char *dst, const char __user *src, long count);
560 562
561/* 563/*
562 * movsl can be slow when source and dest are not both 8-byte aligned 564 * movsl can be slow when source and dest are not both 8-byte aligned
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 566e803cc602..8084bc73b18c 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -213,11 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to,
213 return n; 213 return n;
214} 214}
215 215
216long __must_check strncpy_from_user(char *dst, const char __user *src,
217 long count);
218long __must_check __strncpy_from_user(char *dst,
219 const char __user *src, long count);
220
221/** 216/**
222 * strlen_user: - Get the size of a string in user space. 217 * strlen_user: - Get the size of a string in user space.
223 * @str: The string to measure. 218 * @str: The string to measure.
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 1c66d30971ad..fcd4b6f3ef02 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -208,10 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
208 } 208 }
209} 209}
210 210
211__must_check long
212strncpy_from_user(char *dst, const char __user *src, long count);
213__must_check long
214__strncpy_from_user(char *dst, const char __user *src, long count);
215__must_check long strnlen_user(const char __user *str, long n); 211__must_check long strnlen_user(const char __user *str, long n);
216__must_check long __strnlen_user(const char __user *str, long n); 212__must_check long __strnlen_user(const char __user *str, long n);
217__must_check long strlen_user(const char __user *str); 213__must_check long strlen_user(const char __user *str);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bb8e03407e18..e049d6da0183 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,9 +484,6 @@ static int __x86_pmu_event_init(struct perf_event *event)
484 484
485 /* mark unused */ 485 /* mark unused */
486 event->hw.extra_reg.idx = EXTRA_REG_NONE; 486 event->hw.extra_reg.idx = EXTRA_REG_NONE;
487
488 /* mark not used */
489 event->hw.extra_reg.idx = EXTRA_REG_NONE;
490 event->hw.branch_reg.idx = EXTRA_REG_NONE; 487 event->hw.branch_reg.idx = EXTRA_REG_NONE;
491 488
492 return x86_pmu.hw_config(event); 489 return x86_pmu.hw_config(event);
@@ -1186,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
1186 int idx, handled = 0; 1183 int idx, handled = 0;
1187 u64 val; 1184 u64 val;
1188 1185
1189 perf_sample_data_init(&data, 0);
1190
1191 cpuc = &__get_cpu_var(cpu_hw_events); 1186 cpuc = &__get_cpu_var(cpu_hw_events);
1192 1187
1193 /* 1188 /*
@@ -1222,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
1222 * event overflow 1217 * event overflow
1223 */ 1218 */
1224 handled++; 1219 handled++;
1225 data.period = event->hw.last_period; 1220 perf_sample_data_init(&data, 0, event->hw.last_period);
1226 1221
1227 if (!x86_perf_event_set_period(event)) 1222 if (!x86_perf_event_set_period(event))
1228 continue; 1223 continue;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 95e7fe1c5f0b..65652265fffd 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event)
134 134
135static int amd_pmu_hw_config(struct perf_event *event) 135static int amd_pmu_hw_config(struct perf_event *event)
136{ 136{
137 int ret = x86_pmu_hw_config(event); 137 int ret;
138 138
139 /* pass precise event sampling to ibs: */
140 if (event->attr.precise_ip && get_ibs_caps())
141 return -ENOENT;
142
143 ret = x86_pmu_hw_config(event);
139 if (ret) 144 if (ret)
140 return ret; 145 return ret;
141 146
@@ -205,10 +210,8 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
205 * when we come here 210 * when we come here
206 */ 211 */
207 for (i = 0; i < x86_pmu.num_counters; i++) { 212 for (i = 0; i < x86_pmu.num_counters; i++) {
208 if (nb->owners[i] == event) { 213 if (cmpxchg(nb->owners + i, event, NULL) == event)
209 cmpxchg(nb->owners+i, event, NULL);
210 break; 214 break;
211 }
212 } 215 }
213} 216}
214 217
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 3b8a2d30d14e..da9bcdcd9856 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -9,6 +9,7 @@
9#include <linux/perf_event.h> 9#include <linux/perf_event.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/ptrace.h>
12 13
13#include <asm/apic.h> 14#include <asm/apic.h>
14 15
@@ -16,36 +17,591 @@ static u32 ibs_caps;
16 17
17#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) 18#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
18 19
19static struct pmu perf_ibs; 20#include <linux/kprobes.h>
21#include <linux/hardirq.h>
22
23#include <asm/nmi.h>
24
25#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
26#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
27
28enum ibs_states {
29 IBS_ENABLED = 0,
30 IBS_STARTED = 1,
31 IBS_STOPPING = 2,
32
33 IBS_MAX_STATES,
34};
35
36struct cpu_perf_ibs {
37 struct perf_event *event;
38 unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
39};
40
41struct perf_ibs {
42 struct pmu pmu;
43 unsigned int msr;
44 u64 config_mask;
45 u64 cnt_mask;
46 u64 enable_mask;
47 u64 valid_mask;
48 u64 max_period;
49 unsigned long offset_mask[1];
50 int offset_max;
51 struct cpu_perf_ibs __percpu *pcpu;
52 u64 (*get_count)(u64 config);
53};
54
55struct perf_ibs_data {
56 u32 size;
57 union {
58 u32 data[0]; /* data buffer starts here */
59 u32 caps;
60 };
61 u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
62};
63
64static int
65perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
66{
67 s64 left = local64_read(&hwc->period_left);
68 s64 period = hwc->sample_period;
69 int overflow = 0;
70
71 /*
72 * If we are way outside a reasonable range then just skip forward:
73 */
74 if (unlikely(left <= -period)) {
75 left = period;
76 local64_set(&hwc->period_left, left);
77 hwc->last_period = period;
78 overflow = 1;
79 }
80
81 if (unlikely(left < (s64)min)) {
82 left += period;
83 local64_set(&hwc->period_left, left);
84 hwc->last_period = period;
85 overflow = 1;
86 }
87
88 /*
89 * If the hw period that triggers the sw overflow is too short
90 * we might hit the irq handler. This biases the results.
91 * Thus we shorten the next-to-last period and set the last
92 * period to the max period.
93 */
94 if (left > max) {
95 left -= max;
96 if (left > max)
97 left = max;
98 else if (left < min)
99 left = min;
100 }
101
102 *hw_period = (u64)left;
103
104 return overflow;
105}
106
107static int
108perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
109{
110 struct hw_perf_event *hwc = &event->hw;
111 int shift = 64 - width;
112 u64 prev_raw_count;
113 u64 delta;
114
115 /*
116 * Careful: an NMI might modify the previous event value.
117 *
118 * Our tactic to handle this is to first atomically read and
119 * exchange a new raw count - then add that new-prev delta
120 * count to the generic event atomically:
121 */
122 prev_raw_count = local64_read(&hwc->prev_count);
123 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
124 new_raw_count) != prev_raw_count)
125 return 0;
126
127 /*
128 * Now we have the new raw value and have updated the prev
129 * timestamp already. We can now calculate the elapsed delta
130 * (event-)time and add that to the generic event.
131 *
132 * Careful, not all hw sign-extends above the physical width
133 * of the count.
134 */
135 delta = (new_raw_count << shift) - (prev_raw_count << shift);
136 delta >>= shift;
137
138 local64_add(delta, &event->count);
139 local64_sub(delta, &hwc->period_left);
140
141 return 1;
142}
143
144static struct perf_ibs perf_ibs_fetch;
145static struct perf_ibs perf_ibs_op;
146
147static struct perf_ibs *get_ibs_pmu(int type)
148{
149 if (perf_ibs_fetch.pmu.type == type)
150 return &perf_ibs_fetch;
151 if (perf_ibs_op.pmu.type == type)
152 return &perf_ibs_op;
153 return NULL;
154}
155
156/*
157 * Use IBS for precise event sampling:
158 *
159 * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
160 * perf record -a -e r076:p ... # same as -e cpu-cycles:p
161 * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
162 *
163 * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
164 * MSRC001_1033) is used to select either cycle or micro-ops counting
165 * mode.
166 *
167 * The rip of IBS samples has skid 0. Thus, IBS supports precise
168 * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
169 * rip is invalid when IBS was not able to record the rip correctly.
170 * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
171 *
172 */
173static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
174{
175 switch (event->attr.precise_ip) {
176 case 0:
177 return -ENOENT;
178 case 1:
179 case 2:
180 break;
181 default:
182 return -EOPNOTSUPP;
183 }
184
185 switch (event->attr.type) {
186 case PERF_TYPE_HARDWARE:
187 switch (event->attr.config) {
188 case PERF_COUNT_HW_CPU_CYCLES:
189 *config = 0;
190 return 0;
191 }
192 break;
193 case PERF_TYPE_RAW:
194 switch (event->attr.config) {
195 case 0x0076:
196 *config = 0;
197 return 0;
198 case 0x00C1:
199 *config = IBS_OP_CNT_CTL;
200 return 0;
201 }
202 break;
203 default:
204 return -ENOENT;
205 }
206
207 return -EOPNOTSUPP;
208}
20 209
21static int perf_ibs_init(struct perf_event *event) 210static int perf_ibs_init(struct perf_event *event)
22{ 211{
23 if (perf_ibs.type != event->attr.type) 212 struct hw_perf_event *hwc = &event->hw;
213 struct perf_ibs *perf_ibs;
214 u64 max_cnt, config;
215 int ret;
216
217 perf_ibs = get_ibs_pmu(event->attr.type);
218 if (perf_ibs) {
219 config = event->attr.config;
220 } else {
221 perf_ibs = &perf_ibs_op;
222 ret = perf_ibs_precise_event(event, &config);
223 if (ret)
224 return ret;
225 }
226
227 if (event->pmu != &perf_ibs->pmu)
24 return -ENOENT; 228 return -ENOENT;
229
230 if (config & ~perf_ibs->config_mask)
231 return -EINVAL;
232
233 if (hwc->sample_period) {
234 if (config & perf_ibs->cnt_mask)
235 /* raw max_cnt may not be set */
236 return -EINVAL;
237 if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
238 /*
239 * lower 4 bits can not be set in ibs max cnt,
240 * but allowing it in case we adjust the
241 * sample period to set a frequency.
242 */
243 return -EINVAL;
244 hwc->sample_period &= ~0x0FULL;
245 if (!hwc->sample_period)
246 hwc->sample_period = 0x10;
247 } else {
248 max_cnt = config & perf_ibs->cnt_mask;
249 config &= ~perf_ibs->cnt_mask;
250 event->attr.sample_period = max_cnt << 4;
251 hwc->sample_period = event->attr.sample_period;
252 }
253
254 if (!hwc->sample_period)
255 return -EINVAL;
256
257 /*
258 * If we modify hwc->sample_period, we also need to update
259 * hwc->last_period and hwc->period_left.
260 */
261 hwc->last_period = hwc->sample_period;
262 local64_set(&hwc->period_left, hwc->sample_period);
263
264 hwc->config_base = perf_ibs->msr;
265 hwc->config = config;
266
25 return 0; 267 return 0;
26} 268}
27 269
270static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
271 struct hw_perf_event *hwc, u64 *period)
272{
273 int overflow;
274
275 /* ignore lower 4 bits in min count: */
276 overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
277 local64_set(&hwc->prev_count, 0);
278
279 return overflow;
280}
281
282static u64 get_ibs_fetch_count(u64 config)
283{
284 return (config & IBS_FETCH_CNT) >> 12;
285}
286
287static u64 get_ibs_op_count(u64 config)
288{
289 u64 count = 0;
290
291 if (config & IBS_OP_VAL)
292 count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
293
294 if (ibs_caps & IBS_CAPS_RDWROPCNT)
295 count += (config & IBS_OP_CUR_CNT) >> 32;
296
297 return count;
298}
299
300static void
301perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
302 u64 *config)
303{
304 u64 count = perf_ibs->get_count(*config);
305
306 /*
307 * Set width to 64 since we do not overflow on max width but
308 * instead on max count. In perf_ibs_set_period() we clear
309 * prev count manually on overflow.
310 */
311 while (!perf_event_try_update(event, count, 64)) {
312 rdmsrl(event->hw.config_base, *config);
313 count = perf_ibs->get_count(*config);
314 }
315}
316
317static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
318 struct hw_perf_event *hwc, u64 config)
319{
320 wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
321}
322
323/*
324 * Erratum #420 Instruction-Based Sampling Engine May Generate
325 * Interrupt that Cannot Be Cleared:
326 *
327 * Must clear counter mask first, then clear the enable bit. See
328 * Revision Guide for AMD Family 10h Processors, Publication #41322.
329 */
330static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
331 struct hw_perf_event *hwc, u64 config)
332{
333 config &= ~perf_ibs->cnt_mask;
334 wrmsrl(hwc->config_base, config);
335 config &= ~perf_ibs->enable_mask;
336 wrmsrl(hwc->config_base, config);
337}
338
339/*
340 * We cannot restore the ibs pmu state, so we always needs to update
341 * the event while stopping it and then reset the state when starting
342 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
343 * perf_ibs_start()/perf_ibs_stop() and instead always do it.
344 */
345static void perf_ibs_start(struct perf_event *event, int flags)
346{
347 struct hw_perf_event *hwc = &event->hw;
348 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
349 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
350 u64 period;
351
352 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
353 return;
354
355 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
356 hwc->state = 0;
357
358 perf_ibs_set_period(perf_ibs, hwc, &period);
359 set_bit(IBS_STARTED, pcpu->state);
360 perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
361
362 perf_event_update_userpage(event);
363}
364
365static void perf_ibs_stop(struct perf_event *event, int flags)
366{
367 struct hw_perf_event *hwc = &event->hw;
368 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
369 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
370 u64 config;
371 int stopping;
372
373 stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
374
375 if (!stopping && (hwc->state & PERF_HES_UPTODATE))
376 return;
377
378 rdmsrl(hwc->config_base, config);
379
380 if (stopping) {
381 set_bit(IBS_STOPPING, pcpu->state);
382 perf_ibs_disable_event(perf_ibs, hwc, config);
383 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
384 hwc->state |= PERF_HES_STOPPED;
385 }
386
387 if (hwc->state & PERF_HES_UPTODATE)
388 return;
389
390 /*
391 * Clear valid bit to not count rollovers on update, rollovers
392 * are only updated in the irq handler.
393 */
394 config &= ~perf_ibs->valid_mask;
395
396 perf_ibs_event_update(perf_ibs, event, &config);
397 hwc->state |= PERF_HES_UPTODATE;
398}
399
28static int perf_ibs_add(struct perf_event *event, int flags) 400static int perf_ibs_add(struct perf_event *event, int flags)
29{ 401{
402 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
403 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
404
405 if (test_and_set_bit(IBS_ENABLED, pcpu->state))
406 return -ENOSPC;
407
408 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
409
410 pcpu->event = event;
411
412 if (flags & PERF_EF_START)
413 perf_ibs_start(event, PERF_EF_RELOAD);
414
30 return 0; 415 return 0;
31} 416}
32 417
33static void perf_ibs_del(struct perf_event *event, int flags) 418static void perf_ibs_del(struct perf_event *event, int flags)
34{ 419{
420 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
421 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
422
423 if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
424 return;
425
426 perf_ibs_stop(event, PERF_EF_UPDATE);
427
428 pcpu->event = NULL;
429
430 perf_event_update_userpage(event);
35} 431}
36 432
37static struct pmu perf_ibs = { 433static void perf_ibs_read(struct perf_event *event) { }
38 .event_init= perf_ibs_init, 434
39 .add= perf_ibs_add, 435static struct perf_ibs perf_ibs_fetch = {
40 .del= perf_ibs_del, 436 .pmu = {
437 .task_ctx_nr = perf_invalid_context,
438
439 .event_init = perf_ibs_init,
440 .add = perf_ibs_add,
441 .del = perf_ibs_del,
442 .start = perf_ibs_start,
443 .stop = perf_ibs_stop,
444 .read = perf_ibs_read,
445 },
446 .msr = MSR_AMD64_IBSFETCHCTL,
447 .config_mask = IBS_FETCH_CONFIG_MASK,
448 .cnt_mask = IBS_FETCH_MAX_CNT,
449 .enable_mask = IBS_FETCH_ENABLE,
450 .valid_mask = IBS_FETCH_VAL,
451 .max_period = IBS_FETCH_MAX_CNT << 4,
452 .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
453 .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
454
455 .get_count = get_ibs_fetch_count,
41}; 456};
42 457
458static struct perf_ibs perf_ibs_op = {
459 .pmu = {
460 .task_ctx_nr = perf_invalid_context,
461
462 .event_init = perf_ibs_init,
463 .add = perf_ibs_add,
464 .del = perf_ibs_del,
465 .start = perf_ibs_start,
466 .stop = perf_ibs_stop,
467 .read = perf_ibs_read,
468 },
469 .msr = MSR_AMD64_IBSOPCTL,
470 .config_mask = IBS_OP_CONFIG_MASK,
471 .cnt_mask = IBS_OP_MAX_CNT,
472 .enable_mask = IBS_OP_ENABLE,
473 .valid_mask = IBS_OP_VAL,
474 .max_period = IBS_OP_MAX_CNT << 4,
475 .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
476 .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
477
478 .get_count = get_ibs_op_count,
479};
480
481static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
482{
483 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
484 struct perf_event *event = pcpu->event;
485 struct hw_perf_event *hwc = &event->hw;
486 struct perf_sample_data data;
487 struct perf_raw_record raw;
488 struct pt_regs regs;
489 struct perf_ibs_data ibs_data;
490 int offset, size, check_rip, offset_max, throttle = 0;
491 unsigned int msr;
492 u64 *buf, *config, period;
493
494 if (!test_bit(IBS_STARTED, pcpu->state)) {
495 /*
496 * Catch spurious interrupts after stopping IBS: After
497 * disabling IBS there could be still incomming NMIs
498 * with samples that even have the valid bit cleared.
499 * Mark all this NMIs as handled.
500 */
501 return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
502 }
503
504 msr = hwc->config_base;
505 buf = ibs_data.regs;
506 rdmsrl(msr, *buf);
507 if (!(*buf++ & perf_ibs->valid_mask))
508 return 0;
509
510 config = &ibs_data.regs[0];
511 perf_ibs_event_update(perf_ibs, event, config);
512 perf_sample_data_init(&data, 0, hwc->last_period);
513 if (!perf_ibs_set_period(perf_ibs, hwc, &period))
514 goto out; /* no sw counter overflow */
515
516 ibs_data.caps = ibs_caps;
517 size = 1;
518 offset = 1;
519 check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
520 if (event->attr.sample_type & PERF_SAMPLE_RAW)
521 offset_max = perf_ibs->offset_max;
522 else if (check_rip)
523 offset_max = 2;
524 else
525 offset_max = 1;
526 do {
527 rdmsrl(msr + offset, *buf++);
528 size++;
529 offset = find_next_bit(perf_ibs->offset_mask,
530 perf_ibs->offset_max,
531 offset + 1);
532 } while (offset < offset_max);
533 ibs_data.size = sizeof(u64) * size;
534
535 regs = *iregs;
536 if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
537 regs.flags &= ~PERF_EFLAGS_EXACT;
538 } else {
539 instruction_pointer_set(&regs, ibs_data.regs[1]);
540 regs.flags |= PERF_EFLAGS_EXACT;
541 }
542
543 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
544 raw.size = sizeof(u32) + ibs_data.size;
545 raw.data = ibs_data.data;
546 data.raw = &raw;
547 }
548
549 throttle = perf_event_overflow(event, &data, &regs);
550out:
551 if (throttle)
552 perf_ibs_disable_event(perf_ibs, hwc, *config);
553 else
554 perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
555
556 perf_event_update_userpage(event);
557
558 return 1;
559}
560
561static int __kprobes
562perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
563{
564 int handled = 0;
565
566 handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
567 handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
568
569 if (handled)
570 inc_irq_stat(apic_perf_irqs);
571
572 return handled;
573}
574
575static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
576{
577 struct cpu_perf_ibs __percpu *pcpu;
578 int ret;
579
580 pcpu = alloc_percpu(struct cpu_perf_ibs);
581 if (!pcpu)
582 return -ENOMEM;
583
584 perf_ibs->pcpu = pcpu;
585
586 ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
587 if (ret) {
588 perf_ibs->pcpu = NULL;
589 free_percpu(pcpu);
590 }
591
592 return ret;
593}
594
43static __init int perf_event_ibs_init(void) 595static __init int perf_event_ibs_init(void)
44{ 596{
45 if (!ibs_caps) 597 if (!ibs_caps)
46 return -ENODEV; /* ibs not supported by the cpu */ 598 return -ENODEV; /* ibs not supported by the cpu */
47 599
48 perf_pmu_register(&perf_ibs, "ibs", -1); 600 perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
601 if (ibs_caps & IBS_CAPS_OPCNT)
602 perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
603 perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
604 register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
49 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); 605 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
50 606
51 return 0; 607 return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 26b3e2fef104..166546ec6aef 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1027 u64 status; 1027 u64 status;
1028 int handled; 1028 int handled;
1029 1029
1030 perf_sample_data_init(&data, 0);
1031
1032 cpuc = &__get_cpu_var(cpu_hw_events); 1030 cpuc = &__get_cpu_var(cpu_hw_events);
1033 1031
1034 /* 1032 /*
@@ -1082,7 +1080,7 @@ again:
1082 if (!intel_pmu_save_and_restart(event)) 1080 if (!intel_pmu_save_and_restart(event))
1083 continue; 1081 continue;
1084 1082
1085 data.period = event->hw.last_period; 1083 perf_sample_data_init(&data, 0, event->hw.last_period);
1086 1084
1087 if (has_branch_stack(event)) 1085 if (has_branch_stack(event))
1088 data.br_stack = &cpuc->lbr_stack; 1086 data.br_stack = &cpuc->lbr_stack;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f64df19e7dd..5a3edc27f6e5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void)
316 316
317 ds->bts_index = ds->bts_buffer_base; 317 ds->bts_index = ds->bts_buffer_base;
318 318
319 perf_sample_data_init(&data, 0); 319 perf_sample_data_init(&data, 0, event->hw.last_period);
320 data.period = event->hw.last_period;
321 regs.ip = 0; 320 regs.ip = 0;
322 321
323 /* 322 /*
@@ -564,8 +563,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
564 if (!intel_pmu_save_and_restart(event)) 563 if (!intel_pmu_save_and_restart(event))
565 return; 564 return;
566 565
567 perf_sample_data_init(&data, 0); 566 perf_sample_data_init(&data, 0, event->hw.last_period);
568 data.period = event->hw.last_period;
569 567
570 /* 568 /*
571 * We use the interrupt regs as a base because the PEBS record 569 * We use the interrupt regs as a base because the PEBS record
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index a2dfacfd7103..47124a73dd73 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
1005 int idx, handled = 0; 1005 int idx, handled = 0;
1006 u64 val; 1006 u64 val;
1007 1007
1008 perf_sample_data_init(&data, 0);
1009
1010 cpuc = &__get_cpu_var(cpu_hw_events); 1008 cpuc = &__get_cpu_var(cpu_hw_events);
1011 1009
1012 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1010 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
1034 handled += overflow; 1032 handled += overflow;
1035 1033
1036 /* event overflow for sure */ 1034 /* event overflow for sure */
1037 data.period = event->hw.last_period; 1035 perf_sample_data_init(&data, 0, hwc->last_period);
1038 1036
1039 if (!x86_perf_event_set_period(event)) 1037 if (!x86_perf_event_set_period(event))
1040 continue; 1038 continue;
1039
1040
1041 if (perf_event_overflow(event, &data, regs)) 1041 if (perf_event_overflow(event, &data, regs))
1042 x86_pmu_stop(event, 0); 1042 x86_pmu_stop(event, 0);
1043 } 1043 }
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c9a281f272fd..4243e8bbdcb1 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,40 +24,21 @@
24#include <trace/syscall.h> 24#include <trace/syscall.h>
25 25
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <asm/kprobes.h>
27#include <asm/ftrace.h> 28#include <asm/ftrace.h>
28#include <asm/nops.h> 29#include <asm/nops.h>
29#include <asm/nmi.h>
30
31 30
32#ifdef CONFIG_DYNAMIC_FTRACE 31#ifdef CONFIG_DYNAMIC_FTRACE
33 32
34/*
35 * modifying_code is set to notify NMIs that they need to use
36 * memory barriers when entering or exiting. But we don't want
37 * to burden NMIs with unnecessary memory barriers when code
38 * modification is not being done (which is most of the time).
39 *
40 * A mutex is already held when ftrace_arch_code_modify_prepare
41 * and post_process are called. No locks need to be taken here.
42 *
43 * Stop machine will make sure currently running NMIs are done
44 * and new NMIs will see the updated variable before we need
45 * to worry about NMIs doing memory barriers.
46 */
47static int modifying_code __read_mostly;
48static DEFINE_PER_CPU(int, save_modifying_code);
49
50int ftrace_arch_code_modify_prepare(void) 33int ftrace_arch_code_modify_prepare(void)
51{ 34{
52 set_kernel_text_rw(); 35 set_kernel_text_rw();
53 set_all_modules_text_rw(); 36 set_all_modules_text_rw();
54 modifying_code = 1;
55 return 0; 37 return 0;
56} 38}
57 39
58int ftrace_arch_code_modify_post_process(void) 40int ftrace_arch_code_modify_post_process(void)
59{ 41{
60 modifying_code = 0;
61 set_all_modules_text_ro(); 42 set_all_modules_text_ro();
62 set_kernel_text_ro(); 43 set_kernel_text_ro();
63 return 0; 44 return 0;
@@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
90 return calc.code; 71 return calc.code;
91} 72}
92 73
93/*
94 * Modifying code must take extra care. On an SMP machine, if
95 * the code being modified is also being executed on another CPU
96 * that CPU will have undefined results and possibly take a GPF.
97 * We use kstop_machine to stop other CPUS from exectuing code.
98 * But this does not stop NMIs from happening. We still need
99 * to protect against that. We separate out the modification of
100 * the code to take care of this.
101 *
102 * Two buffers are added: An IP buffer and a "code" buffer.
103 *
104 * 1) Put the instruction pointer into the IP buffer
105 * and the new code into the "code" buffer.
106 * 2) Wait for any running NMIs to finish and set a flag that says
107 * we are modifying code, it is done in an atomic operation.
108 * 3) Write the code
109 * 4) clear the flag.
110 * 5) Wait for any running NMIs to finish.
111 *
112 * If an NMI is executed, the first thing it does is to call
113 * "ftrace_nmi_enter". This will check if the flag is set to write
114 * and if it is, it will write what is in the IP and "code" buffers.
115 *
116 * The trick is, it does not matter if everyone is writing the same
117 * content to the code location. Also, if a CPU is executing code
118 * it is OK to write to that code location if the contents being written
119 * are the same as what exists.
120 */
121
122#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
123static atomic_t nmi_running = ATOMIC_INIT(0);
124static int mod_code_status; /* holds return value of text write */
125static void *mod_code_ip; /* holds the IP to write to */
126static const void *mod_code_newcode; /* holds the text to write to the IP */
127
128static unsigned nmi_wait_count;
129static atomic_t nmi_update_count = ATOMIC_INIT(0);
130
131int ftrace_arch_read_dyn_info(char *buf, int size)
132{
133 int r;
134
135 r = snprintf(buf, size, "%u %u",
136 nmi_wait_count,
137 atomic_read(&nmi_update_count));
138 return r;
139}
140
141static void clear_mod_flag(void)
142{
143 int old = atomic_read(&nmi_running);
144
145 for (;;) {
146 int new = old & ~MOD_CODE_WRITE_FLAG;
147
148 if (old == new)
149 break;
150
151 old = atomic_cmpxchg(&nmi_running, old, new);
152 }
153}
154
155static void ftrace_mod_code(void)
156{
157 /*
158 * Yes, more than one CPU process can be writing to mod_code_status.
159 * (and the code itself)
160 * But if one were to fail, then they all should, and if one were
161 * to succeed, then they all should.
162 */
163 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
164 MCOUNT_INSN_SIZE);
165
166 /* if we fail, then kill any new writers */
167 if (mod_code_status)
168 clear_mod_flag();
169}
170
171void ftrace_nmi_enter(void)
172{
173 __this_cpu_write(save_modifying_code, modifying_code);
174
175 if (!__this_cpu_read(save_modifying_code))
176 return;
177
178 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
179 smp_rmb();
180 ftrace_mod_code();
181 atomic_inc(&nmi_update_count);
182 }
183 /* Must have previous changes seen before executions */
184 smp_mb();
185}
186
187void ftrace_nmi_exit(void)
188{
189 if (!__this_cpu_read(save_modifying_code))
190 return;
191
192 /* Finish all executions before clearing nmi_running */
193 smp_mb();
194 atomic_dec(&nmi_running);
195}
196
197static void wait_for_nmi_and_set_mod_flag(void)
198{
199 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
200 return;
201
202 do {
203 cpu_relax();
204 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
205
206 nmi_wait_count++;
207}
208
209static void wait_for_nmi(void)
210{
211 if (!atomic_read(&nmi_running))
212 return;
213
214 do {
215 cpu_relax();
216 } while (atomic_read(&nmi_running));
217
218 nmi_wait_count++;
219}
220
221static inline int 74static inline int
222within(unsigned long addr, unsigned long start, unsigned long end) 75within(unsigned long addr, unsigned long start, unsigned long end)
223{ 76{
@@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
238 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 91 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
239 ip = (unsigned long)__va(__pa(ip)); 92 ip = (unsigned long)__va(__pa(ip));
240 93
241 mod_code_ip = (void *)ip; 94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
242 mod_code_newcode = new_code;
243
244 /* The buffers need to be visible before we let NMIs write them */
245 smp_mb();
246
247 wait_for_nmi_and_set_mod_flag();
248
249 /* Make sure all running NMIs have finished before we write the code */
250 smp_mb();
251
252 ftrace_mod_code();
253
254 /* Make sure the write happens before clearing the bit */
255 smp_mb();
256
257 clear_mod_flag();
258 wait_for_nmi();
259
260 return mod_code_status;
261} 95}
262 96
263static const unsigned char *ftrace_nop_replace(void) 97static const unsigned char *ftrace_nop_replace(void)
@@ -334,6 +168,347 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
334 return ret; 168 return ret;
335} 169}
336 170
171int modifying_ftrace_code __read_mostly;
172
173/*
174 * A breakpoint was added to the code address we are about to
175 * modify, and this is the handle that will just skip over it.
176 * We are either changing a nop into a trace call, or a trace
177 * call to a nop. While the change is taking place, we treat
178 * it just like it was a nop.
179 */
180int ftrace_int3_handler(struct pt_regs *regs)
181{
182 if (WARN_ON_ONCE(!regs))
183 return 0;
184
185 if (!ftrace_location(regs->ip - 1))
186 return 0;
187
188 regs->ip += MCOUNT_INSN_SIZE - 1;
189
190 return 1;
191}
192
193static int ftrace_write(unsigned long ip, const char *val, int size)
194{
195 /*
196 * On x86_64, kernel text mappings are mapped read-only with
197 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
198 * of the kernel text mapping to modify the kernel text.
199 *
200 * For 32bit kernels, these mappings are same and we can use
201 * kernel identity mapping to modify code.
202 */
203 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
204 ip = (unsigned long)__va(__pa(ip));
205
206 return probe_kernel_write((void *)ip, val, size);
207}
208
209static int add_break(unsigned long ip, const char *old)
210{
211 unsigned char replaced[MCOUNT_INSN_SIZE];
212 unsigned char brk = BREAKPOINT_INSTRUCTION;
213
214 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
215 return -EFAULT;
216
217 /* Make sure it is what we expect it to be */
218 if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
219 return -EINVAL;
220
221 if (ftrace_write(ip, &brk, 1))
222 return -EPERM;
223
224 return 0;
225}
226
227static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
228{
229 unsigned const char *old;
230 unsigned long ip = rec->ip;
231
232 old = ftrace_call_replace(ip, addr);
233
234 return add_break(rec->ip, old);
235}
236
237
238static int add_brk_on_nop(struct dyn_ftrace *rec)
239{
240 unsigned const char *old;
241
242 old = ftrace_nop_replace();
243
244 return add_break(rec->ip, old);
245}
246
247static int add_breakpoints(struct dyn_ftrace *rec, int enable)
248{
249 unsigned long ftrace_addr;
250 int ret;
251
252 ret = ftrace_test_record(rec, enable);
253
254 ftrace_addr = (unsigned long)FTRACE_ADDR;
255
256 switch (ret) {
257 case FTRACE_UPDATE_IGNORE:
258 return 0;
259
260 case FTRACE_UPDATE_MAKE_CALL:
261 /* converting nop to call */
262 return add_brk_on_nop(rec);
263
264 case FTRACE_UPDATE_MAKE_NOP:
265 /* converting a call to a nop */
266 return add_brk_on_call(rec, ftrace_addr);
267 }
268 return 0;
269}
270
271/*
272 * On error, we need to remove breakpoints. This needs to
273 * be done caefully. If the address does not currently have a
274 * breakpoint, we know we are done. Otherwise, we look at the
275 * remaining 4 bytes of the instruction. If it matches a nop
276 * we replace the breakpoint with the nop. Otherwise we replace
277 * it with the call instruction.
278 */
279static int remove_breakpoint(struct dyn_ftrace *rec)
280{
281 unsigned char ins[MCOUNT_INSN_SIZE];
282 unsigned char brk = BREAKPOINT_INSTRUCTION;
283 const unsigned char *nop;
284 unsigned long ftrace_addr;
285 unsigned long ip = rec->ip;
286
287 /* If we fail the read, just give up */
288 if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
289 return -EFAULT;
290
291 /* If this does not have a breakpoint, we are done */
292 if (ins[0] != brk)
293 return -1;
294
295 nop = ftrace_nop_replace();
296
297 /*
298 * If the last 4 bytes of the instruction do not match
299 * a nop, then we assume that this is a call to ftrace_addr.
300 */
301 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
302 /*
303 * For extra paranoidism, we check if the breakpoint is on
304 * a call that would actually jump to the ftrace_addr.
305 * If not, don't touch the breakpoint, we make just create
306 * a disaster.
307 */
308 ftrace_addr = (unsigned long)FTRACE_ADDR;
309 nop = ftrace_call_replace(ip, ftrace_addr);
310
311 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
312 return -EINVAL;
313 }
314
315 return probe_kernel_write((void *)ip, &nop[0], 1);
316}
317
318static int add_update_code(unsigned long ip, unsigned const char *new)
319{
320 /* skip breakpoint */
321 ip++;
322 new++;
323 if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
324 return -EPERM;
325 return 0;
326}
327
328static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
329{
330 unsigned long ip = rec->ip;
331 unsigned const char *new;
332
333 new = ftrace_call_replace(ip, addr);
334 return add_update_code(ip, new);
335}
336
337static int add_update_nop(struct dyn_ftrace *rec)
338{
339 unsigned long ip = rec->ip;
340 unsigned const char *new;
341
342 new = ftrace_nop_replace();
343 return add_update_code(ip, new);
344}
345
346static int add_update(struct dyn_ftrace *rec, int enable)
347{
348 unsigned long ftrace_addr;
349 int ret;
350
351 ret = ftrace_test_record(rec, enable);
352
353 ftrace_addr = (unsigned long)FTRACE_ADDR;
354
355 switch (ret) {
356 case FTRACE_UPDATE_IGNORE:
357 return 0;
358
359 case FTRACE_UPDATE_MAKE_CALL:
360 /* converting nop to call */
361 return add_update_call(rec, ftrace_addr);
362
363 case FTRACE_UPDATE_MAKE_NOP:
364 /* converting a call to a nop */
365 return add_update_nop(rec);
366 }
367
368 return 0;
369}
370
371static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
372{
373 unsigned long ip = rec->ip;
374 unsigned const char *new;
375
376 new = ftrace_call_replace(ip, addr);
377
378 if (ftrace_write(ip, new, 1))
379 return -EPERM;
380
381 return 0;
382}
383
384static int finish_update_nop(struct dyn_ftrace *rec)
385{
386 unsigned long ip = rec->ip;
387 unsigned const char *new;
388
389 new = ftrace_nop_replace();
390
391 if (ftrace_write(ip, new, 1))
392 return -EPERM;
393 return 0;
394}
395
396static int finish_update(struct dyn_ftrace *rec, int enable)
397{
398 unsigned long ftrace_addr;
399 int ret;
400
401 ret = ftrace_update_record(rec, enable);
402
403 ftrace_addr = (unsigned long)FTRACE_ADDR;
404
405 switch (ret) {
406 case FTRACE_UPDATE_IGNORE:
407 return 0;
408
409 case FTRACE_UPDATE_MAKE_CALL:
410 /* converting nop to call */
411 return finish_update_call(rec, ftrace_addr);
412
413 case FTRACE_UPDATE_MAKE_NOP:
414 /* converting a call to a nop */
415 return finish_update_nop(rec);
416 }
417
418 return 0;
419}
420
421static void do_sync_core(void *data)
422{
423 sync_core();
424}
425
426static void run_sync(void)
427{
428 int enable_irqs = irqs_disabled();
429
430 /* We may be called with interrupts disbled (on bootup). */
431 if (enable_irqs)
432 local_irq_enable();
433 on_each_cpu(do_sync_core, NULL, 1);
434 if (enable_irqs)
435 local_irq_disable();
436}
437
438static void ftrace_replace_code(int enable)
439{
440 struct ftrace_rec_iter *iter;
441 struct dyn_ftrace *rec;
442 const char *report = "adding breakpoints";
443 int count = 0;
444 int ret;
445
446 for_ftrace_rec_iter(iter) {
447 rec = ftrace_rec_iter_record(iter);
448
449 ret = add_breakpoints(rec, enable);
450 if (ret)
451 goto remove_breakpoints;
452 count++;
453 }
454
455 run_sync();
456
457 report = "updating code";
458
459 for_ftrace_rec_iter(iter) {
460 rec = ftrace_rec_iter_record(iter);
461
462 ret = add_update(rec, enable);
463 if (ret)
464 goto remove_breakpoints;
465 }
466
467 run_sync();
468
469 report = "removing breakpoints";
470
471 for_ftrace_rec_iter(iter) {
472 rec = ftrace_rec_iter_record(iter);
473
474 ret = finish_update(rec, enable);
475 if (ret)
476 goto remove_breakpoints;
477 }
478
479 run_sync();
480
481 return;
482
483 remove_breakpoints:
484 ftrace_bug(ret, rec ? rec->ip : 0);
485 printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
486 for_ftrace_rec_iter(iter) {
487 rec = ftrace_rec_iter_record(iter);
488 remove_breakpoint(rec);
489 }
490}
491
492void arch_ftrace_update_code(int command)
493{
494 modifying_ftrace_code++;
495
496 if (command & FTRACE_UPDATE_CALLS)
497 ftrace_replace_code(1);
498 else if (command & FTRACE_DISABLE_CALLS)
499 ftrace_replace_code(0);
500
501 if (command & FTRACE_UPDATE_TRACE_FUNC)
502 ftrace_update_ftrace_func(ftrace_trace_function);
503
504 if (command & FTRACE_START_FUNC_RET)
505 ftrace_enable_ftrace_graph_caller();
506 else if (command & FTRACE_STOP_FUNC_RET)
507 ftrace_disable_ftrace_graph_caller();
508
509 modifying_ftrace_code--;
510}
511
337int __init ftrace_dyn_arch_init(void *data) 512int __init ftrace_dyn_arch_init(void *data)
338{ 513{
339 /* The return code is retured via data */ 514 /* The return code is retured via data */
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 47acaf319165..eb1539eac393 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -84,7 +84,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
84 84
85#define nmi_to_desc(type) (&nmi_desc[type]) 85#define nmi_to_desc(type) (&nmi_desc[type])
86 86
87static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 87static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
88{ 88{
89 struct nmi_desc *desc = nmi_to_desc(type); 89 struct nmi_desc *desc = nmi_to_desc(type);
90 struct nmiaction *a; 90 struct nmiaction *a;
@@ -209,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name)
209 209
210EXPORT_SYMBOL_GPL(unregister_nmi_handler); 210EXPORT_SYMBOL_GPL(unregister_nmi_handler);
211 211
212static notrace __kprobes void 212static __kprobes void
213pci_serr_error(unsigned char reason, struct pt_regs *regs) 213pci_serr_error(unsigned char reason, struct pt_regs *regs)
214{ 214{
215 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", 215 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
@@ -236,7 +236,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
236 outb(reason, NMI_REASON_PORT); 236 outb(reason, NMI_REASON_PORT);
237} 237}
238 238
239static notrace __kprobes void 239static __kprobes void
240io_check_error(unsigned char reason, struct pt_regs *regs) 240io_check_error(unsigned char reason, struct pt_regs *regs)
241{ 241{
242 unsigned long i; 242 unsigned long i;
@@ -263,7 +263,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
263 outb(reason, NMI_REASON_PORT); 263 outb(reason, NMI_REASON_PORT);
264} 264}
265 265
266static notrace __kprobes void 266static __kprobes void
267unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 267unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
268{ 268{
269 int handled; 269 int handled;
@@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
305static DEFINE_PER_CPU(bool, swallow_nmi); 305static DEFINE_PER_CPU(bool, swallow_nmi);
306static DEFINE_PER_CPU(unsigned long, last_nmi_rip); 306static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
307 307
308static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 308static __kprobes void default_do_nmi(struct pt_regs *regs)
309{ 309{
310 unsigned char reason = 0; 310 unsigned char reason = 0;
311 int handled; 311 int handled;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ff9281f16029..92d5756d85fc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -50,6 +50,7 @@
50#include <asm/processor.h> 50#include <asm/processor.h>
51#include <asm/debugreg.h> 51#include <asm/debugreg.h>
52#include <linux/atomic.h> 52#include <linux/atomic.h>
53#include <asm/ftrace.h>
53#include <asm/traps.h> 54#include <asm/traps.h>
54#include <asm/desc.h> 55#include <asm/desc.h>
55#include <asm/i387.h> 56#include <asm/i387.h>
@@ -303,8 +304,13 @@ gp_in_kernel:
303} 304}
304 305
305/* May run on IST stack. */ 306/* May run on IST stack. */
306dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) 307dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
307{ 308{
309#ifdef CONFIG_DYNAMIC_FTRACE
310 /* ftrace must be first, everything else may cause a recursive crash */
311 if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs))
312 return;
313#endif
308#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 314#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
309 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 315 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
310 SIGTRAP) == NOTIFY_STOP) 316 SIGTRAP) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index f386dc49f988..7515cf0e1805 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -216,9 +216,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
216 current_thread_info()->sig_on_uaccess_error = 1; 216 current_thread_info()->sig_on_uaccess_error = 1;
217 217
218 /* 218 /*
219 * 0 is a valid user pointer (in the access_ok sense) on 32-bit and 219 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and
220 * 64-bit, so we don't need to special-case it here. For all the 220 * 64-bit, so we don't need to special-case it here. For all the
221 * vsyscalls, 0 means "don't write anything" not "write it at 221 * vsyscalls, NULL means "don't write anything" not "write it at
222 * address 0". 222 * address 0".
223 */ 223 */
224 ret = -EFAULT; 224 ret = -EFAULT;
@@ -247,7 +247,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
247 247
248 ret = sys_getcpu((unsigned __user *)regs->di, 248 ret = sys_getcpu((unsigned __user *)regs->di,
249 (unsigned __user *)regs->si, 249 (unsigned __user *)regs->si,
250 0); 250 NULL);
251 break; 251 break;
252 } 252 }
253 253
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 173df38dbda5..2e88438ffd83 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -459,17 +459,17 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
459 pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); 459 pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1);
460 460
461 if (pmu->version == 1) { 461 if (pmu->version == 1) {
462 pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1; 462 pmu->nr_arch_fixed_counters = 0;
463 return; 463 } else {
464 pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
465 X86_PMC_MAX_FIXED);
466 pmu->counter_bitmask[KVM_PMC_FIXED] =
467 ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
464 } 468 }
465 469
466 pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), 470 pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
467 X86_PMC_MAX_FIXED); 471 (((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED);
468 pmu->counter_bitmask[KVM_PMC_FIXED] = 472 pmu->global_ctrl_mask = ~pmu->global_ctrl;
469 ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
470 pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1)
471 | (((1ull << pmu->nr_arch_fixed_counters) - 1)
472 << X86_PMC_IDX_FIXED));
473} 473}
474 474
475void kvm_pmu_init(struct kvm_vcpu *vcpu) 475void kvm_pmu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ad85adfef843..4ff0ab9bc3c8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2210,9 +2210,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
2210 msr = find_msr_entry(vmx, msr_index); 2210 msr = find_msr_entry(vmx, msr_index);
2211 if (msr) { 2211 if (msr) {
2212 msr->data = data; 2212 msr->data = data;
2213 if (msr - vmx->guest_msrs < vmx->save_nmsrs) 2213 if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
2214 preempt_disable();
2214 kvm_set_shared_msr(msr->index, msr->data, 2215 kvm_set_shared_msr(msr->index, msr->data,
2215 msr->mask); 2216 msr->mask);
2217 preempt_enable();
2218 }
2216 break; 2219 break;
2217 } 2220 }
2218 ret = kvm_set_msr_common(vcpu, msr_index, data); 2221 ret = kvm_set_msr_common(vcpu, msr_index, data);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4044ce0bf7c1..91a5e989abcf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6336,13 +6336,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
6336 if (npages && !old.rmap) { 6336 if (npages && !old.rmap) {
6337 unsigned long userspace_addr; 6337 unsigned long userspace_addr;
6338 6338
6339 down_write(&current->mm->mmap_sem); 6339 userspace_addr = vm_mmap(NULL, 0,
6340 userspace_addr = do_mmap(NULL, 0,
6341 npages * PAGE_SIZE, 6340 npages * PAGE_SIZE,
6342 PROT_READ | PROT_WRITE, 6341 PROT_READ | PROT_WRITE,
6343 map_flags, 6342 map_flags,
6344 0); 6343 0);
6345 up_write(&current->mm->mmap_sem);
6346 6344
6347 if (IS_ERR((void *)userspace_addr)) 6345 if (IS_ERR((void *)userspace_addr))
6348 return PTR_ERR((void *)userspace_addr); 6346 return PTR_ERR((void *)userspace_addr);
@@ -6366,10 +6364,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
6366 if (!user_alloc && !old.user_alloc && old.rmap && !npages) { 6364 if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
6367 int ret; 6365 int ret;
6368 6366
6369 down_write(&current->mm->mmap_sem); 6367 ret = vm_munmap(old.userspace_addr,
6370 ret = do_munmap(current->mm, old.userspace_addr,
6371 old.npages * PAGE_SIZE); 6368 old.npages * PAGE_SIZE);
6372 up_write(&current->mm->mmap_sem);
6373 if (ret < 0) 6369 if (ret < 0)
6374 printk(KERN_WARNING 6370 printk(KERN_WARNING
6375 "kvm_vm_ioctl_set_memory_region: " 6371 "kvm_vm_ioctl_set_memory_region: "
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 25feb1ae71c5..b1e6c4b2e8eb 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -379,8 +379,8 @@ err_out:
379 return; 379 return;
380} 380}
381 381
382/* Decode moffset16/32/64 */ 382/* Decode moffset16/32/64. Return 0 if failed */
383static void __get_moffset(struct insn *insn) 383static int __get_moffset(struct insn *insn)
384{ 384{
385 switch (insn->addr_bytes) { 385 switch (insn->addr_bytes) {
386 case 2: 386 case 2:
@@ -397,15 +397,19 @@ static void __get_moffset(struct insn *insn)
397 insn->moffset2.value = get_next(int, insn); 397 insn->moffset2.value = get_next(int, insn);
398 insn->moffset2.nbytes = 4; 398 insn->moffset2.nbytes = 4;
399 break; 399 break;
400 default: /* opnd_bytes must be modified manually */
401 goto err_out;
400 } 402 }
401 insn->moffset1.got = insn->moffset2.got = 1; 403 insn->moffset1.got = insn->moffset2.got = 1;
402 404
405 return 1;
406
403err_out: 407err_out:
404 return; 408 return 0;
405} 409}
406 410
407/* Decode imm v32(Iz) */ 411/* Decode imm v32(Iz). Return 0 if failed */
408static void __get_immv32(struct insn *insn) 412static int __get_immv32(struct insn *insn)
409{ 413{
410 switch (insn->opnd_bytes) { 414 switch (insn->opnd_bytes) {
411 case 2: 415 case 2:
@@ -417,14 +421,18 @@ static void __get_immv32(struct insn *insn)
417 insn->immediate.value = get_next(int, insn); 421 insn->immediate.value = get_next(int, insn);
418 insn->immediate.nbytes = 4; 422 insn->immediate.nbytes = 4;
419 break; 423 break;
424 default: /* opnd_bytes must be modified manually */
425 goto err_out;
420 } 426 }
421 427
428 return 1;
429
422err_out: 430err_out:
423 return; 431 return 0;
424} 432}
425 433
426/* Decode imm v64(Iv/Ov) */ 434/* Decode imm v64(Iv/Ov), Return 0 if failed */
427static void __get_immv(struct insn *insn) 435static int __get_immv(struct insn *insn)
428{ 436{
429 switch (insn->opnd_bytes) { 437 switch (insn->opnd_bytes) {
430 case 2: 438 case 2:
@@ -441,15 +449,18 @@ static void __get_immv(struct insn *insn)
441 insn->immediate2.value = get_next(int, insn); 449 insn->immediate2.value = get_next(int, insn);
442 insn->immediate2.nbytes = 4; 450 insn->immediate2.nbytes = 4;
443 break; 451 break;
452 default: /* opnd_bytes must be modified manually */
453 goto err_out;
444 } 454 }
445 insn->immediate1.got = insn->immediate2.got = 1; 455 insn->immediate1.got = insn->immediate2.got = 1;
446 456
457 return 1;
447err_out: 458err_out:
448 return; 459 return 0;
449} 460}
450 461
451/* Decode ptr16:16/32(Ap) */ 462/* Decode ptr16:16/32(Ap) */
452static void __get_immptr(struct insn *insn) 463static int __get_immptr(struct insn *insn)
453{ 464{
454 switch (insn->opnd_bytes) { 465 switch (insn->opnd_bytes) {
455 case 2: 466 case 2:
@@ -462,14 +473,17 @@ static void __get_immptr(struct insn *insn)
462 break; 473 break;
463 case 8: 474 case 8:
464 /* ptr16:64 is not exist (no segment) */ 475 /* ptr16:64 is not exist (no segment) */
465 return; 476 return 0;
477 default: /* opnd_bytes must be modified manually */
478 goto err_out;
466 } 479 }
467 insn->immediate2.value = get_next(unsigned short, insn); 480 insn->immediate2.value = get_next(unsigned short, insn);
468 insn->immediate2.nbytes = 2; 481 insn->immediate2.nbytes = 2;
469 insn->immediate1.got = insn->immediate2.got = 1; 482 insn->immediate1.got = insn->immediate2.got = 1;
470 483
484 return 1;
471err_out: 485err_out:
472 return; 486 return 0;
473} 487}
474 488
475/** 489/**
@@ -489,7 +503,8 @@ void insn_get_immediate(struct insn *insn)
489 insn_get_displacement(insn); 503 insn_get_displacement(insn);
490 504
491 if (inat_has_moffset(insn->attr)) { 505 if (inat_has_moffset(insn->attr)) {
492 __get_moffset(insn); 506 if (!__get_moffset(insn))
507 goto err_out;
493 goto done; 508 goto done;
494 } 509 }
495 510
@@ -517,16 +532,20 @@ void insn_get_immediate(struct insn *insn)
517 insn->immediate2.nbytes = 4; 532 insn->immediate2.nbytes = 4;
518 break; 533 break;
519 case INAT_IMM_PTR: 534 case INAT_IMM_PTR:
520 __get_immptr(insn); 535 if (!__get_immptr(insn))
536 goto err_out;
521 break; 537 break;
522 case INAT_IMM_VWORD32: 538 case INAT_IMM_VWORD32:
523 __get_immv32(insn); 539 if (!__get_immv32(insn))
540 goto err_out;
524 break; 541 break;
525 case INAT_IMM_VWORD: 542 case INAT_IMM_VWORD:
526 __get_immv(insn); 543 if (!__get_immv(insn))
544 goto err_out;
527 break; 545 break;
528 default: 546 default:
529 break; 547 /* Here, insn must have an immediate, but failed */
548 goto err_out;
530 } 549 }
531 if (inat_has_second_immediate(insn->attr)) { 550 if (inat_has_second_immediate(insn->attr)) {
532 insn->immediate2.value = get_next(char, insn); 551 insn->immediate2.value = get_next(char, insn);
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index 97be9cb54483..d6ae30bbd7bb 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -7,6 +7,8 @@
7#include <linux/highmem.h> 7#include <linux/highmem.h>
8#include <linux/module.h> 8#include <linux/module.h>
9 9
10#include <asm/word-at-a-time.h>
11
10/* 12/*
11 * best effort, GUP based copy_from_user() that is NMI-safe 13 * best effort, GUP based copy_from_user() that is NMI-safe
12 */ 14 */
@@ -41,3 +43,104 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
41 return len; 43 return len;
42} 44}
43EXPORT_SYMBOL_GPL(copy_from_user_nmi); 45EXPORT_SYMBOL_GPL(copy_from_user_nmi);
46
47static inline unsigned long count_bytes(unsigned long mask)
48{
49 mask = (mask - 1) & ~mask;
50 mask >>= 7;
51 return count_masked_bytes(mask);
52}
53
54/*
55 * Do a strncpy, return length of string without final '\0'.
56 * 'count' is the user-supplied count (return 'count' if we
57 * hit it), 'max' is the address space maximum (and we return
58 * -EFAULT if we hit it).
59 */
60static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max)
61{
62 long res = 0;
63
64 /*
65 * Truncate 'max' to the user-specified limit, so that
66 * we only have one limit we need to check in the loop
67 */
68 if (max > count)
69 max = count;
70
71 while (max >= sizeof(unsigned long)) {
72 unsigned long c;
73
74 /* Fall back to byte-at-a-time if we get a page fault */
75 if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
76 break;
77 /* This can write a few bytes past the NUL character, but that's ok */
78 *(unsigned long *)(dst+res) = c;
79 c = has_zero(c);
80 if (c)
81 return res + count_bytes(c);
82 res += sizeof(unsigned long);
83 max -= sizeof(unsigned long);
84 }
85
86 while (max) {
87 char c;
88
89 if (unlikely(__get_user(c,src+res)))
90 return -EFAULT;
91 dst[res] = c;
92 if (!c)
93 return res;
94 res++;
95 max--;
96 }
97
98 /*
99 * Uhhuh. We hit 'max'. But was that the user-specified maximum
100 * too? If so, that's ok - we got as much as the user asked for.
101 */
102 if (res >= count)
103 return res;
104
105 /*
106 * Nope: we hit the address space limit, and we still had more
107 * characters the caller would have wanted. That's an EFAULT.
108 */
109 return -EFAULT;
110}
111
112/**
113 * strncpy_from_user: - Copy a NUL terminated string from userspace.
114 * @dst: Destination address, in kernel space. This buffer must be at
115 * least @count bytes long.
116 * @src: Source address, in user space.
117 * @count: Maximum number of bytes to copy, including the trailing NUL.
118 *
119 * Copies a NUL-terminated string from userspace to kernel space.
120 *
121 * On success, returns the length of the string (not including the trailing
122 * NUL).
123 *
124 * If access to userspace fails, returns -EFAULT (some data may have been
125 * copied).
126 *
127 * If @count is smaller than the length of the string, copies @count bytes
128 * and returns @count.
129 */
130long
131strncpy_from_user(char *dst, const char __user *src, long count)
132{
133 unsigned long max_addr, src_addr;
134
135 if (unlikely(count <= 0))
136 return 0;
137
138 max_addr = current_thread_info()->addr_limit.seg;
139 src_addr = (unsigned long)src;
140 if (likely(src_addr < max_addr)) {
141 unsigned long max = max_addr - src_addr;
142 return do_strncpy_from_user(dst, src, count, max);
143 }
144 return -EFAULT;
145}
146EXPORT_SYMBOL(strncpy_from_user);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index d9b094ca7aaa..ef2a6a5d78e3 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -33,93 +33,6 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
33 __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) 33 __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n))
34 34
35/* 35/*
36 * Copy a null terminated string from userspace.
37 */
38
39#define __do_strncpy_from_user(dst, src, count, res) \
40do { \
41 int __d0, __d1, __d2; \
42 might_fault(); \
43 __asm__ __volatile__( \
44 " testl %1,%1\n" \
45 " jz 2f\n" \
46 "0: lodsb\n" \
47 " stosb\n" \
48 " testb %%al,%%al\n" \
49 " jz 1f\n" \
50 " decl %1\n" \
51 " jnz 0b\n" \
52 "1: subl %1,%0\n" \
53 "2:\n" \
54 ".section .fixup,\"ax\"\n" \
55 "3: movl %5,%0\n" \
56 " jmp 2b\n" \
57 ".previous\n" \
58 _ASM_EXTABLE(0b,3b) \
59 : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \
60 "=&D" (__d2) \
61 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
62 : "memory"); \
63} while (0)
64
65/**
66 * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
67 * @dst: Destination address, in kernel space. This buffer must be at
68 * least @count bytes long.
69 * @src: Source address, in user space.
70 * @count: Maximum number of bytes to copy, including the trailing NUL.
71 *
72 * Copies a NUL-terminated string from userspace to kernel space.
73 * Caller must check the specified block with access_ok() before calling
74 * this function.
75 *
76 * On success, returns the length of the string (not including the trailing
77 * NUL).
78 *
79 * If access to userspace fails, returns -EFAULT (some data may have been
80 * copied).
81 *
82 * If @count is smaller than the length of the string, copies @count bytes
83 * and returns @count.
84 */
85long
86__strncpy_from_user(char *dst, const char __user *src, long count)
87{
88 long res;
89 __do_strncpy_from_user(dst, src, count, res);
90 return res;
91}
92EXPORT_SYMBOL(__strncpy_from_user);
93
94/**
95 * strncpy_from_user: - Copy a NUL terminated string from userspace.
96 * @dst: Destination address, in kernel space. This buffer must be at
97 * least @count bytes long.
98 * @src: Source address, in user space.
99 * @count: Maximum number of bytes to copy, including the trailing NUL.
100 *
101 * Copies a NUL-terminated string from userspace to kernel space.
102 *
103 * On success, returns the length of the string (not including the trailing
104 * NUL).
105 *
106 * If access to userspace fails, returns -EFAULT (some data may have been
107 * copied).
108 *
109 * If @count is smaller than the length of the string, copies @count bytes
110 * and returns @count.
111 */
112long
113strncpy_from_user(char *dst, const char __user *src, long count)
114{
115 long res = -EFAULT;
116 if (access_ok(VERIFY_READ, src, 1))
117 __do_strncpy_from_user(dst, src, count, res);
118 return res;
119}
120EXPORT_SYMBOL(strncpy_from_user);
121
122/*
123 * Zero Userspace 36 * Zero Userspace
124 */ 37 */
125 38
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index b7c2849ffb66..0d0326f388c0 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -9,55 +9,6 @@
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10 10
11/* 11/*
12 * Copy a null terminated string from userspace.
13 */
14
15#define __do_strncpy_from_user(dst,src,count,res) \
16do { \
17 long __d0, __d1, __d2; \
18 might_fault(); \
19 __asm__ __volatile__( \
20 " testq %1,%1\n" \
21 " jz 2f\n" \
22 "0: lodsb\n" \
23 " stosb\n" \
24 " testb %%al,%%al\n" \
25 " jz 1f\n" \
26 " decq %1\n" \
27 " jnz 0b\n" \
28 "1: subq %1,%0\n" \
29 "2:\n" \
30 ".section .fixup,\"ax\"\n" \
31 "3: movq %5,%0\n" \
32 " jmp 2b\n" \
33 ".previous\n" \
34 _ASM_EXTABLE(0b,3b) \
35 : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \
36 "=&D" (__d2) \
37 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
38 : "memory"); \
39} while (0)
40
41long
42__strncpy_from_user(char *dst, const char __user *src, long count)
43{
44 long res;
45 __do_strncpy_from_user(dst, src, count, res);
46 return res;
47}
48EXPORT_SYMBOL(__strncpy_from_user);
49
50long
51strncpy_from_user(char *dst, const char __user *src, long count)
52{
53 long res = -EFAULT;
54 if (access_ok(VERIFY_READ, src, 1))
55 return __strncpy_from_user(dst, src, count);
56 return res;
57}
58EXPORT_SYMBOL(strncpy_from_user);
59
60/*
61 * Zero Userspace 12 * Zero Userspace
62 */ 13 */
63 14
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
new file mode 100644
index 000000000000..7d01b8c56c00
--- /dev/null
+++ b/arch/x86/um/asm/barrier.h
@@ -0,0 +1,75 @@
1#ifndef _ASM_UM_BARRIER_H_
2#define _ASM_UM_BARRIER_H_
3
4#include <asm/asm.h>
5#include <asm/segment.h>
6#include <asm/cpufeature.h>
7#include <asm/cmpxchg.h>
8#include <asm/nops.h>
9
10#include <linux/kernel.h>
11#include <linux/irqflags.h>
12
13/*
14 * Force strict CPU ordering.
15 * And yes, this is required on UP too when we're talking
16 * to devices.
17 */
18#ifdef CONFIG_X86_32
19
20#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
21#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
22#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
23
24#else /* CONFIG_X86_32 */
25
26#define mb() asm volatile("mfence" : : : "memory")
27#define rmb() asm volatile("lfence" : : : "memory")
28#define wmb() asm volatile("sfence" : : : "memory")
29
30#endif /* CONFIG_X86_32 */
31
32#define read_barrier_depends() do { } while (0)
33
34#ifdef CONFIG_SMP
35
36#define smp_mb() mb()
37#ifdef CONFIG_X86_PPRO_FENCE
38#define smp_rmb() rmb()
39#else /* CONFIG_X86_PPRO_FENCE */
40#define smp_rmb() barrier()
41#endif /* CONFIG_X86_PPRO_FENCE */
42
43#ifdef CONFIG_X86_OOSTORE
44#define smp_wmb() wmb()
45#else /* CONFIG_X86_OOSTORE */
46#define smp_wmb() barrier()
47#endif /* CONFIG_X86_OOSTORE */
48
49#define smp_read_barrier_depends() read_barrier_depends()
50#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
51
52#else /* CONFIG_SMP */
53
54#define smp_mb() barrier()
55#define smp_rmb() barrier()
56#define smp_wmb() barrier()
57#define smp_read_barrier_depends() do { } while (0)
58#define set_mb(var, value) do { var = value; barrier(); } while (0)
59
60#endif /* CONFIG_SMP */
61
62/*
63 * Stop RDTSC speculation. This is needed when you need to use RDTSC
64 * (or get_cycles or vread that possibly accesses the TSC) in a defined
65 * code region.
66 *
67 * (Could use an alternative three way for this if there was one.)
68 */
69static inline void rdtsc_barrier(void)
70{
71 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
72 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
73}
74
75#endif
diff --git a/arch/x86/um/asm/system.h b/arch/x86/um/asm/system.h
deleted file mode 100644
index a459fd9b7598..000000000000
--- a/arch/x86/um/asm/system.h
+++ /dev/null
@@ -1,135 +0,0 @@
1#ifndef _ASM_X86_SYSTEM_H_
2#define _ASM_X86_SYSTEM_H_
3
4#include <asm/asm.h>
5#include <asm/segment.h>
6#include <asm/cpufeature.h>
7#include <asm/cmpxchg.h>
8#include <asm/nops.h>
9
10#include <linux/kernel.h>
11#include <linux/irqflags.h>
12
13/* entries in ARCH_DLINFO: */
14#ifdef CONFIG_IA32_EMULATION
15# define AT_VECTOR_SIZE_ARCH 2
16#else
17# define AT_VECTOR_SIZE_ARCH 1
18#endif
19
20extern unsigned long arch_align_stack(unsigned long sp);
21
22void default_idle(void);
23
24/*
25 * Force strict CPU ordering.
26 * And yes, this is required on UP too when we're talking
27 * to devices.
28 */
29#ifdef CONFIG_X86_32
30/*
31 * Some non-Intel clones support out of order store. wmb() ceases to be a
32 * nop for these.
33 */
34#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
35#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
36#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
37#else
38#define mb() asm volatile("mfence":::"memory")
39#define rmb() asm volatile("lfence":::"memory")
40#define wmb() asm volatile("sfence" ::: "memory")
41#endif
42
43/**
44 * read_barrier_depends - Flush all pending reads that subsequents reads
45 * depend on.
46 *
47 * No data-dependent reads from memory-like regions are ever reordered
48 * over this barrier. All reads preceding this primitive are guaranteed
49 * to access memory (but not necessarily other CPUs' caches) before any
50 * reads following this primitive that depend on the data return by
51 * any of the preceding reads. This primitive is much lighter weight than
52 * rmb() on most CPUs, and is never heavier weight than is
53 * rmb().
54 *
55 * These ordering constraints are respected by both the local CPU
56 * and the compiler.
57 *
58 * Ordering is not guaranteed by anything other than these primitives,
59 * not even by data dependencies. See the documentation for
60 * memory_barrier() for examples and URLs to more information.
61 *
62 * For example, the following code would force ordering (the initial
63 * value of "a" is zero, "b" is one, and "p" is "&a"):
64 *
65 * <programlisting>
66 * CPU 0 CPU 1
67 *
68 * b = 2;
69 * memory_barrier();
70 * p = &b; q = p;
71 * read_barrier_depends();
72 * d = *q;
73 * </programlisting>
74 *
75 * because the read of "*q" depends on the read of "p" and these
76 * two reads are separated by a read_barrier_depends(). However,
77 * the following code, with the same initial values for "a" and "b":
78 *
79 * <programlisting>
80 * CPU 0 CPU 1
81 *
82 * a = 2;
83 * memory_barrier();
84 * b = 3; y = b;
85 * read_barrier_depends();
86 * x = a;
87 * </programlisting>
88 *
89 * does not enforce ordering, since there is no data dependency between
90 * the read of "a" and the read of "b". Therefore, on some CPUs, such
91 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
92 * in cases like this where there are no data dependencies.
93 **/
94
95#define read_barrier_depends() do { } while (0)
96
97#ifdef CONFIG_SMP
98#define smp_mb() mb()
99#ifdef CONFIG_X86_PPRO_FENCE
100# define smp_rmb() rmb()
101#else
102# define smp_rmb() barrier()
103#endif
104#ifdef CONFIG_X86_OOSTORE
105# define smp_wmb() wmb()
106#else
107# define smp_wmb() barrier()
108#endif
109#define smp_read_barrier_depends() read_barrier_depends()
110#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
111#else
112#define smp_mb() barrier()
113#define smp_rmb() barrier()
114#define smp_wmb() barrier()
115#define smp_read_barrier_depends() do { } while (0)
116#define set_mb(var, value) do { var = value; barrier(); } while (0)
117#endif
118
119/*
120 * Stop RDTSC speculation. This is needed when you need to use RDTSC
121 * (or get_cycles or vread that possibly accesses the TSC) in a defined
122 * code region.
123 *
124 * (Could use an alternative three way for this if there was one.)
125 */
126static inline void rdtsc_barrier(void)
127{
128 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
129 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
130}
131
132extern void *_switch_to(void *prev, void *next, void *last);
133#define switch_to(prev, next, last) prev = _switch_to(prev, next, last)
134
135#endif