aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 19:44:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 19:44:39 -0400
commit4d4abdcb1dee03a4f9d6d2021622ed07e14dfd17 (patch)
tree4ed4c74b70240451065165fda5fb2059f8c6b1e5 /arch
parent0342cbcfced2ee937d7c8e1c63f3d3082da7c7dc (diff)
parent7fcfd1abd6480d3b9ef17f5759c175e036e835cf (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (123 commits) perf: Remove the nmi parameter from the oprofile_perf backend x86, perf: Make copy_from_user_nmi() a library function perf: Remove perf_event_attr::type check x86, perf: P4 PMU - Fix typos in comments and style cleanup perf tools: Make test use the preset debugfs path perf tools: Add automated tests for events parsing perf tools: De-opt the parse_events function perf script: Fix display of IP address for non-callchain path perf tools: Fix endian conversion reading event attr from file header perf tools: Add missing 'node' alias to the hw_cache[] array perf probe: Support adding probes on offline kernel modules perf probe: Add probed module in front of function perf probe: Introduce debuginfo to encapsulate dwarf information perf-probe: Move dwarf library routines to dwarf-aux.{c, h} perf probe: Remove redundant dwarf functions perf probe: Move strtailcmp to string.c perf probe: Rename DIE_FIND_CB_FOUND to DIE_FIND_CB_END tracing/kprobe: Update symbol reference when loading module tracing/kprobes: Support module init function probing kprobes: Return -ENOENT if probe point doesn't exist ...
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/perf_event.c2
-rw-r--r--arch/alpha/kernel/time.c2
-rw-r--r--arch/arm/kernel/perf_event_v6.c30
-rw-r--r--arch/arm/kernel/perf_event_v7.c30
-rw-r--r--arch/arm/kernel/perf_event_xscale.c18
-rw-r--r--arch/arm/kernel/ptrace.c5
-rw-r--r--arch/arm/kernel/swp_emulate.c2
-rw-r--r--arch/arm/mm/fault.c6
-rw-r--r--arch/mips/include/asm/stacktrace.h4
-rw-r--r--arch/mips/kernel/perf_event.c2
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c28
-rw-r--r--arch/mips/kernel/process.c19
-rw-r--r--arch/mips/kernel/traps.c8
-rw-r--r--arch/mips/kernel/unaligned.c5
-rw-r--r--arch/mips/math-emu/cp1emu.c3
-rw-r--r--arch/mips/mm/fault.c8
-rw-r--r--arch/mips/oprofile/Makefile2
-rw-r--r--arch/mips/oprofile/backtrace.c175
-rw-r--r--arch/mips/oprofile/common.c1
-rw-r--r--arch/mips/oprofile/op_impl.h2
-rw-r--r--arch/powerpc/include/asm/emulated_ops.h4
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h2
-rw-r--r--arch/powerpc/kernel/e500-pmu.c5
-rw-r--r--arch/powerpc/kernel/mpc7450-pmu.c5
-rw-r--r--arch/powerpc/kernel/perf_event.c6
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c6
-rw-r--r--arch/powerpc/kernel/power4-pmu.c5
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c5
-rw-r--r--arch/powerpc/kernel/power5-pmu.c5
-rw-r--r--arch/powerpc/kernel/power6-pmu.c5
-rw-r--r--arch/powerpc/kernel/power7-pmu.c5
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c5
-rw-r--r--arch/powerpc/kernel/ptrace.c4
-rw-r--r--arch/powerpc/kernel/time.c2
-rw-r--r--arch/powerpc/mm/fault.c6
-rw-r--r--arch/s390/mm/fault.c6
-rw-r--r--arch/sh/kernel/cpu/sh4/perf_event.c15
-rw-r--r--arch/sh/kernel/cpu/sh4a/perf_event.c15
-rw-r--r--arch/sh/kernel/ptrace_32.c5
-rw-r--r--arch/sh/kernel/traps_32.c2
-rw-r--r--arch/sh/kernel/traps_64.c8
-rw-r--r--arch/sh/math-emu/math.c2
-rw-r--r--arch/sh/mm/fault_32.c6
-rw-r--r--arch/sh/mm/tlbflush_64.c6
-rw-r--r--arch/sparc/kernel/perf_event.c44
-rw-r--r--arch/sparc/kernel/unaligned_32.c4
-rw-r--r--arch/sparc/kernel/unaligned_64.c12
-rw-r--r--arch/sparc/kernel/visemul.c2
-rw-r--r--arch/sparc/math-emu/math_32.c2
-rw-r--r--arch/sparc/math-emu/math_64.c2
-rw-r--r--arch/sparc/mm/fault_32.c8
-rw-r--r--arch/sparc/mm/fault_64.c8
-rw-r--r--arch/x86/include/asm/irqflags.h11
-rw-r--r--arch/x86/include/asm/perf_event.h5
-rw-r--r--arch/x86/include/asm/perf_event_p4.h33
-rw-r--r--arch/x86/include/asm/uaccess.h3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c168
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c14
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c385
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c119
-rw-r--r--arch/x86/kernel/dumpstack_64.c37
-rw-r--r--arch/x86/kernel/entry_64.S69
-rw-r--r--arch/x86/kernel/kgdb.c4
-rw-r--r--arch/x86/kernel/ptrace.c5
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/usercopy.c43
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/mm/kmemcheck/error.c2
-rw-r--r--arch/x86/oprofile/backtrace.c21
71 files changed, 1115 insertions, 387 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 90561c45e7d8..8e47709160f8 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
847 data.period = event->hw.last_period; 847 data.period = event->hw.last_period;
848 848
849 if (alpha_perf_event_set_period(event, hwc, idx)) { 849 if (alpha_perf_event_set_period(event, hwc, idx)) {
850 if (perf_event_overflow(event, 1, &data, regs)) { 850 if (perf_event_overflow(event, &data, regs)) {
851 /* Interrupts coming too quickly; "throttle" the 851 /* Interrupts coming too quickly; "throttle" the
852 * counter, i.e., disable it for a little while. 852 * counter, i.e., disable it for a little while.
853 */ 853 */
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 818e74ed45dc..f20d1b5396b8 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -91,7 +91,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);
91#define test_irq_work_pending() __get_cpu_var(irq_work_pending) 91#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
92#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 92#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
93 93
94void set_irq_work_pending(void) 94void arch_irq_work_raise(void)
95{ 95{
96 set_irq_work_pending_flag(); 96 set_irq_work_pending_flag();
97} 97}
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index f1e8dd94afe8..dd7f3b9f4cb3 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -173,6 +173,20 @@ static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
173 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 173 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
174 }, 174 },
175 }, 175 },
176 [C(NODE)] = {
177 [C(OP_READ)] = {
178 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
179 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
180 },
181 [C(OP_WRITE)] = {
182 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
183 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
184 },
185 [C(OP_PREFETCH)] = {
186 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
187 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
188 },
189 },
176}; 190};
177 191
178enum armv6mpcore_perf_types { 192enum armv6mpcore_perf_types {
@@ -310,6 +324,20 @@ static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
310 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 324 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
311 }, 325 },
312 }, 326 },
327 [C(NODE)] = {
328 [C(OP_READ)] = {
329 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
330 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
331 },
332 [C(OP_WRITE)] = {
333 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
334 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
335 },
336 [C(OP_PREFETCH)] = {
337 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
338 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
339 },
340 },
313}; 341};
314 342
315static inline unsigned long 343static inline unsigned long
@@ -479,7 +507,7 @@ armv6pmu_handle_irq(int irq_num,
479 if (!armpmu_event_set_period(event, hwc, idx)) 507 if (!armpmu_event_set_period(event, hwc, idx))
480 continue; 508 continue;
481 509
482 if (perf_event_overflow(event, 0, &data, regs)) 510 if (perf_event_overflow(event, &data, regs))
483 armpmu->disable(hwc, idx); 511 armpmu->disable(hwc, idx);
484 } 512 }
485 513
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 4960686afb58..e20ca9cafef5 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -255,6 +255,20 @@ static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
255 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 255 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
256 }, 256 },
257 }, 257 },
258 [C(NODE)] = {
259 [C(OP_READ)] = {
260 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
261 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
262 },
263 [C(OP_WRITE)] = {
264 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
265 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
266 },
267 [C(OP_PREFETCH)] = {
268 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
269 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
270 },
271 },
258}; 272};
259 273
260/* 274/*
@@ -371,6 +385,20 @@ static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
371 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 385 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
372 }, 386 },
373 }, 387 },
388 [C(NODE)] = {
389 [C(OP_READ)] = {
390 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
391 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
392 },
393 [C(OP_WRITE)] = {
394 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
395 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
396 },
397 [C(OP_PREFETCH)] = {
398 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
399 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
400 },
401 },
374}; 402};
375 403
376/* 404/*
@@ -787,7 +815,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
787 if (!armpmu_event_set_period(event, hwc, idx)) 815 if (!armpmu_event_set_period(event, hwc, idx))
788 continue; 816 continue;
789 817
790 if (perf_event_overflow(event, 0, &data, regs)) 818 if (perf_event_overflow(event, &data, regs))
791 armpmu->disable(hwc, idx); 819 armpmu->disable(hwc, idx);
792 } 820 }
793 821
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 39affbe4fdb2..3c4397491d08 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -144,6 +144,20 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
144 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 144 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
145 }, 145 },
146 }, 146 },
147 [C(NODE)] = {
148 [C(OP_READ)] = {
149 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
150 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
151 },
152 [C(OP_WRITE)] = {
153 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
154 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
155 },
156 [C(OP_PREFETCH)] = {
157 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
158 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
159 },
160 },
147}; 161};
148 162
149#define XSCALE_PMU_ENABLE 0x001 163#define XSCALE_PMU_ENABLE 0x001
@@ -251,7 +265,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
251 if (!armpmu_event_set_period(event, hwc, idx)) 265 if (!armpmu_event_set_period(event, hwc, idx))
252 continue; 266 continue;
253 267
254 if (perf_event_overflow(event, 0, &data, regs)) 268 if (perf_event_overflow(event, &data, regs))
255 armpmu->disable(hwc, idx); 269 armpmu->disable(hwc, idx);
256 } 270 }
257 271
@@ -583,7 +597,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
583 if (!armpmu_event_set_period(event, hwc, idx)) 597 if (!armpmu_event_set_period(event, hwc, idx))
584 continue; 598 continue;
585 599
586 if (perf_event_overflow(event, 0, &data, regs)) 600 if (perf_event_overflow(event, &data, regs))
587 armpmu->disable(hwc, idx); 601 armpmu->disable(hwc, idx);
588 } 602 }
589 603
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 97260060bf26..5c199610719f 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -396,7 +396,7 @@ static long ptrace_hbp_idx_to_num(int idx)
396/* 396/*
397 * Handle hitting a HW-breakpoint. 397 * Handle hitting a HW-breakpoint.
398 */ 398 */
399static void ptrace_hbptriggered(struct perf_event *bp, int unused, 399static void ptrace_hbptriggered(struct perf_event *bp,
400 struct perf_sample_data *data, 400 struct perf_sample_data *data,
401 struct pt_regs *regs) 401 struct pt_regs *regs)
402{ 402{
@@ -479,7 +479,8 @@ static struct perf_event *ptrace_hbp_create(struct task_struct *tsk, int type)
479 attr.bp_type = type; 479 attr.bp_type = type;
480 attr.disabled = 1; 480 attr.disabled = 1;
481 481
482 return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, tsk); 482 return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL,
483 tsk);
483} 484}
484 485
485static int ptrace_gethbpregs(struct task_struct *tsk, long num, 486static int ptrace_gethbpregs(struct task_struct *tsk, long num,
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index 40ee7e5045e4..5f452f8fde05 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -183,7 +183,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr)
183 unsigned int address, destreg, data, type; 183 unsigned int address, destreg, data, type;
184 unsigned int res = 0; 184 unsigned int res = 0;
185 185
186 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc); 186 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
187 187
188 if (current->pid != previous_pid) { 188 if (current->pid != previous_pid) {
189 pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n", 189 pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n",
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index bc0e1d88fd3b..9ea4f7ddd665 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -318,11 +318,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
318 fault = __do_page_fault(mm, addr, fsr, tsk); 318 fault = __do_page_fault(mm, addr, fsr, tsk);
319 up_read(&mm->mmap_sem); 319 up_read(&mm->mmap_sem);
320 320
321 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, addr); 321 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
322 if (fault & VM_FAULT_MAJOR) 322 if (fault & VM_FAULT_MAJOR)
323 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, addr); 323 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr);
324 else if (fault & VM_FAULT_MINOR) 324 else if (fault & VM_FAULT_MINOR)
325 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, addr); 325 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr);
326 326
327 /* 327 /*
328 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR 328 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
diff --git a/arch/mips/include/asm/stacktrace.h b/arch/mips/include/asm/stacktrace.h
index 0bf82818aa53..780ee2c2a2ac 100644
--- a/arch/mips/include/asm/stacktrace.h
+++ b/arch/mips/include/asm/stacktrace.h
@@ -7,6 +7,10 @@
7extern int raw_show_trace; 7extern int raw_show_trace;
8extern unsigned long unwind_stack(struct task_struct *task, unsigned long *sp, 8extern unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
9 unsigned long pc, unsigned long *ra); 9 unsigned long pc, unsigned long *ra);
10extern unsigned long unwind_stack_by_address(unsigned long stack_page,
11 unsigned long *sp,
12 unsigned long pc,
13 unsigned long *ra);
10#else 14#else
11#define raw_show_trace 1 15#define raw_show_trace 1
12static inline unsigned long unwind_stack(struct task_struct *task, 16static inline unsigned long unwind_stack(struct task_struct *task,
diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index a8244854d3dc..d0deaab9ace2 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -527,7 +527,7 @@ handle_associated_event(struct cpu_hw_events *cpuc,
527 if (!mipspmu_event_set_period(event, hwc, idx)) 527 if (!mipspmu_event_set_period(event, hwc, idx))
528 return; 528 return;
529 529
530 if (perf_event_overflow(event, 0, data, regs)) 530 if (perf_event_overflow(event, data, regs))
531 mipspmu->disable_event(idx); 531 mipspmu->disable_event(idx);
532} 532}
533 533
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 75266ff4cc33..e5ad09a9baf7 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -377,6 +377,20 @@ static const struct mips_perf_event mipsxxcore_cache_map
377 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID }, 377 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
378 }, 378 },
379}, 379},
380[C(NODE)] = {
381 [C(OP_READ)] = {
382 [C(RESULT_ACCESS)] = { UNSUPPORTED_PERF_EVENT_ID },
383 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
384 },
385 [C(OP_WRITE)] = {
386 [C(RESULT_ACCESS)] = { UNSUPPORTED_PERF_EVENT_ID },
387 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
388 },
389 [C(OP_PREFETCH)] = {
390 [C(RESULT_ACCESS)] = { UNSUPPORTED_PERF_EVENT_ID },
391 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
392 },
393},
380}; 394};
381 395
382/* 74K core has completely different cache event map. */ 396/* 74K core has completely different cache event map. */
@@ -480,6 +494,20 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map
480 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID }, 494 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
481 }, 495 },
482}, 496},
497[C(NODE)] = {
498 [C(OP_READ)] = {
499 [C(RESULT_ACCESS)] = { UNSUPPORTED_PERF_EVENT_ID },
500 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
501 },
502 [C(OP_WRITE)] = {
503 [C(RESULT_ACCESS)] = { UNSUPPORTED_PERF_EVENT_ID },
504 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
505 },
506 [C(OP_PREFETCH)] = {
507 [C(RESULT_ACCESS)] = { UNSUPPORTED_PERF_EVENT_ID },
508 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
509 },
510},
483}; 511};
484 512
485#ifdef CONFIG_MIPS_MT_SMP 513#ifdef CONFIG_MIPS_MT_SMP
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index d2112d3cf115..c28fbe6107bc 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -373,18 +373,18 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
373 373
374 374
375#ifdef CONFIG_KALLSYMS 375#ifdef CONFIG_KALLSYMS
376/* used by show_backtrace() */ 376/* generic stack unwinding function */
377unsigned long unwind_stack(struct task_struct *task, unsigned long *sp, 377unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
378 unsigned long pc, unsigned long *ra) 378 unsigned long *sp,
379 unsigned long pc,
380 unsigned long *ra)
379{ 381{
380 unsigned long stack_page;
381 struct mips_frame_info info; 382 struct mips_frame_info info;
382 unsigned long size, ofs; 383 unsigned long size, ofs;
383 int leaf; 384 int leaf;
384 extern void ret_from_irq(void); 385 extern void ret_from_irq(void);
385 extern void ret_from_exception(void); 386 extern void ret_from_exception(void);
386 387
387 stack_page = (unsigned long)task_stack_page(task);
388 if (!stack_page) 388 if (!stack_page)
389 return 0; 389 return 0;
390 390
@@ -443,6 +443,15 @@ unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
443 *ra = 0; 443 *ra = 0;
444 return __kernel_text_address(pc) ? pc : 0; 444 return __kernel_text_address(pc) ? pc : 0;
445} 445}
446EXPORT_SYMBOL(unwind_stack_by_address);
447
448/* used by show_backtrace() */
449unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
450 unsigned long pc, unsigned long *ra)
451{
452 unsigned long stack_page = (unsigned long)task_stack_page(task);
453 return unwind_stack_by_address(stack_page, sp, pc, ra);
454}
446#endif 455#endif
447 456
448/* 457/*
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index e9b3af27d844..b7517e3abc85 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -578,12 +578,12 @@ static int simulate_llsc(struct pt_regs *regs, unsigned int opcode)
578{ 578{
579 if ((opcode & OPCODE) == LL) { 579 if ((opcode & OPCODE) == LL) {
580 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 580 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
581 1, 0, regs, 0); 581 1, regs, 0);
582 return simulate_ll(regs, opcode); 582 return simulate_ll(regs, opcode);
583 } 583 }
584 if ((opcode & OPCODE) == SC) { 584 if ((opcode & OPCODE) == SC) {
585 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 585 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
586 1, 0, regs, 0); 586 1, regs, 0);
587 return simulate_sc(regs, opcode); 587 return simulate_sc(regs, opcode);
588 } 588 }
589 589
@@ -602,7 +602,7 @@ static int simulate_rdhwr(struct pt_regs *regs, unsigned int opcode)
602 int rd = (opcode & RD) >> 11; 602 int rd = (opcode & RD) >> 11;
603 int rt = (opcode & RT) >> 16; 603 int rt = (opcode & RT) >> 16;
604 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 604 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
605 1, 0, regs, 0); 605 1, regs, 0);
606 switch (rd) { 606 switch (rd) {
607 case 0: /* CPU number */ 607 case 0: /* CPU number */
608 regs->regs[rt] = smp_processor_id(); 608 regs->regs[rt] = smp_processor_id();
@@ -640,7 +640,7 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode)
640{ 640{
641 if ((opcode & OPCODE) == SPEC0 && (opcode & FUNC) == SYNC) { 641 if ((opcode & OPCODE) == SPEC0 && (opcode & FUNC) == SYNC) {
642 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 642 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
643 1, 0, regs, 0); 643 1, regs, 0);
644 return 0; 644 return 0;
645 } 645 }
646 646
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index cfea1adfa153..eb319b580353 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -111,8 +111,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
111 unsigned long value; 111 unsigned long value;
112 unsigned int res; 112 unsigned int res;
113 113
114 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 114 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
115 1, 0, regs, 0);
116 115
117 /* 116 /*
118 * This load never faults. 117 * This load never faults.
@@ -517,7 +516,7 @@ asmlinkage void do_ade(struct pt_regs *regs)
517 mm_segment_t seg; 516 mm_segment_t seg;
518 517
519 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 518 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,
520 1, 0, regs, regs->cp0_badvaddr); 519 1, regs, regs->cp0_badvaddr);
521 /* 520 /*
522 * Did we catch a fault trying to load an instruction? 521 * Did we catch a fault trying to load an instruction?
523 * Or are we running in MIPS16 mode? 522 * Or are we running in MIPS16 mode?
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index d32cb0503110..dbf2f93a5091 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -272,8 +272,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
272 } 272 }
273 273
274 emul: 274 emul:
275 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 275 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, xcp, 0);
276 1, 0, xcp, 0);
277 MIPS_FPU_EMU_INC_STATS(emulated); 276 MIPS_FPU_EMU_INC_STATS(emulated);
278 switch (MIPSInst_OPCODE(ir)) { 277 switch (MIPSInst_OPCODE(ir)) {
279 case ldc1_op:{ 278 case ldc1_op:{
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 137ee76a0045..937cf3368164 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -145,7 +145,7 @@ good_area:
145 * the fault. 145 * the fault.
146 */ 146 */
147 fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); 147 fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
148 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 148 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
149 if (unlikely(fault & VM_FAULT_ERROR)) { 149 if (unlikely(fault & VM_FAULT_ERROR)) {
150 if (fault & VM_FAULT_OOM) 150 if (fault & VM_FAULT_OOM)
151 goto out_of_memory; 151 goto out_of_memory;
@@ -154,12 +154,10 @@ good_area:
154 BUG(); 154 BUG();
155 } 155 }
156 if (fault & VM_FAULT_MAJOR) { 156 if (fault & VM_FAULT_MAJOR) {
157 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 157 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
158 1, 0, regs, address);
159 tsk->maj_flt++; 158 tsk->maj_flt++;
160 } else { 159 } else {
161 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 160 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
162 1, 0, regs, address);
163 tsk->min_flt++; 161 tsk->min_flt++;
164 } 162 }
165 163
diff --git a/arch/mips/oprofile/Makefile b/arch/mips/oprofile/Makefile
index 4b9d7044e26c..29f2f13eb31c 100644
--- a/arch/mips/oprofile/Makefile
+++ b/arch/mips/oprofile/Makefile
@@ -8,7 +8,7 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
8 oprofilefs.o oprofile_stats.o \ 8 oprofilefs.o oprofile_stats.o \
9 timer_int.o ) 9 timer_int.o )
10 10
11oprofile-y := $(DRIVER_OBJS) common.o 11oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
12 12
13oprofile-$(CONFIG_CPU_MIPS32) += op_model_mipsxx.o 13oprofile-$(CONFIG_CPU_MIPS32) += op_model_mipsxx.o
14oprofile-$(CONFIG_CPU_MIPS64) += op_model_mipsxx.o 14oprofile-$(CONFIG_CPU_MIPS64) += op_model_mipsxx.o
diff --git a/arch/mips/oprofile/backtrace.c b/arch/mips/oprofile/backtrace.c
new file mode 100644
index 000000000000..6854ed5097d2
--- /dev/null
+++ b/arch/mips/oprofile/backtrace.c
@@ -0,0 +1,175 @@
1#include <linux/oprofile.h>
2#include <linux/sched.h>
3#include <linux/mm.h>
4#include <linux/uaccess.h>
5#include <asm/ptrace.h>
6#include <asm/stacktrace.h>
7#include <linux/stacktrace.h>
8#include <linux/kernel.h>
9#include <asm/sections.h>
10#include <asm/inst.h>
11
12struct stackframe {
13 unsigned long sp;
14 unsigned long pc;
15 unsigned long ra;
16};
17
18static inline int get_mem(unsigned long addr, unsigned long *result)
19{
20 unsigned long *address = (unsigned long *) addr;
21 if (!access_ok(VERIFY_READ, addr, sizeof(unsigned long)))
22 return -1;
23 if (__copy_from_user_inatomic(result, address, sizeof(unsigned long)))
24 return -3;
25 return 0;
26}
27
28/*
29 * These two instruction helpers were taken from process.c
30 */
31static inline int is_ra_save_ins(union mips_instruction *ip)
32{
33 /* sw / sd $ra, offset($sp) */
34 return (ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op)
35 && ip->i_format.rs == 29 && ip->i_format.rt == 31;
36}
37
38static inline int is_sp_move_ins(union mips_instruction *ip)
39{
40 /* addiu/daddiu sp,sp,-imm */
41 if (ip->i_format.rs != 29 || ip->i_format.rt != 29)
42 return 0;
43 if (ip->i_format.opcode == addiu_op || ip->i_format.opcode == daddiu_op)
44 return 1;
45 return 0;
46}
47
48/*
49 * Looks for specific instructions that mark the end of a function.
50 * This usually means we ran into the code area of the previous function.
51 */
52static inline int is_end_of_function_marker(union mips_instruction *ip)
53{
54 /* jr ra */
55 if (ip->r_format.func == jr_op && ip->r_format.rs == 31)
56 return 1;
57 /* lui gp */
58 if (ip->i_format.opcode == lui_op && ip->i_format.rt == 28)
59 return 1;
60 return 0;
61}
62
63/*
64 * TODO for userspace stack unwinding:
65 * - handle cases where the stack is adjusted inside a function
66 * (generally doesn't happen)
67 * - find optimal value for max_instr_check
68 * - try to find a way to handle leaf functions
69 */
70
71static inline int unwind_user_frame(struct stackframe *old_frame,
72 const unsigned int max_instr_check)
73{
74 struct stackframe new_frame = *old_frame;
75 off_t ra_offset = 0;
76 size_t stack_size = 0;
77 unsigned long addr;
78
79 if (old_frame->pc == 0 || old_frame->sp == 0 || old_frame->ra == 0)
80 return -9;
81
82 for (addr = new_frame.pc; (addr + max_instr_check > new_frame.pc)
83 && (!ra_offset || !stack_size); --addr) {
84 union mips_instruction ip;
85
86 if (get_mem(addr, (unsigned long *) &ip))
87 return -11;
88
89 if (is_sp_move_ins(&ip)) {
90 int stack_adjustment = ip.i_format.simmediate;
91 if (stack_adjustment > 0)
92 /* This marks the end of the previous function,
93 which means we overran. */
94 break;
95 stack_size = (unsigned) stack_adjustment;
96 } else if (is_ra_save_ins(&ip)) {
97 int ra_slot = ip.i_format.simmediate;
98 if (ra_slot < 0)
99 /* This shouldn't happen. */
100 break;
101 ra_offset = ra_slot;
102 } else if (is_end_of_function_marker(&ip))
103 break;
104 }
105
106 if (!ra_offset || !stack_size)
107 return -1;
108
109 if (ra_offset) {
110 new_frame.ra = old_frame->sp + ra_offset;
111 if (get_mem(new_frame.ra, &(new_frame.ra)))
112 return -13;
113 }
114
115 if (stack_size) {
116 new_frame.sp = old_frame->sp + stack_size;
117 if (get_mem(new_frame.sp, &(new_frame.sp)))
118 return -14;
119 }
120
121 if (new_frame.sp > old_frame->sp)
122 return -2;
123
124 new_frame.pc = old_frame->ra;
125 *old_frame = new_frame;
126
127 return 0;
128}
129
130static inline void do_user_backtrace(unsigned long low_addr,
131 struct stackframe *frame,
132 unsigned int depth)
133{
134 const unsigned int max_instr_check = 512;
135 const unsigned long high_addr = low_addr + THREAD_SIZE;
136
137 while (depth-- && !unwind_user_frame(frame, max_instr_check)) {
138 oprofile_add_trace(frame->ra);
139 if (frame->sp < low_addr || frame->sp > high_addr)
140 break;
141 }
142}
143
144#ifndef CONFIG_KALLSYMS
145static inline void do_kernel_backtrace(unsigned long low_addr,
146 struct stackframe *frame,
147 unsigned int depth) { }
148#else
149static inline void do_kernel_backtrace(unsigned long low_addr,
150 struct stackframe *frame,
151 unsigned int depth)
152{
153 while (depth-- && frame->pc) {
154 frame->pc = unwind_stack_by_address(low_addr,
155 &(frame->sp),
156 frame->pc,
157 &(frame->ra));
158 oprofile_add_trace(frame->ra);
159 }
160}
161#endif
162
163void notrace op_mips_backtrace(struct pt_regs *const regs, unsigned int depth)
164{
165 struct stackframe frame = { .sp = regs->regs[29],
166 .pc = regs->cp0_epc,
167 .ra = regs->regs[31] };
168 const int userspace = user_mode(regs);
169 const unsigned long low_addr = ALIGN(frame.sp, THREAD_SIZE);
170
171 if (userspace)
172 do_user_backtrace(low_addr, &frame, depth);
173 else
174 do_kernel_backtrace(low_addr, &frame, depth);
175}
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index f9eb1aba6345..d1f2d4c52d42 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -115,6 +115,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
115 ops->start = op_mips_start; 115 ops->start = op_mips_start;
116 ops->stop = op_mips_stop; 116 ops->stop = op_mips_stop;
117 ops->cpu_type = lmodel->cpu_type; 117 ops->cpu_type = lmodel->cpu_type;
118 ops->backtrace = op_mips_backtrace;
118 119
119 printk(KERN_INFO "oprofile: using %s performance monitoring.\n", 120 printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
120 lmodel->cpu_type); 121 lmodel->cpu_type);
diff --git a/arch/mips/oprofile/op_impl.h b/arch/mips/oprofile/op_impl.h
index f04b54fb37d1..7c2da27ece04 100644
--- a/arch/mips/oprofile/op_impl.h
+++ b/arch/mips/oprofile/op_impl.h
@@ -36,4 +36,6 @@ struct op_mips_model {
36 unsigned char num_counters; 36 unsigned char num_counters;
37}; 37};
38 38
39void op_mips_backtrace(struct pt_regs * const regs, unsigned int depth);
40
39#endif 41#endif
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index 45921672b97a..2cc41c715d2b 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -78,14 +78,14 @@ extern void ppc_warn_emulated_print(const char *type);
78#define PPC_WARN_EMULATED(type, regs) \ 78#define PPC_WARN_EMULATED(type, regs) \
79 do { \ 79 do { \
80 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \ 80 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \
81 1, 0, regs, 0); \ 81 1, regs, 0); \
82 __PPC_WARN_EMULATED(type); \ 82 __PPC_WARN_EMULATED(type); \
83 } while (0) 83 } while (0)
84 84
85#define PPC_WARN_ALIGNMENT(type, regs) \ 85#define PPC_WARN_ALIGNMENT(type, regs) \
86 do { \ 86 do { \
87 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \ 87 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \
88 1, 0, regs, regs->dar); \ 88 1, regs, regs->dar); \
89 __PPC_WARN_EMULATED(type); \ 89 __PPC_WARN_EMULATED(type); \
90 } while (0) 90 } while (0)
91 91
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index 1c33ec17ca36..80fd4d2b4a62 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -57,7 +57,7 @@ void hw_breakpoint_pmu_read(struct perf_event *bp);
57extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); 57extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
58 58
59extern struct pmu perf_ops_bp; 59extern struct pmu perf_ops_bp;
60extern void ptrace_triggered(struct perf_event *bp, int nmi, 60extern void ptrace_triggered(struct perf_event *bp,
61 struct perf_sample_data *data, struct pt_regs *regs); 61 struct perf_sample_data *data, struct pt_regs *regs);
62static inline void hw_breakpoint_disable(void) 62static inline void hw_breakpoint_disable(void)
63{ 63{
diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c
index b150b510510f..cb2e2949c8d1 100644
--- a/arch/powerpc/kernel/e500-pmu.c
+++ b/arch/powerpc/kernel/e500-pmu.c
@@ -75,6 +75,11 @@ static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
75 [C(OP_WRITE)] = { -1, -1 }, 75 [C(OP_WRITE)] = { -1, -1 },
76 [C(OP_PREFETCH)] = { -1, -1 }, 76 [C(OP_PREFETCH)] = { -1, -1 },
77 }, 77 },
78 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
79 [C(OP_READ)] = { -1, -1 },
80 [C(OP_WRITE)] = { -1, -1 },
81 [C(OP_PREFETCH)] = { -1, -1 },
82 },
78}; 83};
79 84
80static int num_events = 128; 85static int num_events = 128;
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
index 2cc5e0301d0b..845a58478890 100644
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -388,6 +388,11 @@ static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
388 [C(OP_WRITE)] = { -1, -1 }, 388 [C(OP_WRITE)] = { -1, -1 },
389 [C(OP_PREFETCH)] = { -1, -1 }, 389 [C(OP_PREFETCH)] = { -1, -1 },
390 }, 390 },
391 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
392 [C(OP_READ)] = { -1, -1 },
393 [C(OP_WRITE)] = { -1, -1 },
394 [C(OP_PREFETCH)] = { -1, -1 },
395 },
391}; 396};
392 397
393struct power_pmu mpc7450_pmu = { 398struct power_pmu mpc7450_pmu = {
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 822f63008ae1..14967de98876 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1207,7 +1207,7 @@ struct pmu power_pmu = {
1207 * here so there is no possibility of being interrupted. 1207 * here so there is no possibility of being interrupted.
1208 */ 1208 */
1209static void record_and_restart(struct perf_event *event, unsigned long val, 1209static void record_and_restart(struct perf_event *event, unsigned long val,
1210 struct pt_regs *regs, int nmi) 1210 struct pt_regs *regs)
1211{ 1211{
1212 u64 period = event->hw.sample_period; 1212 u64 period = event->hw.sample_period;
1213 s64 prev, delta, left; 1213 s64 prev, delta, left;
@@ -1258,7 +1258,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1258 if (event->attr.sample_type & PERF_SAMPLE_ADDR) 1258 if (event->attr.sample_type & PERF_SAMPLE_ADDR)
1259 perf_get_data_addr(regs, &data.addr); 1259 perf_get_data_addr(regs, &data.addr);
1260 1260
1261 if (perf_event_overflow(event, nmi, &data, regs)) 1261 if (perf_event_overflow(event, &data, regs))
1262 power_pmu_stop(event, 0); 1262 power_pmu_stop(event, 0);
1263 } 1263 }
1264} 1264}
@@ -1346,7 +1346,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
1346 if ((int)val < 0) { 1346 if ((int)val < 0) {
1347 /* event has overflowed */ 1347 /* event has overflowed */
1348 found = 1; 1348 found = 1;
1349 record_and_restart(event, val, regs, nmi); 1349 record_and_restart(event, val, regs);
1350 } 1350 }
1351 } 1351 }
1352 1352
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index b0dc8f7069cd..0a6d2a9d569c 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -568,7 +568,7 @@ static struct pmu fsl_emb_pmu = {
568 * here so there is no possibility of being interrupted. 568 * here so there is no possibility of being interrupted.
569 */ 569 */
570static void record_and_restart(struct perf_event *event, unsigned long val, 570static void record_and_restart(struct perf_event *event, unsigned long val,
571 struct pt_regs *regs, int nmi) 571 struct pt_regs *regs)
572{ 572{
573 u64 period = event->hw.sample_period; 573 u64 period = event->hw.sample_period;
574 s64 prev, delta, left; 574 s64 prev, delta, left;
@@ -616,7 +616,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
616 perf_sample_data_init(&data, 0); 616 perf_sample_data_init(&data, 0);
617 data.period = event->hw.last_period; 617 data.period = event->hw.last_period;
618 618
619 if (perf_event_overflow(event, nmi, &data, regs)) 619 if (perf_event_overflow(event, &data, regs))
620 fsl_emb_pmu_stop(event, 0); 620 fsl_emb_pmu_stop(event, 0);
621 } 621 }
622} 622}
@@ -644,7 +644,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
644 if (event) { 644 if (event) {
645 /* event has overflowed */ 645 /* event has overflowed */
646 found = 1; 646 found = 1;
647 record_and_restart(event, val, regs, nmi); 647 record_and_restart(event, val, regs);
648 } else { 648 } else {
649 /* 649 /*
650 * Disabled counter is negative, 650 * Disabled counter is negative,
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index ead8b3c2649e..e9dbc2d35c9c 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -587,6 +587,11 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
587 [C(OP_WRITE)] = { -1, -1 }, 587 [C(OP_WRITE)] = { -1, -1 },
588 [C(OP_PREFETCH)] = { -1, -1 }, 588 [C(OP_PREFETCH)] = { -1, -1 },
589 }, 589 },
590 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
591 [C(OP_READ)] = { -1, -1 },
592 [C(OP_WRITE)] = { -1, -1 },
593 [C(OP_PREFETCH)] = { -1, -1 },
594 },
590}; 595};
591 596
592static struct power_pmu power4_pmu = { 597static struct power_pmu power4_pmu = {
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index eca0ac595cb6..f58a2bd41b59 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -653,6 +653,11 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
653 [C(OP_WRITE)] = { -1, -1 }, 653 [C(OP_WRITE)] = { -1, -1 },
654 [C(OP_PREFETCH)] = { -1, -1 }, 654 [C(OP_PREFETCH)] = { -1, -1 },
655 }, 655 },
656 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
657 [C(OP_READ)] = { -1, -1 },
658 [C(OP_WRITE)] = { -1, -1 },
659 [C(OP_PREFETCH)] = { -1, -1 },
660 },
656}; 661};
657 662
658static struct power_pmu power5p_pmu = { 663static struct power_pmu power5p_pmu = {
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index d5ff0f64a5e6..b1acab684142 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -595,6 +595,11 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
595 [C(OP_WRITE)] = { -1, -1 }, 595 [C(OP_WRITE)] = { -1, -1 },
596 [C(OP_PREFETCH)] = { -1, -1 }, 596 [C(OP_PREFETCH)] = { -1, -1 },
597 }, 597 },
598 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
599 [C(OP_READ)] = { -1, -1 },
600 [C(OP_WRITE)] = { -1, -1 },
601 [C(OP_PREFETCH)] = { -1, -1 },
602 },
598}; 603};
599 604
600static struct power_pmu power5_pmu = { 605static struct power_pmu power5_pmu = {
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 31603927e376..b24a3a23d073 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -516,6 +516,11 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
516 [C(OP_WRITE)] = { -1, -1 }, 516 [C(OP_WRITE)] = { -1, -1 },
517 [C(OP_PREFETCH)] = { -1, -1 }, 517 [C(OP_PREFETCH)] = { -1, -1 },
518 }, 518 },
519 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
520 [C(OP_READ)] = { -1, -1 },
521 [C(OP_WRITE)] = { -1, -1 },
522 [C(OP_PREFETCH)] = { -1, -1 },
523 },
519}; 524};
520 525
521static struct power_pmu power6_pmu = { 526static struct power_pmu power6_pmu = {
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 593740fcb799..6d9dccb2ea59 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -342,6 +342,11 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
342 [C(OP_WRITE)] = { -1, -1 }, 342 [C(OP_WRITE)] = { -1, -1 },
343 [C(OP_PREFETCH)] = { -1, -1 }, 343 [C(OP_PREFETCH)] = { -1, -1 },
344 }, 344 },
345 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
346 [C(OP_READ)] = { -1, -1 },
347 [C(OP_WRITE)] = { -1, -1 },
348 [C(OP_PREFETCH)] = { -1, -1 },
349 },
345}; 350};
346 351
347static struct power_pmu power7_pmu = { 352static struct power_pmu power7_pmu = {
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 9a6e093858fe..b121de9658eb 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -467,6 +467,11 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
467 [C(OP_WRITE)] = { -1, -1 }, 467 [C(OP_WRITE)] = { -1, -1 },
468 [C(OP_PREFETCH)] = { -1, -1 }, 468 [C(OP_PREFETCH)] = { -1, -1 },
469 }, 469 },
470 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
471 [C(OP_READ)] = { -1, -1 },
472 [C(OP_WRITE)] = { -1, -1 },
473 [C(OP_PREFETCH)] = { -1, -1 },
474 },
470}; 475};
471 476
472static struct power_pmu ppc970_pmu = { 477static struct power_pmu ppc970_pmu = {
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index cb22024f2b42..05b7dd217f60 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -882,7 +882,7 @@ void user_disable_single_step(struct task_struct *task)
882} 882}
883 883
884#ifdef CONFIG_HAVE_HW_BREAKPOINT 884#ifdef CONFIG_HAVE_HW_BREAKPOINT
885void ptrace_triggered(struct perf_event *bp, int nmi, 885void ptrace_triggered(struct perf_event *bp,
886 struct perf_sample_data *data, struct pt_regs *regs) 886 struct perf_sample_data *data, struct pt_regs *regs)
887{ 887{
888 struct perf_event_attr attr; 888 struct perf_event_attr attr;
@@ -973,7 +973,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
973 &attr.bp_type); 973 &attr.bp_type);
974 974
975 thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, 975 thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
976 ptrace_triggered, task); 976 ptrace_triggered, NULL, task);
977 if (IS_ERR(bp)) { 977 if (IS_ERR(bp)) {
978 thread->ptrace_bps[0] = NULL; 978 thread->ptrace_bps[0] = NULL;
979 ptrace_put_breakpoints(task); 979 ptrace_put_breakpoints(task);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index f33acfd872ad..03b29a6759ab 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -544,7 +544,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);
544 544
545#endif /* 32 vs 64 bit */ 545#endif /* 32 vs 64 bit */
546 546
547void set_irq_work_pending(void) 547void arch_irq_work_raise(void)
548{ 548{
549 preempt_disable(); 549 preempt_disable();
550 set_irq_work_pending_flag(); 550 set_irq_work_pending_flag();
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ad35f66c69e8..5efe8c96d37f 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -174,7 +174,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
174 die("Weird page fault", regs, SIGSEGV); 174 die("Weird page fault", regs, SIGSEGV);
175 } 175 }
176 176
177 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 177 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
178 178
179 /* When running in the kernel we expect faults to occur only to 179 /* When running in the kernel we expect faults to occur only to
180 * addresses in user space. All other faults represent errors in the 180 * addresses in user space. All other faults represent errors in the
@@ -320,7 +320,7 @@ good_area:
320 } 320 }
321 if (ret & VM_FAULT_MAJOR) { 321 if (ret & VM_FAULT_MAJOR) {
322 current->maj_flt++; 322 current->maj_flt++;
323 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 323 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
324 regs, address); 324 regs, address);
325#ifdef CONFIG_PPC_SMLPAR 325#ifdef CONFIG_PPC_SMLPAR
326 if (firmware_has_feature(FW_FEATURE_CMO)) { 326 if (firmware_has_feature(FW_FEATURE_CMO)) {
@@ -331,7 +331,7 @@ good_area:
331#endif 331#endif
332 } else { 332 } else {
333 current->min_flt++; 333 current->min_flt++;
334 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 334 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
335 regs, address); 335 regs, address);
336 } 336 }
337 up_read(&mm->mmap_sem); 337 up_read(&mm->mmap_sem);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index fe103e891e7a..095f782a5512 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -299,7 +299,7 @@ static inline int do_exception(struct pt_regs *regs, int access,
299 goto out; 299 goto out;
300 300
301 address = trans_exc_code & __FAIL_ADDR_MASK; 301 address = trans_exc_code & __FAIL_ADDR_MASK;
302 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 302 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
303 flags = FAULT_FLAG_ALLOW_RETRY; 303 flags = FAULT_FLAG_ALLOW_RETRY;
304 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) 304 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
305 flags |= FAULT_FLAG_WRITE; 305 flags |= FAULT_FLAG_WRITE;
@@ -345,11 +345,11 @@ retry:
345 if (flags & FAULT_FLAG_ALLOW_RETRY) { 345 if (flags & FAULT_FLAG_ALLOW_RETRY) {
346 if (fault & VM_FAULT_MAJOR) { 346 if (fault & VM_FAULT_MAJOR) {
347 tsk->maj_flt++; 347 tsk->maj_flt++;
348 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 348 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
349 regs, address); 349 regs, address);
350 } else { 350 } else {
351 tsk->min_flt++; 351 tsk->min_flt++;
352 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 352 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
353 regs, address); 353 regs, address);
354 } 354 }
355 if (fault & VM_FAULT_RETRY) { 355 if (fault & VM_FAULT_RETRY) {
diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c
index 748955df018d..fa4f724b295a 100644
--- a/arch/sh/kernel/cpu/sh4/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4/perf_event.c
@@ -180,6 +180,21 @@ static const int sh7750_cache_events
180 [ C(RESULT_MISS) ] = -1, 180 [ C(RESULT_MISS) ] = -1,
181 }, 181 },
182 }, 182 },
183
184 [ C(NODE) ] = {
185 [ C(OP_READ) ] = {
186 [ C(RESULT_ACCESS) ] = -1,
187 [ C(RESULT_MISS) ] = -1,
188 },
189 [ C(OP_WRITE) ] = {
190 [ C(RESULT_ACCESS) ] = -1,
191 [ C(RESULT_MISS) ] = -1,
192 },
193 [ C(OP_PREFETCH) ] = {
194 [ C(RESULT_ACCESS) ] = -1,
195 [ C(RESULT_MISS) ] = -1,
196 },
197 },
183}; 198};
184 199
185static int sh7750_event_map(int event) 200static int sh7750_event_map(int event)
diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c
index 17e6bebfede0..84a2c396ceee 100644
--- a/arch/sh/kernel/cpu/sh4a/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4a/perf_event.c
@@ -205,6 +205,21 @@ static const int sh4a_cache_events
205 [ C(RESULT_MISS) ] = -1, 205 [ C(RESULT_MISS) ] = -1,
206 }, 206 },
207 }, 207 },
208
209 [ C(NODE) ] = {
210 [ C(OP_READ) ] = {
211 [ C(RESULT_ACCESS) ] = -1,
212 [ C(RESULT_MISS) ] = -1,
213 },
214 [ C(OP_WRITE) ] = {
215 [ C(RESULT_ACCESS) ] = -1,
216 [ C(RESULT_MISS) ] = -1,
217 },
218 [ C(OP_PREFETCH) ] = {
219 [ C(RESULT_ACCESS) ] = -1,
220 [ C(RESULT_MISS) ] = -1,
221 },
222 },
208}; 223};
209 224
210static int sh4a_event_map(int event) 225static int sh4a_event_map(int event)
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 3d7b209b2178..92b3c276339a 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -63,7 +63,7 @@ static inline int put_stack_long(struct task_struct *task, int offset,
63 return 0; 63 return 0;
64} 64}
65 65
66void ptrace_triggered(struct perf_event *bp, int nmi, 66void ptrace_triggered(struct perf_event *bp,
67 struct perf_sample_data *data, struct pt_regs *regs) 67 struct perf_sample_data *data, struct pt_regs *regs)
68{ 68{
69 struct perf_event_attr attr; 69 struct perf_event_attr attr;
@@ -91,7 +91,8 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr)
91 attr.bp_len = HW_BREAKPOINT_LEN_2; 91 attr.bp_len = HW_BREAKPOINT_LEN_2;
92 attr.bp_type = HW_BREAKPOINT_R; 92 attr.bp_type = HW_BREAKPOINT_R;
93 93
94 bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); 94 bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
95 NULL, tsk);
95 if (IS_ERR(bp)) 96 if (IS_ERR(bp))
96 return PTR_ERR(bp); 97 return PTR_ERR(bp);
97 98
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index b51a17104b5f..d9006f8ffc14 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -393,7 +393,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
393 */ 393 */
394 if (!expected) { 394 if (!expected) {
395 unaligned_fixups_notify(current, instruction, regs); 395 unaligned_fixups_notify(current, instruction, regs);
396 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, 396 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1,
397 regs, address); 397 regs, address);
398 } 398 }
399 399
diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c
index 6713ca97e553..67110be83fd7 100644
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -434,7 +434,7 @@ static int misaligned_load(struct pt_regs *regs,
434 return error; 434 return error;
435 } 435 }
436 436
437 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address); 437 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
438 438
439 destreg = (opcode >> 4) & 0x3f; 439 destreg = (opcode >> 4) & 0x3f;
440 if (user_mode(regs)) { 440 if (user_mode(regs)) {
@@ -512,7 +512,7 @@ static int misaligned_store(struct pt_regs *regs,
512 return error; 512 return error;
513 } 513 }
514 514
515 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address); 515 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
516 516
517 srcreg = (opcode >> 4) & 0x3f; 517 srcreg = (opcode >> 4) & 0x3f;
518 if (user_mode(regs)) { 518 if (user_mode(regs)) {
@@ -588,7 +588,7 @@ static int misaligned_fpu_load(struct pt_regs *regs,
588 return error; 588 return error;
589 } 589 }
590 590
591 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address); 591 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address);
592 592
593 destreg = (opcode >> 4) & 0x3f; 593 destreg = (opcode >> 4) & 0x3f;
594 if (user_mode(regs)) { 594 if (user_mode(regs)) {
@@ -665,7 +665,7 @@ static int misaligned_fpu_store(struct pt_regs *regs,
665 return error; 665 return error;
666 } 666 }
667 667
668 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address); 668 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address);
669 669
670 srcreg = (opcode >> 4) & 0x3f; 670 srcreg = (opcode >> 4) & 0x3f;
671 if (user_mode(regs)) { 671 if (user_mode(regs)) {
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index f76a5090d5d1..977195210653 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -620,7 +620,7 @@ int do_fpu_inst(unsigned short inst, struct pt_regs *regs)
620 struct task_struct *tsk = current; 620 struct task_struct *tsk = current;
621 struct sh_fpu_soft_struct *fpu = &(tsk->thread.xstate->softfpu); 621 struct sh_fpu_soft_struct *fpu = &(tsk->thread.xstate->softfpu);
622 622
623 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 623 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
624 624
625 if (!(task_thread_info(tsk)->status & TS_USEDFPU)) { 625 if (!(task_thread_info(tsk)->status & TS_USEDFPU)) {
626 /* initialize once. */ 626 /* initialize once. */
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index d4c34d757f0d..7bebd044f2a1 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -160,7 +160,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
160 if ((regs->sr & SR_IMASK) != SR_IMASK) 160 if ((regs->sr & SR_IMASK) != SR_IMASK)
161 local_irq_enable(); 161 local_irq_enable();
162 162
163 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 163 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
164 164
165 /* 165 /*
166 * If we're in an interrupt, have no user context or are running 166 * If we're in an interrupt, have no user context or are running
@@ -210,11 +210,11 @@ good_area:
210 } 210 }
211 if (fault & VM_FAULT_MAJOR) { 211 if (fault & VM_FAULT_MAJOR) {
212 tsk->maj_flt++; 212 tsk->maj_flt++;
213 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 213 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
214 regs, address); 214 regs, address);
215 } else { 215 } else {
216 tsk->min_flt++; 216 tsk->min_flt++;
217 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 217 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
218 regs, address); 218 regs, address);
219 } 219 }
220 220
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index 7f5810f5dfdc..e3430e093d43 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
116 /* Not an IO address, so reenable interrupts */ 116 /* Not an IO address, so reenable interrupts */
117 local_irq_enable(); 117 local_irq_enable();
118 118
119 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 119 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
120 120
121 /* 121 /*
122 * If we're in an interrupt or have no user 122 * If we're in an interrupt or have no user
@@ -200,11 +200,11 @@ good_area:
200 200
201 if (fault & VM_FAULT_MAJOR) { 201 if (fault & VM_FAULT_MAJOR) {
202 tsk->maj_flt++; 202 tsk->maj_flt++;
203 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 203 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
204 regs, address); 204 regs, address);
205 } else { 205 } else {
206 tsk->min_flt++; 206 tsk->min_flt++;
207 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 207 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
208 regs, address); 208 regs, address);
209 } 209 }
210 210
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 2cb0e1c001e2..62a034318b18 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -246,6 +246,20 @@ static const cache_map_t ultra3_cache_map = {
246 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 246 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
247 }, 247 },
248}, 248},
249[C(NODE)] = {
250 [C(OP_READ)] = {
251 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
252 [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
253 },
254 [ C(OP_WRITE) ] = {
255 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
256 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
257 },
258 [ C(OP_PREFETCH) ] = {
259 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
260 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
261 },
262},
249}; 263};
250 264
251static const struct sparc_pmu ultra3_pmu = { 265static const struct sparc_pmu ultra3_pmu = {
@@ -361,6 +375,20 @@ static const cache_map_t niagara1_cache_map = {
361 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 375 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
362 }, 376 },
363}, 377},
378[C(NODE)] = {
379 [C(OP_READ)] = {
380 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
381 [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
382 },
383 [ C(OP_WRITE) ] = {
384 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
385 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
386 },
387 [ C(OP_PREFETCH) ] = {
388 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
389 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
390 },
391},
364}; 392};
365 393
366static const struct sparc_pmu niagara1_pmu = { 394static const struct sparc_pmu niagara1_pmu = {
@@ -473,6 +501,20 @@ static const cache_map_t niagara2_cache_map = {
473 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 501 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
474 }, 502 },
475}, 503},
504[C(NODE)] = {
505 [C(OP_READ)] = {
506 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
507 [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
508 },
509 [ C(OP_WRITE) ] = {
510 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
511 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
512 },
513 [ C(OP_PREFETCH) ] = {
514 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
515 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
516 },
517},
476}; 518};
477 519
478static const struct sparc_pmu niagara2_pmu = { 520static const struct sparc_pmu niagara2_pmu = {
@@ -1277,7 +1319,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1277 if (!sparc_perf_event_set_period(event, hwc, idx)) 1319 if (!sparc_perf_event_set_period(event, hwc, idx))
1278 continue; 1320 continue;
1279 1321
1280 if (perf_event_overflow(event, 1, &data, regs)) 1322 if (perf_event_overflow(event, &data, regs))
1281 sparc_pmu_stop(event, 0); 1323 sparc_pmu_stop(event, 0);
1282 } 1324 }
1283 1325
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
index 4491f4cb2695..7efbb2f9e77f 100644
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -247,7 +247,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
247 unsigned long addr = compute_effective_address(regs, insn); 247 unsigned long addr = compute_effective_address(regs, insn);
248 int err; 248 int err;
249 249
250 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr); 250 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
251 switch (dir) { 251 switch (dir) {
252 case load: 252 case load:
253 err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f), 253 err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
@@ -338,7 +338,7 @@ asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
338 } 338 }
339 339
340 addr = compute_effective_address(regs, insn); 340 addr = compute_effective_address(regs, insn);
341 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr); 341 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
342 switch(dir) { 342 switch(dir) {
343 case load: 343 case load:
344 err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f), 344 err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c
index b2b019ea8caa..35cff1673aa4 100644
--- a/arch/sparc/kernel/unaligned_64.c
+++ b/arch/sparc/kernel/unaligned_64.c
@@ -317,7 +317,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
317 317
318 addr = compute_effective_address(regs, insn, 318 addr = compute_effective_address(regs, insn,
319 ((insn >> 25) & 0x1f)); 319 ((insn >> 25) & 0x1f));
320 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr); 320 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
321 switch (asi) { 321 switch (asi) {
322 case ASI_NL: 322 case ASI_NL:
323 case ASI_AIUPL: 323 case ASI_AIUPL:
@@ -384,7 +384,7 @@ int handle_popc(u32 insn, struct pt_regs *regs)
384 int ret, i, rd = ((insn >> 25) & 0x1f); 384 int ret, i, rd = ((insn >> 25) & 0x1f);
385 int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; 385 int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
386 386
387 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 387 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
388 if (insn & 0x2000) { 388 if (insn & 0x2000) {
389 maybe_flush_windows(0, 0, rd, from_kernel); 389 maybe_flush_windows(0, 0, rd, from_kernel);
390 value = sign_extend_imm13(insn); 390 value = sign_extend_imm13(insn);
@@ -431,7 +431,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)
431 int asi = decode_asi(insn, regs); 431 int asi = decode_asi(insn, regs);
432 int flag = (freg < 32) ? FPRS_DL : FPRS_DU; 432 int flag = (freg < 32) ? FPRS_DL : FPRS_DU;
433 433
434 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 434 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
435 435
436 save_and_clear_fpu(); 436 save_and_clear_fpu();
437 current_thread_info()->xfsr[0] &= ~0x1c000; 437 current_thread_info()->xfsr[0] &= ~0x1c000;
@@ -554,7 +554,7 @@ void handle_ld_nf(u32 insn, struct pt_regs *regs)
554 int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; 554 int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
555 unsigned long *reg; 555 unsigned long *reg;
556 556
557 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 557 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
558 558
559 maybe_flush_windows(0, 0, rd, from_kernel); 559 maybe_flush_windows(0, 0, rd, from_kernel);
560 reg = fetch_reg_addr(rd, regs); 560 reg = fetch_reg_addr(rd, regs);
@@ -586,7 +586,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
586 586
587 if (tstate & TSTATE_PRIV) 587 if (tstate & TSTATE_PRIV)
588 die_if_kernel("lddfmna from kernel", regs); 588 die_if_kernel("lddfmna from kernel", regs);
589 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar); 589 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);
590 if (test_thread_flag(TIF_32BIT)) 590 if (test_thread_flag(TIF_32BIT))
591 pc = (u32)pc; 591 pc = (u32)pc;
592 if (get_user(insn, (u32 __user *) pc) != -EFAULT) { 592 if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
@@ -647,7 +647,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
647 647
648 if (tstate & TSTATE_PRIV) 648 if (tstate & TSTATE_PRIV)
649 die_if_kernel("stdfmna from kernel", regs); 649 die_if_kernel("stdfmna from kernel", regs);
650 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar); 650 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);
651 if (test_thread_flag(TIF_32BIT)) 651 if (test_thread_flag(TIF_32BIT))
652 pc = (u32)pc; 652 pc = (u32)pc;
653 if (get_user(insn, (u32 __user *) pc) != -EFAULT) { 653 if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c
index 36357717d691..32b626c9d815 100644
--- a/arch/sparc/kernel/visemul.c
+++ b/arch/sparc/kernel/visemul.c
@@ -802,7 +802,7 @@ int vis_emul(struct pt_regs *regs, unsigned int insn)
802 802
803 BUG_ON(regs->tstate & TSTATE_PRIV); 803 BUG_ON(regs->tstate & TSTATE_PRIV);
804 804
805 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 805 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
806 806
807 if (test_thread_flag(TIF_32BIT)) 807 if (test_thread_flag(TIF_32BIT))
808 pc = (u32)pc; 808 pc = (u32)pc;
diff --git a/arch/sparc/math-emu/math_32.c b/arch/sparc/math-emu/math_32.c
index a3fccde894ec..aa4d55b0bdf0 100644
--- a/arch/sparc/math-emu/math_32.c
+++ b/arch/sparc/math-emu/math_32.c
@@ -164,7 +164,7 @@ int do_mathemu(struct pt_regs *regs, struct task_struct *fpt)
164 int retcode = 0; /* assume all succeed */ 164 int retcode = 0; /* assume all succeed */
165 unsigned long insn; 165 unsigned long insn;
166 166
167 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 167 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
168 168
169#ifdef DEBUG_MATHEMU 169#ifdef DEBUG_MATHEMU
170 printk("In do_mathemu()... pc is %08lx\n", regs->pc); 170 printk("In do_mathemu()... pc is %08lx\n", regs->pc);
diff --git a/arch/sparc/math-emu/math_64.c b/arch/sparc/math-emu/math_64.c
index 56d2c44747b8..e575bd2fe381 100644
--- a/arch/sparc/math-emu/math_64.c
+++ b/arch/sparc/math-emu/math_64.c
@@ -184,7 +184,7 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f)
184 184
185 if (tstate & TSTATE_PRIV) 185 if (tstate & TSTATE_PRIV)
186 die_if_kernel("unfinished/unimplemented FPop from kernel", regs); 186 die_if_kernel("unfinished/unimplemented FPop from kernel", regs);
187 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 187 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
188 if (test_thread_flag(TIF_32BIT)) 188 if (test_thread_flag(TIF_32BIT))
189 pc = (u32)pc; 189 pc = (u32)pc;
190 if (get_user(insn, (u32 __user *) pc) != -EFAULT) { 190 if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 7543ddbdadb2..aa1c1b1ce5cc 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -251,7 +251,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
251 if (in_atomic() || !mm) 251 if (in_atomic() || !mm)
252 goto no_context; 252 goto no_context;
253 253
254 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 254 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
255 255
256 down_read(&mm->mmap_sem); 256 down_read(&mm->mmap_sem);
257 257
@@ -301,12 +301,10 @@ good_area:
301 } 301 }
302 if (fault & VM_FAULT_MAJOR) { 302 if (fault & VM_FAULT_MAJOR) {
303 current->maj_flt++; 303 current->maj_flt++;
304 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 304 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
305 regs, address);
306 } else { 305 } else {
307 current->min_flt++; 306 current->min_flt++;
308 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 307 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
309 regs, address);
310 } 308 }
311 up_read(&mm->mmap_sem); 309 up_read(&mm->mmap_sem);
312 return; 310 return;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index f92ce56a8b22..504c0622f729 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -325,7 +325,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
325 if (in_atomic() || !mm) 325 if (in_atomic() || !mm)
326 goto intr_or_no_mm; 326 goto intr_or_no_mm;
327 327
328 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 328 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
329 329
330 if (!down_read_trylock(&mm->mmap_sem)) { 330 if (!down_read_trylock(&mm->mmap_sem)) {
331 if ((regs->tstate & TSTATE_PRIV) && 331 if ((regs->tstate & TSTATE_PRIV) &&
@@ -433,12 +433,10 @@ good_area:
433 } 433 }
434 if (fault & VM_FAULT_MAJOR) { 434 if (fault & VM_FAULT_MAJOR) {
435 current->maj_flt++; 435 current->maj_flt++;
436 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 436 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
437 regs, address);
438 } else { 437 } else {
439 current->min_flt++; 438 current->min_flt++;
440 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 439 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
441 regs, address);
442 } 440 }
443 up_read(&mm->mmap_sem); 441 up_read(&mm->mmap_sem);
444 442
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 5745ce8bf108..bba3cf88e624 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -60,23 +60,24 @@ static inline void native_halt(void)
60#include <asm/paravirt.h> 60#include <asm/paravirt.h>
61#else 61#else
62#ifndef __ASSEMBLY__ 62#ifndef __ASSEMBLY__
63#include <linux/types.h>
63 64
64static inline unsigned long arch_local_save_flags(void) 65static inline notrace unsigned long arch_local_save_flags(void)
65{ 66{
66 return native_save_fl(); 67 return native_save_fl();
67} 68}
68 69
69static inline void arch_local_irq_restore(unsigned long flags) 70static inline notrace void arch_local_irq_restore(unsigned long flags)
70{ 71{
71 native_restore_fl(flags); 72 native_restore_fl(flags);
72} 73}
73 74
74static inline void arch_local_irq_disable(void) 75static inline notrace void arch_local_irq_disable(void)
75{ 76{
76 native_irq_disable(); 77 native_irq_disable();
77} 78}
78 79
79static inline void arch_local_irq_enable(void) 80static inline notrace void arch_local_irq_enable(void)
80{ 81{
81 native_irq_enable(); 82 native_irq_enable();
82} 83}
@@ -102,7 +103,7 @@ static inline void halt(void)
102/* 103/*
103 * For spinlocks, etc: 104 * For spinlocks, etc:
104 */ 105 */
105static inline unsigned long arch_local_irq_save(void) 106static inline notrace unsigned long arch_local_irq_save(void)
106{ 107{
107 unsigned long flags = arch_local_save_flags(); 108 unsigned long flags = arch_local_save_flags();
108 arch_local_irq_disable(); 109 arch_local_irq_disable();
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index d9d4dae305f6..094fb30817ab 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -152,6 +152,11 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
152 (regs)->bp = caller_frame_pointer(); \ 152 (regs)->bp = caller_frame_pointer(); \
153 (regs)->cs = __KERNEL_CS; \ 153 (regs)->cs = __KERNEL_CS; \
154 regs->flags = 0; \ 154 regs->flags = 0; \
155 asm volatile( \
156 _ASM_MOV "%%"_ASM_SP ", %0\n" \
157 : "=m" ((regs)->sp) \
158 :: "memory" \
159 ); \
155} 160}
156 161
157#else 162#else
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 56fd9e3abbda..4f7e67e2345e 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -102,6 +102,14 @@
102#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 102#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
103 103
104/* 104/*
105 * If an event has alias it should be marked
106 * with a special bit. (Don't forget to check
107 * P4_PEBS_CONFIG_MASK and related bits on
108 * modification.)
109 */
110#define P4_CONFIG_ALIASABLE (1 << 9)
111
112/*
105 * The bits we allow to pass for RAW events 113 * The bits we allow to pass for RAW events
106 */ 114 */
107#define P4_CONFIG_MASK_ESCR \ 115#define P4_CONFIG_MASK_ESCR \
@@ -123,6 +131,31 @@
123 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \ 131 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
124 (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) 132 (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
125 133
134/*
135 * In case of event aliasing we need to preserve some
136 * caller bits, otherwise the mapping won't be complete.
137 */
138#define P4_CONFIG_EVENT_ALIAS_MASK \
139 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR) | \
140 p4_config_pack_cccr(P4_CCCR_EDGE | \
141 P4_CCCR_THRESHOLD_MASK | \
142 P4_CCCR_COMPLEMENT | \
143 P4_CCCR_COMPARE))
144
145#define P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS \
146 ((P4_CONFIG_HT) | \
147 p4_config_pack_escr(P4_ESCR_T0_OS | \
148 P4_ESCR_T0_USR | \
149 P4_ESCR_T1_OS | \
150 P4_ESCR_T1_USR) | \
151 p4_config_pack_cccr(P4_CCCR_OVF | \
152 P4_CCCR_CASCADE | \
153 P4_CCCR_FORCE_OVF | \
154 P4_CCCR_THREAD_ANY | \
155 P4_CCCR_OVF_PMI_T0 | \
156 P4_CCCR_OVF_PMI_T1 | \
157 P4_CONFIG_ALIASABLE))
158
126static inline bool p4_is_event_cascaded(u64 config) 159static inline bool p4_is_event_cascaded(u64 config)
127{ 160{
128 u32 cccr = p4_config_unpack_cccr(config); 161 u32 cccr = p4_config_unpack_cccr(config);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 99ddd148a760..36361bf6fdd1 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -555,6 +555,9 @@ struct __large_struct { unsigned long buf[100]; };
555 555
556#endif /* CONFIG_X86_WP_WORKS_OK */ 556#endif /* CONFIG_X86_WP_WORKS_OK */
557 557
558extern unsigned long
559copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
560
558/* 561/*
559 * movsl can be slow when source and dest are not both 8-byte aligned 562 * movsl can be slow when source and dest are not both 8-byte aligned
560 */ 563 */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3a0338b4b179..4ee3abf20ed6 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -22,7 +22,6 @@
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/highmem.h>
26#include <linux/cpu.h> 25#include <linux/cpu.h>
27#include <linux/bitops.h> 26#include <linux/bitops.h>
28 27
@@ -45,38 +44,27 @@ do { \
45#endif 44#endif
46 45
47/* 46/*
48 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context 47 * | NHM/WSM | SNB |
48 * register -------------------------------
49 * | HT | no HT | HT | no HT |
50 *-----------------------------------------
51 * offcore | core | core | cpu | core |
52 * lbr_sel | core | core | cpu | core |
53 * ld_lat | cpu | core | cpu | core |
54 *-----------------------------------------
55 *
56 * Given that there is a small number of shared regs,
57 * we can pre-allocate their slot in the per-cpu
58 * per-core reg tables.
49 */ 59 */
50static unsigned long 60enum extra_reg_type {
51copy_from_user_nmi(void *to, const void __user *from, unsigned long n) 61 EXTRA_REG_NONE = -1, /* not used */
52{
53 unsigned long offset, addr = (unsigned long)from;
54 unsigned long size, len = 0;
55 struct page *page;
56 void *map;
57 int ret;
58
59 do {
60 ret = __get_user_pages_fast(addr, 1, 0, &page);
61 if (!ret)
62 break;
63
64 offset = addr & (PAGE_SIZE - 1);
65 size = min(PAGE_SIZE - offset, n - len);
66
67 map = kmap_atomic(page);
68 memcpy(to, map+offset, size);
69 kunmap_atomic(map);
70 put_page(page);
71 62
72 len += size; 63 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
73 to += size; 64 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
74 addr += size;
75 65
76 } while (len < n); 66 EXTRA_REG_MAX /* number of entries needed */
77 67};
78 return len;
79}
80 68
81struct event_constraint { 69struct event_constraint {
82 union { 70 union {
@@ -132,11 +120,10 @@ struct cpu_hw_events {
132 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 120 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
133 121
134 /* 122 /*
135 * Intel percore register state. 123 * manage shared (per-core, per-cpu) registers
136 * Coordinate shared resources between HT threads. 124 * used on Intel NHM/WSM/SNB
137 */ 125 */
138 int percore_used; /* Used by this CPU? */ 126 struct intel_shared_regs *shared_regs;
139 struct intel_percore *per_core;
140 127
141 /* 128 /*
142 * AMD specific bits 129 * AMD specific bits
@@ -187,26 +174,45 @@ struct cpu_hw_events {
187 for ((e) = (c); (e)->weight; (e)++) 174 for ((e) = (c); (e)->weight; (e)++)
188 175
189/* 176/*
177 * Per register state.
178 */
179struct er_account {
180 raw_spinlock_t lock; /* per-core: protect structure */
181 u64 config; /* extra MSR config */
182 u64 reg; /* extra MSR number */
183 atomic_t ref; /* reference count */
184};
185
186/*
190 * Extra registers for specific events. 187 * Extra registers for specific events.
188 *
191 * Some events need large masks and require external MSRs. 189 * Some events need large masks and require external MSRs.
192 * Define a mapping to these extra registers. 190 * Those extra MSRs end up being shared for all events on
191 * a PMU and sometimes between PMU of sibling HT threads.
192 * In either case, the kernel needs to handle conflicting
193 * accesses to those extra, shared, regs. The data structure
194 * to manage those registers is stored in cpu_hw_event.
193 */ 195 */
194struct extra_reg { 196struct extra_reg {
195 unsigned int event; 197 unsigned int event;
196 unsigned int msr; 198 unsigned int msr;
197 u64 config_mask; 199 u64 config_mask;
198 u64 valid_mask; 200 u64 valid_mask;
201 int idx; /* per_xxx->regs[] reg index */
199}; 202};
200 203
201#define EVENT_EXTRA_REG(e, ms, m, vm) { \ 204#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
202 .event = (e), \ 205 .event = (e), \
203 .msr = (ms), \ 206 .msr = (ms), \
204 .config_mask = (m), \ 207 .config_mask = (m), \
205 .valid_mask = (vm), \ 208 .valid_mask = (vm), \
209 .idx = EXTRA_REG_##i \
206 } 210 }
207#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ 211
208 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) 212#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
209#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) 213 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
214
215#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
210 216
211union perf_capabilities { 217union perf_capabilities {
212 struct { 218 struct {
@@ -252,7 +258,6 @@ struct x86_pmu {
252 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 258 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
253 struct perf_event *event); 259 struct perf_event *event);
254 struct event_constraint *event_constraints; 260 struct event_constraint *event_constraints;
255 struct event_constraint *percore_constraints;
256 void (*quirks)(void); 261 void (*quirks)(void);
257 int perfctr_second_write; 262 int perfctr_second_write;
258 263
@@ -286,8 +291,12 @@ struct x86_pmu {
286 * Extra registers for events 291 * Extra registers for events
287 */ 292 */
288 struct extra_reg *extra_regs; 293 struct extra_reg *extra_regs;
294 unsigned int er_flags;
289}; 295};
290 296
297#define ERF_NO_HT_SHARING 1
298#define ERF_HAS_RSP_1 2
299
291static struct x86_pmu x86_pmu __read_mostly; 300static struct x86_pmu x86_pmu __read_mostly;
292 301
293static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 302static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
@@ -393,10 +402,10 @@ static inline unsigned int x86_pmu_event_addr(int index)
393 */ 402 */
394static int x86_pmu_extra_regs(u64 config, struct perf_event *event) 403static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
395{ 404{
405 struct hw_perf_event_extra *reg;
396 struct extra_reg *er; 406 struct extra_reg *er;
397 407
398 event->hw.extra_reg = 0; 408 reg = &event->hw.extra_reg;
399 event->hw.extra_config = 0;
400 409
401 if (!x86_pmu.extra_regs) 410 if (!x86_pmu.extra_regs)
402 return 0; 411 return 0;
@@ -406,8 +415,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
406 continue; 415 continue;
407 if (event->attr.config1 & ~er->valid_mask) 416 if (event->attr.config1 & ~er->valid_mask)
408 return -EINVAL; 417 return -EINVAL;
409 event->hw.extra_reg = er->msr; 418
410 event->hw.extra_config = event->attr.config1; 419 reg->idx = er->idx;
420 reg->config = event->attr.config1;
421 reg->reg = er->msr;
411 break; 422 break;
412 } 423 }
413 return 0; 424 return 0;
@@ -706,6 +717,9 @@ static int __x86_pmu_event_init(struct perf_event *event)
706 event->hw.last_cpu = -1; 717 event->hw.last_cpu = -1;
707 event->hw.last_tag = ~0ULL; 718 event->hw.last_tag = ~0ULL;
708 719
720 /* mark unused */
721 event->hw.extra_reg.idx = EXTRA_REG_NONE;
722
709 return x86_pmu.hw_config(event); 723 return x86_pmu.hw_config(event);
710} 724}
711 725
@@ -747,8 +761,8 @@ static void x86_pmu_disable(struct pmu *pmu)
747static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, 761static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
748 u64 enable_mask) 762 u64 enable_mask)
749{ 763{
750 if (hwc->extra_reg) 764 if (hwc->extra_reg.reg)
751 wrmsrl(hwc->extra_reg, hwc->extra_config); 765 wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
752 wrmsrl(hwc->config_base, hwc->config | enable_mask); 766 wrmsrl(hwc->config_base, hwc->config | enable_mask);
753} 767}
754 768
@@ -1332,7 +1346,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1332 if (!x86_perf_event_set_period(event)) 1346 if (!x86_perf_event_set_period(event))
1333 continue; 1347 continue;
1334 1348
1335 if (perf_event_overflow(event, 1, &data, regs)) 1349 if (perf_event_overflow(event, &data, regs))
1336 x86_pmu_stop(event, 0); 1350 x86_pmu_stop(event, 0);
1337 } 1351 }
1338 1352
@@ -1637,6 +1651,40 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
1637 perf_pmu_enable(pmu); 1651 perf_pmu_enable(pmu);
1638 return 0; 1652 return 0;
1639} 1653}
1654/*
1655 * a fake_cpuc is used to validate event groups. Due to
1656 * the extra reg logic, we need to also allocate a fake
1657 * per_core and per_cpu structure. Otherwise, group events
1658 * using extra reg may conflict without the kernel being
1659 * able to catch this when the last event gets added to
1660 * the group.
1661 */
1662static void free_fake_cpuc(struct cpu_hw_events *cpuc)
1663{
1664 kfree(cpuc->shared_regs);
1665 kfree(cpuc);
1666}
1667
1668static struct cpu_hw_events *allocate_fake_cpuc(void)
1669{
1670 struct cpu_hw_events *cpuc;
1671 int cpu = raw_smp_processor_id();
1672
1673 cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
1674 if (!cpuc)
1675 return ERR_PTR(-ENOMEM);
1676
1677 /* only needed, if we have extra_regs */
1678 if (x86_pmu.extra_regs) {
1679 cpuc->shared_regs = allocate_shared_regs(cpu);
1680 if (!cpuc->shared_regs)
1681 goto error;
1682 }
1683 return cpuc;
1684error:
1685 free_fake_cpuc(cpuc);
1686 return ERR_PTR(-ENOMEM);
1687}
1640 1688
1641/* 1689/*
1642 * validate that we can schedule this event 1690 * validate that we can schedule this event
@@ -1647,9 +1695,9 @@ static int validate_event(struct perf_event *event)
1647 struct event_constraint *c; 1695 struct event_constraint *c;
1648 int ret = 0; 1696 int ret = 0;
1649 1697
1650 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); 1698 fake_cpuc = allocate_fake_cpuc();
1651 if (!fake_cpuc) 1699 if (IS_ERR(fake_cpuc))
1652 return -ENOMEM; 1700 return PTR_ERR(fake_cpuc);
1653 1701
1654 c = x86_pmu.get_event_constraints(fake_cpuc, event); 1702 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1655 1703
@@ -1659,7 +1707,7 @@ static int validate_event(struct perf_event *event)
1659 if (x86_pmu.put_event_constraints) 1707 if (x86_pmu.put_event_constraints)
1660 x86_pmu.put_event_constraints(fake_cpuc, event); 1708 x86_pmu.put_event_constraints(fake_cpuc, event);
1661 1709
1662 kfree(fake_cpuc); 1710 free_fake_cpuc(fake_cpuc);
1663 1711
1664 return ret; 1712 return ret;
1665} 1713}
@@ -1679,36 +1727,32 @@ static int validate_group(struct perf_event *event)
1679{ 1727{
1680 struct perf_event *leader = event->group_leader; 1728 struct perf_event *leader = event->group_leader;
1681 struct cpu_hw_events *fake_cpuc; 1729 struct cpu_hw_events *fake_cpuc;
1682 int ret, n; 1730 int ret = -ENOSPC, n;
1683
1684 ret = -ENOMEM;
1685 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1686 if (!fake_cpuc)
1687 goto out;
1688 1731
1732 fake_cpuc = allocate_fake_cpuc();
1733 if (IS_ERR(fake_cpuc))
1734 return PTR_ERR(fake_cpuc);
1689 /* 1735 /*
1690 * the event is not yet connected with its 1736 * the event is not yet connected with its
1691 * siblings therefore we must first collect 1737 * siblings therefore we must first collect
1692 * existing siblings, then add the new event 1738 * existing siblings, then add the new event
1693 * before we can simulate the scheduling 1739 * before we can simulate the scheduling
1694 */ 1740 */
1695 ret = -ENOSPC;
1696 n = collect_events(fake_cpuc, leader, true); 1741 n = collect_events(fake_cpuc, leader, true);
1697 if (n < 0) 1742 if (n < 0)
1698 goto out_free; 1743 goto out;
1699 1744
1700 fake_cpuc->n_events = n; 1745 fake_cpuc->n_events = n;
1701 n = collect_events(fake_cpuc, event, false); 1746 n = collect_events(fake_cpuc, event, false);
1702 if (n < 0) 1747 if (n < 0)
1703 goto out_free; 1748 goto out;
1704 1749
1705 fake_cpuc->n_events = n; 1750 fake_cpuc->n_events = n;
1706 1751
1707 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); 1752 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
1708 1753
1709out_free:
1710 kfree(fake_cpuc);
1711out: 1754out:
1755 free_fake_cpuc(fake_cpuc);
1712 return ret; 1756 return ret;
1713} 1757}
1714 1758
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index fe29c1d2219e..941caa2e449b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -89,6 +89,20 @@ static __initconst const u64 amd_hw_cache_event_ids
89 [ C(RESULT_MISS) ] = -1, 89 [ C(RESULT_MISS) ] = -1,
90 }, 90 },
91 }, 91 },
92 [ C(NODE) ] = {
93 [ C(OP_READ) ] = {
94 [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
95 [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */
96 },
97 [ C(OP_WRITE) ] = {
98 [ C(RESULT_ACCESS) ] = -1,
99 [ C(RESULT_MISS) ] = -1,
100 },
101 [ C(OP_PREFETCH) ] = {
102 [ C(RESULT_ACCESS) ] = -1,
103 [ C(RESULT_MISS) ] = -1,
104 },
105 },
92}; 106};
93 107
94/* 108/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 41178c826c48..45fbb8f7f549 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,25 +1,15 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#ifdef CONFIG_CPU_SUP_INTEL
2 2
3#define MAX_EXTRA_REGS 2
4
5/*
6 * Per register state.
7 */
8struct er_account {
9 int ref; /* reference count */
10 unsigned int extra_reg; /* extra MSR number */
11 u64 extra_config; /* extra MSR config */
12};
13
14/* 3/*
15 * Per core state 4 * Per core/cpu state
16 * This used to coordinate shared registers for HT threads. 5 *
6 * Used to coordinate shared registers between HT threads or
7 * among events on a single PMU.
17 */ 8 */
18struct intel_percore { 9struct intel_shared_regs {
19 raw_spinlock_t lock; /* protect structure */ 10 struct er_account regs[EXTRA_REG_MAX];
20 struct er_account regs[MAX_EXTRA_REGS]; 11 int refcnt; /* per-core: #HT threads */
21 int refcnt; /* number of threads */ 12 unsigned core_id; /* per-core: core id */
22 unsigned core_id;
23}; 13};
24 14
25/* 15/*
@@ -88,16 +78,10 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
88 78
89static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 79static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
90{ 80{
91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 81 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
92 EVENT_EXTRA_END 82 EVENT_EXTRA_END
93}; 83};
94 84
95static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
96{
97 INTEL_EVENT_CONSTRAINT(0xb7, 0),
98 EVENT_CONSTRAINT_END
99};
100
101static struct event_constraint intel_westmere_event_constraints[] __read_mostly = 85static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
102{ 86{
103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 87 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -116,8 +100,6 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
116 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 100 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
117 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 101 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
118 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 102 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
119 INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
120 INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
121 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 103 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
122 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 104 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
123 EVENT_CONSTRAINT_END 105 EVENT_CONSTRAINT_END
@@ -125,15 +107,13 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
125 107
126static struct extra_reg intel_westmere_extra_regs[] __read_mostly = 108static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
127{ 109{
128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 110 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), 111 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
130 EVENT_EXTRA_END 112 EVENT_EXTRA_END
131}; 113};
132 114
133static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = 115static struct event_constraint intel_v1_event_constraints[] __read_mostly =
134{ 116{
135 INTEL_EVENT_CONSTRAINT(0xb7, 0),
136 INTEL_EVENT_CONSTRAINT(0xbb, 0),
137 EVENT_CONSTRAINT_END 117 EVENT_CONSTRAINT_END
138}; 118};
139 119
@@ -145,6 +125,12 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
145 EVENT_CONSTRAINT_END 125 EVENT_CONSTRAINT_END
146}; 126};
147 127
128static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
129 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
130 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
131 EVENT_EXTRA_END
132};
133
148static u64 intel_pmu_event_map(int hw_event) 134static u64 intel_pmu_event_map(int hw_event)
149{ 135{
150 return intel_perfmon_event_map[hw_event]; 136 return intel_perfmon_event_map[hw_event];
@@ -245,6 +231,21 @@ static __initconst const u64 snb_hw_cache_event_ids
245 [ C(RESULT_MISS) ] = -1, 231 [ C(RESULT_MISS) ] = -1,
246 }, 232 },
247 }, 233 },
234 [ C(NODE) ] = {
235 [ C(OP_READ) ] = {
236 [ C(RESULT_ACCESS) ] = -1,
237 [ C(RESULT_MISS) ] = -1,
238 },
239 [ C(OP_WRITE) ] = {
240 [ C(RESULT_ACCESS) ] = -1,
241 [ C(RESULT_MISS) ] = -1,
242 },
243 [ C(OP_PREFETCH) ] = {
244 [ C(RESULT_ACCESS) ] = -1,
245 [ C(RESULT_MISS) ] = -1,
246 },
247 },
248
248}; 249};
249 250
250static __initconst const u64 westmere_hw_cache_event_ids 251static __initconst const u64 westmere_hw_cache_event_ids
@@ -346,6 +347,20 @@ static __initconst const u64 westmere_hw_cache_event_ids
346 [ C(RESULT_MISS) ] = -1, 347 [ C(RESULT_MISS) ] = -1,
347 }, 348 },
348 }, 349 },
350 [ C(NODE) ] = {
351 [ C(OP_READ) ] = {
352 [ C(RESULT_ACCESS) ] = 0x01b7,
353 [ C(RESULT_MISS) ] = 0x01b7,
354 },
355 [ C(OP_WRITE) ] = {
356 [ C(RESULT_ACCESS) ] = 0x01b7,
357 [ C(RESULT_MISS) ] = 0x01b7,
358 },
359 [ C(OP_PREFETCH) ] = {
360 [ C(RESULT_ACCESS) ] = 0x01b7,
361 [ C(RESULT_MISS) ] = 0x01b7,
362 },
363 },
349}; 364};
350 365
351/* 366/*
@@ -398,7 +413,21 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
398 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, 413 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
399 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, 414 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
400 }, 415 },
401 } 416 },
417 [ C(NODE) ] = {
418 [ C(OP_READ) ] = {
419 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
420 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
421 },
422 [ C(OP_WRITE) ] = {
423 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
424 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
425 },
426 [ C(OP_PREFETCH) ] = {
427 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
428 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
429 },
430 },
402}; 431};
403 432
404static __initconst const u64 nehalem_hw_cache_event_ids 433static __initconst const u64 nehalem_hw_cache_event_ids
@@ -500,6 +529,20 @@ static __initconst const u64 nehalem_hw_cache_event_ids
500 [ C(RESULT_MISS) ] = -1, 529 [ C(RESULT_MISS) ] = -1,
501 }, 530 },
502 }, 531 },
532 [ C(NODE) ] = {
533 [ C(OP_READ) ] = {
534 [ C(RESULT_ACCESS) ] = 0x01b7,
535 [ C(RESULT_MISS) ] = 0x01b7,
536 },
537 [ C(OP_WRITE) ] = {
538 [ C(RESULT_ACCESS) ] = 0x01b7,
539 [ C(RESULT_MISS) ] = 0x01b7,
540 },
541 [ C(OP_PREFETCH) ] = {
542 [ C(RESULT_ACCESS) ] = 0x01b7,
543 [ C(RESULT_MISS) ] = 0x01b7,
544 },
545 },
503}; 546};
504 547
505static __initconst const u64 core2_hw_cache_event_ids 548static __initconst const u64 core2_hw_cache_event_ids
@@ -1003,7 +1046,7 @@ again:
1003 1046
1004 data.period = event->hw.last_period; 1047 data.period = event->hw.last_period;
1005 1048
1006 if (perf_event_overflow(event, 1, &data, regs)) 1049 if (perf_event_overflow(event, &data, regs))
1007 x86_pmu_stop(event, 0); 1050 x86_pmu_stop(event, 0);
1008 } 1051 }
1009 1052
@@ -1037,65 +1080,121 @@ intel_bts_constraints(struct perf_event *event)
1037 return NULL; 1080 return NULL;
1038} 1081}
1039 1082
1083static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
1084{
1085 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1086 return false;
1087
1088 if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
1089 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1090 event->hw.config |= 0x01bb;
1091 event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
1092 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1093 } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
1094 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1095 event->hw.config |= 0x01b7;
1096 event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
1097 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1098 }
1099
1100 if (event->hw.extra_reg.idx == orig_idx)
1101 return false;
1102
1103 return true;
1104}
1105
1106/*
1107 * manage allocation of shared extra msr for certain events
1108 *
1109 * sharing can be:
1110 * per-cpu: to be shared between the various events on a single PMU
1111 * per-core: per-cpu + shared by HT threads
1112 */
1040static struct event_constraint * 1113static struct event_constraint *
1041intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 1114__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1115 struct perf_event *event)
1042{ 1116{
1043 struct hw_perf_event *hwc = &event->hw; 1117 struct event_constraint *c = &emptyconstraint;
1044 unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; 1118 struct hw_perf_event_extra *reg = &event->hw.extra_reg;
1045 struct event_constraint *c;
1046 struct intel_percore *pc;
1047 struct er_account *era; 1119 struct er_account *era;
1048 int i; 1120 unsigned long flags;
1049 int free_slot; 1121 int orig_idx = reg->idx;
1050 int found;
1051 1122
1052 if (!x86_pmu.percore_constraints || hwc->extra_alloc) 1123 /* already allocated shared msr */
1053 return NULL; 1124 if (reg->alloc)
1125 return &unconstrained;
1054 1126
1055 for (c = x86_pmu.percore_constraints; c->cmask; c++) { 1127again:
1056 if (e != c->code) 1128 era = &cpuc->shared_regs->regs[reg->idx];
1057 continue; 1129 /*
1130 * we use spin_lock_irqsave() to avoid lockdep issues when
1131 * passing a fake cpuc
1132 */
1133 raw_spin_lock_irqsave(&era->lock, flags);
1134
1135 if (!atomic_read(&era->ref) || era->config == reg->config) {
1136
1137 /* lock in msr value */
1138 era->config = reg->config;
1139 era->reg = reg->reg;
1140
1141 /* one more user */
1142 atomic_inc(&era->ref);
1143
1144 /* no need to reallocate during incremental event scheduling */
1145 reg->alloc = 1;
1058 1146
1059 /* 1147 /*
1060 * Allocate resource per core. 1148 * All events using extra_reg are unconstrained.
1149 * Avoids calling x86_get_event_constraints()
1150 *
1151 * Must revisit if extra_reg controlling events
1152 * ever have constraints. Worst case we go through
1153 * the regular event constraint table.
1061 */ 1154 */
1062 pc = cpuc->per_core; 1155 c = &unconstrained;
1063 if (!pc) 1156 } else if (intel_try_alt_er(event, orig_idx)) {
1064 break; 1157 raw_spin_unlock(&era->lock);
1065 c = &emptyconstraint; 1158 goto again;
1066 raw_spin_lock(&pc->lock);
1067 free_slot = -1;
1068 found = 0;
1069 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1070 era = &pc->regs[i];
1071 if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
1072 /* Allow sharing same config */
1073 if (hwc->extra_config == era->extra_config) {
1074 era->ref++;
1075 cpuc->percore_used = 1;
1076 hwc->extra_alloc = 1;
1077 c = NULL;
1078 }
1079 /* else conflict */
1080 found = 1;
1081 break;
1082 } else if (era->ref == 0 && free_slot == -1)
1083 free_slot = i;
1084 }
1085 if (!found && free_slot != -1) {
1086 era = &pc->regs[free_slot];
1087 era->ref = 1;
1088 era->extra_reg = hwc->extra_reg;
1089 era->extra_config = hwc->extra_config;
1090 cpuc->percore_used = 1;
1091 hwc->extra_alloc = 1;
1092 c = NULL;
1093 }
1094 raw_spin_unlock(&pc->lock);
1095 return c;
1096 } 1159 }
1160 raw_spin_unlock_irqrestore(&era->lock, flags);
1097 1161
1098 return NULL; 1162 return c;
1163}
1164
1165static void
1166__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1167 struct hw_perf_event_extra *reg)
1168{
1169 struct er_account *era;
1170
1171 /*
1172 * only put constraint if extra reg was actually
1173 * allocated. Also takes care of event which do
1174 * not use an extra shared reg
1175 */
1176 if (!reg->alloc)
1177 return;
1178
1179 era = &cpuc->shared_regs->regs[reg->idx];
1180
1181 /* one fewer user */
1182 atomic_dec(&era->ref);
1183
1184 /* allocate again next time */
1185 reg->alloc = 0;
1186}
1187
1188static struct event_constraint *
1189intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
1190 struct perf_event *event)
1191{
1192 struct event_constraint *c = NULL;
1193
1194 if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
1195 c = __intel_shared_reg_get_constraints(cpuc, event);
1196
1197 return c;
1099} 1198}
1100 1199
1101static struct event_constraint * 1200static struct event_constraint *
@@ -1111,49 +1210,28 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
1111 if (c) 1210 if (c)
1112 return c; 1211 return c;
1113 1212
1114 c = intel_percore_constraints(cpuc, event); 1213 c = intel_shared_regs_constraints(cpuc, event);
1115 if (c) 1214 if (c)
1116 return c; 1215 return c;
1117 1216
1118 return x86_get_event_constraints(cpuc, event); 1217 return x86_get_event_constraints(cpuc, event);
1119} 1218}
1120 1219
1121static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1220static void
1221intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1122 struct perf_event *event) 1222 struct perf_event *event)
1123{ 1223{
1124 struct extra_reg *er; 1224 struct hw_perf_event_extra *reg;
1125 struct intel_percore *pc;
1126 struct er_account *era;
1127 struct hw_perf_event *hwc = &event->hw;
1128 int i, allref;
1129 1225
1130 if (!cpuc->percore_used) 1226 reg = &event->hw.extra_reg;
1131 return; 1227 if (reg->idx != EXTRA_REG_NONE)
1132 1228 __intel_shared_reg_put_constraints(cpuc, reg);
1133 for (er = x86_pmu.extra_regs; er->msr; er++) { 1229}
1134 if (er->event != (hwc->config & er->config_mask))
1135 continue;
1136 1230
1137 pc = cpuc->per_core; 1231static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1138 raw_spin_lock(&pc->lock); 1232 struct perf_event *event)
1139 for (i = 0; i < MAX_EXTRA_REGS; i++) { 1233{
1140 era = &pc->regs[i]; 1234 intel_put_shared_regs_event_constraints(cpuc, event);
1141 if (era->ref > 0 &&
1142 era->extra_config == hwc->extra_config &&
1143 era->extra_reg == er->msr) {
1144 era->ref--;
1145 hwc->extra_alloc = 0;
1146 break;
1147 }
1148 }
1149 allref = 0;
1150 for (i = 0; i < MAX_EXTRA_REGS; i++)
1151 allref += pc->regs[i].ref;
1152 if (allref == 0)
1153 cpuc->percore_used = 0;
1154 raw_spin_unlock(&pc->lock);
1155 break;
1156 }
1157} 1235}
1158 1236
1159static int intel_pmu_hw_config(struct perf_event *event) 1237static int intel_pmu_hw_config(struct perf_event *event)
@@ -1231,20 +1309,36 @@ static __initconst const struct x86_pmu core_pmu = {
1231 .event_constraints = intel_core_event_constraints, 1309 .event_constraints = intel_core_event_constraints,
1232}; 1310};
1233 1311
1312static struct intel_shared_regs *allocate_shared_regs(int cpu)
1313{
1314 struct intel_shared_regs *regs;
1315 int i;
1316
1317 regs = kzalloc_node(sizeof(struct intel_shared_regs),
1318 GFP_KERNEL, cpu_to_node(cpu));
1319 if (regs) {
1320 /*
1321 * initialize the locks to keep lockdep happy
1322 */
1323 for (i = 0; i < EXTRA_REG_MAX; i++)
1324 raw_spin_lock_init(&regs->regs[i].lock);
1325
1326 regs->core_id = -1;
1327 }
1328 return regs;
1329}
1330
1234static int intel_pmu_cpu_prepare(int cpu) 1331static int intel_pmu_cpu_prepare(int cpu)
1235{ 1332{
1236 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 1333 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1237 1334
1238 if (!cpu_has_ht_siblings()) 1335 if (!x86_pmu.extra_regs)
1239 return NOTIFY_OK; 1336 return NOTIFY_OK;
1240 1337
1241 cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), 1338 cpuc->shared_regs = allocate_shared_regs(cpu);
1242 GFP_KERNEL, cpu_to_node(cpu)); 1339 if (!cpuc->shared_regs)
1243 if (!cpuc->per_core)
1244 return NOTIFY_BAD; 1340 return NOTIFY_BAD;
1245 1341
1246 raw_spin_lock_init(&cpuc->per_core->lock);
1247 cpuc->per_core->core_id = -1;
1248 return NOTIFY_OK; 1342 return NOTIFY_OK;
1249} 1343}
1250 1344
@@ -1260,32 +1354,34 @@ static void intel_pmu_cpu_starting(int cpu)
1260 */ 1354 */
1261 intel_pmu_lbr_reset(); 1355 intel_pmu_lbr_reset();
1262 1356
1263 if (!cpu_has_ht_siblings()) 1357 if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
1264 return; 1358 return;
1265 1359
1266 for_each_cpu(i, topology_thread_cpumask(cpu)) { 1360 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1267 struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; 1361 struct intel_shared_regs *pc;
1268 1362
1363 pc = per_cpu(cpu_hw_events, i).shared_regs;
1269 if (pc && pc->core_id == core_id) { 1364 if (pc && pc->core_id == core_id) {
1270 kfree(cpuc->per_core); 1365 kfree(cpuc->shared_regs);
1271 cpuc->per_core = pc; 1366 cpuc->shared_regs = pc;
1272 break; 1367 break;
1273 } 1368 }
1274 } 1369 }
1275 1370
1276 cpuc->per_core->core_id = core_id; 1371 cpuc->shared_regs->core_id = core_id;
1277 cpuc->per_core->refcnt++; 1372 cpuc->shared_regs->refcnt++;
1278} 1373}
1279 1374
1280static void intel_pmu_cpu_dying(int cpu) 1375static void intel_pmu_cpu_dying(int cpu)
1281{ 1376{
1282 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 1377 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1283 struct intel_percore *pc = cpuc->per_core; 1378 struct intel_shared_regs *pc;
1284 1379
1380 pc = cpuc->shared_regs;
1285 if (pc) { 1381 if (pc) {
1286 if (pc->core_id == -1 || --pc->refcnt == 0) 1382 if (pc->core_id == -1 || --pc->refcnt == 0)
1287 kfree(pc); 1383 kfree(pc);
1288 cpuc->per_core = NULL; 1384 cpuc->shared_regs = NULL;
1289 } 1385 }
1290 1386
1291 fini_debug_store_on_cpu(cpu); 1387 fini_debug_store_on_cpu(cpu);
@@ -1436,7 +1532,6 @@ static __init int intel_pmu_init(void)
1436 1532
1437 x86_pmu.event_constraints = intel_nehalem_event_constraints; 1533 x86_pmu.event_constraints = intel_nehalem_event_constraints;
1438 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; 1534 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
1439 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1440 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1535 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1441 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1536 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1442 1537
@@ -1481,10 +1576,10 @@ static __init int intel_pmu_init(void)
1481 intel_pmu_lbr_init_nhm(); 1576 intel_pmu_lbr_init_nhm();
1482 1577
1483 x86_pmu.event_constraints = intel_westmere_event_constraints; 1578 x86_pmu.event_constraints = intel_westmere_event_constraints;
1484 x86_pmu.percore_constraints = intel_westmere_percore_constraints;
1485 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1579 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1486 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 1580 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1487 x86_pmu.extra_regs = intel_westmere_extra_regs; 1581 x86_pmu.extra_regs = intel_westmere_extra_regs;
1582 x86_pmu.er_flags |= ERF_HAS_RSP_1;
1488 1583
1489 /* UOPS_ISSUED.STALLED_CYCLES */ 1584 /* UOPS_ISSUED.STALLED_CYCLES */
1490 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1585 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
@@ -1502,6 +1597,10 @@ static __init int intel_pmu_init(void)
1502 1597
1503 x86_pmu.event_constraints = intel_snb_event_constraints; 1598 x86_pmu.event_constraints = intel_snb_event_constraints;
1504 x86_pmu.pebs_constraints = intel_snb_pebs_events; 1599 x86_pmu.pebs_constraints = intel_snb_pebs_events;
1600 x86_pmu.extra_regs = intel_snb_extra_regs;
1601 /* all extra regs are per-cpu when HT is on */
1602 x86_pmu.er_flags |= ERF_HAS_RSP_1;
1603 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
1505 1604
1506 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 1605 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
1507 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1606 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
@@ -1512,11 +1611,19 @@ static __init int intel_pmu_init(void)
1512 break; 1611 break;
1513 1612
1514 default: 1613 default:
1515 /* 1614 switch (x86_pmu.version) {
1516 * default constraints for v2 and up 1615 case 1:
1517 */ 1616 x86_pmu.event_constraints = intel_v1_event_constraints;
1518 x86_pmu.event_constraints = intel_gen_event_constraints; 1617 pr_cont("generic architected perfmon v1, ");
1519 pr_cont("generic architected perfmon, "); 1618 break;
1619 default:
1620 /*
1621 * default constraints for v2 and up
1622 */
1623 x86_pmu.event_constraints = intel_gen_event_constraints;
1624 pr_cont("generic architected perfmon, ");
1625 break;
1626 }
1520 } 1627 }
1521 return 0; 1628 return 0;
1522} 1629}
@@ -1528,4 +1635,8 @@ static int intel_pmu_init(void)
1528 return 0; 1635 return 0;
1529} 1636}
1530 1637
1638static struct intel_shared_regs *allocate_shared_regs(int cpu)
1639{
1640 return NULL;
1641}
1531#endif /* CONFIG_CPU_SUP_INTEL */ 1642#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index bab491b8ee25..1b1ef3addcfd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -340,7 +340,7 @@ static int intel_pmu_drain_bts_buffer(void)
340 */ 340 */
341 perf_prepare_sample(&header, &data, event, &regs); 341 perf_prepare_sample(&header, &data, event, &regs);
342 342
343 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) 343 if (perf_output_begin(&handle, event, header.size * (top - at)))
344 return 1; 344 return 1;
345 345
346 for (; at < top; at++) { 346 for (; at < top; at++) {
@@ -616,7 +616,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
616 else 616 else
617 regs.flags &= ~PERF_EFLAGS_EXACT; 617 regs.flags &= ~PERF_EFLAGS_EXACT;
618 618
619 if (perf_event_overflow(event, 1, &data, &regs)) 619 if (perf_event_overflow(event, &data, &regs))
620 x86_pmu_stop(event, 0); 620 x86_pmu_stop(event, 0);
621} 621}
622 622
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ead584fb6a7d..7809d2bcb209 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -554,13 +554,102 @@ static __initconst const u64 p4_hw_cache_event_ids
554 [ C(RESULT_MISS) ] = -1, 554 [ C(RESULT_MISS) ] = -1,
555 }, 555 },
556 }, 556 },
557 [ C(NODE) ] = {
558 [ C(OP_READ) ] = {
559 [ C(RESULT_ACCESS) ] = -1,
560 [ C(RESULT_MISS) ] = -1,
561 },
562 [ C(OP_WRITE) ] = {
563 [ C(RESULT_ACCESS) ] = -1,
564 [ C(RESULT_MISS) ] = -1,
565 },
566 [ C(OP_PREFETCH) ] = {
567 [ C(RESULT_ACCESS) ] = -1,
568 [ C(RESULT_MISS) ] = -1,
569 },
570 },
557}; 571};
558 572
573/*
574 * Because of Netburst being quite restricted in how many
575 * identical events may run simultaneously, we introduce event aliases,
576 * ie the different events which have the same functionality but
577 * utilize non-intersected resources (ESCR/CCCR/counter registers).
578 *
579 * This allow us to relax restrictions a bit and run two or more
580 * identical events together.
581 *
582 * Never set any custom internal bits such as P4_CONFIG_HT,
583 * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
584 * either up to date automatically or not applicable at all.
585 */
586struct p4_event_alias {
587 u64 original;
588 u64 alternative;
589} p4_event_aliases[] = {
590 {
591 /*
592 * Non-halted cycles can be substituted with non-sleeping cycles (see
593 * Intel SDM Vol3b for details). We need this alias to be able
594 * to run nmi-watchdog and 'perf top' (or any other user space tool
595 * which is interested in running PERF_COUNT_HW_CPU_CYCLES)
596 * simultaneously.
597 */
598 .original =
599 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
600 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
601 .alternative =
602 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) |
603 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
604 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
605 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
606 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
607 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
608 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
609 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
610 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
611 p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT |
612 P4_CCCR_COMPARE),
613 },
614};
615
616static u64 p4_get_alias_event(u64 config)
617{
618 u64 config_match;
619 int i;
620
621 /*
622 * Only event with special mark is allowed,
623 * we're to be sure it didn't come as malformed
624 * RAW event.
625 */
626 if (!(config & P4_CONFIG_ALIASABLE))
627 return 0;
628
629 config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
630
631 for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
632 if (config_match == p4_event_aliases[i].original) {
633 config_match = p4_event_aliases[i].alternative;
634 break;
635 } else if (config_match == p4_event_aliases[i].alternative) {
636 config_match = p4_event_aliases[i].original;
637 break;
638 }
639 }
640
641 if (i >= ARRAY_SIZE(p4_event_aliases))
642 return 0;
643
644 return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
645}
646
559static u64 p4_general_events[PERF_COUNT_HW_MAX] = { 647static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
560 /* non-halted CPU clocks */ 648 /* non-halted CPU clocks */
561 [PERF_COUNT_HW_CPU_CYCLES] = 649 [PERF_COUNT_HW_CPU_CYCLES] =
562 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | 650 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
563 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), 651 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) |
652 P4_CONFIG_ALIASABLE,
564 653
565 /* 654 /*
566 * retired instructions 655 * retired instructions
@@ -945,7 +1034,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
945 1034
946 if (!x86_perf_event_set_period(event)) 1035 if (!x86_perf_event_set_period(event))
947 continue; 1036 continue;
948 if (perf_event_overflow(event, 1, &data, regs)) 1037 if (perf_event_overflow(event, &data, regs))
949 x86_pmu_stop(event, 0); 1038 x86_pmu_stop(event, 0);
950 } 1039 }
951 1040
@@ -1120,6 +1209,8 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
1120 struct p4_event_bind *bind; 1209 struct p4_event_bind *bind;
1121 unsigned int i, thread, num; 1210 unsigned int i, thread, num;
1122 int cntr_idx, escr_idx; 1211 int cntr_idx, escr_idx;
1212 u64 config_alias;
1213 int pass;
1123 1214
1124 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 1215 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1125 bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); 1216 bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
@@ -1128,6 +1219,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
1128 1219
1129 hwc = &cpuc->event_list[i]->hw; 1220 hwc = &cpuc->event_list[i]->hw;
1130 thread = p4_ht_thread(cpu); 1221 thread = p4_ht_thread(cpu);
1222 pass = 0;
1223
1224again:
1225 /*
1226 * It's possible to hit a circular lock
1227 * between original and alternative events
1228 * if both are scheduled already.
1229 */
1230 if (pass > 2)
1231 goto done;
1232
1131 bind = p4_config_get_bind(hwc->config); 1233 bind = p4_config_get_bind(hwc->config);
1132 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); 1234 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
1133 if (unlikely(escr_idx == -1)) 1235 if (unlikely(escr_idx == -1))
@@ -1141,8 +1243,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
1141 } 1243 }
1142 1244
1143 cntr_idx = p4_next_cntr(thread, used_mask, bind); 1245 cntr_idx = p4_next_cntr(thread, used_mask, bind);
1144 if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) 1246 if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
1145 goto done; 1247 /*
1248 * Check whether an event alias is still available.
1249 */
1250 config_alias = p4_get_alias_event(hwc->config);
1251 if (!config_alias)
1252 goto done;
1253 hwc->config = config_alias;
1254 pass++;
1255 goto again;
1256 }
1146 1257
1147 p4_pmu_swap_config_ts(hwc, cpu); 1258 p4_pmu_swap_config_ts(hwc, cpu);
1148 if (assign) 1259 if (assign)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index e71c98d3c0d2..19853ad8afc5 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -105,34 +105,6 @@ in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
105} 105}
106 106
107/* 107/*
108 * We are returning from the irq stack and go to the previous one.
109 * If the previous stack is also in the irq stack, then bp in the first
110 * frame of the irq stack points to the previous, interrupted one.
111 * Otherwise we have another level of indirection: We first save
112 * the bp of the previous stack, then we switch the stack to the irq one
113 * and save a new bp that links to the previous one.
114 * (See save_args())
115 */
116static inline unsigned long
117fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
118 unsigned long *irq_stack, unsigned long *irq_stack_end)
119{
120#ifdef CONFIG_FRAME_POINTER
121 struct stack_frame *frame = (struct stack_frame *)bp;
122 unsigned long next;
123
124 if (!in_irq_stack(stack, irq_stack, irq_stack_end)) {
125 if (!probe_kernel_address(&frame->next_frame, next))
126 return next;
127 else
128 WARN_ONCE(1, "Perf: bad frame pointer = %p in "
129 "callchain\n", &frame->next_frame);
130 }
131#endif
132 return bp;
133}
134
135/*
136 * x86-64 can have up to three kernel stacks: 108 * x86-64 can have up to three kernel stacks:
137 * process stack 109 * process stack
138 * interrupt stack 110 * interrupt stack
@@ -155,9 +127,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
155 task = current; 127 task = current;
156 128
157 if (!stack) { 129 if (!stack) {
158 stack = &dummy; 130 if (regs)
159 if (task && task != current) 131 stack = (unsigned long *)regs->sp;
132 else if (task && task != current)
160 stack = (unsigned long *)task->thread.sp; 133 stack = (unsigned long *)task->thread.sp;
134 else
135 stack = &dummy;
161 } 136 }
162 137
163 if (!bp) 138 if (!bp)
@@ -205,8 +180,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
205 * pointer (index -1 to end) in the IRQ stack: 180 * pointer (index -1 to end) in the IRQ stack:
206 */ 181 */
207 stack = (unsigned long *) (irq_stack_end[-1]); 182 stack = (unsigned long *) (irq_stack_end[-1]);
208 bp = fixup_bp_irq_link(bp, stack, irq_stack,
209 irq_stack_end);
210 irq_stack_end = NULL; 183 irq_stack_end = NULL;
211 ops->stack(data, "EOI"); 184 ops->stack(data, "EOI");
212 continue; 185 continue;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8a445a0c989e..d656f68371a4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -297,27 +297,26 @@ ENDPROC(native_usergs_sysret64)
297 .endm 297 .endm
298 298
299/* save partial stack frame */ 299/* save partial stack frame */
300 .pushsection .kprobes.text, "ax" 300 .macro SAVE_ARGS_IRQ
301ENTRY(save_args)
302 XCPT_FRAME
303 cld 301 cld
304 /* 302 /* start from rbp in pt_regs and jump over */
305 * start from rbp in pt_regs and jump over 303 movq_cfi rdi, RDI-RBP
306 * return address. 304 movq_cfi rsi, RSI-RBP
307 */ 305 movq_cfi rdx, RDX-RBP
308 movq_cfi rdi, RDI+8-RBP 306 movq_cfi rcx, RCX-RBP
309 movq_cfi rsi, RSI+8-RBP 307 movq_cfi rax, RAX-RBP
310 movq_cfi rdx, RDX+8-RBP 308 movq_cfi r8, R8-RBP
311 movq_cfi rcx, RCX+8-RBP 309 movq_cfi r9, R9-RBP
312 movq_cfi rax, RAX+8-RBP 310 movq_cfi r10, R10-RBP
313 movq_cfi r8, R8+8-RBP 311 movq_cfi r11, R11-RBP
314 movq_cfi r9, R9+8-RBP 312
315 movq_cfi r10, R10+8-RBP 313 /* Save rbp so that we can unwind from get_irq_regs() */
316 movq_cfi r11, R11+8-RBP 314 movq_cfi rbp, 0
317 315
318 leaq -RBP+8(%rsp),%rdi /* arg1 for handler */ 316 /* Save previous stack value */
319 movq_cfi rbp, 8 /* push %rbp */ 317 movq %rsp, %rsi
320 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ 318
319 leaq -RBP(%rsp),%rdi /* arg1 for handler */
321 testl $3, CS(%rdi) 320 testl $3, CS(%rdi)
322 je 1f 321 je 1f
323 SWAPGS 322 SWAPGS
@@ -329,19 +328,14 @@ ENTRY(save_args)
329 */ 328 */
3301: incl PER_CPU_VAR(irq_count) 3291: incl PER_CPU_VAR(irq_count)
331 jne 2f 330 jne 2f
332 popq_cfi %rax /* move return address... */
333 mov PER_CPU_VAR(irq_stack_ptr),%rsp 331 mov PER_CPU_VAR(irq_stack_ptr),%rsp
334 EMPTY_FRAME 0 332 EMPTY_FRAME 0
335 pushq_cfi %rbp /* backlink for unwinder */ 333
336 pushq_cfi %rax /* ... to the new stack */ 3342: /* Store previous stack value */
337 /* 335 pushq %rsi
338 * We entered an interrupt context - irqs are off: 336 /* We entered an interrupt context - irqs are off: */
339 */ 337 TRACE_IRQS_OFF
3402: TRACE_IRQS_OFF 338 .endm
341 ret
342 CFI_ENDPROC
343END(save_args)
344 .popsection
345 339
346ENTRY(save_rest) 340ENTRY(save_rest)
347 PARTIAL_FRAME 1 REST_SKIP+8 341 PARTIAL_FRAME 1 REST_SKIP+8
@@ -791,7 +785,7 @@ END(interrupt)
791 /* reserve pt_regs for scratch regs and rbp */ 785 /* reserve pt_regs for scratch regs and rbp */
792 subq $ORIG_RAX-RBP, %rsp 786 subq $ORIG_RAX-RBP, %rsp
793 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP 787 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
794 call save_args 788 SAVE_ARGS_IRQ
795 PARTIAL_FRAME 0 789 PARTIAL_FRAME 0
796 call \func 790 call \func
797 .endm 791 .endm
@@ -814,15 +808,14 @@ ret_from_intr:
814 DISABLE_INTERRUPTS(CLBR_NONE) 808 DISABLE_INTERRUPTS(CLBR_NONE)
815 TRACE_IRQS_OFF 809 TRACE_IRQS_OFF
816 decl PER_CPU_VAR(irq_count) 810 decl PER_CPU_VAR(irq_count)
817 leaveq
818 811
819 CFI_RESTORE rbp 812 /* Restore saved previous stack */
813 popq %rsi
814 leaq 16(%rsi), %rsp
815
820 CFI_DEF_CFA_REGISTER rsp 816 CFI_DEF_CFA_REGISTER rsp
821 CFI_ADJUST_CFA_OFFSET -8 817 CFI_ADJUST_CFA_OFFSET -16
822 818
823 /* we did not save rbx, restore only from ARGOFFSET */
824 addq $8, %rsp
825 CFI_ADJUST_CFA_OFFSET -8
826exit_intr: 819exit_intr:
827 GET_THREAD_INFO(%rcx) 820 GET_THREAD_INFO(%rcx)
828 testl $3,CS-ARGOFFSET(%rsp) 821 testl $3,CS-ARGOFFSET(%rsp)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 5f9ecff328b5..00354d4919a9 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -608,7 +608,7 @@ int kgdb_arch_init(void)
608 return register_die_notifier(&kgdb_notifier); 608 return register_die_notifier(&kgdb_notifier);
609} 609}
610 610
611static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi, 611static void kgdb_hw_overflow_handler(struct perf_event *event,
612 struct perf_sample_data *data, struct pt_regs *regs) 612 struct perf_sample_data *data, struct pt_regs *regs)
613{ 613{
614 struct task_struct *tsk = current; 614 struct task_struct *tsk = current;
@@ -638,7 +638,7 @@ void kgdb_arch_late(void)
638 for (i = 0; i < HBP_NUM; i++) { 638 for (i = 0; i < HBP_NUM; i++) {
639 if (breakinfo[i].pev) 639 if (breakinfo[i].pev)
640 continue; 640 continue;
641 breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); 641 breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
642 if (IS_ERR((void * __force)breakinfo[i].pev)) { 642 if (IS_ERR((void * __force)breakinfo[i].pev)) {
643 printk(KERN_ERR "kgdb: Could not allocate hw" 643 printk(KERN_ERR "kgdb: Could not allocate hw"
644 "breakpoints\nDisabling the kernel debugger\n"); 644 "breakpoints\nDisabling the kernel debugger\n");
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 807c2a2b80f1..82528799c5de 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -528,7 +528,7 @@ static int genregs_set(struct task_struct *target,
528 return ret; 528 return ret;
529} 529}
530 530
531static void ptrace_triggered(struct perf_event *bp, int nmi, 531static void ptrace_triggered(struct perf_event *bp,
532 struct perf_sample_data *data, 532 struct perf_sample_data *data,
533 struct pt_regs *regs) 533 struct pt_regs *regs)
534{ 534{
@@ -715,7 +715,8 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
715 attr.bp_type = HW_BREAKPOINT_W; 715 attr.bp_type = HW_BREAKPOINT_W;
716 attr.disabled = 1; 716 attr.disabled = 1;
717 717
718 bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); 718 bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
719 NULL, tsk);
719 720
720 /* 721 /*
721 * CHECKME: the previous code returned -EIO if the addr wasn't 722 * CHECKME: the previous code returned -EIO if the addr wasn't
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 55d9bc03f696..fdd0c6430e5a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -66,7 +66,7 @@ void save_stack_trace(struct stack_trace *trace)
66} 66}
67EXPORT_SYMBOL_GPL(save_stack_trace); 67EXPORT_SYMBOL_GPL(save_stack_trace);
68 68
69void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) 69void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
70{ 70{
71 dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); 71 dump_trace(current, regs, NULL, 0, &save_stack_ops, trace);
72 if (trace->nr_entries < trace->max_entries) 72 if (trace->nr_entries < trace->max_entries)
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index f2479f19ddde..6ba477342b8e 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
18 18
19lib-y := delay.o 19lib-y := delay.o
20lib-y += thunk_$(BITS).o 20lib-y += thunk_$(BITS).o
21lib-y += usercopy_$(BITS).o getuser.o putuser.o 21lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
22lib-y += memcpy_$(BITS).o 22lib-y += memcpy_$(BITS).o
23lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o 23lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
24 24
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
new file mode 100644
index 000000000000..97be9cb54483
--- /dev/null
+++ b/arch/x86/lib/usercopy.c
@@ -0,0 +1,43 @@
1/*
2 * User address space access functions.
3 *
4 * For licencing details see kernel-base/COPYING
5 */
6
7#include <linux/highmem.h>
8#include <linux/module.h>
9
10/*
11 * best effort, GUP based copy_from_user() that is NMI-safe
12 */
13unsigned long
14copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
15{
16 unsigned long offset, addr = (unsigned long)from;
17 unsigned long size, len = 0;
18 struct page *page;
19 void *map;
20 int ret;
21
22 do {
23 ret = __get_user_pages_fast(addr, 1, 0, &page);
24 if (!ret)
25 break;
26
27 offset = addr & (PAGE_SIZE - 1);
28 size = min(PAGE_SIZE - offset, n - len);
29
30 map = kmap_atomic(page);
31 memcpy(to, map+offset, size);
32 kunmap_atomic(map);
33 put_page(page);
34
35 len += size;
36 to += size;
37 addr += size;
38
39 } while (len < n);
40
41 return len;
42}
43EXPORT_SYMBOL_GPL(copy_from_user_nmi);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2dbf6bf4c7e5..4d09df054e39 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1059,7 +1059,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
1059 if (unlikely(error_code & PF_RSVD)) 1059 if (unlikely(error_code & PF_RSVD))
1060 pgtable_bad(regs, error_code, address); 1060 pgtable_bad(regs, error_code, address);
1061 1061
1062 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 1062 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1063 1063
1064 /* 1064 /*
1065 * If we're in an interrupt, have no user context or are running 1065 * If we're in an interrupt, have no user context or are running
@@ -1161,11 +1161,11 @@ good_area:
1161 if (flags & FAULT_FLAG_ALLOW_RETRY) { 1161 if (flags & FAULT_FLAG_ALLOW_RETRY) {
1162 if (fault & VM_FAULT_MAJOR) { 1162 if (fault & VM_FAULT_MAJOR) {
1163 tsk->maj_flt++; 1163 tsk->maj_flt++;
1164 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 1164 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
1165 regs, address); 1165 regs, address);
1166 } else { 1166 } else {
1167 tsk->min_flt++; 1167 tsk->min_flt++;
1168 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 1168 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
1169 regs, address); 1169 regs, address);
1170 } 1170 }
1171 if (fault & VM_FAULT_RETRY) { 1171 if (fault & VM_FAULT_RETRY) {
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index 704a37cedddb..dab41876cdd5 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
185 e->trace.entries = e->trace_entries; 185 e->trace.entries = e->trace_entries;
186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries); 186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
187 e->trace.skip = 0; 187 e->trace.skip = 0;
188 save_stack_trace_regs(&e->trace, regs); 188 save_stack_trace_regs(regs, &e->trace);
189 189
190 /* Round address down to nearest 16 bytes */ 190 /* Round address down to nearest 16 bytes */
191 shadow_copy = kmemcheck_shadow_lookup(address 191 shadow_copy = kmemcheck_shadow_lookup(address
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index a5b64ab4cd6e..bff89dfe3619 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -11,10 +11,11 @@
11#include <linux/oprofile.h> 11#include <linux/oprofile.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/compat.h>
15#include <linux/uaccess.h>
16
14#include <asm/ptrace.h> 17#include <asm/ptrace.h>
15#include <asm/uaccess.h>
16#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
17#include <linux/compat.h>
18 19
19static int backtrace_stack(void *data, char *name) 20static int backtrace_stack(void *data, char *name)
20{ 21{
@@ -40,13 +41,13 @@ static struct stacktrace_ops backtrace_ops = {
40static struct stack_frame_ia32 * 41static struct stack_frame_ia32 *
41dump_user_backtrace_32(struct stack_frame_ia32 *head) 42dump_user_backtrace_32(struct stack_frame_ia32 *head)
42{ 43{
44 /* Also check accessibility of one struct frame_head beyond: */
43 struct stack_frame_ia32 bufhead[2]; 45 struct stack_frame_ia32 bufhead[2];
44 struct stack_frame_ia32 *fp; 46 struct stack_frame_ia32 *fp;
47 unsigned long bytes;
45 48
46 /* Also check accessibility of one struct frame_head beyond */ 49 bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
47 if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) 50 if (bytes != sizeof(bufhead))
48 return NULL;
49 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
50 return NULL; 51 return NULL;
51 52
52 fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); 53 fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
@@ -87,12 +88,12 @@ x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
87 88
88static struct stack_frame *dump_user_backtrace(struct stack_frame *head) 89static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
89{ 90{
91 /* Also check accessibility of one struct frame_head beyond: */
90 struct stack_frame bufhead[2]; 92 struct stack_frame bufhead[2];
93 unsigned long bytes;
91 94
92 /* Also check accessibility of one struct stack_frame beyond */ 95 bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
93 if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) 96 if (bytes != sizeof(bufhead))
94 return NULL;
95 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
96 return NULL; 97 return NULL;
97 98
98 oprofile_add_trace(bufhead[0].return_address); 99 oprofile_add_trace(bufhead[0].return_address);