aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-19 12:06:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-19 12:06:49 -0400
commitb3ea36b7a242773d7a05cec6c071a47b6581c178 (patch)
tree53edfe621b34f30b6f5212f4479d21e2c68dc743 /arch/x86
parent763008c4357b73c8d18396dfd8d79dc58fa3f99d (diff)
parent737480a0d525dae13306296da08029dff545bc72 (diff)
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: kprobes/x86: Fix the return address of multiple kretprobes perf tools: Fix build error on read only source. perf, x86: Fix Intel-nhm PMU programming errata workaround
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c81
-rw-r--r--arch/x86/kernel/kprobes.c25
2 files changed, 85 insertions, 21 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 214ac860ebe0..d8d86d014008 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added)
491 * Intel Errata AAP53 (model 30) 491 * Intel Errata AAP53 (model 30)
492 * Intel Errata BD53 (model 44) 492 * Intel Errata BD53 (model 44)
493 * 493 *
494 * These chips need to be 'reset' when adding counters by programming 494 * The official story:
495 * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 495 * These chips need to be 'reset' when adding counters by programming the
496 * either in sequence on the same PMC or on different PMCs. 496 * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
497 * in sequence on the same PMC or on different PMCs.
498 *
499 * In practise it appears some of these events do in fact count, and
500 * we need to programm all 4 events.
497 */ 501 */
498static void intel_pmu_nhm_enable_all(int added) 502static void intel_pmu_nhm_workaround(void)
499{ 503{
500 if (added) { 504 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
501 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 505 static const unsigned long nhm_magic[4] = {
502 int i; 506 0x4300B5,
507 0x4300D2,
508 0x4300B1,
509 0x4300B1
510 };
511 struct perf_event *event;
512 int i;
513
514 /*
515 * The Errata requires below steps:
516 * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
517 * 2) Configure 4 PERFEVTSELx with the magic events and clear
518 * the corresponding PMCx;
519 * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
520 * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
521 * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
522 */
523
524 /*
525 * The real steps we choose are a little different from above.
526 * A) To reduce MSR operations, we don't run step 1) as they
527 * are already cleared before this function is called;
528 * B) Call x86_perf_event_update to save PMCx before configuring
529 * PERFEVTSELx with magic number;
530 * C) With step 5), we do clear only when the PERFEVTSELx is
531 * not used currently.
532 * D) Call x86_perf_event_set_period to restore PMCx;
533 */
503 534
504 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); 535 /* We always operate 4 pairs of PERF Counters */
505 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); 536 for (i = 0; i < 4; i++) {
506 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); 537 event = cpuc->events[i];
538 if (event)
539 x86_perf_event_update(event);
540 }
507 541
508 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); 542 for (i = 0; i < 4; i++) {
509 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); 543 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
544 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
545 }
510 546
511 for (i = 0; i < 3; i++) { 547 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
512 struct perf_event *event = cpuc->events[i]; 548 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
513 549
514 if (!event) 550 for (i = 0; i < 4; i++) {
515 continue; 551 event = cpuc->events[i];
516 552
553 if (event) {
554 x86_perf_event_set_period(event);
517 __x86_pmu_enable_event(&event->hw, 555 __x86_pmu_enable_event(&event->hw,
518 ARCH_PERFMON_EVENTSEL_ENABLE); 556 ARCH_PERFMON_EVENTSEL_ENABLE);
519 } 557 } else
558 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
520 } 559 }
560}
561
562static void intel_pmu_nhm_enable_all(int added)
563{
564 if (added)
565 intel_pmu_nhm_workaround();
521 intel_pmu_enable_all(added); 566 intel_pmu_enable_all(added);
522} 567}
523 568
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1bfb6cf4dd55..770ebfb349e9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -709,6 +709,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
709 struct hlist_node *node, *tmp; 709 struct hlist_node *node, *tmp;
710 unsigned long flags, orig_ret_address = 0; 710 unsigned long flags, orig_ret_address = 0;
711 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 711 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
712 kprobe_opcode_t *correct_ret_addr = NULL;
712 713
713 INIT_HLIST_HEAD(&empty_rp); 714 INIT_HLIST_HEAD(&empty_rp);
714 kretprobe_hash_lock(current, &head, &flags); 715 kretprobe_hash_lock(current, &head, &flags);
@@ -740,14 +741,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
740 /* another task is sharing our hash bucket */ 741 /* another task is sharing our hash bucket */
741 continue; 742 continue;
742 743
744 orig_ret_address = (unsigned long)ri->ret_addr;
745
746 if (orig_ret_address != trampoline_address)
747 /*
748 * This is the real return address. Any other
749 * instances associated with this task are for
750 * other calls deeper on the call stack
751 */
752 break;
753 }
754
755 kretprobe_assert(ri, orig_ret_address, trampoline_address);
756
757 correct_ret_addr = ri->ret_addr;
758 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
759 if (ri->task != current)
760 /* another task is sharing our hash bucket */
761 continue;
762
763 orig_ret_address = (unsigned long)ri->ret_addr;
743 if (ri->rp && ri->rp->handler) { 764 if (ri->rp && ri->rp->handler) {
744 __get_cpu_var(current_kprobe) = &ri->rp->kp; 765 __get_cpu_var(current_kprobe) = &ri->rp->kp;
745 get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; 766 get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
767 ri->ret_addr = correct_ret_addr;
746 ri->rp->handler(ri, regs); 768 ri->rp->handler(ri, regs);
747 __get_cpu_var(current_kprobe) = NULL; 769 __get_cpu_var(current_kprobe) = NULL;
748 } 770 }
749 771
750 orig_ret_address = (unsigned long)ri->ret_addr;
751 recycle_rp_inst(ri, &empty_rp); 772 recycle_rp_inst(ri, &empty_rp);
752 773
753 if (orig_ret_address != trampoline_address) 774 if (orig_ret_address != trampoline_address)
@@ -759,8 +780,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
759 break; 780 break;
760 } 781 }
761 782
762 kretprobe_assert(ri, orig_ret_address, trampoline_address);
763
764 kretprobe_hash_unlock(current, &flags); 783 kretprobe_hash_unlock(current, &flags);
765 784
766 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 785 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {