diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-19 12:06:49 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-19 12:06:49 -0400 |
commit | b3ea36b7a242773d7a05cec6c071a47b6581c178 (patch) | |
tree | 53edfe621b34f30b6f5212f4479d21e2c68dc743 | |
parent | 763008c4357b73c8d18396dfd8d79dc58fa3f99d (diff) | |
parent | 737480a0d525dae13306296da08029dff545bc72 (diff) |
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
kprobes/x86: Fix the return address of multiple kretprobes
perf tools: Fix build error on read only source.
perf, x86: Fix Intel-nhm PMU programming errata workaround
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 81 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes.c | 25 | ||||
-rw-r--r-- | tools/perf/Makefile | 14 | ||||
-rw-r--r-- | tools/perf/feature-tests.mak | 2 |
4 files changed, 96 insertions, 26 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 214ac860ebe0..d8d86d014008 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added) | |||
491 | * Intel Errata AAP53 (model 30) | 491 | * Intel Errata AAP53 (model 30) |
492 | * Intel Errata BD53 (model 44) | 492 | * Intel Errata BD53 (model 44) |
493 | * | 493 | * |
494 | * These chips need to be 'reset' when adding counters by programming | 494 | * The official story: |
495 | * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 | 495 | * These chips need to be 'reset' when adding counters by programming the |
496 | * either in sequence on the same PMC or on different PMCs. | 496 | * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either |
497 | * in sequence on the same PMC or on different PMCs. | ||
498 | * | ||
499 | * In practise it appears some of these events do in fact count, and | ||
500 | * we need to programm all 4 events. | ||
497 | */ | 501 | */ |
498 | static void intel_pmu_nhm_enable_all(int added) | 502 | static void intel_pmu_nhm_workaround(void) |
499 | { | 503 | { |
500 | if (added) { | 504 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
501 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 505 | static const unsigned long nhm_magic[4] = { |
502 | int i; | 506 | 0x4300B5, |
507 | 0x4300D2, | ||
508 | 0x4300B1, | ||
509 | 0x4300B1 | ||
510 | }; | ||
511 | struct perf_event *event; | ||
512 | int i; | ||
513 | |||
514 | /* | ||
515 | * The Errata requires below steps: | ||
516 | * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; | ||
517 | * 2) Configure 4 PERFEVTSELx with the magic events and clear | ||
518 | * the corresponding PMCx; | ||
519 | * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; | ||
520 | * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; | ||
521 | * 5) Clear 4 pairs of ERFEVTSELx and PMCx; | ||
522 | */ | ||
523 | |||
524 | /* | ||
525 | * The real steps we choose are a little different from above. | ||
526 | * A) To reduce MSR operations, we don't run step 1) as they | ||
527 | * are already cleared before this function is called; | ||
528 | * B) Call x86_perf_event_update to save PMCx before configuring | ||
529 | * PERFEVTSELx with magic number; | ||
530 | * C) With step 5), we do clear only when the PERFEVTSELx is | ||
531 | * not used currently. | ||
532 | * D) Call x86_perf_event_set_period to restore PMCx; | ||
533 | */ | ||
503 | 534 | ||
504 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); | 535 | /* We always operate 4 pairs of PERF Counters */ |
505 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); | 536 | for (i = 0; i < 4; i++) { |
506 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); | 537 | event = cpuc->events[i]; |
538 | if (event) | ||
539 | x86_perf_event_update(event); | ||
540 | } | ||
507 | 541 | ||
508 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); | 542 | for (i = 0; i < 4; i++) { |
509 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); | 543 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); |
544 | wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); | ||
545 | } | ||
510 | 546 | ||
511 | for (i = 0; i < 3; i++) { | 547 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); |
512 | struct perf_event *event = cpuc->events[i]; | 548 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); |
513 | 549 | ||
514 | if (!event) | 550 | for (i = 0; i < 4; i++) { |
515 | continue; | 551 | event = cpuc->events[i]; |
516 | 552 | ||
553 | if (event) { | ||
554 | x86_perf_event_set_period(event); | ||
517 | __x86_pmu_enable_event(&event->hw, | 555 | __x86_pmu_enable_event(&event->hw, |
518 | ARCH_PERFMON_EVENTSEL_ENABLE); | 556 | ARCH_PERFMON_EVENTSEL_ENABLE); |
519 | } | 557 | } else |
558 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); | ||
520 | } | 559 | } |
560 | } | ||
561 | |||
562 | static void intel_pmu_nhm_enable_all(int added) | ||
563 | { | ||
564 | if (added) | ||
565 | intel_pmu_nhm_workaround(); | ||
521 | intel_pmu_enable_all(added); | 566 | intel_pmu_enable_all(added); |
522 | } | 567 | } |
523 | 568 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 1bfb6cf4dd55..770ebfb349e9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -709,6 +709,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
709 | struct hlist_node *node, *tmp; | 709 | struct hlist_node *node, *tmp; |
710 | unsigned long flags, orig_ret_address = 0; | 710 | unsigned long flags, orig_ret_address = 0; |
711 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; | 711 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; |
712 | kprobe_opcode_t *correct_ret_addr = NULL; | ||
712 | 713 | ||
713 | INIT_HLIST_HEAD(&empty_rp); | 714 | INIT_HLIST_HEAD(&empty_rp); |
714 | kretprobe_hash_lock(current, &head, &flags); | 715 | kretprobe_hash_lock(current, &head, &flags); |
@@ -740,14 +741,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
740 | /* another task is sharing our hash bucket */ | 741 | /* another task is sharing our hash bucket */ |
741 | continue; | 742 | continue; |
742 | 743 | ||
744 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
745 | |||
746 | if (orig_ret_address != trampoline_address) | ||
747 | /* | ||
748 | * This is the real return address. Any other | ||
749 | * instances associated with this task are for | ||
750 | * other calls deeper on the call stack | ||
751 | */ | ||
752 | break; | ||
753 | } | ||
754 | |||
755 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | ||
756 | |||
757 | correct_ret_addr = ri->ret_addr; | ||
758 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | ||
759 | if (ri->task != current) | ||
760 | /* another task is sharing our hash bucket */ | ||
761 | continue; | ||
762 | |||
763 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
743 | if (ri->rp && ri->rp->handler) { | 764 | if (ri->rp && ri->rp->handler) { |
744 | __get_cpu_var(current_kprobe) = &ri->rp->kp; | 765 | __get_cpu_var(current_kprobe) = &ri->rp->kp; |
745 | get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; | 766 | get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; |
767 | ri->ret_addr = correct_ret_addr; | ||
746 | ri->rp->handler(ri, regs); | 768 | ri->rp->handler(ri, regs); |
747 | __get_cpu_var(current_kprobe) = NULL; | 769 | __get_cpu_var(current_kprobe) = NULL; |
748 | } | 770 | } |
749 | 771 | ||
750 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
751 | recycle_rp_inst(ri, &empty_rp); | 772 | recycle_rp_inst(ri, &empty_rp); |
752 | 773 | ||
753 | if (orig_ret_address != trampoline_address) | 774 | if (orig_ret_address != trampoline_address) |
@@ -759,8 +780,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
759 | break; | 780 | break; |
760 | } | 781 | } |
761 | 782 | ||
762 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | ||
763 | |||
764 | kretprobe_hash_unlock(current, &flags); | 783 | kretprobe_hash_unlock(current, &flags); |
765 | 784 | ||
766 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | 785 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index dcb9700b88d2..4f1fa77c1feb 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -5,6 +5,12 @@ endif | |||
5 | # The default target of this Makefile is... | 5 | # The default target of this Makefile is... |
6 | all:: | 6 | all:: |
7 | 7 | ||
8 | ifneq ($(OUTPUT),) | ||
9 | # check that the output directory actually exists | ||
10 | OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) | ||
11 | $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) | ||
12 | endif | ||
13 | |||
8 | # Define V=1 to have a more verbose compile. | 14 | # Define V=1 to have a more verbose compile. |
9 | # Define V=2 to have an even more verbose compile. | 15 | # Define V=2 to have an even more verbose compile. |
10 | # | 16 | # |
@@ -931,15 +937,15 @@ $(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt) | |||
931 | $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@ | 937 | $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@ |
932 | 938 | ||
933 | $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh | 939 | $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh |
934 | $(QUIET_GEN)$(RM) $@ $@+ && \ | 940 | $(QUIET_GEN)$(RM) $(OUTPUT)$@ $(OUTPUT)$@+ && \ |
935 | sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ | 941 | sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ |
936 | -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ | 942 | -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ |
937 | -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ | 943 | -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ |
938 | -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ | 944 | -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ |
939 | -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ | 945 | -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ |
940 | $@.sh >$@+ && \ | 946 | $@.sh > $(OUTPUT)$@+ && \ |
941 | chmod +x $@+ && \ | 947 | chmod +x $(OUTPUT)$@+ && \ |
942 | mv $@+ $(OUTPUT)$@ | 948 | mv $(OUTPUT)$@+ $(OUTPUT)$@ |
943 | 949 | ||
944 | configure: configure.ac | 950 | configure: configure.ac |
945 | $(QUIET_GEN)$(RM) $@ $<+ && \ | 951 | $(QUIET_GEN)$(RM) $@ $<+ && \ |
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak index ddb68e601f0e..7a7b60859053 100644 --- a/tools/perf/feature-tests.mak +++ b/tools/perf/feature-tests.mak | |||
@@ -113,7 +113,7 @@ endef | |||
113 | # try-cc | 113 | # try-cc |
114 | # Usage: option = $(call try-cc, source-to-build, cc-options) | 114 | # Usage: option = $(call try-cc, source-to-build, cc-options) |
115 | try-cc = $(shell sh -c \ | 115 | try-cc = $(shell sh -c \ |
116 | 'TMP="$(TMPOUT).$$$$"; \ | 116 | 'TMP="$(OUTPUT)$(TMPOUT).$$$$"; \ |
117 | echo "$(1)" | \ | 117 | echo "$(1)" | \ |
118 | $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \ | 118 | $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \ |
119 | rm -f "$$TMP"') | 119 | rm -f "$$TMP"') |