diff options
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 81 | ||||
| -rw-r--r-- | arch/x86/kernel/kprobes.c | 25 | ||||
| -rw-r--r-- | tools/perf/Makefile | 14 | ||||
| -rw-r--r-- | tools/perf/feature-tests.mak | 2 |
4 files changed, 96 insertions, 26 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 214ac860ebe0..d8d86d014008 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
| @@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added) | |||
| 491 | * Intel Errata AAP53 (model 30) | 491 | * Intel Errata AAP53 (model 30) |
| 492 | * Intel Errata BD53 (model 44) | 492 | * Intel Errata BD53 (model 44) |
| 493 | * | 493 | * |
| 494 | * These chips need to be 'reset' when adding counters by programming | 494 | * The official story: |
| 495 | * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 | 495 | * These chips need to be 'reset' when adding counters by programming the |
| 496 | * either in sequence on the same PMC or on different PMCs. | 496 | * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either |
| 497 | * in sequence on the same PMC or on different PMCs. | ||
| 498 | * | ||
| 499 | * In practise it appears some of these events do in fact count, and | ||
| 500 | * we need to programm all 4 events. | ||
| 497 | */ | 501 | */ |
| 498 | static void intel_pmu_nhm_enable_all(int added) | 502 | static void intel_pmu_nhm_workaround(void) |
| 499 | { | 503 | { |
| 500 | if (added) { | 504 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 501 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 505 | static const unsigned long nhm_magic[4] = { |
| 502 | int i; | 506 | 0x4300B5, |
| 507 | 0x4300D2, | ||
| 508 | 0x4300B1, | ||
| 509 | 0x4300B1 | ||
| 510 | }; | ||
| 511 | struct perf_event *event; | ||
| 512 | int i; | ||
| 513 | |||
| 514 | /* | ||
| 515 | * The Errata requires below steps: | ||
| 516 | * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; | ||
| 517 | * 2) Configure 4 PERFEVTSELx with the magic events and clear | ||
| 518 | * the corresponding PMCx; | ||
| 519 | * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; | ||
| 520 | * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; | ||
| 521 | * 5) Clear 4 pairs of ERFEVTSELx and PMCx; | ||
| 522 | */ | ||
| 523 | |||
| 524 | /* | ||
| 525 | * The real steps we choose are a little different from above. | ||
| 526 | * A) To reduce MSR operations, we don't run step 1) as they | ||
| 527 | * are already cleared before this function is called; | ||
| 528 | * B) Call x86_perf_event_update to save PMCx before configuring | ||
| 529 | * PERFEVTSELx with magic number; | ||
| 530 | * C) With step 5), we do clear only when the PERFEVTSELx is | ||
| 531 | * not used currently. | ||
| 532 | * D) Call x86_perf_event_set_period to restore PMCx; | ||
| 533 | */ | ||
| 503 | 534 | ||
| 504 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); | 535 | /* We always operate 4 pairs of PERF Counters */ |
| 505 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); | 536 | for (i = 0; i < 4; i++) { |
| 506 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); | 537 | event = cpuc->events[i]; |
| 538 | if (event) | ||
| 539 | x86_perf_event_update(event); | ||
| 540 | } | ||
| 507 | 541 | ||
| 508 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); | 542 | for (i = 0; i < 4; i++) { |
| 509 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); | 543 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); |
| 544 | wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); | ||
| 545 | } | ||
| 510 | 546 | ||
| 511 | for (i = 0; i < 3; i++) { | 547 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); |
| 512 | struct perf_event *event = cpuc->events[i]; | 548 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); |
| 513 | 549 | ||
| 514 | if (!event) | 550 | for (i = 0; i < 4; i++) { |
| 515 | continue; | 551 | event = cpuc->events[i]; |
| 516 | 552 | ||
| 553 | if (event) { | ||
| 554 | x86_perf_event_set_period(event); | ||
| 517 | __x86_pmu_enable_event(&event->hw, | 555 | __x86_pmu_enable_event(&event->hw, |
| 518 | ARCH_PERFMON_EVENTSEL_ENABLE); | 556 | ARCH_PERFMON_EVENTSEL_ENABLE); |
| 519 | } | 557 | } else |
| 558 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); | ||
| 520 | } | 559 | } |
| 560 | } | ||
| 561 | |||
| 562 | static void intel_pmu_nhm_enable_all(int added) | ||
| 563 | { | ||
| 564 | if (added) | ||
| 565 | intel_pmu_nhm_workaround(); | ||
| 521 | intel_pmu_enable_all(added); | 566 | intel_pmu_enable_all(added); |
| 522 | } | 567 | } |
| 523 | 568 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 1bfb6cf4dd55..770ebfb349e9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
| @@ -709,6 +709,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
| 709 | struct hlist_node *node, *tmp; | 709 | struct hlist_node *node, *tmp; |
| 710 | unsigned long flags, orig_ret_address = 0; | 710 | unsigned long flags, orig_ret_address = 0; |
| 711 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; | 711 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; |
| 712 | kprobe_opcode_t *correct_ret_addr = NULL; | ||
| 712 | 713 | ||
| 713 | INIT_HLIST_HEAD(&empty_rp); | 714 | INIT_HLIST_HEAD(&empty_rp); |
| 714 | kretprobe_hash_lock(current, &head, &flags); | 715 | kretprobe_hash_lock(current, &head, &flags); |
| @@ -740,14 +741,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
| 740 | /* another task is sharing our hash bucket */ | 741 | /* another task is sharing our hash bucket */ |
| 741 | continue; | 742 | continue; |
| 742 | 743 | ||
| 744 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
| 745 | |||
| 746 | if (orig_ret_address != trampoline_address) | ||
| 747 | /* | ||
| 748 | * This is the real return address. Any other | ||
| 749 | * instances associated with this task are for | ||
| 750 | * other calls deeper on the call stack | ||
| 751 | */ | ||
| 752 | break; | ||
| 753 | } | ||
| 754 | |||
| 755 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | ||
| 756 | |||
| 757 | correct_ret_addr = ri->ret_addr; | ||
| 758 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | ||
| 759 | if (ri->task != current) | ||
| 760 | /* another task is sharing our hash bucket */ | ||
| 761 | continue; | ||
| 762 | |||
| 763 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
| 743 | if (ri->rp && ri->rp->handler) { | 764 | if (ri->rp && ri->rp->handler) { |
| 744 | __get_cpu_var(current_kprobe) = &ri->rp->kp; | 765 | __get_cpu_var(current_kprobe) = &ri->rp->kp; |
| 745 | get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; | 766 | get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; |
| 767 | ri->ret_addr = correct_ret_addr; | ||
| 746 | ri->rp->handler(ri, regs); | 768 | ri->rp->handler(ri, regs); |
| 747 | __get_cpu_var(current_kprobe) = NULL; | 769 | __get_cpu_var(current_kprobe) = NULL; |
| 748 | } | 770 | } |
| 749 | 771 | ||
| 750 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
| 751 | recycle_rp_inst(ri, &empty_rp); | 772 | recycle_rp_inst(ri, &empty_rp); |
| 752 | 773 | ||
| 753 | if (orig_ret_address != trampoline_address) | 774 | if (orig_ret_address != trampoline_address) |
| @@ -759,8 +780,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
| 759 | break; | 780 | break; |
| 760 | } | 781 | } |
| 761 | 782 | ||
| 762 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | ||
| 763 | |||
| 764 | kretprobe_hash_unlock(current, &flags); | 783 | kretprobe_hash_unlock(current, &flags); |
| 765 | 784 | ||
| 766 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | 785 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index dcb9700b88d2..4f1fa77c1feb 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
| @@ -5,6 +5,12 @@ endif | |||
| 5 | # The default target of this Makefile is... | 5 | # The default target of this Makefile is... |
| 6 | all:: | 6 | all:: |
| 7 | 7 | ||
| 8 | ifneq ($(OUTPUT),) | ||
| 9 | # check that the output directory actually exists | ||
| 10 | OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) | ||
| 11 | $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) | ||
| 12 | endif | ||
| 13 | |||
| 8 | # Define V=1 to have a more verbose compile. | 14 | # Define V=1 to have a more verbose compile. |
| 9 | # Define V=2 to have an even more verbose compile. | 15 | # Define V=2 to have an even more verbose compile. |
| 10 | # | 16 | # |
| @@ -931,15 +937,15 @@ $(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt) | |||
| 931 | $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@ | 937 | $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@ |
| 932 | 938 | ||
| 933 | $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh | 939 | $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh |
| 934 | $(QUIET_GEN)$(RM) $@ $@+ && \ | 940 | $(QUIET_GEN)$(RM) $(OUTPUT)$@ $(OUTPUT)$@+ && \ |
| 935 | sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ | 941 | sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ |
| 936 | -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ | 942 | -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ |
| 937 | -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ | 943 | -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ |
| 938 | -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ | 944 | -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ |
| 939 | -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ | 945 | -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ |
| 940 | $@.sh >$@+ && \ | 946 | $@.sh > $(OUTPUT)$@+ && \ |
| 941 | chmod +x $@+ && \ | 947 | chmod +x $(OUTPUT)$@+ && \ |
| 942 | mv $@+ $(OUTPUT)$@ | 948 | mv $(OUTPUT)$@+ $(OUTPUT)$@ |
| 943 | 949 | ||
| 944 | configure: configure.ac | 950 | configure: configure.ac |
| 945 | $(QUIET_GEN)$(RM) $@ $<+ && \ | 951 | $(QUIET_GEN)$(RM) $@ $<+ && \ |
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak index ddb68e601f0e..7a7b60859053 100644 --- a/tools/perf/feature-tests.mak +++ b/tools/perf/feature-tests.mak | |||
| @@ -113,7 +113,7 @@ endef | |||
| 113 | # try-cc | 113 | # try-cc |
| 114 | # Usage: option = $(call try-cc, source-to-build, cc-options) | 114 | # Usage: option = $(call try-cc, source-to-build, cc-options) |
| 115 | try-cc = $(shell sh -c \ | 115 | try-cc = $(shell sh -c \ |
| 116 | 'TMP="$(TMPOUT).$$$$"; \ | 116 | 'TMP="$(OUTPUT)$(TMPOUT).$$$$"; \ |
| 117 | echo "$(1)" | \ | 117 | echo "$(1)" | \ |
| 118 | $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \ | 118 | $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \ |
| 119 | rm -f "$$TMP"') | 119 | rm -f "$$TMP"') |
