diff options
author | Andi Kleen <ak@linux.intel.com> | 2013-06-17 20:36:47 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-06-19 08:43:32 -0400 |
commit | 130768b8c93cd8d21390a136ec8cef417153ca14 (patch) | |
tree | 01900496205e221dd6ad855352e3b1df6d6be1c6 /arch/x86/kernel | |
parent | b2fa344d0c275ea4436bfc3a97708f2c938ac0eb (diff) |
perf/x86/intel: Add Haswell PEBS record support
Add support for the Haswell extended (fmt2) PEBS format.
It has a superset of the nhm (fmt1) PEBS fields, but has a
longer record so we need to adjust the code paths.
The main advantage is the new "EventingRip" support which
directly gives the instruction, not off-by-one instruction. So
with precise == 2 we use that directly and don't try to use LBRs
and walking basic blocks. This lowers the overhead of using
precise significantly.
Some other features are added in later patches.
Reviewed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Andi Kleen <ak@linux.jf.intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: http://lkml.kernel.org/r/1371515812-9646-2-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 110 |
2 files changed, 91 insertions, 22 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e52a9e577783..ab3395295224 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
403 | * check that PEBS LBR correction does not conflict with | 403 | * check that PEBS LBR correction does not conflict with |
404 | * whatever the user is asking with attr->branch_sample_type | 404 | * whatever the user is asking with attr->branch_sample_type |
405 | */ | 405 | */ |
406 | if (event->attr.precise_ip > 1) { | 406 | if (event->attr.precise_ip > 1 && |
407 | x86_pmu.intel_cap.pebs_format < 2) { | ||
407 | u64 *br_type = &event->attr.branch_sample_type; | 408 | u64 *br_type = &event->attr.branch_sample_type; |
408 | 409 | ||
409 | if (has_branch_stack(event)) { | 410 | if (has_branch_stack(event)) { |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 60250f687052..2a63d1307804 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -165,6 +165,22 @@ struct pebs_record_nhm { | |||
165 | u64 status, dla, dse, lat; | 165 | u64 status, dla, dse, lat; |
166 | }; | 166 | }; |
167 | 167 | ||
168 | /* | ||
169 | * Same as pebs_record_nhm, with two additional fields. | ||
170 | */ | ||
171 | struct pebs_record_hsw { | ||
172 | struct pebs_record_nhm nhm; | ||
173 | /* | ||
174 | * Real IP of the event. In the Intel documentation this | ||
175 | * is called eventingrip. | ||
176 | */ | ||
177 | u64 real_ip; | ||
178 | /* | ||
179 | * TSX tuning information field: abort cycles and abort flags. | ||
180 | */ | ||
181 | u64 tsx_tuning; | ||
182 | }; | ||
183 | |||
168 | void init_debug_store_on_cpu(int cpu) | 184 | void init_debug_store_on_cpu(int cpu) |
169 | { | 185 | { |
170 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 186 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; |
@@ -697,6 +713,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
697 | */ | 713 | */ |
698 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 714 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
699 | struct pebs_record_nhm *pebs = __pebs; | 715 | struct pebs_record_nhm *pebs = __pebs; |
716 | struct pebs_record_hsw *pebs_hsw = __pebs; | ||
700 | struct perf_sample_data data; | 717 | struct perf_sample_data data; |
701 | struct pt_regs regs; | 718 | struct pt_regs regs; |
702 | u64 sample_type; | 719 | u64 sample_type; |
@@ -753,7 +770,10 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
753 | regs.bp = pebs->bp; | 770 | regs.bp = pebs->bp; |
754 | regs.sp = pebs->sp; | 771 | regs.sp = pebs->sp; |
755 | 772 | ||
756 | if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | 773 | if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { |
774 | regs.ip = pebs_hsw->real_ip; | ||
775 | regs.flags |= PERF_EFLAGS_EXACT; | ||
776 | } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | ||
757 | regs.flags |= PERF_EFLAGS_EXACT; | 777 | regs.flags |= PERF_EFLAGS_EXACT; |
758 | else | 778 | else |
759 | regs.flags &= ~PERF_EFLAGS_EXACT; | 779 | regs.flags &= ~PERF_EFLAGS_EXACT; |
@@ -806,35 +826,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | |||
806 | __intel_pmu_pebs_event(event, iregs, at); | 826 | __intel_pmu_pebs_event(event, iregs, at); |
807 | } | 827 | } |
808 | 828 | ||
809 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | 829 | static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, |
830 | void *top) | ||
810 | { | 831 | { |
811 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 832 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
812 | struct debug_store *ds = cpuc->ds; | 833 | struct debug_store *ds = cpuc->ds; |
813 | struct pebs_record_nhm *at, *top; | ||
814 | struct perf_event *event = NULL; | 834 | struct perf_event *event = NULL; |
815 | u64 status = 0; | 835 | u64 status = 0; |
816 | int bit, n; | 836 | int bit; |
817 | |||
818 | if (!x86_pmu.pebs_active) | ||
819 | return; | ||
820 | |||
821 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
822 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
823 | 837 | ||
824 | ds->pebs_index = ds->pebs_buffer_base; | 838 | ds->pebs_index = ds->pebs_buffer_base; |
825 | 839 | ||
826 | n = top - at; | 840 | for (; at < top; at += x86_pmu.pebs_record_size) { |
827 | if (n <= 0) | 841 | struct pebs_record_nhm *p = at; |
828 | return; | ||
829 | |||
830 | /* | ||
831 | * Should not happen, we program the threshold at 1 and do not | ||
832 | * set a reset value. | ||
833 | */ | ||
834 | WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n); | ||
835 | 842 | ||
836 | for ( ; at < top; at++) { | 843 | for_each_set_bit(bit, (unsigned long *)&p->status, |
837 | for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) { | 844 | x86_pmu.max_pebs_events) { |
838 | event = cpuc->events[bit]; | 845 | event = cpuc->events[bit]; |
839 | if (!test_bit(bit, cpuc->active_mask)) | 846 | if (!test_bit(bit, cpuc->active_mask)) |
840 | continue; | 847 | continue; |
@@ -857,6 +864,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | |||
857 | } | 864 | } |
858 | } | 865 | } |
859 | 866 | ||
867 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
868 | { | ||
869 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
870 | struct debug_store *ds = cpuc->ds; | ||
871 | struct pebs_record_nhm *at, *top; | ||
872 | int n; | ||
873 | |||
874 | if (!x86_pmu.pebs_active) | ||
875 | return; | ||
876 | |||
877 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
878 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
879 | |||
880 | ds->pebs_index = ds->pebs_buffer_base; | ||
881 | |||
882 | n = top - at; | ||
883 | if (n <= 0) | ||
884 | return; | ||
885 | |||
886 | /* | ||
887 | * Should not happen, we program the threshold at 1 and do not | ||
888 | * set a reset value. | ||
889 | */ | ||
890 | WARN_ONCE(n > x86_pmu.max_pebs_events, | ||
891 | "Unexpected number of pebs records %d\n", n); | ||
892 | |||
893 | return __intel_pmu_drain_pebs_nhm(iregs, at, top); | ||
894 | } | ||
895 | |||
896 | static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs) | ||
897 | { | ||
898 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
899 | struct debug_store *ds = cpuc->ds; | ||
900 | struct pebs_record_hsw *at, *top; | ||
901 | int n; | ||
902 | |||
903 | if (!x86_pmu.pebs_active) | ||
904 | return; | ||
905 | |||
906 | at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base; | ||
907 | top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index; | ||
908 | |||
909 | n = top - at; | ||
910 | if (n <= 0) | ||
911 | return; | ||
912 | /* | ||
913 | * Should not happen, we program the threshold at 1 and do not | ||
914 | * set a reset value. | ||
915 | */ | ||
916 | WARN_ONCE(n > x86_pmu.max_pebs_events, | ||
917 | "Unexpected number of pebs records %d\n", n); | ||
918 | |||
919 | return __intel_pmu_drain_pebs_nhm(iregs, at, top); | ||
920 | } | ||
921 | |||
860 | /* | 922 | /* |
861 | * BTS, PEBS probe and setup | 923 | * BTS, PEBS probe and setup |
862 | */ | 924 | */ |
@@ -888,6 +950,12 @@ void intel_ds_init(void) | |||
888 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | 950 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; |
889 | break; | 951 | break; |
890 | 952 | ||
953 | case 2: | ||
954 | pr_cont("PEBS fmt2%c, ", pebs_type); | ||
955 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); | ||
956 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; | ||
957 | break; | ||
958 | |||
891 | default: | 959 | default: |
892 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); | 960 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); |
893 | x86_pmu.pebs = 0; | 961 | x86_pmu.pebs = 0; |