aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2013-09-05 23:37:39 -0400
committerIngo Molnar <mingo@kernel.org>2013-09-12 13:13:34 -0400
commit748e86aa90edfddfa6016f1cf383ff5bc6aada91 (patch)
tree593a34c47e1ba18adfc11108f57fdc2d90f68727
parent2dbf0116aa8c7bfa900352d3f7b2609748fcc1c5 (diff)
perf/x86: Report TSX transaction abort cost as weight
Use the existing weight reporting facility to report the transaction abort cost, that is the number of cycles wasted in aborts. Haswell reports this in the PEBS record. This was in fact the original user for weight. This is a very useful sort key to concentrate on the most costly aborts and a good metric for TSX tuning. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1378438661-24765-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c55
1 files changed, 42 insertions, 13 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 63438aad177f..104cbba3b595 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -182,16 +182,29 @@ struct pebs_record_nhm {
182 * Same as pebs_record_nhm, with two additional fields. 182 * Same as pebs_record_nhm, with two additional fields.
183 */ 183 */
184struct pebs_record_hsw { 184struct pebs_record_hsw {
185 struct pebs_record_nhm nhm; 185 u64 flags, ip;
186 /* 186 u64 ax, bx, cx, dx;
187 * Real IP of the event. In the Intel documentation this 187 u64 si, di, bp, sp;
188 * is called eventingrip. 188 u64 r8, r9, r10, r11;
189 */ 189 u64 r12, r13, r14, r15;
190 u64 real_ip; 190 u64 status, dla, dse, lat;
191 /* 191 u64 real_ip; /* the actual eventing ip */
192 * TSX tuning information field: abort cycles and abort flags. 192 u64 tsx_tuning; /* TSX abort cycles and flags */
193 */ 193};
194 u64 tsx_tuning; 194
195union hsw_tsx_tuning {
196 struct {
197 u32 cycles_last_block : 32,
198 hle_abort : 1,
199 rtm_abort : 1,
200 instruction_abort : 1,
201 non_instruction_abort : 1,
202 retry : 1,
203 data_conflict : 1,
204 capacity_writes : 1,
205 capacity_reads : 1;
206 };
207 u64 value;
195}; 208};
196 209
197void init_debug_store_on_cpu(int cpu) 210void init_debug_store_on_cpu(int cpu)
@@ -785,16 +798,26 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
785 return 0; 798 return 0;
786} 799}
787 800
801static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
802{
803 if (pebs->tsx_tuning) {
804 union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
805 return tsx.cycles_last_block;
806 }
807 return 0;
808}
809
788static void __intel_pmu_pebs_event(struct perf_event *event, 810static void __intel_pmu_pebs_event(struct perf_event *event,
789 struct pt_regs *iregs, void *__pebs) 811 struct pt_regs *iregs, void *__pebs)
790{ 812{
791 /* 813 /*
792 * We cast to pebs_record_nhm to get the load latency data 814 * We cast to pebs_record_nhm to get the load latency data
793 * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used 815 * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
816 * We cast to the biggest PEBS record are careful not
817 * to access out-of-bounds members.
794 */ 818 */
795 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 819 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
796 struct pebs_record_nhm *pebs = __pebs; 820 struct pebs_record_hsw *pebs = __pebs;
797 struct pebs_record_hsw *pebs_hsw = __pebs;
798 struct perf_sample_data data; 821 struct perf_sample_data data;
799 struct pt_regs regs; 822 struct pt_regs regs;
800 u64 sample_type; 823 u64 sample_type;
@@ -853,7 +876,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
853 regs.sp = pebs->sp; 876 regs.sp = pebs->sp;
854 877
855 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { 878 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
856 regs.ip = pebs_hsw->real_ip; 879 regs.ip = pebs->real_ip;
857 regs.flags |= PERF_EFLAGS_EXACT; 880 regs.flags |= PERF_EFLAGS_EXACT;
858 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs)) 881 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
859 regs.flags |= PERF_EFLAGS_EXACT; 882 regs.flags |= PERF_EFLAGS_EXACT;
@@ -864,6 +887,12 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
864 x86_pmu.intel_cap.pebs_format >= 1) 887 x86_pmu.intel_cap.pebs_format >= 1)
865 data.addr = pebs->dla; 888 data.addr = pebs->dla;
866 889
890 /* Only set the TSX weight when no memory weight was requested. */
891 if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
892 !fll &&
893 (x86_pmu.intel_cap.pebs_format >= 2))
894 data.weight = intel_hsw_weight(pebs);
895
867 if (has_branch_stack(event)) 896 if (has_branch_stack(event))
868 data.br_stack = &cpuc->lbr_stack; 897 data.br_stack = &cpuc->lbr_stack;
869 898