aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2013-06-17 20:36:52 -0400
committerIngo Molnar <mingo@kernel.org>2013-06-19 08:43:35 -0400
commitf9134f36aed59ab55c0ab1a4618dd455f15aef5f (patch)
treebdccb1042167cb72e257127e1097c506c5b4468e
parent135c5612c460f89657c4698fe2ea753f6f667963 (diff)
perf/x86/intel: Add mem-loads/stores support for Haswell
mem-loads is basically the same as Sandy Bridge, but we use a separate string for changes later. Haswell doesn't support the full precise store mode, so we emulate it using the "DataLA" facility. This allows to do everything, but for data sources we can only detect L1 hit or not. There is no explicit enable bit anymore, so we have to tie it to a perf internal only flag. The address is supported for all memory related PEBS events with DataLA. Instead of only logging for the load and store events we allow logging it for all (it will be simply 0 if the current event does not support it) Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Andi Kleen <ak@linux.jf.intel.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Link: http://lkml.kernel.org/r/1371515812-9646-7-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/cpu/perf_event.h6
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c32
3 files changed, 41 insertions, 7 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index f43473c50f52..108dc75124d9 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -67,6 +67,7 @@ struct event_constraint {
67 */ 67 */
68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ 68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
69#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ 69#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
70#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */
70 71
71struct amd_nb { 72struct amd_nb {
72 int nb_id; /* NorthBridge id */ 73 int nb_id; /* NorthBridge id */
@@ -250,6 +251,11 @@ struct cpu_hw_events {
250 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ 251 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
251 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) 252 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
252 253
254/* DataLA version of store sampling without extra enable bit. */
255#define INTEL_PST_HSW_CONSTRAINT(c, n) \
256 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
257 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
258
253#define EVENT_CONSTRAINT_END \ 259#define EVENT_CONSTRAINT_END \
254 EVENT_CONSTRAINT(0, 0, 0) 260 EVENT_CONSTRAINT(0, 0, 0)
255 261
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 877672c43347..a6eccf1da42f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2036,6 +2036,15 @@ static __init void intel_nehalem_quirk(void)
2036 } 2036 }
2037} 2037}
2038 2038
2039EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
2040EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
2041
2042static struct attribute *hsw_events_attrs[] = {
2043 EVENT_PTR(mem_ld_hsw),
2044 EVENT_PTR(mem_st_hsw),
2045 NULL
2046};
2047
2039__init int intel_pmu_init(void) 2048__init int intel_pmu_init(void)
2040{ 2049{
2041 union cpuid10_edx edx; 2050 union cpuid10_edx edx;
@@ -2279,6 +2288,7 @@ __init int intel_pmu_init(void)
2279 2288
2280 x86_pmu.hw_config = hsw_hw_config; 2289 x86_pmu.hw_config = hsw_hw_config;
2281 x86_pmu.get_event_constraints = hsw_get_event_constraints; 2290 x86_pmu.get_event_constraints = hsw_get_event_constraints;
2291 x86_pmu.cpu_events = hsw_events_attrs;
2282 pr_cont("Haswell events, "); 2292 pr_cont("Haswell events, ");
2283 break; 2293 break;
2284 2294
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index e83148ffe392..ed3e5533ce33 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status)
107 return val; 107 return val;
108} 108}
109 109
110static u64 precise_store_data_hsw(u64 status)
111{
112 union perf_mem_data_src dse;
113
114 dse.val = 0;
115 dse.mem_op = PERF_MEM_OP_STORE;
116 dse.mem_lvl = PERF_MEM_LVL_NA;
117 if (status & 1)
118 dse.mem_lvl = PERF_MEM_LVL_L1;
119 /* Nothing else supported. Sorry. */
120 return dse.val;
121}
122
110static u64 load_latency_data(u64 status) 123static u64 load_latency_data(u64 status)
111{ 124{
112 union intel_x86_pebs_dse dse; 125 union intel_x86_pebs_dse dse;
@@ -566,13 +579,13 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
566 579
567struct event_constraint intel_hsw_pebs_event_constraints[] = { 580struct event_constraint intel_hsw_pebs_event_constraints[] = {
568 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 581 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
569 INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ 582 INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
570 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ 583 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
571 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 584 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
572 INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */ 585 INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
573 INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */ 586 INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
574 INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */ 587 INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
575 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 588 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
576 /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ 589 /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
577 INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), 590 INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
578 /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ 591 /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
@@ -582,7 +595,7 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
582 /* MEM_UOPS_RETIRED.SPLIT_STORES */ 595 /* MEM_UOPS_RETIRED.SPLIT_STORES */
583 INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), 596 INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
584 INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ 597 INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
585 INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ 598 INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
586 INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */ 599 INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
587 INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */ 600 INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
588 INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */ 601 INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
@@ -759,7 +772,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
759 return; 772 return;
760 773
761 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; 774 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
762 fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST; 775 fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
776 PERF_X86_EVENT_PEBS_ST_HSW);
763 777
764 perf_sample_data_init(&data, 0, event->hw.last_period); 778 perf_sample_data_init(&data, 0, event->hw.last_period);
765 779
@@ -770,9 +784,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
770 * if PEBS-LL or PreciseStore 784 * if PEBS-LL or PreciseStore
771 */ 785 */
772 if (fll || fst) { 786 if (fll || fst) {
773 if (sample_type & PERF_SAMPLE_ADDR)
774 data.addr = pebs->dla;
775
776 /* 787 /*
777 * Use latency for weight (only avail with PEBS-LL) 788 * Use latency for weight (only avail with PEBS-LL)
778 */ 789 */
@@ -785,6 +796,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
785 if (sample_type & PERF_SAMPLE_DATA_SRC) { 796 if (sample_type & PERF_SAMPLE_DATA_SRC) {
786 if (fll) 797 if (fll)
787 data.data_src.val = load_latency_data(pebs->dse); 798 data.data_src.val = load_latency_data(pebs->dse);
799 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
800 data.data_src.val =
801 precise_store_data_hsw(pebs->dse);
788 else 802 else
789 data.data_src.val = precise_store_data(pebs->dse); 803 data.data_src.val = precise_store_data(pebs->dse);
790 } 804 }
@@ -814,6 +828,10 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
814 else 828 else
815 regs.flags &= ~PERF_EFLAGS_EXACT; 829 regs.flags &= ~PERF_EFLAGS_EXACT;
816 830
831 if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
832 x86_pmu.intel_cap.pebs_format >= 1)
833 data.addr = pebs->dla;
834
817 if (has_branch_stack(event)) 835 if (has_branch_stack(event))
818 data.br_stack = &cpuc->lbr_stack; 836 data.br_stack = &cpuc->lbr_stack;
819 837