aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2013-01-24 10:10:34 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2013-04-01 11:17:06 -0400
commit9ad64c0f481c37a63dd39842a0fd264bee44a097 (patch)
treed52a4e74229c970a7237ebb8824068e850d1680d
parenta63fcab45273174e665e6a8c9fa1a79a9046d0d5 (diff)
perf/x86: Add support for PEBS Precise Store
This patch adds support for PEBS Precise Store which is available on Intel Sandy Bridge and Ivy Bridge processors. To use Precise store, the proper PEBS event must be used: mem_trans_retired:precise_stores. For the perf tool, the generic mem-stores event exported via sysfs can be used directly. Signed-off-by: Stephane Eranian <eranian@google.com> Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-11-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--arch/x86/kernel/cpu/perf_event.h5
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c49
3 files changed, 54 insertions, 2 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index f3a9a94e4d22..ba9aadfa683b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -66,6 +66,7 @@ struct event_constraint {
66 * struct event_constraint flags 66 * struct event_constraint flags
67 */ 67 */
68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ 68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
69#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
69 70
70struct amd_nb { 71struct amd_nb {
71 int nb_id; /* NorthBridge id */ 72 int nb_id; /* NorthBridge id */
@@ -242,6 +243,10 @@ struct cpu_hw_events {
242 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ 243 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
243 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) 244 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
244 245
246#define INTEL_PST_CONSTRAINT(c, n) \
247 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
248 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
249
245#define EVENT_CONSTRAINT_END \ 250#define EVENT_CONSTRAINT_END \
246 EVENT_CONSTRAINT(0, 0, 0) 251 EVENT_CONSTRAINT(0, 0, 0)
247 252
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ae6096b175b9..e84c4ba44b59 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -163,6 +163,7 @@ static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
163 163
164EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); 164EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
165EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); 165EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
166EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
166 167
167struct attribute *nhm_events_attrs[] = { 168struct attribute *nhm_events_attrs[] = {
168 EVENT_PTR(mem_ld_nhm), 169 EVENT_PTR(mem_ld_nhm),
@@ -171,6 +172,7 @@ struct attribute *nhm_events_attrs[] = {
171 172
172struct attribute *snb_events_attrs[] = { 173struct attribute *snb_events_attrs[] = {
173 EVENT_PTR(mem_ld_snb), 174 EVENT_PTR(mem_ld_snb),
175 EVENT_PTR(mem_st_snb),
174 NULL, 176 NULL,
175}; 177};
176 178
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index a6400bd0463c..36dc13d1ad02 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -69,6 +69,44 @@ static const u64 pebs_data_source[] = {
69 OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ 69 OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
70}; 70};
71 71
72static u64 precise_store_data(u64 status)
73{
74 union intel_x86_pebs_dse dse;
75 u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
76
77 dse.val = status;
78
79 /*
80 * bit 4: TLB access
81 * 1 = stored missed 2nd level TLB
82 *
83 * so it either hit the walker or the OS
84 * otherwise hit 2nd level TLB
85 */
86 if (dse.st_stlb_miss)
87 val |= P(TLB, MISS);
88 else
89 val |= P(TLB, HIT);
90
91 /*
92 * bit 0: hit L1 data cache
93 * if not set, then all we know is that
94 * it missed L1D
95 */
96 if (dse.st_l1d_hit)
97 val |= P(LVL, HIT);
98 else
99 val |= P(LVL, MISS);
100
101 /*
102 * bit 5: Locked prefix
103 */
104 if (dse.st_locked)
105 val |= P(LOCK, LOCKED);
106
107 return val;
108}
109
72static u64 load_latency_data(u64 status) 110static u64 load_latency_data(u64 status)
73{ 111{
74 union intel_x86_pebs_dse dse; 112 union intel_x86_pebs_dse dse;
@@ -486,6 +524,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
486 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 524 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
487 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 525 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
488 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ 526 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
527 INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
489 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 528 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
490 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 529 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
491 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 530 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -500,6 +539,7 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
500 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 539 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
501 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 540 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
502 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ 541 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
542 INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
503 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 543 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
504 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 544 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
505 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 545 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -537,6 +577,8 @@ void intel_pmu_pebs_enable(struct perf_event *event)
537 577
538 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) 578 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
539 cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); 579 cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
580 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
581 cpuc->pebs_enabled |= 1ULL << 63;
540} 582}
541 583
542void intel_pmu_pebs_disable(struct perf_event *event) 584void intel_pmu_pebs_disable(struct perf_event *event)
@@ -657,12 +699,13 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
657 struct perf_sample_data data; 699 struct perf_sample_data data;
658 struct pt_regs regs; 700 struct pt_regs regs;
659 u64 sample_type; 701 u64 sample_type;
660 int fll; 702 int fll, fst;
661 703
662 if (!intel_pmu_save_and_restart(event)) 704 if (!intel_pmu_save_and_restart(event))
663 return; 705 return;
664 706
665 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; 707 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
708 fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
666 709
667 perf_sample_data_init(&data, 0, event->hw.last_period); 710 perf_sample_data_init(&data, 0, event->hw.last_period);
668 711
@@ -672,7 +715,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
672 /* 715 /*
673 * if PEBS-LL or PreciseStore 716 * if PEBS-LL or PreciseStore
674 */ 717 */
675 if (fll) { 718 if (fll || fst) {
676 if (sample_type & PERF_SAMPLE_ADDR) 719 if (sample_type & PERF_SAMPLE_ADDR)
677 data.addr = pebs->dla; 720 data.addr = pebs->dla;
678 721
@@ -688,6 +731,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
688 if (sample_type & PERF_SAMPLE_DATA_SRC) { 731 if (sample_type & PERF_SAMPLE_DATA_SRC) {
689 if (fll) 732 if (fll)
690 data.data_src.val = load_latency_data(pebs->dse); 733 data.data_src.val = load_latency_data(pebs->dse);
734 else
735 data.data_src.val = precise_store_data(pebs->dse);
691 } 736 }
692 } 737 }
693 738