aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_event.h32
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c71
2 files changed, 98 insertions, 5 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 24a65057c1c0..f31f90e2d859 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -71,6 +71,7 @@ struct event_constraint {
71#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ 71#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
72#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ 72#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
73#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ 73#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
74#define PERF_X86_EVENT_EXCL 0x40 /* HT exclusivity on counter */
74#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */ 75#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
75 76
76 77
@@ -123,6 +124,26 @@ struct intel_shared_regs {
123 unsigned core_id; /* per-core: core id */ 124 unsigned core_id; /* per-core: core id */
124}; 125};
125 126
127enum intel_excl_state_type {
128 INTEL_EXCL_UNUSED = 0, /* counter is unused */
129 INTEL_EXCL_SHARED = 1, /* counter can be used by both threads */
130 INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
131};
132
133struct intel_excl_states {
134 enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
135 enum intel_excl_state_type state[X86_PMC_IDX_MAX];
136};
137
138struct intel_excl_cntrs {
139 raw_spinlock_t lock;
140
141 struct intel_excl_states states[2];
142
143 int refcnt; /* per-core: #HT threads */
144 unsigned core_id; /* per-core: core id */
145};
146
126#define MAX_LBR_ENTRIES 16 147#define MAX_LBR_ENTRIES 16
127 148
128enum { 149enum {
@@ -185,6 +206,12 @@ struct cpu_hw_events {
185 * used on Intel NHM/WSM/SNB 206 * used on Intel NHM/WSM/SNB
186 */ 207 */
187 struct intel_shared_regs *shared_regs; 208 struct intel_shared_regs *shared_regs;
209 /*
210 * manage exclusive counter access between hyperthread
211 */
212 struct event_constraint *constraint_list; /* in enable order */
213 struct intel_excl_cntrs *excl_cntrs;
214 int excl_thread_id; /* 0 or 1 */
188 215
189 /* 216 /*
190 * AMD specific bits 217 * AMD specific bits
@@ -208,6 +235,10 @@ struct cpu_hw_events {
208#define EVENT_CONSTRAINT(c, n, m) \ 235#define EVENT_CONSTRAINT(c, n, m) \
209 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0) 236 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
210 237
238#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
239 __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
240 0, PERF_X86_EVENT_EXCL)
241
211/* 242/*
212 * The overlap flag marks event constraints with overlapping counter 243 * The overlap flag marks event constraints with overlapping counter
213 * masks. This is the case if the counter mask of such an event is not 244 * masks. This is the case if the counter mask of such an event is not
@@ -566,6 +597,7 @@ do { \
566 */ 597 */
567#define PMU_FL_NO_HT_SHARING 0x1 /* no hyper-threading resource sharing */ 598#define PMU_FL_NO_HT_SHARING 0x1 /* no hyper-threading resource sharing */
568#define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */ 599#define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */
600#define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */
569 601
570#define EVENT_VAR(_id) event_attr_##_id 602#define EVENT_VAR(_id) event_attr_##_id
571#define EVENT_PTR(_id) &event_attr_##_id.attr.attr 603#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 2dd34b57d3ff..7f54000fd0f1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2224,16 +2224,52 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
2224 return regs; 2224 return regs;
2225} 2225}
2226 2226
2227static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
2228{
2229 struct intel_excl_cntrs *c;
2230 int i;
2231
2232 c = kzalloc_node(sizeof(struct intel_excl_cntrs),
2233 GFP_KERNEL, cpu_to_node(cpu));
2234 if (c) {
2235 raw_spin_lock_init(&c->lock);
2236 for (i = 0; i < X86_PMC_IDX_MAX; i++) {
2237 c->states[0].state[i] = INTEL_EXCL_UNUSED;
2238 c->states[0].init_state[i] = INTEL_EXCL_UNUSED;
2239
2240 c->states[1].state[i] = INTEL_EXCL_UNUSED;
2241 c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
2242 }
2243 c->core_id = -1;
2244 }
2245 return c;
2246}
2247
2227static int intel_pmu_cpu_prepare(int cpu) 2248static int intel_pmu_cpu_prepare(int cpu)
2228{ 2249{
2229 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 2250 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
2230 2251
2231 if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) 2252 if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
2232 return NOTIFY_OK; 2253 cpuc->shared_regs = allocate_shared_regs(cpu);
2254 if (!cpuc->shared_regs)
2255 return NOTIFY_BAD;
2256 }
2233 2257
2234 cpuc->shared_regs = allocate_shared_regs(cpu); 2258 if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
2235 if (!cpuc->shared_regs) 2259 size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
2236 return NOTIFY_BAD; 2260
2261 cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
2262 if (!cpuc->constraint_list)
2263 return NOTIFY_BAD;
2264
2265 cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
2266 if (!cpuc->excl_cntrs) {
2267 kfree(cpuc->constraint_list);
2268 kfree(cpuc->shared_regs);
2269 return NOTIFY_BAD;
2270 }
2271 cpuc->excl_thread_id = 0;
2272 }
2237 2273
2238 return NOTIFY_OK; 2274 return NOTIFY_OK;
2239} 2275}
@@ -2274,12 +2310,29 @@ static void intel_pmu_cpu_starting(int cpu)
2274 2310
2275 if (x86_pmu.lbr_sel_map) 2311 if (x86_pmu.lbr_sel_map)
2276 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; 2312 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
2313
2314 if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
2315 for_each_cpu(i, topology_thread_cpumask(cpu)) {
2316 struct intel_excl_cntrs *c;
2317
2318 c = per_cpu(cpu_hw_events, i).excl_cntrs;
2319 if (c && c->core_id == core_id) {
2320 cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
2321 cpuc->excl_cntrs = c;
2322 cpuc->excl_thread_id = 1;
2323 break;
2324 }
2325 }
2326 cpuc->excl_cntrs->core_id = core_id;
2327 cpuc->excl_cntrs->refcnt++;
2328 }
2277} 2329}
2278 2330
2279static void intel_pmu_cpu_dying(int cpu) 2331static void intel_pmu_cpu_dying(int cpu)
2280{ 2332{
2281 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 2333 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
2282 struct intel_shared_regs *pc; 2334 struct intel_shared_regs *pc;
2335 struct intel_excl_cntrs *c;
2283 2336
2284 pc = cpuc->shared_regs; 2337 pc = cpuc->shared_regs;
2285 if (pc) { 2338 if (pc) {
@@ -2287,6 +2340,14 @@ static void intel_pmu_cpu_dying(int cpu)
2287 kfree(pc); 2340 kfree(pc);
2288 cpuc->shared_regs = NULL; 2341 cpuc->shared_regs = NULL;
2289 } 2342 }
2343 c = cpuc->excl_cntrs;
2344 if (c) {
2345 if (c->core_id == -1 || --c->refcnt == 0)
2346 kfree(c);
2347 cpuc->excl_cntrs = NULL;
2348 kfree(cpuc->constraint_list);
2349 cpuc->constraint_list = NULL;
2350 }
2290 2351
2291 fini_debug_store_on_cpu(cpu); 2352 fini_debug_store_on_cpu(cpu);
2292} 2353}