diff options
author | Stephane Eranian <eranian@google.com> | 2012-02-09 17:20:53 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2012-03-05 08:55:40 -0500 |
commit | b36817e8863090f1f24e538106ca50fa1d9e4003 (patch) | |
tree | 9d92c33bfe3db80ca7b262f673ddee884974085f /arch/x86/kernel/cpu | |
parent | 225ce53910edc3c2322b1e4f2ed049a9196cd0b3 (diff) |
perf/x86: Add Intel LBR sharing logic
The Intel LBR on some recent processor is capable
of filtering branches by type. The filter is configurable
via the LBR_SELECT MSR register.
There are limitation on how this register can be used.
On Nehalem/Westmere, the LBR_SELECT is shared by the two HT threads
when HT is on. It is private to each core when HT is off.
On SandyBridge, the LBR_SELECT register is private to each thread
when HT is on. It is private to each core when HT is off.
The kernel must manage the sharing of LBR_SELECT. It allows
multiple users on the same logical CPU to use LBR_SELECT as
long as they program it with the same value. Across sibling
CPUs (HT threads), the same restriction applies on NHM/WSM.
This patch implements this sharing logic by leveraging the
mechanism put in place for managing the offcore_response
shared MSR.
We modify __intel_shared_reg_get_constraints() to cause
x86_get_event_constraint() to be called because LBR may
be associated with events that may be counter constrained.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1328826068-11713-4-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 70 |
3 files changed, 52 insertions, 26 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f8bddb5b0600..377931354ac7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -426,6 +426,10 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
426 | /* mark unused */ | 426 | /* mark unused */ |
427 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | 427 | event->hw.extra_reg.idx = EXTRA_REG_NONE; |
428 | 428 | ||
429 | /* mark not used */ | ||
430 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
431 | event->hw.branch_reg.idx = EXTRA_REG_NONE; | ||
432 | |||
429 | return x86_pmu.hw_config(event); | 433 | return x86_pmu.hw_config(event); |
430 | } | 434 | } |
431 | 435 | ||
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 82db83b5c3bc..9b9c580a7ab8 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -33,6 +33,7 @@ enum extra_reg_type { | |||
33 | 33 | ||
34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ | 34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ |
35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | 35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
36 | EXTRA_REG_LBR = 2, /* lbr_select */ | ||
36 | 37 | ||
37 | EXTRA_REG_MAX /* number of entries needed */ | 38 | EXTRA_REG_MAX /* number of entries needed */ |
38 | }; | 39 | }; |
@@ -130,6 +131,7 @@ struct cpu_hw_events { | |||
130 | void *lbr_context; | 131 | void *lbr_context; |
131 | struct perf_branch_stack lbr_stack; | 132 | struct perf_branch_stack lbr_stack; |
132 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 133 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
134 | struct er_account *lbr_sel; | ||
133 | 135 | ||
134 | /* | 136 | /* |
135 | * Intel host/guest exclude bits | 137 | * Intel host/guest exclude bits |
@@ -342,6 +344,8 @@ struct x86_pmu { | |||
342 | */ | 344 | */ |
343 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | 345 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ |
344 | int lbr_nr; /* hardware stack size */ | 346 | int lbr_nr; /* hardware stack size */ |
347 | u64 lbr_sel_mask; /* LBR_SELECT valid bits */ | ||
348 | const int *lbr_sel_map; /* lbr_select mappings */ | ||
345 | 349 | ||
346 | /* | 350 | /* |
347 | * Extra registers for events | 351 | * Extra registers for events |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3bd37bdf1b8e..97f7bb587519 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1123,17 +1123,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx) | |||
1123 | */ | 1123 | */ |
1124 | static struct event_constraint * | 1124 | static struct event_constraint * |
1125 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, | 1125 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, |
1126 | struct perf_event *event) | 1126 | struct perf_event *event, |
1127 | struct hw_perf_event_extra *reg) | ||
1127 | { | 1128 | { |
1128 | struct event_constraint *c = &emptyconstraint; | 1129 | struct event_constraint *c = &emptyconstraint; |
1129 | struct hw_perf_event_extra *reg = &event->hw.extra_reg; | ||
1130 | struct er_account *era; | 1130 | struct er_account *era; |
1131 | unsigned long flags; | 1131 | unsigned long flags; |
1132 | int orig_idx = reg->idx; | 1132 | int orig_idx = reg->idx; |
1133 | 1133 | ||
1134 | /* already allocated shared msr */ | 1134 | /* already allocated shared msr */ |
1135 | if (reg->alloc) | 1135 | if (reg->alloc) |
1136 | return &unconstrained; | 1136 | return NULL; /* call x86_get_event_constraint() */ |
1137 | 1137 | ||
1138 | again: | 1138 | again: |
1139 | era = &cpuc->shared_regs->regs[reg->idx]; | 1139 | era = &cpuc->shared_regs->regs[reg->idx]; |
@@ -1156,14 +1156,10 @@ again: | |||
1156 | reg->alloc = 1; | 1156 | reg->alloc = 1; |
1157 | 1157 | ||
1158 | /* | 1158 | /* |
1159 | * All events using extra_reg are unconstrained. | 1159 | * need to call x86_get_event_constraint() |
1160 | * Avoids calling x86_get_event_constraints() | 1160 | * to check if associated event has constraints |
1161 | * | ||
1162 | * Must revisit if extra_reg controlling events | ||
1163 | * ever have constraints. Worst case we go through | ||
1164 | * the regular event constraint table. | ||
1165 | */ | 1161 | */ |
1166 | c = &unconstrained; | 1162 | c = NULL; |
1167 | } else if (intel_try_alt_er(event, orig_idx)) { | 1163 | } else if (intel_try_alt_er(event, orig_idx)) { |
1168 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1164 | raw_spin_unlock_irqrestore(&era->lock, flags); |
1169 | goto again; | 1165 | goto again; |
@@ -1200,11 +1196,23 @@ static struct event_constraint * | |||
1200 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, | 1196 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, |
1201 | struct perf_event *event) | 1197 | struct perf_event *event) |
1202 | { | 1198 | { |
1203 | struct event_constraint *c = NULL; | 1199 | struct event_constraint *c = NULL, *d; |
1204 | 1200 | struct hw_perf_event_extra *xreg, *breg; | |
1205 | if (event->hw.extra_reg.idx != EXTRA_REG_NONE) | 1201 | |
1206 | c = __intel_shared_reg_get_constraints(cpuc, event); | 1202 | xreg = &event->hw.extra_reg; |
1207 | 1203 | if (xreg->idx != EXTRA_REG_NONE) { | |
1204 | c = __intel_shared_reg_get_constraints(cpuc, event, xreg); | ||
1205 | if (c == &emptyconstraint) | ||
1206 | return c; | ||
1207 | } | ||
1208 | breg = &event->hw.branch_reg; | ||
1209 | if (breg->idx != EXTRA_REG_NONE) { | ||
1210 | d = __intel_shared_reg_get_constraints(cpuc, event, breg); | ||
1211 | if (d == &emptyconstraint) { | ||
1212 | __intel_shared_reg_put_constraints(cpuc, xreg); | ||
1213 | c = d; | ||
1214 | } | ||
1215 | } | ||
1208 | return c; | 1216 | return c; |
1209 | } | 1217 | } |
1210 | 1218 | ||
@@ -1252,6 +1260,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | |||
1252 | reg = &event->hw.extra_reg; | 1260 | reg = &event->hw.extra_reg; |
1253 | if (reg->idx != EXTRA_REG_NONE) | 1261 | if (reg->idx != EXTRA_REG_NONE) |
1254 | __intel_shared_reg_put_constraints(cpuc, reg); | 1262 | __intel_shared_reg_put_constraints(cpuc, reg); |
1263 | |||
1264 | reg = &event->hw.branch_reg; | ||
1265 | if (reg->idx != EXTRA_REG_NONE) | ||
1266 | __intel_shared_reg_put_constraints(cpuc, reg); | ||
1255 | } | 1267 | } |
1256 | 1268 | ||
1257 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1269 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
@@ -1431,7 +1443,7 @@ static int intel_pmu_cpu_prepare(int cpu) | |||
1431 | { | 1443 | { |
1432 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1444 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
1433 | 1445 | ||
1434 | if (!x86_pmu.extra_regs) | 1446 | if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) |
1435 | return NOTIFY_OK; | 1447 | return NOTIFY_OK; |
1436 | 1448 | ||
1437 | cpuc->shared_regs = allocate_shared_regs(cpu); | 1449 | cpuc->shared_regs = allocate_shared_regs(cpu); |
@@ -1453,22 +1465,28 @@ static void intel_pmu_cpu_starting(int cpu) | |||
1453 | */ | 1465 | */ |
1454 | intel_pmu_lbr_reset(); | 1466 | intel_pmu_lbr_reset(); |
1455 | 1467 | ||
1456 | if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) | 1468 | cpuc->lbr_sel = NULL; |
1469 | |||
1470 | if (!cpuc->shared_regs) | ||
1457 | return; | 1471 | return; |
1458 | 1472 | ||
1459 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 1473 | if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) { |
1460 | struct intel_shared_regs *pc; | 1474 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
1475 | struct intel_shared_regs *pc; | ||
1461 | 1476 | ||
1462 | pc = per_cpu(cpu_hw_events, i).shared_regs; | 1477 | pc = per_cpu(cpu_hw_events, i).shared_regs; |
1463 | if (pc && pc->core_id == core_id) { | 1478 | if (pc && pc->core_id == core_id) { |
1464 | cpuc->kfree_on_online = cpuc->shared_regs; | 1479 | cpuc->kfree_on_online = cpuc->shared_regs; |
1465 | cpuc->shared_regs = pc; | 1480 | cpuc->shared_regs = pc; |
1466 | break; | 1481 | break; |
1482 | } | ||
1467 | } | 1483 | } |
1484 | cpuc->shared_regs->core_id = core_id; | ||
1485 | cpuc->shared_regs->refcnt++; | ||
1468 | } | 1486 | } |
1469 | 1487 | ||
1470 | cpuc->shared_regs->core_id = core_id; | 1488 | if (x86_pmu.lbr_sel_map) |
1471 | cpuc->shared_regs->refcnt++; | 1489 | cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; |
1472 | } | 1490 | } |
1473 | 1491 | ||
1474 | static void intel_pmu_cpu_dying(int cpu) | 1492 | static void intel_pmu_cpu_dying(int cpu) |