diff options
author | Stephane Eranian <eranian@google.com> | 2011-06-06 10:57:03 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-07-01 05:06:36 -0400 |
commit | efc9f05df2dd171280dcb736a4d973ffefd5508e (patch) | |
tree | ccc1cee8f1cc0ad5391732eb3637b685b4b155a0 | |
parent | a7ac67ea021b4603095d2aa458bc41641238f22c (diff) |
perf_events: Update Intel extra regs shared constraints management
This patch improves the code managing the extra shared registers
used for offcore_response events on Intel Nehalem/Westmere. The
idea is to use static allocation instead of dynamic allocation.
This simplifies greatly the get and put constraint routines for
those events.
The patch also renames per_core to shared_regs because the same
data structure gets used whether or not HT is on. When HT is
off, those events still need to coordination because they use
a extra MSR that has to be shared within an event group.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110606145703.GA7258@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 78 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 260 | ||||
-rw-r--r-- | include/linux/perf_event.h | 14 |
3 files changed, 200 insertions, 152 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5b86ec51534c..019fda7489e7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -45,6 +45,29 @@ do { \ | |||
45 | #endif | 45 | #endif |
46 | 46 | ||
47 | /* | 47 | /* |
48 | * | NHM/WSM | SNB | | ||
49 | * register ------------------------------- | ||
50 | * | HT | no HT | HT | no HT | | ||
51 | *----------------------------------------- | ||
52 | * offcore | core | core | cpu | core | | ||
53 | * lbr_sel | core | core | cpu | core | | ||
54 | * ld_lat | cpu | core | cpu | core | | ||
55 | *----------------------------------------- | ||
56 | * | ||
57 | * Given that there is a small number of shared regs, | ||
58 | * we can pre-allocate their slot in the per-cpu | ||
59 | * per-core reg tables. | ||
60 | */ | ||
61 | enum extra_reg_type { | ||
62 | EXTRA_REG_NONE = -1, /* not used */ | ||
63 | |||
64 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ | ||
65 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | ||
66 | |||
67 | EXTRA_REG_MAX /* number of entries needed */ | ||
68 | }; | ||
69 | |||
70 | /* | ||
48 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | 71 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context |
49 | */ | 72 | */ |
50 | static unsigned long | 73 | static unsigned long |
@@ -132,11 +155,10 @@ struct cpu_hw_events { | |||
132 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 155 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
133 | 156 | ||
134 | /* | 157 | /* |
135 | * Intel percore register state. | 158 | * manage shared (per-core, per-cpu) registers |
136 | * Coordinate shared resources between HT threads. | 159 | * used on Intel NHM/WSM/SNB |
137 | */ | 160 | */ |
138 | int percore_used; /* Used by this CPU? */ | 161 | struct intel_shared_regs *shared_regs; |
139 | struct intel_percore *per_core; | ||
140 | 162 | ||
141 | /* | 163 | /* |
142 | * AMD specific bits | 164 | * AMD specific bits |
@@ -187,26 +209,45 @@ struct cpu_hw_events { | |||
187 | for ((e) = (c); (e)->weight; (e)++) | 209 | for ((e) = (c); (e)->weight; (e)++) |
188 | 210 | ||
189 | /* | 211 | /* |
212 | * Per register state. | ||
213 | */ | ||
214 | struct er_account { | ||
215 | raw_spinlock_t lock; /* per-core: protect structure */ | ||
216 | u64 config; /* extra MSR config */ | ||
217 | u64 reg; /* extra MSR number */ | ||
218 | atomic_t ref; /* reference count */ | ||
219 | }; | ||
220 | |||
221 | /* | ||
190 | * Extra registers for specific events. | 222 | * Extra registers for specific events. |
223 | * | ||
191 | * Some events need large masks and require external MSRs. | 224 | * Some events need large masks and require external MSRs. |
192 | * Define a mapping to these extra registers. | 225 | * Those extra MSRs end up being shared for all events on |
226 | * a PMU and sometimes between PMU of sibling HT threads. | ||
227 | * In either case, the kernel needs to handle conflicting | ||
228 | * accesses to those extra, shared, regs. The data structure | ||
229 | * to manage those registers is stored in cpu_hw_event. | ||
193 | */ | 230 | */ |
194 | struct extra_reg { | 231 | struct extra_reg { |
195 | unsigned int event; | 232 | unsigned int event; |
196 | unsigned int msr; | 233 | unsigned int msr; |
197 | u64 config_mask; | 234 | u64 config_mask; |
198 | u64 valid_mask; | 235 | u64 valid_mask; |
236 | int idx; /* per_xxx->regs[] reg index */ | ||
199 | }; | 237 | }; |
200 | 238 | ||
201 | #define EVENT_EXTRA_REG(e, ms, m, vm) { \ | 239 | #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ |
202 | .event = (e), \ | 240 | .event = (e), \ |
203 | .msr = (ms), \ | 241 | .msr = (ms), \ |
204 | .config_mask = (m), \ | 242 | .config_mask = (m), \ |
205 | .valid_mask = (vm), \ | 243 | .valid_mask = (vm), \ |
244 | .idx = EXTRA_REG_##i \ | ||
206 | } | 245 | } |
207 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ | 246 | |
208 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) | 247 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ |
209 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) | 248 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) |
249 | |||
250 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) | ||
210 | 251 | ||
211 | union perf_capabilities { | 252 | union perf_capabilities { |
212 | struct { | 253 | struct { |
@@ -253,7 +294,6 @@ struct x86_pmu { | |||
253 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 294 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
254 | struct perf_event *event); | 295 | struct perf_event *event); |
255 | struct event_constraint *event_constraints; | 296 | struct event_constraint *event_constraints; |
256 | struct event_constraint *percore_constraints; | ||
257 | void (*quirks)(void); | 297 | void (*quirks)(void); |
258 | int perfctr_second_write; | 298 | int perfctr_second_write; |
259 | 299 | ||
@@ -400,10 +440,10 @@ static inline unsigned int x86_pmu_event_addr(int index) | |||
400 | */ | 440 | */ |
401 | static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | 441 | static int x86_pmu_extra_regs(u64 config, struct perf_event *event) |
402 | { | 442 | { |
443 | struct hw_perf_event_extra *reg; | ||
403 | struct extra_reg *er; | 444 | struct extra_reg *er; |
404 | 445 | ||
405 | event->hw.extra_reg = 0; | 446 | reg = &event->hw.extra_reg; |
406 | event->hw.extra_config = 0; | ||
407 | 447 | ||
408 | if (!x86_pmu.extra_regs) | 448 | if (!x86_pmu.extra_regs) |
409 | return 0; | 449 | return 0; |
@@ -413,8 +453,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | |||
413 | continue; | 453 | continue; |
414 | if (event->attr.config1 & ~er->valid_mask) | 454 | if (event->attr.config1 & ~er->valid_mask) |
415 | return -EINVAL; | 455 | return -EINVAL; |
416 | event->hw.extra_reg = er->msr; | 456 | |
417 | event->hw.extra_config = event->attr.config1; | 457 | reg->idx = er->idx; |
458 | reg->config = event->attr.config1; | ||
459 | reg->reg = er->msr; | ||
418 | break; | 460 | break; |
419 | } | 461 | } |
420 | return 0; | 462 | return 0; |
@@ -713,6 +755,9 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
713 | event->hw.last_cpu = -1; | 755 | event->hw.last_cpu = -1; |
714 | event->hw.last_tag = ~0ULL; | 756 | event->hw.last_tag = ~0ULL; |
715 | 757 | ||
758 | /* mark unused */ | ||
759 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
760 | |||
716 | return x86_pmu.hw_config(event); | 761 | return x86_pmu.hw_config(event); |
717 | } | 762 | } |
718 | 763 | ||
@@ -754,8 +799,8 @@ static void x86_pmu_disable(struct pmu *pmu) | |||
754 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | 799 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
755 | u64 enable_mask) | 800 | u64 enable_mask) |
756 | { | 801 | { |
757 | if (hwc->extra_reg) | 802 | if (hwc->extra_reg.reg) |
758 | wrmsrl(hwc->extra_reg, hwc->extra_config); | 803 | wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); |
759 | wrmsrl(hwc->config_base, hwc->config | enable_mask); | 804 | wrmsrl(hwc->config_base, hwc->config | enable_mask); |
760 | } | 805 | } |
761 | 806 | ||
@@ -1692,7 +1737,6 @@ static int validate_group(struct perf_event *event) | |||
1692 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | 1737 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); |
1693 | if (!fake_cpuc) | 1738 | if (!fake_cpuc) |
1694 | goto out; | 1739 | goto out; |
1695 | |||
1696 | /* | 1740 | /* |
1697 | * the event is not yet connected with its | 1741 | * the event is not yet connected with its |
1698 | * siblings therefore we must first collect | 1742 | * siblings therefore we must first collect |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index d38b0020f775..6ad95baff856 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1,25 +1,15 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | 1 | #ifdef CONFIG_CPU_SUP_INTEL |
2 | 2 | ||
3 | #define MAX_EXTRA_REGS 2 | ||
4 | |||
5 | /* | ||
6 | * Per register state. | ||
7 | */ | ||
8 | struct er_account { | ||
9 | int ref; /* reference count */ | ||
10 | unsigned int extra_reg; /* extra MSR number */ | ||
11 | u64 extra_config; /* extra MSR config */ | ||
12 | }; | ||
13 | |||
14 | /* | 3 | /* |
15 | * Per core state | 4 | * Per core/cpu state |
16 | * This used to coordinate shared registers for HT threads. | 5 | * |
6 | * Used to coordinate shared registers between HT threads or | ||
7 | * among events on a single PMU. | ||
17 | */ | 8 | */ |
18 | struct intel_percore { | 9 | struct intel_shared_regs { |
19 | raw_spinlock_t lock; /* protect structure */ | 10 | struct er_account regs[EXTRA_REG_MAX]; |
20 | struct er_account regs[MAX_EXTRA_REGS]; | 11 | int refcnt; /* per-core: #HT threads */ |
21 | int refcnt; /* number of threads */ | 12 | unsigned core_id; /* per-core: core id */ |
22 | unsigned core_id; | ||
23 | }; | 13 | }; |
24 | 14 | ||
25 | /* | 15 | /* |
@@ -88,16 +78,10 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = | |||
88 | 78 | ||
89 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = | 79 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = |
90 | { | 80 | { |
91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | 81 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
92 | EVENT_EXTRA_END | 82 | EVENT_EXTRA_END |
93 | }; | 83 | }; |
94 | 84 | ||
95 | static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly = | ||
96 | { | ||
97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
98 | EVENT_CONSTRAINT_END | ||
99 | }; | ||
100 | |||
101 | static struct event_constraint intel_westmere_event_constraints[] __read_mostly = | 85 | static struct event_constraint intel_westmere_event_constraints[] __read_mostly = |
102 | { | 86 | { |
103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 87 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -125,18 +109,11 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = | |||
125 | 109 | ||
126 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = | 110 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = |
127 | { | 111 | { |
128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | 112 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), | 113 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), |
130 | EVENT_EXTRA_END | 114 | EVENT_EXTRA_END |
131 | }; | 115 | }; |
132 | 116 | ||
133 | static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = | ||
134 | { | ||
135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), | ||
137 | EVENT_CONSTRAINT_END | ||
138 | }; | ||
139 | |||
140 | static struct event_constraint intel_gen_event_constraints[] __read_mostly = | 117 | static struct event_constraint intel_gen_event_constraints[] __read_mostly = |
141 | { | 118 | { |
142 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 119 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -1037,65 +1014,89 @@ intel_bts_constraints(struct perf_event *event) | |||
1037 | return NULL; | 1014 | return NULL; |
1038 | } | 1015 | } |
1039 | 1016 | ||
1017 | /* | ||
1018 | * manage allocation of shared extra msr for certain events | ||
1019 | * | ||
1020 | * sharing can be: | ||
1021 | * per-cpu: to be shared between the various events on a single PMU | ||
1022 | * per-core: per-cpu + shared by HT threads | ||
1023 | */ | ||
1040 | static struct event_constraint * | 1024 | static struct event_constraint * |
1041 | intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1025 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, |
1026 | struct hw_perf_event_extra *reg) | ||
1042 | { | 1027 | { |
1043 | struct hw_perf_event *hwc = &event->hw; | 1028 | struct event_constraint *c = &emptyconstraint; |
1044 | unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; | ||
1045 | struct event_constraint *c; | ||
1046 | struct intel_percore *pc; | ||
1047 | struct er_account *era; | 1029 | struct er_account *era; |
1048 | int i; | ||
1049 | int free_slot; | ||
1050 | int found; | ||
1051 | 1030 | ||
1052 | if (!x86_pmu.percore_constraints || hwc->extra_alloc) | 1031 | /* already allocated shared msr */ |
1053 | return NULL; | 1032 | if (reg->alloc || !cpuc->shared_regs) |
1033 | return &unconstrained; | ||
1054 | 1034 | ||
1055 | for (c = x86_pmu.percore_constraints; c->cmask; c++) { | 1035 | era = &cpuc->shared_regs->regs[reg->idx]; |
1056 | if (e != c->code) | 1036 | |
1057 | continue; | 1037 | raw_spin_lock(&era->lock); |
1038 | |||
1039 | if (!atomic_read(&era->ref) || era->config == reg->config) { | ||
1040 | |||
1041 | /* lock in msr value */ | ||
1042 | era->config = reg->config; | ||
1043 | era->reg = reg->reg; | ||
1044 | |||
1045 | /* one more user */ | ||
1046 | atomic_inc(&era->ref); | ||
1047 | |||
1048 | /* no need to reallocate during incremental event scheduling */ | ||
1049 | reg->alloc = 1; | ||
1058 | 1050 | ||
1059 | /* | 1051 | /* |
1060 | * Allocate resource per core. | 1052 | * All events using extra_reg are unconstrained. |
1053 | * Avoids calling x86_get_event_constraints() | ||
1054 | * | ||
1055 | * Must revisit if extra_reg controlling events | ||
1056 | * ever have constraints. Worst case we go through | ||
1057 | * the regular event constraint table. | ||
1061 | */ | 1058 | */ |
1062 | pc = cpuc->per_core; | 1059 | c = &unconstrained; |
1063 | if (!pc) | ||
1064 | break; | ||
1065 | c = &emptyconstraint; | ||
1066 | raw_spin_lock(&pc->lock); | ||
1067 | free_slot = -1; | ||
1068 | found = 0; | ||
1069 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
1070 | era = &pc->regs[i]; | ||
1071 | if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { | ||
1072 | /* Allow sharing same config */ | ||
1073 | if (hwc->extra_config == era->extra_config) { | ||
1074 | era->ref++; | ||
1075 | cpuc->percore_used = 1; | ||
1076 | hwc->extra_alloc = 1; | ||
1077 | c = NULL; | ||
1078 | } | ||
1079 | /* else conflict */ | ||
1080 | found = 1; | ||
1081 | break; | ||
1082 | } else if (era->ref == 0 && free_slot == -1) | ||
1083 | free_slot = i; | ||
1084 | } | ||
1085 | if (!found && free_slot != -1) { | ||
1086 | era = &pc->regs[free_slot]; | ||
1087 | era->ref = 1; | ||
1088 | era->extra_reg = hwc->extra_reg; | ||
1089 | era->extra_config = hwc->extra_config; | ||
1090 | cpuc->percore_used = 1; | ||
1091 | hwc->extra_alloc = 1; | ||
1092 | c = NULL; | ||
1093 | } | ||
1094 | raw_spin_unlock(&pc->lock); | ||
1095 | return c; | ||
1096 | } | 1060 | } |
1061 | raw_spin_unlock(&era->lock); | ||
1097 | 1062 | ||
1098 | return NULL; | 1063 | return c; |
1064 | } | ||
1065 | |||
1066 | static void | ||
1067 | __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, | ||
1068 | struct hw_perf_event_extra *reg) | ||
1069 | { | ||
1070 | struct er_account *era; | ||
1071 | |||
1072 | /* | ||
1073 | * only put constraint if extra reg was actually | ||
1074 | * allocated. Also takes care of event which do | ||
1075 | * not use an extra shared reg | ||
1076 | */ | ||
1077 | if (!reg->alloc) | ||
1078 | return; | ||
1079 | |||
1080 | era = &cpuc->shared_regs->regs[reg->idx]; | ||
1081 | |||
1082 | /* one fewer user */ | ||
1083 | atomic_dec(&era->ref); | ||
1084 | |||
1085 | /* allocate again next time */ | ||
1086 | reg->alloc = 0; | ||
1087 | } | ||
1088 | |||
1089 | static struct event_constraint * | ||
1090 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, | ||
1091 | struct perf_event *event) | ||
1092 | { | ||
1093 | struct event_constraint *c = NULL; | ||
1094 | struct hw_perf_event_extra *xreg; | ||
1095 | |||
1096 | xreg = &event->hw.extra_reg; | ||
1097 | if (xreg->idx != EXTRA_REG_NONE) | ||
1098 | c = __intel_shared_reg_get_constraints(cpuc, xreg); | ||
1099 | return c; | ||
1099 | } | 1100 | } |
1100 | 1101 | ||
1101 | static struct event_constraint * | 1102 | static struct event_constraint * |
@@ -1111,49 +1112,28 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
1111 | if (c) | 1112 | if (c) |
1112 | return c; | 1113 | return c; |
1113 | 1114 | ||
1114 | c = intel_percore_constraints(cpuc, event); | 1115 | c = intel_shared_regs_constraints(cpuc, event); |
1115 | if (c) | 1116 | if (c) |
1116 | return c; | 1117 | return c; |
1117 | 1118 | ||
1118 | return x86_get_event_constraints(cpuc, event); | 1119 | return x86_get_event_constraints(cpuc, event); |
1119 | } | 1120 | } |
1120 | 1121 | ||
1121 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1122 | static void |
1123 | intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | ||
1122 | struct perf_event *event) | 1124 | struct perf_event *event) |
1123 | { | 1125 | { |
1124 | struct extra_reg *er; | 1126 | struct hw_perf_event_extra *reg; |
1125 | struct intel_percore *pc; | ||
1126 | struct er_account *era; | ||
1127 | struct hw_perf_event *hwc = &event->hw; | ||
1128 | int i, allref; | ||
1129 | |||
1130 | if (!cpuc->percore_used) | ||
1131 | return; | ||
1132 | 1127 | ||
1133 | for (er = x86_pmu.extra_regs; er->msr; er++) { | 1128 | reg = &event->hw.extra_reg; |
1134 | if (er->event != (hwc->config & er->config_mask)) | 1129 | if (reg->idx != EXTRA_REG_NONE) |
1135 | continue; | 1130 | __intel_shared_reg_put_constraints(cpuc, reg); |
1131 | } | ||
1136 | 1132 | ||
1137 | pc = cpuc->per_core; | 1133 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
1138 | raw_spin_lock(&pc->lock); | 1134 | struct perf_event *event) |
1139 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | 1135 | { |
1140 | era = &pc->regs[i]; | 1136 | intel_put_shared_regs_event_constraints(cpuc, event); |
1141 | if (era->ref > 0 && | ||
1142 | era->extra_config == hwc->extra_config && | ||
1143 | era->extra_reg == er->msr) { | ||
1144 | era->ref--; | ||
1145 | hwc->extra_alloc = 0; | ||
1146 | break; | ||
1147 | } | ||
1148 | } | ||
1149 | allref = 0; | ||
1150 | for (i = 0; i < MAX_EXTRA_REGS; i++) | ||
1151 | allref += pc->regs[i].ref; | ||
1152 | if (allref == 0) | ||
1153 | cpuc->percore_used = 0; | ||
1154 | raw_spin_unlock(&pc->lock); | ||
1155 | break; | ||
1156 | } | ||
1157 | } | 1137 | } |
1158 | 1138 | ||
1159 | static int intel_pmu_hw_config(struct perf_event *event) | 1139 | static int intel_pmu_hw_config(struct perf_event *event) |
@@ -1231,20 +1211,36 @@ static __initconst const struct x86_pmu core_pmu = { | |||
1231 | .event_constraints = intel_core_event_constraints, | 1211 | .event_constraints = intel_core_event_constraints, |
1232 | }; | 1212 | }; |
1233 | 1213 | ||
1214 | static struct intel_shared_regs *allocate_shared_regs(int cpu) | ||
1215 | { | ||
1216 | struct intel_shared_regs *regs; | ||
1217 | int i; | ||
1218 | |||
1219 | regs = kzalloc_node(sizeof(struct intel_shared_regs), | ||
1220 | GFP_KERNEL, cpu_to_node(cpu)); | ||
1221 | if (regs) { | ||
1222 | /* | ||
1223 | * initialize the locks to keep lockdep happy | ||
1224 | */ | ||
1225 | for (i = 0; i < EXTRA_REG_MAX; i++) | ||
1226 | raw_spin_lock_init(®s->regs[i].lock); | ||
1227 | |||
1228 | regs->core_id = -1; | ||
1229 | } | ||
1230 | return regs; | ||
1231 | } | ||
1232 | |||
1234 | static int intel_pmu_cpu_prepare(int cpu) | 1233 | static int intel_pmu_cpu_prepare(int cpu) |
1235 | { | 1234 | { |
1236 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1235 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
1237 | 1236 | ||
1238 | if (!cpu_has_ht_siblings()) | 1237 | if (!x86_pmu.extra_regs) |
1239 | return NOTIFY_OK; | 1238 | return NOTIFY_OK; |
1240 | 1239 | ||
1241 | cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), | 1240 | cpuc->shared_regs = allocate_shared_regs(cpu); |
1242 | GFP_KERNEL, cpu_to_node(cpu)); | 1241 | if (!cpuc->shared_regs) |
1243 | if (!cpuc->per_core) | ||
1244 | return NOTIFY_BAD; | 1242 | return NOTIFY_BAD; |
1245 | 1243 | ||
1246 | raw_spin_lock_init(&cpuc->per_core->lock); | ||
1247 | cpuc->per_core->core_id = -1; | ||
1248 | return NOTIFY_OK; | 1244 | return NOTIFY_OK; |
1249 | } | 1245 | } |
1250 | 1246 | ||
@@ -1260,32 +1256,34 @@ static void intel_pmu_cpu_starting(int cpu) | |||
1260 | */ | 1256 | */ |
1261 | intel_pmu_lbr_reset(); | 1257 | intel_pmu_lbr_reset(); |
1262 | 1258 | ||
1263 | if (!cpu_has_ht_siblings()) | 1259 | if (!cpuc->shared_regs) |
1264 | return; | 1260 | return; |
1265 | 1261 | ||
1266 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 1262 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
1267 | struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; | 1263 | struct intel_shared_regs *pc; |
1268 | 1264 | ||
1265 | pc = per_cpu(cpu_hw_events, i).shared_regs; | ||
1269 | if (pc && pc->core_id == core_id) { | 1266 | if (pc && pc->core_id == core_id) { |
1270 | kfree(cpuc->per_core); | 1267 | kfree(cpuc->shared_regs); |
1271 | cpuc->per_core = pc; | 1268 | cpuc->shared_regs = pc; |
1272 | break; | 1269 | break; |
1273 | } | 1270 | } |
1274 | } | 1271 | } |
1275 | 1272 | ||
1276 | cpuc->per_core->core_id = core_id; | 1273 | cpuc->shared_regs->core_id = core_id; |
1277 | cpuc->per_core->refcnt++; | 1274 | cpuc->shared_regs->refcnt++; |
1278 | } | 1275 | } |
1279 | 1276 | ||
1280 | static void intel_pmu_cpu_dying(int cpu) | 1277 | static void intel_pmu_cpu_dying(int cpu) |
1281 | { | 1278 | { |
1282 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1279 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
1283 | struct intel_percore *pc = cpuc->per_core; | 1280 | struct intel_shared_regs *pc; |
1284 | 1281 | ||
1282 | pc = cpuc->shared_regs; | ||
1285 | if (pc) { | 1283 | if (pc) { |
1286 | if (pc->core_id == -1 || --pc->refcnt == 0) | 1284 | if (pc->core_id == -1 || --pc->refcnt == 0) |
1287 | kfree(pc); | 1285 | kfree(pc); |
1288 | cpuc->per_core = NULL; | 1286 | cpuc->shared_regs = NULL; |
1289 | } | 1287 | } |
1290 | 1288 | ||
1291 | fini_debug_store_on_cpu(cpu); | 1289 | fini_debug_store_on_cpu(cpu); |
@@ -1436,7 +1434,6 @@ static __init int intel_pmu_init(void) | |||
1436 | 1434 | ||
1437 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 1435 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
1438 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; | 1436 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; |
1439 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | ||
1440 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1437 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1441 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1438 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
1442 | 1439 | ||
@@ -1481,7 +1478,6 @@ static __init int intel_pmu_init(void) | |||
1481 | intel_pmu_lbr_init_nhm(); | 1478 | intel_pmu_lbr_init_nhm(); |
1482 | 1479 | ||
1483 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 1480 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
1484 | x86_pmu.percore_constraints = intel_westmere_percore_constraints; | ||
1485 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1481 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1486 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; | 1482 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; |
1487 | x86_pmu.extra_regs = intel_westmere_extra_regs; | 1483 | x86_pmu.extra_regs = intel_westmere_extra_regs; |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 771b0b2845e4..069315eefb22 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -536,6 +536,16 @@ struct perf_branch_stack { | |||
536 | 536 | ||
537 | struct task_struct; | 537 | struct task_struct; |
538 | 538 | ||
539 | /* | ||
540 | * extra PMU register associated with an event | ||
541 | */ | ||
542 | struct hw_perf_event_extra { | ||
543 | u64 config; /* register value */ | ||
544 | unsigned int reg; /* register address or index */ | ||
545 | int alloc; /* extra register already allocated */ | ||
546 | int idx; /* index in shared_regs->regs[] */ | ||
547 | }; | ||
548 | |||
539 | /** | 549 | /** |
540 | * struct hw_perf_event - performance event hardware details: | 550 | * struct hw_perf_event - performance event hardware details: |
541 | */ | 551 | */ |
@@ -549,9 +559,7 @@ struct hw_perf_event { | |||
549 | unsigned long event_base; | 559 | unsigned long event_base; |
550 | int idx; | 560 | int idx; |
551 | int last_cpu; | 561 | int last_cpu; |
552 | unsigned int extra_reg; | 562 | struct hw_perf_event_extra extra_reg; |
553 | u64 extra_config; | ||
554 | int extra_alloc; | ||
555 | }; | 563 | }; |
556 | struct { /* software */ | 564 | struct { /* software */ |
557 | struct hrtimer hrtimer; | 565 | struct hrtimer hrtimer; |