diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/kernel/perf_event.c | 4 | ||||
-rw-r--r-- | arch/powerpc/include/asm/uprobes.h | 5 | ||||
-rw-r--r-- | arch/powerpc/kernel/uprobes.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_rapl.c | 679 |
5 files changed, 683 insertions, 9 deletions
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index bc3f2efa0d86..789d846a9184 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -99,10 +99,6 @@ int armpmu_event_set_period(struct perf_event *event) | |||
99 | s64 period = hwc->sample_period; | 99 | s64 period = hwc->sample_period; |
100 | int ret = 0; | 100 | int ret = 0; |
101 | 101 | ||
102 | /* The period may have been changed by PERF_EVENT_IOC_PERIOD */ | ||
103 | if (unlikely(period != hwc->last_period)) | ||
104 | left = period - (hwc->last_period - left); | ||
105 | |||
106 | if (unlikely(left <= -period)) { | 102 | if (unlikely(left <= -period)) { |
107 | left = period; | 103 | left = period; |
108 | local64_set(&hwc->period_left, left); | 104 | local64_set(&hwc->period_left, left); |
diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index 75c6ecdb8f37..7422a999a39a 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h | |||
@@ -36,9 +36,8 @@ typedef ppc_opcode_t uprobe_opcode_t; | |||
36 | 36 | ||
37 | struct arch_uprobe { | 37 | struct arch_uprobe { |
38 | union { | 38 | union { |
39 | u8 insn[MAX_UINSN_BYTES]; | 39 | u32 insn; |
40 | u8 ixol[MAX_UINSN_BYTES]; | 40 | u32 ixol; |
41 | u32 ainsn; | ||
42 | }; | 41 | }; |
43 | }; | 42 | }; |
44 | 43 | ||
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 59f419b935f2..003b20964ea0 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c | |||
@@ -186,7 +186,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
186 | * emulate_step() returns 1 if the insn was successfully emulated. | 186 | * emulate_step() returns 1 if the insn was successfully emulated. |
187 | * For all other cases, we need to single-step in hardware. | 187 | * For all other cases, we need to single-step in hardware. |
188 | */ | 188 | */ |
189 | ret = emulate_step(regs, auprobe->ainsn); | 189 | ret = emulate_step(regs, auprobe->insn); |
190 | if (ret > 0) | 190 | if (ret > 0) |
191 | return true; | 191 | return true; |
192 | 192 | ||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 47b56a7e99cb..6359506a19ee 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -36,7 +36,7 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o | |||
36 | endif | 36 | endif |
37 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o | 37 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o |
38 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | 38 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o |
39 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o | 39 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_rapl.o |
40 | endif | 40 | endif |
41 | 41 | ||
42 | 42 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c new file mode 100644 index 000000000000..5ad35ad94d0f --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c | |||
@@ -0,0 +1,679 @@ | |||
1 | /* | ||
2 | * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters | ||
3 | * Copyright (C) 2013 Google, Inc., Stephane Eranian | ||
4 | * | ||
5 | * Intel RAPL interface is specified in the IA-32 Manual Vol3b | ||
6 | * section 14.7.1 (September 2013) | ||
7 | * | ||
8 | * RAPL provides more controls than just reporting energy consumption | ||
9 | * however here we only expose the 3 energy consumption free running | ||
10 | * counters (pp0, pkg, dram). | ||
11 | * | ||
12 | * Each of those counters increments in a power unit defined by the | ||
13 | * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules | ||
14 | * but it can vary. | ||
15 | * | ||
16 | * Counter to rapl events mappings: | ||
17 | * | ||
18 | * pp0 counter: consumption of all physical cores (power plane 0) | ||
19 | * event: rapl_energy_cores | ||
20 | * perf code: 0x1 | ||
21 | * | ||
22 | * pkg counter: consumption of the whole processor package | ||
23 | * event: rapl_energy_pkg | ||
24 | * perf code: 0x2 | ||
25 | * | ||
26 | * dram counter: consumption of the dram domain (servers only) | ||
27 | * event: rapl_energy_dram | ||
28 | * perf code: 0x3 | ||
29 | * | ||
30 | * dram counter: consumption of the builtin-gpu domain (client only) | ||
31 | * event: rapl_energy_gpu | ||
32 | * perf code: 0x4 | ||
33 | * | ||
34 | * We manage those counters as free running (read-only). They may be | ||
35 | * use simultaneously by other tools, such as turbostat. | ||
36 | * | ||
37 | * The events only support system-wide mode counting. There is no | ||
38 | * sampling support because it does not make sense and is not | ||
39 | * supported by the RAPL hardware. | ||
40 | * | ||
41 | * Because we want to avoid floating-point operations in the kernel, | ||
42 | * the events are all reported in fixed point arithmetic (32.32). | ||
43 | * Tools must adjust the counts to convert them to Watts using | ||
44 | * the duration of the measurement. Tools may use a function such as | ||
45 | * ldexp(raw_count, -32); | ||
46 | */ | ||
47 | #include <linux/module.h> | ||
48 | #include <linux/slab.h> | ||
49 | #include <linux/perf_event.h> | ||
50 | #include <asm/cpu_device_id.h> | ||
51 | #include "perf_event.h" | ||
52 | |||
53 | /* | ||
54 | * RAPL energy status counters | ||
55 | */ | ||
56 | #define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */ | ||
57 | #define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */ | ||
58 | #define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */ | ||
59 | #define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */ | ||
60 | #define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */ | ||
61 | #define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */ | ||
62 | #define RAPL_IDX_PP1_NRG_STAT 3 /* DRAM */ | ||
63 | #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ | ||
64 | |||
65 | /* Clients have PP0, PKG */ | ||
66 | #define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ | ||
67 | 1<<RAPL_IDX_PKG_NRG_STAT|\ | ||
68 | 1<<RAPL_IDX_PP1_NRG_STAT) | ||
69 | |||
70 | /* Servers have PP0, PKG, RAM */ | ||
71 | #define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\ | ||
72 | 1<<RAPL_IDX_PKG_NRG_STAT|\ | ||
73 | 1<<RAPL_IDX_RAM_NRG_STAT) | ||
74 | |||
75 | /* | ||
76 | * event code: LSB 8 bits, passed in attr->config | ||
77 | * any other bit is reserved | ||
78 | */ | ||
79 | #define RAPL_EVENT_MASK 0xFFULL | ||
80 | |||
81 | #define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ | ||
82 | static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ | ||
83 | struct kobj_attribute *attr, \ | ||
84 | char *page) \ | ||
85 | { \ | ||
86 | BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ | ||
87 | return sprintf(page, _format "\n"); \ | ||
88 | } \ | ||
89 | static struct kobj_attribute format_attr_##_var = \ | ||
90 | __ATTR(_name, 0444, __rapl_##_var##_show, NULL) | ||
91 | |||
92 | #define RAPL_EVENT_DESC(_name, _config) \ | ||
93 | { \ | ||
94 | .attr = __ATTR(_name, 0444, rapl_event_show, NULL), \ | ||
95 | .config = _config, \ | ||
96 | } | ||
97 | |||
98 | #define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ | ||
99 | |||
100 | struct rapl_pmu { | ||
101 | spinlock_t lock; | ||
102 | int hw_unit; /* 1/2^hw_unit Joule */ | ||
103 | int n_active; /* number of active events */ | ||
104 | struct list_head active_list; | ||
105 | struct pmu *pmu; /* pointer to rapl_pmu_class */ | ||
106 | ktime_t timer_interval; /* in ktime_t unit */ | ||
107 | struct hrtimer hrtimer; | ||
108 | }; | ||
109 | |||
110 | static struct pmu rapl_pmu_class; | ||
111 | static cpumask_t rapl_cpu_mask; | ||
112 | static int rapl_cntr_mask; | ||
113 | |||
114 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); | ||
115 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); | ||
116 | |||
117 | static inline u64 rapl_read_counter(struct perf_event *event) | ||
118 | { | ||
119 | u64 raw; | ||
120 | rdmsrl(event->hw.event_base, raw); | ||
121 | return raw; | ||
122 | } | ||
123 | |||
124 | static inline u64 rapl_scale(u64 v) | ||
125 | { | ||
126 | /* | ||
127 | * scale delta to smallest unit (1/2^32) | ||
128 | * users must then scale back: count * 1/(1e9*2^32) to get Joules | ||
129 | * or use ldexp(count, -32). | ||
130 | * Watts = Joules/Time delta | ||
131 | */ | ||
132 | return v << (32 - __get_cpu_var(rapl_pmu)->hw_unit); | ||
133 | } | ||
134 | |||
135 | static u64 rapl_event_update(struct perf_event *event) | ||
136 | { | ||
137 | struct hw_perf_event *hwc = &event->hw; | ||
138 | u64 prev_raw_count, new_raw_count; | ||
139 | s64 delta, sdelta; | ||
140 | int shift = RAPL_CNTR_WIDTH; | ||
141 | |||
142 | again: | ||
143 | prev_raw_count = local64_read(&hwc->prev_count); | ||
144 | rdmsrl(event->hw.event_base, new_raw_count); | ||
145 | |||
146 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
147 | new_raw_count) != prev_raw_count) { | ||
148 | cpu_relax(); | ||
149 | goto again; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * Now we have the new raw value and have updated the prev | ||
154 | * timestamp already. We can now calculate the elapsed delta | ||
155 | * (event-)time and add that to the generic event. | ||
156 | * | ||
157 | * Careful, not all hw sign-extends above the physical width | ||
158 | * of the count. | ||
159 | */ | ||
160 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
161 | delta >>= shift; | ||
162 | |||
163 | sdelta = rapl_scale(delta); | ||
164 | |||
165 | local64_add(sdelta, &event->count); | ||
166 | |||
167 | return new_raw_count; | ||
168 | } | ||
169 | |||
170 | static void rapl_start_hrtimer(struct rapl_pmu *pmu) | ||
171 | { | ||
172 | __hrtimer_start_range_ns(&pmu->hrtimer, | ||
173 | pmu->timer_interval, 0, | ||
174 | HRTIMER_MODE_REL_PINNED, 0); | ||
175 | } | ||
176 | |||
177 | static void rapl_stop_hrtimer(struct rapl_pmu *pmu) | ||
178 | { | ||
179 | hrtimer_cancel(&pmu->hrtimer); | ||
180 | } | ||
181 | |||
182 | static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) | ||
183 | { | ||
184 | struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); | ||
185 | struct perf_event *event; | ||
186 | unsigned long flags; | ||
187 | |||
188 | if (!pmu->n_active) | ||
189 | return HRTIMER_NORESTART; | ||
190 | |||
191 | spin_lock_irqsave(&pmu->lock, flags); | ||
192 | |||
193 | list_for_each_entry(event, &pmu->active_list, active_entry) { | ||
194 | rapl_event_update(event); | ||
195 | } | ||
196 | |||
197 | spin_unlock_irqrestore(&pmu->lock, flags); | ||
198 | |||
199 | hrtimer_forward_now(hrtimer, pmu->timer_interval); | ||
200 | |||
201 | return HRTIMER_RESTART; | ||
202 | } | ||
203 | |||
204 | static void rapl_hrtimer_init(struct rapl_pmu *pmu) | ||
205 | { | ||
206 | struct hrtimer *hr = &pmu->hrtimer; | ||
207 | |||
208 | hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
209 | hr->function = rapl_hrtimer_handle; | ||
210 | } | ||
211 | |||
212 | static void __rapl_pmu_event_start(struct rapl_pmu *pmu, | ||
213 | struct perf_event *event) | ||
214 | { | ||
215 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | ||
216 | return; | ||
217 | |||
218 | event->hw.state = 0; | ||
219 | |||
220 | list_add_tail(&event->active_entry, &pmu->active_list); | ||
221 | |||
222 | local64_set(&event->hw.prev_count, rapl_read_counter(event)); | ||
223 | |||
224 | pmu->n_active++; | ||
225 | if (pmu->n_active == 1) | ||
226 | rapl_start_hrtimer(pmu); | ||
227 | } | ||
228 | |||
229 | static void rapl_pmu_event_start(struct perf_event *event, int mode) | ||
230 | { | ||
231 | struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); | ||
232 | unsigned long flags; | ||
233 | |||
234 | spin_lock_irqsave(&pmu->lock, flags); | ||
235 | __rapl_pmu_event_start(pmu, event); | ||
236 | spin_unlock_irqrestore(&pmu->lock, flags); | ||
237 | } | ||
238 | |||
239 | static void rapl_pmu_event_stop(struct perf_event *event, int mode) | ||
240 | { | ||
241 | struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); | ||
242 | struct hw_perf_event *hwc = &event->hw; | ||
243 | unsigned long flags; | ||
244 | |||
245 | spin_lock_irqsave(&pmu->lock, flags); | ||
246 | |||
247 | /* mark event as deactivated and stopped */ | ||
248 | if (!(hwc->state & PERF_HES_STOPPED)) { | ||
249 | WARN_ON_ONCE(pmu->n_active <= 0); | ||
250 | pmu->n_active--; | ||
251 | if (pmu->n_active == 0) | ||
252 | rapl_stop_hrtimer(pmu); | ||
253 | |||
254 | list_del(&event->active_entry); | ||
255 | |||
256 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
257 | hwc->state |= PERF_HES_STOPPED; | ||
258 | } | ||
259 | |||
260 | /* check if update of sw counter is necessary */ | ||
261 | if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
262 | /* | ||
263 | * Drain the remaining delta count out of a event | ||
264 | * that we are disabling: | ||
265 | */ | ||
266 | rapl_event_update(event); | ||
267 | hwc->state |= PERF_HES_UPTODATE; | ||
268 | } | ||
269 | |||
270 | spin_unlock_irqrestore(&pmu->lock, flags); | ||
271 | } | ||
272 | |||
273 | static int rapl_pmu_event_add(struct perf_event *event, int mode) | ||
274 | { | ||
275 | struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); | ||
276 | struct hw_perf_event *hwc = &event->hw; | ||
277 | unsigned long flags; | ||
278 | |||
279 | spin_lock_irqsave(&pmu->lock, flags); | ||
280 | |||
281 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
282 | |||
283 | if (mode & PERF_EF_START) | ||
284 | __rapl_pmu_event_start(pmu, event); | ||
285 | |||
286 | spin_unlock_irqrestore(&pmu->lock, flags); | ||
287 | |||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | static void rapl_pmu_event_del(struct perf_event *event, int flags) | ||
292 | { | ||
293 | rapl_pmu_event_stop(event, PERF_EF_UPDATE); | ||
294 | } | ||
295 | |||
296 | static int rapl_pmu_event_init(struct perf_event *event) | ||
297 | { | ||
298 | u64 cfg = event->attr.config & RAPL_EVENT_MASK; | ||
299 | int bit, msr, ret = 0; | ||
300 | |||
301 | /* only look at RAPL events */ | ||
302 | if (event->attr.type != rapl_pmu_class.type) | ||
303 | return -ENOENT; | ||
304 | |||
305 | /* check only supported bits are set */ | ||
306 | if (event->attr.config & ~RAPL_EVENT_MASK) | ||
307 | return -EINVAL; | ||
308 | |||
309 | /* | ||
310 | * check event is known (determines counter) | ||
311 | */ | ||
312 | switch (cfg) { | ||
313 | case INTEL_RAPL_PP0: | ||
314 | bit = RAPL_IDX_PP0_NRG_STAT; | ||
315 | msr = MSR_PP0_ENERGY_STATUS; | ||
316 | break; | ||
317 | case INTEL_RAPL_PKG: | ||
318 | bit = RAPL_IDX_PKG_NRG_STAT; | ||
319 | msr = MSR_PKG_ENERGY_STATUS; | ||
320 | break; | ||
321 | case INTEL_RAPL_RAM: | ||
322 | bit = RAPL_IDX_RAM_NRG_STAT; | ||
323 | msr = MSR_DRAM_ENERGY_STATUS; | ||
324 | break; | ||
325 | case INTEL_RAPL_PP1: | ||
326 | bit = RAPL_IDX_PP1_NRG_STAT; | ||
327 | msr = MSR_PP1_ENERGY_STATUS; | ||
328 | break; | ||
329 | default: | ||
330 | return -EINVAL; | ||
331 | } | ||
332 | /* check event supported */ | ||
333 | if (!(rapl_cntr_mask & (1 << bit))) | ||
334 | return -EINVAL; | ||
335 | |||
336 | /* unsupported modes and filters */ | ||
337 | if (event->attr.exclude_user || | ||
338 | event->attr.exclude_kernel || | ||
339 | event->attr.exclude_hv || | ||
340 | event->attr.exclude_idle || | ||
341 | event->attr.exclude_host || | ||
342 | event->attr.exclude_guest || | ||
343 | event->attr.sample_period) /* no sampling */ | ||
344 | return -EINVAL; | ||
345 | |||
346 | /* must be done before validate_group */ | ||
347 | event->hw.event_base = msr; | ||
348 | event->hw.config = cfg; | ||
349 | event->hw.idx = bit; | ||
350 | |||
351 | return ret; | ||
352 | } | ||
353 | |||
354 | static void rapl_pmu_event_read(struct perf_event *event) | ||
355 | { | ||
356 | rapl_event_update(event); | ||
357 | } | ||
358 | |||
359 | static ssize_t rapl_get_attr_cpumask(struct device *dev, | ||
360 | struct device_attribute *attr, char *buf) | ||
361 | { | ||
362 | int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &rapl_cpu_mask); | ||
363 | |||
364 | buf[n++] = '\n'; | ||
365 | buf[n] = '\0'; | ||
366 | return n; | ||
367 | } | ||
368 | |||
369 | static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); | ||
370 | |||
371 | static struct attribute *rapl_pmu_attrs[] = { | ||
372 | &dev_attr_cpumask.attr, | ||
373 | NULL, | ||
374 | }; | ||
375 | |||
376 | static struct attribute_group rapl_pmu_attr_group = { | ||
377 | .attrs = rapl_pmu_attrs, | ||
378 | }; | ||
379 | |||
380 | EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); | ||
381 | EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); | ||
382 | EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); | ||
383 | EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); | ||
384 | |||
385 | EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); | ||
386 | EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); | ||
387 | EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); | ||
388 | EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules"); | ||
389 | |||
390 | /* | ||
391 | * we compute in 0.23 nJ increments regardless of MSR | ||
392 | */ | ||
393 | EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10"); | ||
394 | EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10"); | ||
395 | EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); | ||
396 | EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); | ||
397 | |||
398 | static struct attribute *rapl_events_srv_attr[] = { | ||
399 | EVENT_PTR(rapl_cores), | ||
400 | EVENT_PTR(rapl_pkg), | ||
401 | EVENT_PTR(rapl_ram), | ||
402 | |||
403 | EVENT_PTR(rapl_cores_unit), | ||
404 | EVENT_PTR(rapl_pkg_unit), | ||
405 | EVENT_PTR(rapl_ram_unit), | ||
406 | |||
407 | EVENT_PTR(rapl_cores_scale), | ||
408 | EVENT_PTR(rapl_pkg_scale), | ||
409 | EVENT_PTR(rapl_ram_scale), | ||
410 | NULL, | ||
411 | }; | ||
412 | |||
413 | static struct attribute *rapl_events_cln_attr[] = { | ||
414 | EVENT_PTR(rapl_cores), | ||
415 | EVENT_PTR(rapl_pkg), | ||
416 | EVENT_PTR(rapl_gpu), | ||
417 | |||
418 | EVENT_PTR(rapl_cores_unit), | ||
419 | EVENT_PTR(rapl_pkg_unit), | ||
420 | EVENT_PTR(rapl_gpu_unit), | ||
421 | |||
422 | EVENT_PTR(rapl_cores_scale), | ||
423 | EVENT_PTR(rapl_pkg_scale), | ||
424 | EVENT_PTR(rapl_gpu_scale), | ||
425 | NULL, | ||
426 | }; | ||
427 | |||
428 | static struct attribute_group rapl_pmu_events_group = { | ||
429 | .name = "events", | ||
430 | .attrs = NULL, /* patched at runtime */ | ||
431 | }; | ||
432 | |||
433 | DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); | ||
434 | static struct attribute *rapl_formats_attr[] = { | ||
435 | &format_attr_event.attr, | ||
436 | NULL, | ||
437 | }; | ||
438 | |||
439 | static struct attribute_group rapl_pmu_format_group = { | ||
440 | .name = "format", | ||
441 | .attrs = rapl_formats_attr, | ||
442 | }; | ||
443 | |||
444 | const struct attribute_group *rapl_attr_groups[] = { | ||
445 | &rapl_pmu_attr_group, | ||
446 | &rapl_pmu_format_group, | ||
447 | &rapl_pmu_events_group, | ||
448 | NULL, | ||
449 | }; | ||
450 | |||
451 | static struct pmu rapl_pmu_class = { | ||
452 | .attr_groups = rapl_attr_groups, | ||
453 | .task_ctx_nr = perf_invalid_context, /* system-wide only */ | ||
454 | .event_init = rapl_pmu_event_init, | ||
455 | .add = rapl_pmu_event_add, /* must have */ | ||
456 | .del = rapl_pmu_event_del, /* must have */ | ||
457 | .start = rapl_pmu_event_start, | ||
458 | .stop = rapl_pmu_event_stop, | ||
459 | .read = rapl_pmu_event_read, | ||
460 | }; | ||
461 | |||
462 | static void rapl_cpu_exit(int cpu) | ||
463 | { | ||
464 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); | ||
465 | int i, phys_id = topology_physical_package_id(cpu); | ||
466 | int target = -1; | ||
467 | |||
468 | /* find a new cpu on same package */ | ||
469 | for_each_online_cpu(i) { | ||
470 | if (i == cpu) | ||
471 | continue; | ||
472 | if (phys_id == topology_physical_package_id(i)) { | ||
473 | target = i; | ||
474 | break; | ||
475 | } | ||
476 | } | ||
477 | /* | ||
478 | * clear cpu from cpumask | ||
479 | * if was set in cpumask and still some cpu on package, | ||
480 | * then move to new cpu | ||
481 | */ | ||
482 | if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0) | ||
483 | cpumask_set_cpu(target, &rapl_cpu_mask); | ||
484 | |||
485 | WARN_ON(cpumask_empty(&rapl_cpu_mask)); | ||
486 | /* | ||
487 | * migrate events and context to new cpu | ||
488 | */ | ||
489 | if (target >= 0) | ||
490 | perf_pmu_migrate_context(pmu->pmu, cpu, target); | ||
491 | |||
492 | /* cancel overflow polling timer for CPU */ | ||
493 | rapl_stop_hrtimer(pmu); | ||
494 | } | ||
495 | |||
496 | static void rapl_cpu_init(int cpu) | ||
497 | { | ||
498 | int i, phys_id = topology_physical_package_id(cpu); | ||
499 | |||
500 | /* check if phys_is is already covered */ | ||
501 | for_each_cpu(i, &rapl_cpu_mask) { | ||
502 | if (phys_id == topology_physical_package_id(i)) | ||
503 | return; | ||
504 | } | ||
505 | /* was not found, so add it */ | ||
506 | cpumask_set_cpu(cpu, &rapl_cpu_mask); | ||
507 | } | ||
508 | |||
509 | static int rapl_cpu_prepare(int cpu) | ||
510 | { | ||
511 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); | ||
512 | int phys_id = topology_physical_package_id(cpu); | ||
513 | u64 ms; | ||
514 | |||
515 | if (pmu) | ||
516 | return 0; | ||
517 | |||
518 | if (phys_id < 0) | ||
519 | return -1; | ||
520 | |||
521 | pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); | ||
522 | if (!pmu) | ||
523 | return -1; | ||
524 | |||
525 | spin_lock_init(&pmu->lock); | ||
526 | |||
527 | INIT_LIST_HEAD(&pmu->active_list); | ||
528 | |||
529 | /* | ||
530 | * grab power unit as: 1/2^unit Joules | ||
531 | * | ||
532 | * we cache in local PMU instance | ||
533 | */ | ||
534 | rdmsrl(MSR_RAPL_POWER_UNIT, pmu->hw_unit); | ||
535 | pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL; | ||
536 | pmu->pmu = &rapl_pmu_class; | ||
537 | |||
538 | /* | ||
539 | * use reference of 200W for scaling the timeout | ||
540 | * to avoid missing counter overflows. | ||
541 | * 200W = 200 Joules/sec | ||
542 | * divide interval by 2 to avoid lockstep (2 * 100) | ||
543 | * if hw unit is 32, then we use 2 ms 1/200/2 | ||
544 | */ | ||
545 | if (pmu->hw_unit < 32) | ||
546 | ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1)); | ||
547 | else | ||
548 | ms = 2; | ||
549 | |||
550 | pmu->timer_interval = ms_to_ktime(ms); | ||
551 | |||
552 | rapl_hrtimer_init(pmu); | ||
553 | |||
554 | /* set RAPL pmu for this cpu for now */ | ||
555 | per_cpu(rapl_pmu, cpu) = pmu; | ||
556 | per_cpu(rapl_pmu_to_free, cpu) = NULL; | ||
557 | |||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | static void rapl_cpu_kfree(int cpu) | ||
562 | { | ||
563 | struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu); | ||
564 | |||
565 | kfree(pmu); | ||
566 | |||
567 | per_cpu(rapl_pmu_to_free, cpu) = NULL; | ||
568 | } | ||
569 | |||
570 | static int rapl_cpu_dying(int cpu) | ||
571 | { | ||
572 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); | ||
573 | |||
574 | if (!pmu) | ||
575 | return 0; | ||
576 | |||
577 | per_cpu(rapl_pmu, cpu) = NULL; | ||
578 | |||
579 | per_cpu(rapl_pmu_to_free, cpu) = pmu; | ||
580 | |||
581 | return 0; | ||
582 | } | ||
583 | |||
584 | static int rapl_cpu_notifier(struct notifier_block *self, | ||
585 | unsigned long action, void *hcpu) | ||
586 | { | ||
587 | unsigned int cpu = (long)hcpu; | ||
588 | |||
589 | switch (action & ~CPU_TASKS_FROZEN) { | ||
590 | case CPU_UP_PREPARE: | ||
591 | rapl_cpu_prepare(cpu); | ||
592 | break; | ||
593 | case CPU_STARTING: | ||
594 | rapl_cpu_init(cpu); | ||
595 | break; | ||
596 | case CPU_UP_CANCELED: | ||
597 | case CPU_DYING: | ||
598 | rapl_cpu_dying(cpu); | ||
599 | break; | ||
600 | case CPU_ONLINE: | ||
601 | case CPU_DEAD: | ||
602 | rapl_cpu_kfree(cpu); | ||
603 | break; | ||
604 | case CPU_DOWN_PREPARE: | ||
605 | rapl_cpu_exit(cpu); | ||
606 | break; | ||
607 | default: | ||
608 | break; | ||
609 | } | ||
610 | |||
611 | return NOTIFY_OK; | ||
612 | } | ||
613 | |||
614 | static const struct x86_cpu_id rapl_cpu_match[] = { | ||
615 | [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, | ||
616 | [1] = {}, | ||
617 | }; | ||
618 | |||
619 | static int __init rapl_pmu_init(void) | ||
620 | { | ||
621 | struct rapl_pmu *pmu; | ||
622 | int cpu, ret; | ||
623 | |||
624 | /* | ||
625 | * check for Intel processor family 6 | ||
626 | */ | ||
627 | if (!x86_match_cpu(rapl_cpu_match)) | ||
628 | return 0; | ||
629 | |||
630 | /* check supported CPU */ | ||
631 | switch (boot_cpu_data.x86_model) { | ||
632 | case 42: /* Sandy Bridge */ | ||
633 | case 58: /* Ivy Bridge */ | ||
634 | case 60: /* Haswell */ | ||
635 | case 69: /* Haswell-Celeron */ | ||
636 | rapl_cntr_mask = RAPL_IDX_CLN; | ||
637 | rapl_pmu_events_group.attrs = rapl_events_cln_attr; | ||
638 | break; | ||
639 | case 45: /* Sandy Bridge-EP */ | ||
640 | case 62: /* IvyTown */ | ||
641 | rapl_cntr_mask = RAPL_IDX_SRV; | ||
642 | rapl_pmu_events_group.attrs = rapl_events_srv_attr; | ||
643 | break; | ||
644 | |||
645 | default: | ||
646 | /* unsupported */ | ||
647 | return 0; | ||
648 | } | ||
649 | get_online_cpus(); | ||
650 | |||
651 | for_each_online_cpu(cpu) { | ||
652 | rapl_cpu_prepare(cpu); | ||
653 | rapl_cpu_init(cpu); | ||
654 | } | ||
655 | |||
656 | perf_cpu_notifier(rapl_cpu_notifier); | ||
657 | |||
658 | ret = perf_pmu_register(&rapl_pmu_class, "power", -1); | ||
659 | if (WARN_ON(ret)) { | ||
660 | pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret); | ||
661 | put_online_cpus(); | ||
662 | return -1; | ||
663 | } | ||
664 | |||
665 | pmu = __get_cpu_var(rapl_pmu); | ||
666 | |||
667 | pr_info("RAPL PMU detected, hw unit 2^-%d Joules," | ||
668 | " API unit is 2^-32 Joules," | ||
669 | " %d fixed counters" | ||
670 | " %llu ms ovfl timer\n", | ||
671 | pmu->hw_unit, | ||
672 | hweight32(rapl_cntr_mask), | ||
673 | ktime_to_ms(pmu->timer_interval)); | ||
674 | |||
675 | put_online_cpus(); | ||
676 | |||
677 | return 0; | ||
678 | } | ||
679 | device_initcall(rapl_pmu_init); | ||