aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/perf
diff options
context:
space:
mode:
authorCody P Schafer <cody@linux.vnet.ibm.com>2015-01-30 16:46:00 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2015-02-02 01:56:38 -0500
commit5c5cd7b502595f6b90509b8aa4bba6f81b69315c (patch)
tree6e7aeb1fd8e31efad78262c1f10ba58b831f06a4 /arch/powerpc/perf
parente08e52824e41fb42e46593450d378ad1b33caedb (diff)
powerpc/perf/hv-24x7: parse catalog and populate sysfs with events
Retrieves and parses the 24x7 catalog on POWER systems that supply it (right now, only POWER 8). Events are exposed via sysfs in the standard fashion, and are all parameterized. $ cd /sys/bus/event_source/devices/hv_24x7/events $ cat HPM_CS_FROM_L4_LDATA__PHYS_CORE domain=0x2,offset=0xd58,core=?,lpar=0x0 $ cat HPM_TLBIE__VCPU_HOME_CHIP domain=0x4,offset=0x358,vcpu=?,lpar=? where user is required to specify values for the fields with '?' (like core, vcpu, lpar above), when specifying the event with the perf tool. Catalog is (at the moment) only parsed on boot. It needs re-parsing when a some hypervisor events occur. At that point we'll also need to prevent old events from continuing to function (counter that is passed in via spare space in the config values?). Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com> Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/perf')
-rw-r--r--arch/powerpc/perf/hv-24x7-catalog.h25
-rw-r--r--arch/powerpc/perf/hv-24x7-domains.h28
-rw-r--r--arch/powerpc/perf/hv-24x7.c793
-rw-r--r--arch/powerpc/perf/hv-24x7.h12
4 files changed, 841 insertions, 17 deletions
diff --git a/arch/powerpc/perf/hv-24x7-catalog.h b/arch/powerpc/perf/hv-24x7-catalog.h
index 21b19dd86d9c..69e2e1faf902 100644
--- a/arch/powerpc/perf/hv-24x7-catalog.h
+++ b/arch/powerpc/perf/hv-24x7-catalog.h
@@ -30,4 +30,29 @@ struct hv_24x7_catalog_page_0 {
30 __u8 reserved6[2]; 30 __u8 reserved6[2];
31} __packed; 31} __packed;
32 32
33struct hv_24x7_event_data {
34 __be16 length; /* in bytes, must be a multiple of 16 */
35 __u8 reserved1[2];
36 __u8 domain; /* Chip = 1, Core = 2 */
37 __u8 reserved2[1];
38 __be16 event_group_record_offs; /* in bytes, must be 8 byte aligned */
39 __be16 event_group_record_len; /* in bytes */
40
41 /* in bytes, offset from event_group_record */
42 __be16 event_counter_offs;
43
44 /* verified_state, unverified_state, caveat_state, broken_state, ... */
45 __be32 flags;
46
47 __be16 primary_group_ix;
48 __be16 group_count;
49 __be16 event_name_len;
50 __u8 remainder[];
51 /* __u8 event_name[event_name_len - 2]; */
52 /* __be16 event_description_len; */
53 /* __u8 event_desc[event_description_len - 2]; */
54 /* __be16 detailed_desc_len; */
55 /* __u8 detailed_desc[detailed_desc_len - 2]; */
56} __packed;
57
33#endif 58#endif
diff --git a/arch/powerpc/perf/hv-24x7-domains.h b/arch/powerpc/perf/hv-24x7-domains.h
new file mode 100644
index 000000000000..49c1efd50045
--- /dev/null
+++ b/arch/powerpc/perf/hv-24x7-domains.h
@@ -0,0 +1,28 @@
1
2/*
3 * DOMAIN(name, num, index_kind, is_physical)
4 *
5 * @name: An all caps token, suitable for use in generating an enum
6 * member and appending to an event name in sysfs.
7 *
8 * @num: The number corresponding to the domain as given in
9 * documentation. We assume the catalog domain and the hcall
10 * domain have the same numbering (so far they do), but this
11 * may need to be changed in the future.
12 *
13 * @index_kind: A stringifiable token describing the meaning of the index
14 * within the given domain. Must fit the parsing rules of the
15 * perf sysfs api.
16 *
17 * @is_physical: True if the domain is physical, false otherwise (if virtual).
18 *
19 * Note: The terms PHYS_CHIP, PHYS_CORE, VCPU correspond to physical chip,
20 * physical core and virtual processor in 24x7 Counters specifications.
21 */
22
23DOMAIN(PHYS_CHIP, 0x01, chip, true)
24DOMAIN(PHYS_CORE, 0x02, core, true)
25DOMAIN(VCPU_HOME_CORE, 0x03, vcpu, false)
26DOMAIN(VCPU_HOME_CHIP, 0x04, vcpu, false)
27DOMAIN(VCPU_HOME_NODE, 0x05, vcpu, false)
28DOMAIN(VCPU_REMOTE_NODE, 0x06, vcpu, false)
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index f162d0b8eea3..9445a824819e 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -13,16 +13,66 @@
13#define pr_fmt(fmt) "hv-24x7: " fmt 13#define pr_fmt(fmt) "hv-24x7: " fmt
14 14
15#include <linux/perf_event.h> 15#include <linux/perf_event.h>
16#include <linux/rbtree.h>
16#include <linux/module.h> 17#include <linux/module.h>
17#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20
18#include <asm/firmware.h> 21#include <asm/firmware.h>
19#include <asm/hvcall.h> 22#include <asm/hvcall.h>
20#include <asm/io.h> 23#include <asm/io.h>
24#include <linux/byteorder/generic.h>
21 25
22#include "hv-24x7.h" 26#include "hv-24x7.h"
23#include "hv-24x7-catalog.h" 27#include "hv-24x7-catalog.h"
24#include "hv-common.h" 28#include "hv-common.h"
25 29
30static const char *event_domain_suffix(unsigned domain)
31{
32 switch (domain) {
33#define DOMAIN(n, v, x, c) \
34 case HV_PERF_DOMAIN_##n: \
35 return "__" #n;
36#include "hv-24x7-domains.h"
37#undef DOMAIN
38 default:
39 WARN(1, "unknown domain %d\n", domain);
40 return "__UNKNOWN_DOMAIN_SUFFIX";
41 }
42}
43
44static bool domain_is_valid(unsigned domain)
45{
46 switch (domain) {
47#define DOMAIN(n, v, x, c) \
48 case HV_PERF_DOMAIN_##n: \
49 /* fall through */
50#include "hv-24x7-domains.h"
51#undef DOMAIN
52 return true;
53 default:
54 return false;
55 }
56}
57
58static bool is_physical_domain(unsigned domain)
59{
60 switch (domain) {
61#define DOMAIN(n, v, x, c) \
62 case HV_PERF_DOMAIN_##n: \
63 return c;
64#include "hv-24x7-domains.h"
65#undef DOMAIN
66 default:
67 return false;
68 }
69}
70
71static bool catalog_entry_domain_is_valid(unsigned domain)
72{
73 return is_physical_domain(domain);
74}
75
26/* 76/*
27 * TODO: Merging events: 77 * TODO: Merging events:
28 * - Think of the hcall as an interface to a 4d array of counters: 78 * - Think of the hcall as an interface to a 4d array of counters:
@@ -44,13 +94,14 @@
44 94
45/* 95/*
46 * Example usage: 96 * Example usage:
47 * perf stat -e 'hv_24x7/domain=2,offset=8,starting_index=0,lpar=0xffffffff/' 97 * perf stat -e 'hv_24x7/domain=2,offset=8,vcpu=0,lpar=0xffffffff/'
48 */ 98 */
49 99
50/* u3 0-6, one of HV_24X7_PERF_DOMAIN */ 100/* u3 0-6, one of HV_24X7_PERF_DOMAIN */
51EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3); 101EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3);
52/* u16 */ 102/* u16 */
53EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 16, 31); 103EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31);
104EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31);
54/* u32, see "data_offset" */ 105/* u32, see "data_offset" */
55EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63); 106EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63);
56/* u16 */ 107/* u16 */
@@ -63,7 +114,8 @@ EVENT_DEFINE_RANGE(reserved3, config2, 0, 63);
63static struct attribute *format_attrs[] = { 114static struct attribute *format_attrs[] = {
64 &format_attr_domain.attr, 115 &format_attr_domain.attr,
65 &format_attr_offset.attr, 116 &format_attr_offset.attr,
66 &format_attr_starting_index.attr, 117 &format_attr_core.attr,
118 &format_attr_vcpu.attr,
67 &format_attr_lpar.attr, 119 &format_attr_lpar.attr,
68 NULL, 120 NULL,
69}; 121};
@@ -73,8 +125,115 @@ static struct attribute_group format_group = {
73 .attrs = format_attrs, 125 .attrs = format_attrs,
74}; 126};
75 127
128static struct attribute_group event_group = {
129 .name = "events",
130 /* .attrs is set in init */
131};
132
133static struct attribute_group event_desc_group = {
134 .name = "event_descs",
135 /* .attrs is set in init */
136};
137
138static struct attribute_group event_long_desc_group = {
139 .name = "event_long_descs",
140 /* .attrs is set in init */
141};
142
76static struct kmem_cache *hv_page_cache; 143static struct kmem_cache *hv_page_cache;
77 144
145static char *event_name(struct hv_24x7_event_data *ev, int *len)
146{
147 *len = be16_to_cpu(ev->event_name_len) - 2;
148 return (char *)ev->remainder;
149}
150
151static char *event_desc(struct hv_24x7_event_data *ev, int *len)
152{
153 unsigned nl = be16_to_cpu(ev->event_name_len);
154 __be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
155 *len = be16_to_cpu(*desc_len) - 2;
156 return (char *)ev->remainder + nl;
157}
158
159static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
160{
161 unsigned nl = be16_to_cpu(ev->event_name_len);
162 __be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
163 unsigned desc_len = be16_to_cpu(*desc_len_);
164 __be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
165 *len = be16_to_cpu(*long_desc_len) - 2;
166 return (char *)ev->remainder + nl + desc_len;
167}
168
169static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev,
170 void *end)
171{
172 void *start = ev;
173
174 return (start + offsetof(struct hv_24x7_event_data, remainder)) < end;
175}
176
177/*
178 * Things we don't check:
179 * - padding for desc, name, and long/detailed desc is required to be '\0'
180 * bytes.
181 *
182 * Return NULL if we pass end,
183 * Otherwise return the address of the byte just following the event.
184 */
185static void *event_end(struct hv_24x7_event_data *ev, void *end)
186{
187 void *start = ev;
188 __be16 *dl_, *ldl_;
189 unsigned dl, ldl;
190 unsigned nl = be16_to_cpu(ev->event_name_len);
191
192 if (nl < 2) {
193 pr_debug("%s: name length too short: %d", __func__, nl);
194 return NULL;
195 }
196
197 if (start + nl > end) {
198 pr_debug("%s: start=%p + nl=%u > end=%p",
199 __func__, start, nl, end);
200 return NULL;
201 }
202
203 dl_ = (__be16 *)(ev->remainder + nl - 2);
204 if (!IS_ALIGNED((uintptr_t)dl_, 2))
205 pr_warn("desc len not aligned %p", dl_);
206 dl = be16_to_cpu(*dl_);
207 if (dl < 2) {
208 pr_debug("%s: desc len too short: %d", __func__, dl);
209 return NULL;
210 }
211
212 if (start + nl + dl > end) {
213 pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p",
214 __func__, start, nl, dl, start + nl + dl, end);
215 return NULL;
216 }
217
218 ldl_ = (__be16 *)(ev->remainder + nl + dl - 2);
219 if (!IS_ALIGNED((uintptr_t)ldl_, 2))
220 pr_warn("long desc len not aligned %p", ldl_);
221 ldl = be16_to_cpu(*ldl_);
222 if (ldl < 2) {
223 pr_debug("%s: long desc len too short (ldl=%u)",
224 __func__, ldl);
225 return NULL;
226 }
227
228 if (start + nl + dl + ldl > end) {
229 pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p",
230 __func__, start, nl, dl, ldl, end);
231 return NULL;
232 }
233
234 return start + nl + dl + ldl;
235}
236
78static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096, 237static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
79 unsigned long version, 238 unsigned long version,
80 unsigned long index) 239 unsigned long index)
@@ -97,6 +256,609 @@ static unsigned long h_get_24x7_catalog_page(char page[],
97 version, index); 256 version, index);
98} 257}
99 258
259static unsigned core_domains[] = {
260 HV_PERF_DOMAIN_PHYS_CORE,
261 HV_PERF_DOMAIN_VCPU_HOME_CORE,
262 HV_PERF_DOMAIN_VCPU_HOME_CHIP,
263 HV_PERF_DOMAIN_VCPU_HOME_NODE,
264 HV_PERF_DOMAIN_VCPU_REMOTE_NODE,
265};
266/* chip event data always yeilds a single event, core yeilds multiple */
267#define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains)
268
269static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain)
270{
271 const char *sindex;
272 const char *lpar;
273
274 if (is_physical_domain(domain)) {
275 lpar = "0x0";
276 sindex = "core";
277 } else {
278 lpar = "?";
279 sindex = "vcpu";
280 }
281
282 return kasprintf(GFP_KERNEL,
283 "domain=0x%x,offset=0x%x,%s=?,lpar=%s",
284 domain,
285 be16_to_cpu(event->event_counter_offs) +
286 be16_to_cpu(event->event_group_record_offs),
287 sindex,
288 lpar);
289}
290
291/* Avoid trusting fw to NUL terminate strings */
292static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp)
293{
294 return kasprintf(gfp, "%.*s", max_len, maybe_str);
295}
296
297static ssize_t device_show_string(struct device *dev,
298 struct device_attribute *attr, char *buf)
299{
300 struct dev_ext_attribute *d;
301
302 d = container_of(attr, struct dev_ext_attribute, attr);
303 return sprintf(buf, "%s\n", (char *)d->var);
304}
305
306static struct attribute *device_str_attr_create_(char *name, char *str)
307{
308 struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
309
310 if (!attr)
311 return NULL;
312
313 attr->var = str;
314 attr->attr.attr.name = name;
315 attr->attr.attr.mode = 0444;
316 attr->attr.show = device_show_string;
317 return &attr->attr.attr;
318}
319
320static struct attribute *device_str_attr_create(char *name, int name_max,
321 int name_nonce,
322 char *str, size_t str_max)
323{
324 char *n;
325 char *s = memdup_to_str(str, str_max, GFP_KERNEL);
326 struct attribute *a;
327
328 if (!s)
329 return NULL;
330
331 if (!name_nonce)
332 n = kasprintf(GFP_KERNEL, "%.*s", name_max, name);
333 else
334 n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name,
335 name_nonce);
336 if (!n)
337 goto out_s;
338
339 a = device_str_attr_create_(n, s);
340 if (!a)
341 goto out_n;
342
343 return a;
344out_n:
345 kfree(n);
346out_s:
347 kfree(s);
348 return NULL;
349}
350
351static void device_str_attr_destroy(struct attribute *attr)
352{
353 struct dev_ext_attribute *d;
354
355 d = container_of(attr, struct dev_ext_attribute, attr.attr);
356 kfree(d->var);
357 kfree(d->attr.attr.name);
358 kfree(d);
359}
360
361static struct attribute *event_to_attr(unsigned ix,
362 struct hv_24x7_event_data *event,
363 unsigned domain,
364 int nonce)
365{
366 int event_name_len;
367 char *ev_name, *a_ev_name, *val;
368 const char *ev_suffix;
369 struct attribute *attr;
370
371 if (!domain_is_valid(domain)) {
372 pr_warn("catalog event %u has invalid domain %u\n",
373 ix, domain);
374 return NULL;
375 }
376
377 val = event_fmt(event, domain);
378 if (!val)
379 return NULL;
380
381 ev_suffix = event_domain_suffix(domain);
382 ev_name = event_name(event, &event_name_len);
383 if (!nonce)
384 a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s",
385 (int)event_name_len, ev_name, ev_suffix);
386 else
387 a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d",
388 (int)event_name_len, ev_name, ev_suffix, nonce);
389
390
391 if (!a_ev_name)
392 goto out_val;
393
394 attr = device_str_attr_create_(a_ev_name, val);
395 if (!attr)
396 goto out_name;
397
398 return attr;
399out_name:
400 kfree(a_ev_name);
401out_val:
402 kfree(val);
403 return NULL;
404}
405
406static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event,
407 int nonce)
408{
409 int nl, dl;
410 char *name = event_name(event, &nl);
411 char *desc = event_desc(event, &dl);
412
413 /* If there isn't a description, don't create the sysfs file */
414 if (!dl)
415 return NULL;
416
417 return device_str_attr_create(name, nl, nonce, desc, dl);
418}
419
420static struct attribute *
421event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
422{
423 int nl, dl;
424 char *name = event_name(event, &nl);
425 char *desc = event_long_desc(event, &dl);
426
427 /* If there isn't a description, don't create the sysfs file */
428 if (!dl)
429 return NULL;
430
431 return device_str_attr_create(name, nl, nonce, desc, dl);
432}
433
434static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs,
435 struct hv_24x7_event_data *event, int nonce)
436{
437 unsigned i;
438
439 switch (event->domain) {
440 case HV_PERF_DOMAIN_PHYS_CHIP:
441 *attrs = event_to_attr(ix, event, event->domain, nonce);
442 return 1;
443 case HV_PERF_DOMAIN_PHYS_CORE:
444 for (i = 0; i < ARRAY_SIZE(core_domains); i++) {
445 attrs[i] = event_to_attr(ix, event, core_domains[i],
446 nonce);
447 if (!attrs[i]) {
448 pr_warn("catalog event %u: individual attr %u "
449 "creation failure\n", ix, i);
450 for (; i; i--)
451 device_str_attr_destroy(attrs[i - 1]);
452 return -1;
453 }
454 }
455 return i;
456 default:
457 pr_warn("catalog event %u: domain %u is not allowed in the "
458 "catalog\n", ix, event->domain);
459 return -1;
460 }
461}
462
463static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
464{
465 switch (event->domain) {
466 case HV_PERF_DOMAIN_PHYS_CHIP:
467 return 1;
468 case HV_PERF_DOMAIN_PHYS_CORE:
469 return ARRAY_SIZE(core_domains);
470 default:
471 return 0;
472 }
473}
474
475static unsigned long vmalloc_to_phys(void *v)
476{
477 struct page *p = vmalloc_to_page(v);
478
479 BUG_ON(!p);
480 return page_to_phys(p) + offset_in_page(v);
481}
482
483/* */
484struct event_uniq {
485 struct rb_node node;
486 const char *name;
487 int nl;
488 unsigned ct;
489 unsigned domain;
490};
491
492static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
493{
494 if (s1 < s2)
495 return 1;
496 if (s2 > s1)
497 return -1;
498
499 return memcmp(d1, d2, s1);
500}
501
502static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2,
503 size_t s2, unsigned d2)
504{
505 int r = memord(v1, s1, v2, s2);
506
507 if (r)
508 return r;
509 if (d1 > d2)
510 return 1;
511 if (d2 > d1)
512 return -1;
513 return 0;
514}
515
516static int event_uniq_add(struct rb_root *root, const char *name, int nl,
517 unsigned domain)
518{
519 struct rb_node **new = &(root->rb_node), *parent = NULL;
520 struct event_uniq *data;
521
522 /* Figure out where to put new node */
523 while (*new) {
524 struct event_uniq *it;
525 int result;
526
527 it = container_of(*new, struct event_uniq, node);
528 result = ev_uniq_ord(name, nl, domain, it->name, it->nl,
529 it->domain);
530
531 parent = *new;
532 if (result < 0)
533 new = &((*new)->rb_left);
534 else if (result > 0)
535 new = &((*new)->rb_right);
536 else {
537 it->ct++;
538 pr_info("found a duplicate event %.*s, ct=%u\n", nl,
539 name, it->ct);
540 return it->ct;
541 }
542 }
543
544 data = kmalloc(sizeof(*data), GFP_KERNEL);
545 if (!data)
546 return -ENOMEM;
547
548 *data = (struct event_uniq) {
549 .name = name,
550 .nl = nl,
551 .ct = 0,
552 .domain = domain,
553 };
554
555 /* Add new node and rebalance tree. */
556 rb_link_node(&data->node, parent, new);
557 rb_insert_color(&data->node, root);
558
559 /* data->ct */
560 return 0;
561}
562
563static void event_uniq_destroy(struct rb_root *root)
564{
565 /*
566 * the strings we point to are in the giant block of memory filled by
567 * the catalog, and are freed separately.
568 */
569 struct event_uniq *pos, *n;
570
571 rbtree_postorder_for_each_entry_safe(pos, n, root, node)
572 kfree(pos);
573}
574
575
576/*
577 * ensure the event structure's sizes are self consistent and don't cause us to
578 * read outside of the event
579 *
580 * On success, return the event length in bytes.
581 * Otherwise, return -1 (and print as appropriate).
582 */
583static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
584 size_t event_idx,
585 size_t event_data_bytes,
586 size_t event_entry_count,
587 size_t offset, void *end)
588{
589 ssize_t ev_len;
590 void *ev_end, *calc_ev_end;
591
592 if (offset >= event_data_bytes)
593 return -1;
594
595 if (event_idx >= event_entry_count) {
596 pr_devel("catalog event data has %zu bytes of padding after last event\n",
597 event_data_bytes - offset);
598 return -1;
599 }
600
601 if (!event_fixed_portion_is_within(event, end)) {
602 pr_warn("event %zu fixed portion is not within range\n",
603 event_idx);
604 return -1;
605 }
606
607 ev_len = be16_to_cpu(event->length);
608
609 if (ev_len % 16)
610 pr_info("event %zu has length %zu not divisible by 16: event=%pK\n",
611 event_idx, ev_len, event);
612
613 ev_end = (__u8 *)event + ev_len;
614 if (ev_end > end) {
615 pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n",
616 event_idx, ev_len, ev_end, end,
617 offset);
618 return -1;
619 }
620
621 calc_ev_end = event_end(event, end);
622 if (!calc_ev_end) {
623 pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n",
624 event_idx, event_data_bytes, event, end,
625 offset);
626 return -1;
627 }
628
629 if (calc_ev_end > ev_end) {
630 pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
631 event_idx, event, ev_end, offset, calc_ev_end);
632 return -1;
633 }
634
635 return ev_len;
636}
637
638#define MAX_4K (SIZE_MAX / 4096)
639
640static void create_events_from_catalog(struct attribute ***events_,
641 struct attribute ***event_descs_,
642 struct attribute ***event_long_descs_)
643{
644 unsigned long hret;
645 size_t catalog_len, catalog_page_len, event_entry_count,
646 event_data_len, event_data_offs,
647 event_data_bytes, junk_events, event_idx, event_attr_ct, i,
648 attr_max, event_idx_last, desc_ct, long_desc_ct;
649 ssize_t ct, ev_len;
650 uint32_t catalog_version_num;
651 struct attribute **events, **event_descs, **event_long_descs;
652 struct hv_24x7_catalog_page_0 *page_0 =
653 kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
654 void *page = page_0;
655 void *event_data, *end;
656 struct hv_24x7_event_data *event;
657 struct rb_root ev_uniq = RB_ROOT;
658
659 if (!page)
660 goto e_out;
661
662 hret = h_get_24x7_catalog_page(page, 0, 0);
663 if (hret)
664 goto e_free;
665
666 catalog_version_num = be64_to_cpu(page_0->version);
667 catalog_page_len = be32_to_cpu(page_0->length);
668
669 if (MAX_4K < catalog_page_len) {
670 pr_err("invalid page count: %zu\n", catalog_page_len);
671 goto e_free;
672 }
673
674 catalog_len = catalog_page_len * 4096;
675
676 event_entry_count = be16_to_cpu(page_0->event_entry_count);
677 event_data_offs = be16_to_cpu(page_0->event_data_offs);
678 event_data_len = be16_to_cpu(page_0->event_data_len);
679
680 pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n",
681 (size_t)catalog_version_num, catalog_len,
682 event_entry_count, event_data_offs, event_data_len);
683
684 if ((MAX_4K < event_data_len)
685 || (MAX_4K < event_data_offs)
686 || (MAX_4K - event_data_offs < event_data_len)) {
687 pr_err("invalid event data offs %zu and/or len %zu\n",
688 event_data_offs, event_data_len);
689 goto e_free;
690 }
691
692 if ((event_data_offs + event_data_len) > catalog_page_len) {
693 pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n",
694 event_data_offs,
695 event_data_offs + event_data_len,
696 catalog_page_len);
697 goto e_free;
698 }
699
700 if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) {
701 pr_err("event_entry_count %zu is invalid\n",
702 event_entry_count);
703 goto e_free;
704 }
705
706 event_data_bytes = event_data_len * 4096;
707
708 /*
709 * event data can span several pages, events can cross between these
710 * pages. Use vmalloc to make this easier.
711 */
712 event_data = vmalloc(event_data_bytes);
713 if (!event_data) {
714 pr_err("could not allocate event data\n");
715 goto e_free;
716 }
717
718 end = event_data + event_data_bytes;
719
720 /*
721 * using vmalloc_to_phys() like this only works if PAGE_SIZE is
722 * divisible by 4096
723 */
724 BUILD_BUG_ON(PAGE_SIZE % 4096);
725
726 for (i = 0; i < event_data_len; i++) {
727 hret = h_get_24x7_catalog_page_(
728 vmalloc_to_phys(event_data + i * 4096),
729 catalog_version_num,
730 i + event_data_offs);
731 if (hret) {
732 pr_err("failed to get event data in page %zu\n",
733 i + event_data_offs);
734 goto e_event_data;
735 }
736 }
737
738 /*
739 * scan the catalog to determine the number of attributes we need, and
740 * verify it at the same time.
741 */
742 for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0;
743 ;
744 event_idx++, event = (void *)event + ev_len) {
745 size_t offset = (void *)event - (void *)event_data;
746 char *name;
747 int nl;
748
749 ev_len = catalog_event_len_validate(event, event_idx,
750 event_data_bytes,
751 event_entry_count,
752 offset, end);
753 if (ev_len < 0)
754 break;
755
756 name = event_name(event, &nl);
757
758 if (event->event_group_record_len == 0) {
759 pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
760 event_idx, nl, name);
761 junk_events++;
762 continue;
763 }
764
765 if (!catalog_entry_domain_is_valid(event->domain)) {
766 pr_info("event %zu (%.*s) has invalid domain %d\n",
767 event_idx, nl, name, event->domain);
768 junk_events++;
769 continue;
770 }
771
772 attr_max += event_to_attr_ct(event);
773 }
774
775 event_idx_last = event_idx;
776 if (event_idx_last != event_entry_count)
777 pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n",
778 event_idx_last, event_entry_count, junk_events);
779
780 events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL);
781 if (!events)
782 goto e_event_data;
783
784 event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs),
785 GFP_KERNEL);
786 if (!event_descs)
787 goto e_event_attrs;
788
789 event_long_descs = kmalloc_array(event_idx + 1,
790 sizeof(*event_long_descs), GFP_KERNEL);
791 if (!event_long_descs)
792 goto e_event_descs;
793
794 /* Iterate over the catalog filling in the attribute vector */
795 for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0,
796 event = event_data, event_idx = 0;
797 event_idx < event_idx_last;
798 event_idx++, ev_len = be16_to_cpu(event->length),
799 event = (void *)event + ev_len) {
800 char *name;
801 int nl;
802 int nonce;
803 /*
804 * these are the only "bad" events that are intermixed and that
805 * we can ignore without issue. make sure to skip them here
806 */
807 if (event->event_group_record_len == 0)
808 continue;
809 if (!catalog_entry_domain_is_valid(event->domain))
810 continue;
811
812 name = event_name(event, &nl);
813 nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
814 ct = event_data_to_attrs(event_idx, events + event_attr_ct,
815 event, nonce);
816 if (ct <= 0) {
817 pr_warn("event %zu (%.*s) creation failure, skipping\n",
818 event_idx, nl, name);
819 junk_events++;
820 } else {
821 event_attr_ct += ct;
822 event_descs[desc_ct] = event_to_desc_attr(event, nonce);
823 if (event_descs[desc_ct])
824 desc_ct++;
825 event_long_descs[long_desc_ct] =
826 event_to_long_desc_attr(event, nonce);
827 if (event_long_descs[long_desc_ct])
828 long_desc_ct++;
829 }
830 }
831
832 pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n",
833 event_idx, event_attr_ct, junk_events, desc_ct);
834
835 events[event_attr_ct] = NULL;
836 event_descs[desc_ct] = NULL;
837 event_long_descs[long_desc_ct] = NULL;
838
839 event_uniq_destroy(&ev_uniq);
840 vfree(event_data);
841 kmem_cache_free(hv_page_cache, page);
842
843 *events_ = events;
844 *event_descs_ = event_descs;
845 *event_long_descs_ = event_long_descs;
846 return;
847
848e_event_descs:
849 kfree(event_descs);
850e_event_attrs:
851 kfree(events);
852e_event_data:
853 vfree(event_data);
854e_free:
855 kmem_cache_free(hv_page_cache, page);
856e_out:
857 *events_ = NULL;
858 *event_descs_ = NULL;
859 *event_long_descs_ = NULL;
860}
861
100static ssize_t catalog_read(struct file *filp, struct kobject *kobj, 862static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
101 struct bin_attribute *bin_attr, char *buf, 863 struct bin_attribute *bin_attr, char *buf,
102 loff_t offset, size_t count) 864 loff_t offset, size_t count)
@@ -207,16 +969,13 @@ static struct attribute_group if_group = {
207 969
208static const struct attribute_group *attr_groups[] = { 970static const struct attribute_group *attr_groups[] = {
209 &format_group, 971 &format_group,
972 &event_group,
973 &event_desc_group,
974 &event_long_desc_group,
210 &if_group, 975 &if_group,
211 NULL, 976 NULL,
212}; 977};
213 978
214static bool is_physical_domain(int domain)
215{
216 return domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP ||
217 domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE;
218}
219
220DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096); 979DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096);
221DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096); 980DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096);
222 981
@@ -291,9 +1050,17 @@ out:
291static unsigned long event_24x7_request(struct perf_event *event, u64 *res, 1050static unsigned long event_24x7_request(struct perf_event *event, u64 *res,
292 bool success_expected) 1051 bool success_expected)
293{ 1052{
1053 u16 idx;
1054 unsigned domain = event_get_domain(event);
1055
1056 if (is_physical_domain(domain))
1057 idx = event_get_core(event);
1058 else
1059 idx = event_get_vcpu(event);
1060
294 return single_24x7_request(event_get_domain(event), 1061 return single_24x7_request(event_get_domain(event),
295 event_get_offset(event), 1062 event_get_offset(event),
296 event_get_starting_index(event), 1063 idx,
297 event_get_lpar(event), 1064 event_get_lpar(event),
298 res, 1065 res,
299 success_expected); 1066 success_expected);
@@ -356,7 +1123,7 @@ static int h_24x7_event_init(struct perf_event *event)
356 return -EIO; 1123 return -EIO;
357 } 1124 }
358 1125
359 /* PHYSICAL domains & other lpars require extra capabilities */ 1126 /* Physical domains & other lpars require extra capabilities */
360 if (!caps.collect_privileged && (is_physical_domain(domain) || 1127 if (!caps.collect_privileged && (is_physical_domain(domain) ||
361 (event_get_lpar(event) != event_get_lpar_max()))) { 1128 (event_get_lpar(event) != event_get_lpar_max()))) {
362 pr_devel("hv permisions disallow: is_physical_domain:%d, lpar=0x%llx\n", 1129 pr_devel("hv permisions disallow: is_physical_domain:%d, lpar=0x%llx\n",
@@ -452,6 +1219,10 @@ static int hv_24x7_init(void)
452 /* sampling not supported */ 1219 /* sampling not supported */
453 h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 1220 h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
454 1221
1222 create_events_from_catalog(&event_group.attrs,
1223 &event_desc_group.attrs,
1224 &event_long_desc_group.attrs);
1225
455 r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1); 1226 r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
456 if (r) 1227 if (r)
457 return r; 1228 return r;
diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
index 720ebce4b435..69cd4e690f58 100644
--- a/arch/powerpc/perf/hv-24x7.h
+++ b/arch/powerpc/perf/hv-24x7.h
@@ -3,14 +3,14 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5 5
6enum hv_perf_domains {
7#define DOMAIN(n, v, x, c) HV_PERF_DOMAIN_##n = v,
8#include "hv-24x7-domains.h"
9#undef DOMAIN
10};
11
6struct hv_24x7_request { 12struct hv_24x7_request {
7 /* PHYSICAL domains require enabling via phyp/hmc. */ 13 /* PHYSICAL domains require enabling via phyp/hmc. */
8#define HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP 0x01
9#define HV_24X7_PERF_DOMAIN_PHYSICAL_CORE 0x02
10#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CORE 0x03
11#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CHIP 0x04
12#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_NODE 0x05
13#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_REMOTE_NODE 0x06
14 __u8 performance_domain; 14 __u8 performance_domain;
15 __u8 reserved[0x1]; 15 __u8 reserved[0x1];
16 16