aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Shishkin <alexander.shishkin@linux.intel.com>2019-08-06 04:46:00 -0400
committerPeter Zijlstra <peterz@infradead.org>2019-08-28 05:29:38 -0400
commitab43762ef010967e4ccd53627f70a2eecbeafefb (patch)
tree2e04b6310c579f14555a97a779ff39d8087215ec
parent794b8bedca9341569e1081bc880e7ea209dbca5c (diff)
perf: Allow normal events to output AUX data
In some cases, ordinary (non-AUX) events can generate data for AUX events. For example, PEBS events can come out as records in the Intel PT stream instead of their usual DS records, if configured to do so. One requirement for such events is to consistently schedule together, to ensure that the data from the "AUX output" events isn't lost while their corresponding AUX event is not scheduled. We use grouping to provide this guarantee: an "AUX output" event can be added to a group where an AUX event is a group leader, and provided that the former supports writing to the latter. Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: kan.liang@linux.intel.com Link: https://lkml.kernel.org/r/20190806084606.4021-2-alexander.shishkin@linux.intel.com
-rw-r--r--include/linux/perf_event.h14
-rw-r--r--include/uapi/linux/perf_event.h3
-rw-r--r--kernel/events/core.c93
3 files changed, 109 insertions, 1 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e8ad3c590a23..61448c19a132 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -246,6 +246,7 @@ struct perf_event;
246#define PERF_PMU_CAP_ITRACE 0x20 246#define PERF_PMU_CAP_ITRACE 0x20
247#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40 247#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
248#define PERF_PMU_CAP_NO_EXCLUDE 0x80 248#define PERF_PMU_CAP_NO_EXCLUDE 0x80
249#define PERF_PMU_CAP_AUX_OUTPUT 0x100
249 250
250/** 251/**
251 * struct pmu - generic performance monitoring unit 252 * struct pmu - generic performance monitoring unit
@@ -447,6 +448,16 @@ struct pmu {
447 /* optional */ 448 /* optional */
448 449
449 /* 450 /*
451 * Check if event can be used for aux_output purposes for
452 * events of this PMU.
453 *
454 * Runs from perf_event_open(). Should return 0 for "no match"
455 * or non-zero for "match".
456 */
457 int (*aux_output_match) (struct perf_event *event);
458 /* optional */
459
460 /*
450 * Filter events for PMU-specific reasons. 461 * Filter events for PMU-specific reasons.
451 */ 462 */
452 int (*filter_match) (struct perf_event *event); /* optional */ 463 int (*filter_match) (struct perf_event *event); /* optional */
@@ -681,6 +692,9 @@ struct perf_event {
681 struct perf_addr_filter_range *addr_filter_ranges; 692 struct perf_addr_filter_range *addr_filter_ranges;
682 unsigned long addr_filters_gen; 693 unsigned long addr_filters_gen;
683 694
695 /* for aux_output events */
696 struct perf_event *aux_event;
697
684 void (*destroy)(struct perf_event *); 698 void (*destroy)(struct perf_event *);
685 struct rcu_head rcu_head; 699 struct rcu_head rcu_head;
686 700
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 7198ddd0c6b1..bb7b271397a6 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -374,7 +374,8 @@ struct perf_event_attr {
374 namespaces : 1, /* include namespaces data */ 374 namespaces : 1, /* include namespaces data */
375 ksymbol : 1, /* include ksymbol events */ 375 ksymbol : 1, /* include ksymbol events */
376 bpf_event : 1, /* include bpf events */ 376 bpf_event : 1, /* include bpf events */
377 __reserved_1 : 33; 377 aux_output : 1, /* generate AUX records instead of events */
378 __reserved_1 : 32;
378 379
379 union { 380 union {
380 __u32 wakeup_events; /* wakeup every n events */ 381 __u32 wakeup_events; /* wakeup every n events */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0463c1151bae..2aad959e6def 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1887,6 +1887,89 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1887 ctx->generation++; 1887 ctx->generation++;
1888} 1888}
1889 1889
1890static int
1891perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event)
1892{
1893 if (!has_aux(aux_event))
1894 return 0;
1895
1896 if (!event->pmu->aux_output_match)
1897 return 0;
1898
1899 return event->pmu->aux_output_match(aux_event);
1900}
1901
1902static void put_event(struct perf_event *event);
1903static void event_sched_out(struct perf_event *event,
1904 struct perf_cpu_context *cpuctx,
1905 struct perf_event_context *ctx);
1906
1907static void perf_put_aux_event(struct perf_event *event)
1908{
1909 struct perf_event_context *ctx = event->ctx;
1910 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1911 struct perf_event *iter;
1912
1913 /*
1914 * If event uses aux_event tear down the link
1915 */
1916 if (event->aux_event) {
1917 iter = event->aux_event;
1918 event->aux_event = NULL;
1919 put_event(iter);
1920 return;
1921 }
1922
1923 /*
1924 * If the event is an aux_event, tear down all links to
1925 * it from other events.
1926 */
1927 for_each_sibling_event(iter, event->group_leader) {
1928 if (iter->aux_event != event)
1929 continue;
1930
1931 iter->aux_event = NULL;
1932 put_event(event);
1933
1934 /*
1935 * If it's ACTIVE, schedule it out and put it into ERROR
1936 * state so that we don't try to schedule it again. Note
1937 * that perf_event_enable() will clear the ERROR status.
1938 */
1939 event_sched_out(iter, cpuctx, ctx);
1940 perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
1941 }
1942}
1943
1944static int perf_get_aux_event(struct perf_event *event,
1945 struct perf_event *group_leader)
1946{
1947 /*
1948 * Our group leader must be an aux event if we want to be
1949 * an aux_output. This way, the aux event will precede its
1950 * aux_output events in the group, and therefore will always
1951 * schedule first.
1952 */
1953 if (!group_leader)
1954 return 0;
1955
1956 if (!perf_aux_output_match(event, group_leader))
1957 return 0;
1958
1959 if (!atomic_long_inc_not_zero(&group_leader->refcount))
1960 return 0;
1961
1962 /*
1963 * Link aux_outputs to their aux event; this is undone in
1964 * perf_group_detach() by perf_put_aux_event(). When the
1965 * group in torn down, the aux_output events loose their
1966 * link to the aux_event and can't schedule any more.
1967 */
1968 event->aux_event = group_leader;
1969
1970 return 1;
1971}
1972
1890static void perf_group_detach(struct perf_event *event) 1973static void perf_group_detach(struct perf_event *event)
1891{ 1974{
1892 struct perf_event *sibling, *tmp; 1975 struct perf_event *sibling, *tmp;
@@ -1902,6 +1985,8 @@ static void perf_group_detach(struct perf_event *event)
1902 1985
1903 event->attach_state &= ~PERF_ATTACH_GROUP; 1986 event->attach_state &= ~PERF_ATTACH_GROUP;
1904 1987
1988 perf_put_aux_event(event);
1989
1905 /* 1990 /*
1906 * If this is a sibling, remove it from its group. 1991 * If this is a sibling, remove it from its group.
1907 */ 1992 */
@@ -10426,6 +10511,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
10426 goto err_ns; 10511 goto err_ns;
10427 } 10512 }
10428 10513
10514 if (event->attr.aux_output &&
10515 !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
10516 err = -EOPNOTSUPP;
10517 goto err_pmu;
10518 }
10519
10429 err = exclusive_event_init(event); 10520 err = exclusive_event_init(event);
10430 if (err) 10521 if (err)
10431 goto err_pmu; 10522 goto err_pmu;
@@ -11082,6 +11173,8 @@ SYSCALL_DEFINE5(perf_event_open,
11082 } 11173 }
11083 } 11174 }
11084 11175
11176 if (event->attr.aux_output && !perf_get_aux_event(event, group_leader))
11177 goto err_locked;
11085 11178
11086 /* 11179 /*
11087 * Must be under the same ctx::mutex as perf_install_in_context(), 11180 * Must be under the same ctx::mutex as perf_install_in_context(),