aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDavid Carrillo-Cisneros <davidcc@google.com>2016-08-02 03:48:12 -0400
committerIngo Molnar <mingo@kernel.org>2016-08-10 07:05:52 -0400
commitdb4a835601b73cf8d6cd8986381d966b8e13d2d9 (patch)
tree1b99cff91738db040181bdd92b34dc7760b5184c /kernel
parent0b8f1e2e26bfc6b9abe3f0f3faba2cb0eecb9fb9 (diff)
perf/core: Set cgroup in CPU contexts for new cgroup events
There's a perf stat bug easy to observer on a machine with only one cgroup: $ perf stat -e cycles -I 1000 -C 0 -G / # time counts unit events 1.000161699 <not counted> cycles / 2.000355591 <not counted> cycles / 3.000565154 <not counted> cycles / 4.000951350 <not counted> cycles / We'd expect some output there. The underlying problem is that there is an optimization in perf_cgroup_sched_{in,out}() that skips the switch of cgroup events if the old and new cgroups in a task switch are the same. This optimization interacts with the current code in two ways that cause a CPU context's cgroup (cpuctx->cgrp) to be NULL even if a cgroup event matches the current task. These are: 1. On creation of the first cgroup event in a CPU: In current code, cpuctx->cpu is only set in perf_cgroup_sched_in, but due to the aforesaid optimization, perf_cgroup_sched_in will run until the next cgroup switches in that CPU. This may happen late or never happen, depending on system's number of cgroups, CPU load, etc. 2. On deletion of the last cgroup event in a cpuctx: In list_del_event, cpuctx->cgrp is set NULL. Any new cgroup event will not be sched in because cpuctx->cgrp == NULL until a cgroup switch occurs and perf_cgroup_sched_in is executed (updating cpuctx->cgrp). This patch fixes both problems by setting cpuctx->cgrp in list_add_event, mirroring what list_del_event does when removing a cgroup event from CPU context, as introduced in: commit 68cacd29167b ("perf_events: Fix stale ->cgrp pointer in update_cgrp_time_from_cpuctx()") With this patch, cpuctx->cgrp is always set/clear when installing/removing the first/last cgroup event in/from the CPU context. With cpuctx->cgrp correctly set, event_filter_match works as intended when events are sched in/out. After the fix, the output is as expected: $ perf stat -e cycles -I 1000 -a -G / # time counts unit events 1.004699159 627342882 cycles / 2.007397156 615272690 cycles / 3.010019057 616726074 cycles / Signed-off-by: David Carrillo-Cisneros <davidcc@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Kan Liang <kan.liang@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul Turner <pjt@google.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vegard Nossum <vegard.nossum@gmail.com> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: http://lkml.kernel.org/r/1470124092-113192-1-git-send-email-davidcc@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c54
1 files changed, 36 insertions, 18 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 87d02b8cb87e..1903b8f3a705 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -843,6 +843,32 @@ perf_cgroup_mark_enabled(struct perf_event *event,
843 } 843 }
844 } 844 }
845} 845}
846
847/*
848 * Update cpuctx->cgrp so that it is set when first cgroup event is added and
849 * cleared when last cgroup event is removed.
850 */
851static inline void
852list_update_cgroup_event(struct perf_event *event,
853 struct perf_event_context *ctx, bool add)
854{
855 struct perf_cpu_context *cpuctx;
856
857 if (!is_cgroup_event(event))
858 return;
859
860 if (add && ctx->nr_cgroups++)
861 return;
862 else if (!add && --ctx->nr_cgroups)
863 return;
864 /*
865 * Because cgroup events are always per-cpu events,
866 * this will always be called from the right CPU.
867 */
868 cpuctx = __get_cpu_context(ctx);
869 cpuctx->cgrp = add ? event->cgrp : NULL;
870}
871
846#else /* !CONFIG_CGROUP_PERF */ 872#else /* !CONFIG_CGROUP_PERF */
847 873
848static inline bool 874static inline bool
@@ -920,6 +946,13 @@ perf_cgroup_mark_enabled(struct perf_event *event,
920 struct perf_event_context *ctx) 946 struct perf_event_context *ctx)
921{ 947{
922} 948}
949
950static inline void
951list_update_cgroup_event(struct perf_event *event,
952 struct perf_event_context *ctx, bool add)
953{
954}
955
923#endif 956#endif
924 957
925/* 958/*
@@ -1392,6 +1425,7 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
1392static void 1425static void
1393list_add_event(struct perf_event *event, struct perf_event_context *ctx) 1426list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1394{ 1427{
1428
1395 lockdep_assert_held(&ctx->lock); 1429 lockdep_assert_held(&ctx->lock);
1396 1430
1397 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); 1431 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1412,8 +1446,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1412 list_add_tail(&event->group_entry, list); 1446 list_add_tail(&event->group_entry, list);
1413 } 1447 }
1414 1448
1415 if (is_cgroup_event(event)) 1449 list_update_cgroup_event(event, ctx, true);
1416 ctx->nr_cgroups++;
1417 1450
1418 list_add_rcu(&event->event_entry, &ctx->event_list); 1451 list_add_rcu(&event->event_entry, &ctx->event_list);
1419 ctx->nr_events++; 1452 ctx->nr_events++;
@@ -1581,8 +1614,6 @@ static void perf_group_attach(struct perf_event *event)
1581static void 1614static void
1582list_del_event(struct perf_event *event, struct perf_event_context *ctx) 1615list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1583{ 1616{
1584 struct perf_cpu_context *cpuctx;
1585
1586 WARN_ON_ONCE(event->ctx != ctx); 1617 WARN_ON_ONCE(event->ctx != ctx);
1587 lockdep_assert_held(&ctx->lock); 1618 lockdep_assert_held(&ctx->lock);
1588 1619
@@ -1594,20 +1625,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1594 1625
1595 event->attach_state &= ~PERF_ATTACH_CONTEXT; 1626 event->attach_state &= ~PERF_ATTACH_CONTEXT;
1596 1627
1597 if (is_cgroup_event(event)) { 1628 list_update_cgroup_event(event, ctx, false);
1598 ctx->nr_cgroups--;
1599 /*
1600 * Because cgroup events are always per-cpu events, this will
1601 * always be called from the right CPU.
1602 */
1603 cpuctx = __get_cpu_context(ctx);
1604 /*
1605 * If there are no more cgroup events then clear cgrp to avoid
1606 * stale pointer in update_cgrp_time_from_cpuctx().
1607 */
1608 if (!ctx->nr_cgroups)
1609 cpuctx->cgrp = NULL;
1610 }
1611 1629
1612 ctx->nr_events--; 1630 ctx->nr_events--;
1613 if (event->attr.inherit_stat) 1631 if (event->attr.inherit_stat)