diff options
author | Gleb Natapov <gleb@redhat.com> | 2011-11-27 10:59:09 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-12-06 02:34:02 -0500 |
commit | b202952075f62603bea9bfb6ebc6b0420db11949 (patch) | |
tree | 9c8e0538b455e68b5c371caba5b1585ed0ef9d8a /kernel/events | |
parent | b79387ef185af2323594920923cecba5753c3817 (diff) |
perf, core: Rate limit perf_sched_events jump_label patching
jump_lable patching is very expensive operation that involves pausing all
cpus. The patching of perf_sched_events jump_label is easily controllable
from userspace by unprivileged user.
When te user runs a loop like this:
"while true; do perf stat -e cycles true; done"
... the performance of my test application that just increments a counter
for one second drops by 4%.
This is on a 16 cpu box with my test application using only one of
them. An impact on a real server doing real work will be worse.
Performance of KVM PMU drops nearly 50% due to jump_lable for "perf
record" since KVM PMU implementation creates and destroys perf event
frequently.
This patch introduces a way to rate limit jump_label patching and uses
it to fix the above problem.
I believe that as jump_label use will spread the problem will become more
common and thus solving it in a generic code is appropriate. Also fixing
it in the perf code would result in moving jump_label accounting logic to
perf code with all the ifdefs in case of JUMP_LABEL=n kernel. With this
patch all details are nicely hidden inside jump_label code.
Signed-off-by: Gleb Natapov <gleb@redhat.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20111127155909.GO2557@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 3c1541d7a53d..3a3b1a18f490 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -128,7 +128,7 @@ enum event_type_t { | |||
128 | * perf_sched_events : >0 events exist | 128 | * perf_sched_events : >0 events exist |
129 | * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu | 129 | * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu |
130 | */ | 130 | */ |
131 | struct jump_label_key perf_sched_events __read_mostly; | 131 | struct jump_label_key_deferred perf_sched_events __read_mostly; |
132 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); | 132 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); |
133 | 133 | ||
134 | static atomic_t nr_mmap_events __read_mostly; | 134 | static atomic_t nr_mmap_events __read_mostly; |
@@ -2748,7 +2748,7 @@ static void free_event(struct perf_event *event) | |||
2748 | 2748 | ||
2749 | if (!event->parent) { | 2749 | if (!event->parent) { |
2750 | if (event->attach_state & PERF_ATTACH_TASK) | 2750 | if (event->attach_state & PERF_ATTACH_TASK) |
2751 | jump_label_dec(&perf_sched_events); | 2751 | jump_label_dec_deferred(&perf_sched_events); |
2752 | if (event->attr.mmap || event->attr.mmap_data) | 2752 | if (event->attr.mmap || event->attr.mmap_data) |
2753 | atomic_dec(&nr_mmap_events); | 2753 | atomic_dec(&nr_mmap_events); |
2754 | if (event->attr.comm) | 2754 | if (event->attr.comm) |
@@ -2759,7 +2759,7 @@ static void free_event(struct perf_event *event) | |||
2759 | put_callchain_buffers(); | 2759 | put_callchain_buffers(); |
2760 | if (is_cgroup_event(event)) { | 2760 | if (is_cgroup_event(event)) { |
2761 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); | 2761 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); |
2762 | jump_label_dec(&perf_sched_events); | 2762 | jump_label_dec_deferred(&perf_sched_events); |
2763 | } | 2763 | } |
2764 | } | 2764 | } |
2765 | 2765 | ||
@@ -5784,7 +5784,7 @@ done: | |||
5784 | 5784 | ||
5785 | if (!event->parent) { | 5785 | if (!event->parent) { |
5786 | if (event->attach_state & PERF_ATTACH_TASK) | 5786 | if (event->attach_state & PERF_ATTACH_TASK) |
5787 | jump_label_inc(&perf_sched_events); | 5787 | jump_label_inc(&perf_sched_events.key); |
5788 | if (event->attr.mmap || event->attr.mmap_data) | 5788 | if (event->attr.mmap || event->attr.mmap_data) |
5789 | atomic_inc(&nr_mmap_events); | 5789 | atomic_inc(&nr_mmap_events); |
5790 | if (event->attr.comm) | 5790 | if (event->attr.comm) |
@@ -6022,7 +6022,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6022 | * - that may need work on context switch | 6022 | * - that may need work on context switch |
6023 | */ | 6023 | */ |
6024 | atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); | 6024 | atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); |
6025 | jump_label_inc(&perf_sched_events); | 6025 | jump_label_inc(&perf_sched_events.key); |
6026 | } | 6026 | } |
6027 | 6027 | ||
6028 | /* | 6028 | /* |
@@ -6868,6 +6868,9 @@ void __init perf_event_init(void) | |||
6868 | 6868 | ||
6869 | ret = init_hw_breakpoint(); | 6869 | ret = init_hw_breakpoint(); |
6870 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); | 6870 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); |
6871 | |||
6872 | /* do not patch jump label more than once per second */ | ||
6873 | jump_label_rate_limit(&perf_sched_events, HZ); | ||
6871 | } | 6874 | } |
6872 | 6875 | ||
6873 | static int __init perf_event_sysfs_init(void) | 6876 | static int __init perf_event_sysfs_init(void) |