diff options
author | Gleb Natapov <gleb@redhat.com> | 2011-11-27 10:59:09 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-12-06 02:34:02 -0500 |
commit | b202952075f62603bea9bfb6ebc6b0420db11949 (patch) | |
tree | 9c8e0538b455e68b5c371caba5b1585ed0ef9d8a /kernel | |
parent | b79387ef185af2323594920923cecba5753c3817 (diff) |
perf, core: Rate limit perf_sched_events jump_label patching
jump_lable patching is very expensive operation that involves pausing all
cpus. The patching of perf_sched_events jump_label is easily controllable
from userspace by unprivileged user.
When te user runs a loop like this:
"while true; do perf stat -e cycles true; done"
... the performance of my test application that just increments a counter
for one second drops by 4%.
This is on a 16 cpu box with my test application using only one of
them. An impact on a real server doing real work will be worse.
Performance of KVM PMU drops nearly 50% due to jump_lable for "perf
record" since KVM PMU implementation creates and destroys perf event
frequently.
This patch introduces a way to rate limit jump_label patching and uses
it to fix the above problem.
I believe that as jump_label use will spread the problem will become more
common and thus solving it in a generic code is appropriate. Also fixing
it in the perf code would result in moving jump_label accounting logic to
perf code with all the ifdefs in case of JUMP_LABEL=n kernel. With this
patch all details are nicely hidden inside jump_label code.
Signed-off-by: Gleb Natapov <gleb@redhat.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20111127155909.GO2557@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 13 | ||||
-rw-r--r-- | kernel/jump_label.c | 35 |
2 files changed, 41 insertions, 7 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 3c1541d7a53d..3a3b1a18f490 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -128,7 +128,7 @@ enum event_type_t { | |||
128 | * perf_sched_events : >0 events exist | 128 | * perf_sched_events : >0 events exist |
129 | * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu | 129 | * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu |
130 | */ | 130 | */ |
131 | struct jump_label_key perf_sched_events __read_mostly; | 131 | struct jump_label_key_deferred perf_sched_events __read_mostly; |
132 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); | 132 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); |
133 | 133 | ||
134 | static atomic_t nr_mmap_events __read_mostly; | 134 | static atomic_t nr_mmap_events __read_mostly; |
@@ -2748,7 +2748,7 @@ static void free_event(struct perf_event *event) | |||
2748 | 2748 | ||
2749 | if (!event->parent) { | 2749 | if (!event->parent) { |
2750 | if (event->attach_state & PERF_ATTACH_TASK) | 2750 | if (event->attach_state & PERF_ATTACH_TASK) |
2751 | jump_label_dec(&perf_sched_events); | 2751 | jump_label_dec_deferred(&perf_sched_events); |
2752 | if (event->attr.mmap || event->attr.mmap_data) | 2752 | if (event->attr.mmap || event->attr.mmap_data) |
2753 | atomic_dec(&nr_mmap_events); | 2753 | atomic_dec(&nr_mmap_events); |
2754 | if (event->attr.comm) | 2754 | if (event->attr.comm) |
@@ -2759,7 +2759,7 @@ static void free_event(struct perf_event *event) | |||
2759 | put_callchain_buffers(); | 2759 | put_callchain_buffers(); |
2760 | if (is_cgroup_event(event)) { | 2760 | if (is_cgroup_event(event)) { |
2761 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); | 2761 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); |
2762 | jump_label_dec(&perf_sched_events); | 2762 | jump_label_dec_deferred(&perf_sched_events); |
2763 | } | 2763 | } |
2764 | } | 2764 | } |
2765 | 2765 | ||
@@ -5784,7 +5784,7 @@ done: | |||
5784 | 5784 | ||
5785 | if (!event->parent) { | 5785 | if (!event->parent) { |
5786 | if (event->attach_state & PERF_ATTACH_TASK) | 5786 | if (event->attach_state & PERF_ATTACH_TASK) |
5787 | jump_label_inc(&perf_sched_events); | 5787 | jump_label_inc(&perf_sched_events.key); |
5788 | if (event->attr.mmap || event->attr.mmap_data) | 5788 | if (event->attr.mmap || event->attr.mmap_data) |
5789 | atomic_inc(&nr_mmap_events); | 5789 | atomic_inc(&nr_mmap_events); |
5790 | if (event->attr.comm) | 5790 | if (event->attr.comm) |
@@ -6022,7 +6022,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6022 | * - that may need work on context switch | 6022 | * - that may need work on context switch |
6023 | */ | 6023 | */ |
6024 | atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); | 6024 | atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); |
6025 | jump_label_inc(&perf_sched_events); | 6025 | jump_label_inc(&perf_sched_events.key); |
6026 | } | 6026 | } |
6027 | 6027 | ||
6028 | /* | 6028 | /* |
@@ -6868,6 +6868,9 @@ void __init perf_event_init(void) | |||
6868 | 6868 | ||
6869 | ret = init_hw_breakpoint(); | 6869 | ret = init_hw_breakpoint(); |
6870 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); | 6870 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); |
6871 | |||
6872 | /* do not patch jump label more than once per second */ | ||
6873 | jump_label_rate_limit(&perf_sched_events, HZ); | ||
6871 | } | 6874 | } |
6872 | 6875 | ||
6873 | static int __init perf_event_sysfs_init(void) | 6876 | static int __init perf_event_sysfs_init(void) |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 66ff7109f697..51a175ab0a03 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -72,15 +72,46 @@ void jump_label_inc(struct jump_label_key *key) | |||
72 | jump_label_unlock(); | 72 | jump_label_unlock(); |
73 | } | 73 | } |
74 | 74 | ||
75 | void jump_label_dec(struct jump_label_key *key) | 75 | static void __jump_label_dec(struct jump_label_key *key, |
76 | unsigned long rate_limit, struct delayed_work *work) | ||
76 | { | 77 | { |
77 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) | 78 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) |
78 | return; | 79 | return; |
79 | 80 | ||
80 | jump_label_update(key, JUMP_LABEL_DISABLE); | 81 | if (rate_limit) { |
82 | atomic_inc(&key->enabled); | ||
83 | schedule_delayed_work(work, rate_limit); | ||
84 | } else | ||
85 | jump_label_update(key, JUMP_LABEL_DISABLE); | ||
86 | |||
81 | jump_label_unlock(); | 87 | jump_label_unlock(); |
82 | } | 88 | } |
83 | 89 | ||
90 | static void jump_label_update_timeout(struct work_struct *work) | ||
91 | { | ||
92 | struct jump_label_key_deferred *key = | ||
93 | container_of(work, struct jump_label_key_deferred, work.work); | ||
94 | __jump_label_dec(&key->key, 0, NULL); | ||
95 | } | ||
96 | |||
97 | void jump_label_dec(struct jump_label_key *key) | ||
98 | { | ||
99 | __jump_label_dec(key, 0, NULL); | ||
100 | } | ||
101 | |||
102 | void jump_label_dec_deferred(struct jump_label_key_deferred *key) | ||
103 | { | ||
104 | __jump_label_dec(&key->key, key->timeout, &key->work); | ||
105 | } | ||
106 | |||
107 | |||
108 | void jump_label_rate_limit(struct jump_label_key_deferred *key, | ||
109 | unsigned long rl) | ||
110 | { | ||
111 | key->timeout = rl; | ||
112 | INIT_DELAYED_WORK(&key->work, jump_label_update_timeout); | ||
113 | } | ||
114 | |||
84 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) | 115 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) |
85 | { | 116 | { |
86 | if (entry->code <= (unsigned long)end && | 117 | if (entry->code <= (unsigned long)end && |