aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-10-14 02:01:34 -0400
committerIngo Molnar <mingo@elte.hu>2010-10-18 13:58:50 -0400
commite360adbe29241a0194e10e20595360dd7b98a2b3 (patch)
treeef5fa5f50a895096bfb25bc11b25949603158238 /kernel/perf_event.c
parent8e5fc1a7320baf6076391607515dceb61319b36a (diff)
irq_work: Add generic hardirq context callbacks
Provide a mechanism that allows running code in IRQ context. It is most useful for NMI code that needs to interact with the rest of the system -- like wakeup a task to drain buffers. Perf currently has such a mechanism, so extract that and provide it as a generic feature, independent of perf so that others may also benefit. The IRQ context callback is generated through self-IPIs where possible, or on architectures like powerpc the decrementer (the built-in timer facility) is set to generate an interrupt immediately. Architectures that don't have anything like this get to do with a callback from the timer tick. These architectures can call irq_work_run() at the tail of any IRQ handlers that might enqueue such work (like the perf IRQ handler) to avoid undue latencies in processing the work. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> [ various fixes ] Signed-off-by: Huang Ying <ying.huang@intel.com> LKML-Reference: <1287036094.7768.291.camel@yhuang-dev> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c104
1 files changed, 5 insertions, 99 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 634f86a4b2f9..99b9700e74d0 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2206,12 +2206,11 @@ static void free_event_rcu(struct rcu_head *head)
2206 kfree(event); 2206 kfree(event);
2207} 2207}
2208 2208
2209static void perf_pending_sync(struct perf_event *event);
2210static void perf_buffer_put(struct perf_buffer *buffer); 2209static void perf_buffer_put(struct perf_buffer *buffer);
2211 2210
2212static void free_event(struct perf_event *event) 2211static void free_event(struct perf_event *event)
2213{ 2212{
2214 perf_pending_sync(event); 2213 irq_work_sync(&event->pending);
2215 2214
2216 if (!event->parent) { 2215 if (!event->parent) {
2217 atomic_dec(&nr_events); 2216 atomic_dec(&nr_events);
@@ -3162,16 +3161,7 @@ void perf_event_wakeup(struct perf_event *event)
3162 } 3161 }
3163} 3162}
3164 3163
3165/* 3164static void perf_pending_event(struct irq_work *entry)
3166 * Pending wakeups
3167 *
3168 * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
3169 *
3170 * The NMI bit means we cannot possibly take locks. Therefore, maintain a
3171 * single linked list and use cmpxchg() to add entries lockless.
3172 */
3173
3174static void perf_pending_event(struct perf_pending_entry *entry)
3175{ 3165{
3176 struct perf_event *event = container_of(entry, 3166 struct perf_event *event = container_of(entry,
3177 struct perf_event, pending); 3167 struct perf_event, pending);
@@ -3187,89 +3177,6 @@ static void perf_pending_event(struct perf_pending_entry *entry)
3187 } 3177 }
3188} 3178}
3189 3179
3190#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
3191
3192static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
3193 PENDING_TAIL,
3194};
3195
3196static void perf_pending_queue(struct perf_pending_entry *entry,
3197 void (*func)(struct perf_pending_entry *))
3198{
3199 struct perf_pending_entry **head;
3200
3201 if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
3202 return;
3203
3204 entry->func = func;
3205
3206 head = &get_cpu_var(perf_pending_head);
3207
3208 do {
3209 entry->next = *head;
3210 } while (cmpxchg(head, entry->next, entry) != entry->next);
3211
3212 set_perf_event_pending();
3213
3214 put_cpu_var(perf_pending_head);
3215}
3216
3217static int __perf_pending_run(void)
3218{
3219 struct perf_pending_entry *list;
3220 int nr = 0;
3221
3222 list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
3223 while (list != PENDING_TAIL) {
3224 void (*func)(struct perf_pending_entry *);
3225 struct perf_pending_entry *entry = list;
3226
3227 list = list->next;
3228
3229 func = entry->func;
3230 entry->next = NULL;
3231 /*
3232 * Ensure we observe the unqueue before we issue the wakeup,
3233 * so that we won't be waiting forever.
3234 * -- see perf_not_pending().
3235 */
3236 smp_wmb();
3237
3238 func(entry);
3239 nr++;
3240 }
3241
3242 return nr;
3243}
3244
3245static inline int perf_not_pending(struct perf_event *event)
3246{
3247 /*
3248 * If we flush on whatever cpu we run, there is a chance we don't
3249 * need to wait.
3250 */
3251 get_cpu();
3252 __perf_pending_run();
3253 put_cpu();
3254
3255 /*
3256 * Ensure we see the proper queue state before going to sleep
3257 * so that we do not miss the wakeup. -- see perf_pending_handle()
3258 */
3259 smp_rmb();
3260 return event->pending.next == NULL;
3261}
3262
3263static void perf_pending_sync(struct perf_event *event)
3264{
3265 wait_event(event->waitq, perf_not_pending(event));
3266}
3267
3268void perf_event_do_pending(void)
3269{
3270 __perf_pending_run();
3271}
3272
3273/* 3180/*
3274 * We assume there is only KVM supporting the callbacks. 3181 * We assume there is only KVM supporting the callbacks.
3275 * Later on, we might change it to a list if there is 3182 * Later on, we might change it to a list if there is
@@ -3319,8 +3226,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
3319 3226
3320 if (handle->nmi) { 3227 if (handle->nmi) {
3321 handle->event->pending_wakeup = 1; 3228 handle->event->pending_wakeup = 1;
3322 perf_pending_queue(&handle->event->pending, 3229 irq_work_queue(&handle->event->pending);
3323 perf_pending_event);
3324 } else 3230 } else
3325 perf_event_wakeup(handle->event); 3231 perf_event_wakeup(handle->event);
3326} 3232}
@@ -4356,8 +4262,7 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
4356 event->pending_kill = POLL_HUP; 4262 event->pending_kill = POLL_HUP;
4357 if (nmi) { 4263 if (nmi) {
4358 event->pending_disable = 1; 4264 event->pending_disable = 1;
4359 perf_pending_queue(&event->pending, 4265 irq_work_queue(&event->pending);
4360 perf_pending_event);
4361 } else 4266 } else
4362 perf_event_disable(event); 4267 perf_event_disable(event);
4363 } 4268 }
@@ -5374,6 +5279,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
5374 INIT_LIST_HEAD(&event->event_entry); 5279 INIT_LIST_HEAD(&event->event_entry);
5375 INIT_LIST_HEAD(&event->sibling_list); 5280 INIT_LIST_HEAD(&event->sibling_list);
5376 init_waitqueue_head(&event->waitq); 5281 init_waitqueue_head(&event->waitq);
5282 init_irq_work(&event->pending, perf_pending_event);
5377 5283
5378 mutex_init(&event->mmap_mutex); 5284 mutex_init(&event->mmap_mutex);
5379 5285