diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-10-14 02:01:34 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-10-18 13:58:50 -0400 |
commit | e360adbe29241a0194e10e20595360dd7b98a2b3 (patch) | |
tree | ef5fa5f50a895096bfb25bc11b25949603158238 /kernel/perf_event.c | |
parent | 8e5fc1a7320baf6076391607515dceb61319b36a (diff) |
irq_work: Add generic hardirq context callbacks
Provide a mechanism that allows running code in IRQ context. It is
most useful for NMI code that needs to interact with the rest of the
system -- like wakeup a task to drain buffers.
Perf currently has such a mechanism, so extract that and provide it as
a generic feature, independent of perf so that others may also
benefit.
The IRQ context callback is generated through self-IPIs where
possible, or on architectures like powerpc the decrementer (the
built-in timer facility) is set to generate an interrupt immediately.
Architectures that don't have anything like this get to do with a
callback from the timer tick. These architectures can call
irq_work_run() at the tail of any IRQ handlers that might enqueue such
work (like the perf IRQ handler) to avoid undue latencies in
processing the work.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
[ various fixes ]
Signed-off-by: Huang Ying <ying.huang@intel.com>
LKML-Reference: <1287036094.7768.291.camel@yhuang-dev>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 104 |
1 files changed, 5 insertions, 99 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 634f86a4b2f9..99b9700e74d0 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -2206,12 +2206,11 @@ static void free_event_rcu(struct rcu_head *head) | |||
2206 | kfree(event); | 2206 | kfree(event); |
2207 | } | 2207 | } |
2208 | 2208 | ||
2209 | static void perf_pending_sync(struct perf_event *event); | ||
2210 | static void perf_buffer_put(struct perf_buffer *buffer); | 2209 | static void perf_buffer_put(struct perf_buffer *buffer); |
2211 | 2210 | ||
2212 | static void free_event(struct perf_event *event) | 2211 | static void free_event(struct perf_event *event) |
2213 | { | 2212 | { |
2214 | perf_pending_sync(event); | 2213 | irq_work_sync(&event->pending); |
2215 | 2214 | ||
2216 | if (!event->parent) { | 2215 | if (!event->parent) { |
2217 | atomic_dec(&nr_events); | 2216 | atomic_dec(&nr_events); |
@@ -3162,16 +3161,7 @@ void perf_event_wakeup(struct perf_event *event) | |||
3162 | } | 3161 | } |
3163 | } | 3162 | } |
3164 | 3163 | ||
3165 | /* | 3164 | static void perf_pending_event(struct irq_work *entry) |
3166 | * Pending wakeups | ||
3167 | * | ||
3168 | * Handle the case where we need to wakeup up from NMI (or rq->lock) context. | ||
3169 | * | ||
3170 | * The NMI bit means we cannot possibly take locks. Therefore, maintain a | ||
3171 | * single linked list and use cmpxchg() to add entries lockless. | ||
3172 | */ | ||
3173 | |||
3174 | static void perf_pending_event(struct perf_pending_entry *entry) | ||
3175 | { | 3165 | { |
3176 | struct perf_event *event = container_of(entry, | 3166 | struct perf_event *event = container_of(entry, |
3177 | struct perf_event, pending); | 3167 | struct perf_event, pending); |
@@ -3187,89 +3177,6 @@ static void perf_pending_event(struct perf_pending_entry *entry) | |||
3187 | } | 3177 | } |
3188 | } | 3178 | } |
3189 | 3179 | ||
3190 | #define PENDING_TAIL ((struct perf_pending_entry *)-1UL) | ||
3191 | |||
3192 | static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { | ||
3193 | PENDING_TAIL, | ||
3194 | }; | ||
3195 | |||
3196 | static void perf_pending_queue(struct perf_pending_entry *entry, | ||
3197 | void (*func)(struct perf_pending_entry *)) | ||
3198 | { | ||
3199 | struct perf_pending_entry **head; | ||
3200 | |||
3201 | if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) | ||
3202 | return; | ||
3203 | |||
3204 | entry->func = func; | ||
3205 | |||
3206 | head = &get_cpu_var(perf_pending_head); | ||
3207 | |||
3208 | do { | ||
3209 | entry->next = *head; | ||
3210 | } while (cmpxchg(head, entry->next, entry) != entry->next); | ||
3211 | |||
3212 | set_perf_event_pending(); | ||
3213 | |||
3214 | put_cpu_var(perf_pending_head); | ||
3215 | } | ||
3216 | |||
3217 | static int __perf_pending_run(void) | ||
3218 | { | ||
3219 | struct perf_pending_entry *list; | ||
3220 | int nr = 0; | ||
3221 | |||
3222 | list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); | ||
3223 | while (list != PENDING_TAIL) { | ||
3224 | void (*func)(struct perf_pending_entry *); | ||
3225 | struct perf_pending_entry *entry = list; | ||
3226 | |||
3227 | list = list->next; | ||
3228 | |||
3229 | func = entry->func; | ||
3230 | entry->next = NULL; | ||
3231 | /* | ||
3232 | * Ensure we observe the unqueue before we issue the wakeup, | ||
3233 | * so that we won't be waiting forever. | ||
3234 | * -- see perf_not_pending(). | ||
3235 | */ | ||
3236 | smp_wmb(); | ||
3237 | |||
3238 | func(entry); | ||
3239 | nr++; | ||
3240 | } | ||
3241 | |||
3242 | return nr; | ||
3243 | } | ||
3244 | |||
3245 | static inline int perf_not_pending(struct perf_event *event) | ||
3246 | { | ||
3247 | /* | ||
3248 | * If we flush on whatever cpu we run, there is a chance we don't | ||
3249 | * need to wait. | ||
3250 | */ | ||
3251 | get_cpu(); | ||
3252 | __perf_pending_run(); | ||
3253 | put_cpu(); | ||
3254 | |||
3255 | /* | ||
3256 | * Ensure we see the proper queue state before going to sleep | ||
3257 | * so that we do not miss the wakeup. -- see perf_pending_handle() | ||
3258 | */ | ||
3259 | smp_rmb(); | ||
3260 | return event->pending.next == NULL; | ||
3261 | } | ||
3262 | |||
3263 | static void perf_pending_sync(struct perf_event *event) | ||
3264 | { | ||
3265 | wait_event(event->waitq, perf_not_pending(event)); | ||
3266 | } | ||
3267 | |||
3268 | void perf_event_do_pending(void) | ||
3269 | { | ||
3270 | __perf_pending_run(); | ||
3271 | } | ||
3272 | |||
3273 | /* | 3180 | /* |
3274 | * We assume there is only KVM supporting the callbacks. | 3181 | * We assume there is only KVM supporting the callbacks. |
3275 | * Later on, we might change it to a list if there is | 3182 | * Later on, we might change it to a list if there is |
@@ -3319,8 +3226,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle) | |||
3319 | 3226 | ||
3320 | if (handle->nmi) { | 3227 | if (handle->nmi) { |
3321 | handle->event->pending_wakeup = 1; | 3228 | handle->event->pending_wakeup = 1; |
3322 | perf_pending_queue(&handle->event->pending, | 3229 | irq_work_queue(&handle->event->pending); |
3323 | perf_pending_event); | ||
3324 | } else | 3230 | } else |
3325 | perf_event_wakeup(handle->event); | 3231 | perf_event_wakeup(handle->event); |
3326 | } | 3232 | } |
@@ -4356,8 +4262,7 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
4356 | event->pending_kill = POLL_HUP; | 4262 | event->pending_kill = POLL_HUP; |
4357 | if (nmi) { | 4263 | if (nmi) { |
4358 | event->pending_disable = 1; | 4264 | event->pending_disable = 1; |
4359 | perf_pending_queue(&event->pending, | 4265 | irq_work_queue(&event->pending); |
4360 | perf_pending_event); | ||
4361 | } else | 4266 | } else |
4362 | perf_event_disable(event); | 4267 | perf_event_disable(event); |
4363 | } | 4268 | } |
@@ -5374,6 +5279,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
5374 | INIT_LIST_HEAD(&event->event_entry); | 5279 | INIT_LIST_HEAD(&event->event_entry); |
5375 | INIT_LIST_HEAD(&event->sibling_list); | 5280 | INIT_LIST_HEAD(&event->sibling_list); |
5376 | init_waitqueue_head(&event->waitq); | 5281 | init_waitqueue_head(&event->waitq); |
5282 | init_irq_work(&event->pending, perf_pending_event); | ||
5377 | 5283 | ||
5378 | mutex_init(&event->mmap_mutex); | 5284 | mutex_init(&event->mmap_mutex); |
5379 | 5285 | ||