diff options
author | Rafael J. Wysocki <rjw@sisk.pl> | 2011-12-21 15:59:45 -0500 |
---|---|---|
committer | Rafael J. Wysocki <rjw@sisk.pl> | 2011-12-21 15:59:45 -0500 |
commit | b00f4dc5ff022cb9cbaffd376d9454d7fa1e496f (patch) | |
tree | 40f1b232e2f1e8ac365317a14fdcbcb331722b46 /kernel/events | |
parent | 1eac8111e0763853266a171ce11214da3a347a0a (diff) | |
parent | b9e26dfdad5a4f9cbdaacafac6998614cc9c41bc (diff) |
Merge branch 'master' into pm-sleep
* master: (848 commits)
SELinux: Fix RCU deref check warning in sel_netport_insert()
binary_sysctl(): fix memory leak
mm/vmalloc.c: remove static declaration of va from __get_vm_area_node
ipmi_watchdog: restore settings when BMC reset
oom: fix integer overflow of points in oom_badness
memcg: keep root group unchanged if creation fails
nilfs2: potential integer overflow in nilfs_ioctl_clean_segments()
nilfs2: unbreak compat ioctl
cpusets: stall when updating mems_allowed for mempolicy or disjoint nodemask
evm: prevent racing during tfm allocation
evm: key must be set once during initialization
mmc: vub300: fix type of firmware_rom_wait_states module parameter
Revert "mmc: enable runtime PM by default"
mmc: sdhci: remove "state" argument from sdhci_suspend_host
x86, dumpstack: Fix code bytes breakage due to missing KERN_CONT
IB/qib: Correct sense on freectxts increment and decrement
RDMA/cma: Verify private data length
cgroups: fix a css_set not found bug in cgroup_attach_proc
oprofile: Fix uninitialized memory access when writing to writing to oprofilefs
Revert "xen/pv-on-hvm kexec: add xs_reset_watches to shutdown watches from old kernel"
...
Conflicts:
kernel/cgroup_freezer.c
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 95 | ||||
-rw-r--r-- | kernel/events/internal.h | 3 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 3 |
3 files changed, 97 insertions, 4 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 0e8457da6f95..58690af323e4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -185,6 +185,9 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | |||
185 | static void update_context_time(struct perf_event_context *ctx); | 185 | static void update_context_time(struct perf_event_context *ctx); |
186 | static u64 perf_event_time(struct perf_event *event); | 186 | static u64 perf_event_time(struct perf_event *event); |
187 | 187 | ||
188 | static void ring_buffer_attach(struct perf_event *event, | ||
189 | struct ring_buffer *rb); | ||
190 | |||
188 | void __weak perf_event_print_debug(void) { } | 191 | void __weak perf_event_print_debug(void) { } |
189 | 192 | ||
190 | extern __weak const char *perf_pmu_name(void) | 193 | extern __weak const char *perf_pmu_name(void) |
@@ -2171,9 +2174,10 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
2171 | */ | 2174 | */ |
2172 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | 2175 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
2173 | 2176 | ||
2174 | perf_event_sched_in(cpuctx, ctx, task); | 2177 | if (ctx->nr_events) |
2178 | cpuctx->task_ctx = ctx; | ||
2175 | 2179 | ||
2176 | cpuctx->task_ctx = ctx; | 2180 | perf_event_sched_in(cpuctx, cpuctx->task_ctx, task); |
2177 | 2181 | ||
2178 | perf_pmu_enable(ctx->pmu); | 2182 | perf_pmu_enable(ctx->pmu); |
2179 | perf_ctx_unlock(cpuctx, ctx); | 2183 | perf_ctx_unlock(cpuctx, ctx); |
@@ -3190,12 +3194,33 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
3190 | struct ring_buffer *rb; | 3194 | struct ring_buffer *rb; |
3191 | unsigned int events = POLL_HUP; | 3195 | unsigned int events = POLL_HUP; |
3192 | 3196 | ||
3197 | /* | ||
3198 | * Race between perf_event_set_output() and perf_poll(): perf_poll() | ||
3199 | * grabs the rb reference but perf_event_set_output() overrides it. | ||
3200 | * Here is the timeline for two threads T1, T2: | ||
3201 | * t0: T1, rb = rcu_dereference(event->rb) | ||
3202 | * t1: T2, old_rb = event->rb | ||
3203 | * t2: T2, event->rb = new rb | ||
3204 | * t3: T2, ring_buffer_detach(old_rb) | ||
3205 | * t4: T1, ring_buffer_attach(rb1) | ||
3206 | * t5: T1, poll_wait(event->waitq) | ||
3207 | * | ||
3208 | * To avoid this problem, we grab mmap_mutex in perf_poll() | ||
3209 | * thereby ensuring that the assignment of the new ring buffer | ||
3210 | * and the detachment of the old buffer appear atomic to perf_poll() | ||
3211 | */ | ||
3212 | mutex_lock(&event->mmap_mutex); | ||
3213 | |||
3193 | rcu_read_lock(); | 3214 | rcu_read_lock(); |
3194 | rb = rcu_dereference(event->rb); | 3215 | rb = rcu_dereference(event->rb); |
3195 | if (rb) | 3216 | if (rb) { |
3217 | ring_buffer_attach(event, rb); | ||
3196 | events = atomic_xchg(&rb->poll, 0); | 3218 | events = atomic_xchg(&rb->poll, 0); |
3219 | } | ||
3197 | rcu_read_unlock(); | 3220 | rcu_read_unlock(); |
3198 | 3221 | ||
3222 | mutex_unlock(&event->mmap_mutex); | ||
3223 | |||
3199 | poll_wait(file, &event->waitq, wait); | 3224 | poll_wait(file, &event->waitq, wait); |
3200 | 3225 | ||
3201 | return events; | 3226 | return events; |
@@ -3496,6 +3521,53 @@ unlock: | |||
3496 | return ret; | 3521 | return ret; |
3497 | } | 3522 | } |
3498 | 3523 | ||
3524 | static void ring_buffer_attach(struct perf_event *event, | ||
3525 | struct ring_buffer *rb) | ||
3526 | { | ||
3527 | unsigned long flags; | ||
3528 | |||
3529 | if (!list_empty(&event->rb_entry)) | ||
3530 | return; | ||
3531 | |||
3532 | spin_lock_irqsave(&rb->event_lock, flags); | ||
3533 | if (!list_empty(&event->rb_entry)) | ||
3534 | goto unlock; | ||
3535 | |||
3536 | list_add(&event->rb_entry, &rb->event_list); | ||
3537 | unlock: | ||
3538 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
3539 | } | ||
3540 | |||
3541 | static void ring_buffer_detach(struct perf_event *event, | ||
3542 | struct ring_buffer *rb) | ||
3543 | { | ||
3544 | unsigned long flags; | ||
3545 | |||
3546 | if (list_empty(&event->rb_entry)) | ||
3547 | return; | ||
3548 | |||
3549 | spin_lock_irqsave(&rb->event_lock, flags); | ||
3550 | list_del_init(&event->rb_entry); | ||
3551 | wake_up_all(&event->waitq); | ||
3552 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
3553 | } | ||
3554 | |||
3555 | static void ring_buffer_wakeup(struct perf_event *event) | ||
3556 | { | ||
3557 | struct ring_buffer *rb; | ||
3558 | |||
3559 | rcu_read_lock(); | ||
3560 | rb = rcu_dereference(event->rb); | ||
3561 | if (!rb) | ||
3562 | goto unlock; | ||
3563 | |||
3564 | list_for_each_entry_rcu(event, &rb->event_list, rb_entry) | ||
3565 | wake_up_all(&event->waitq); | ||
3566 | |||
3567 | unlock: | ||
3568 | rcu_read_unlock(); | ||
3569 | } | ||
3570 | |||
3499 | static void rb_free_rcu(struct rcu_head *rcu_head) | 3571 | static void rb_free_rcu(struct rcu_head *rcu_head) |
3500 | { | 3572 | { |
3501 | struct ring_buffer *rb; | 3573 | struct ring_buffer *rb; |
@@ -3521,9 +3593,19 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event) | |||
3521 | 3593 | ||
3522 | static void ring_buffer_put(struct ring_buffer *rb) | 3594 | static void ring_buffer_put(struct ring_buffer *rb) |
3523 | { | 3595 | { |
3596 | struct perf_event *event, *n; | ||
3597 | unsigned long flags; | ||
3598 | |||
3524 | if (!atomic_dec_and_test(&rb->refcount)) | 3599 | if (!atomic_dec_and_test(&rb->refcount)) |
3525 | return; | 3600 | return; |
3526 | 3601 | ||
3602 | spin_lock_irqsave(&rb->event_lock, flags); | ||
3603 | list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) { | ||
3604 | list_del_init(&event->rb_entry); | ||
3605 | wake_up_all(&event->waitq); | ||
3606 | } | ||
3607 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
3608 | |||
3527 | call_rcu(&rb->rcu_head, rb_free_rcu); | 3609 | call_rcu(&rb->rcu_head, rb_free_rcu); |
3528 | } | 3610 | } |
3529 | 3611 | ||
@@ -3546,6 +3628,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
3546 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); | 3628 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); |
3547 | vma->vm_mm->pinned_vm -= event->mmap_locked; | 3629 | vma->vm_mm->pinned_vm -= event->mmap_locked; |
3548 | rcu_assign_pointer(event->rb, NULL); | 3630 | rcu_assign_pointer(event->rb, NULL); |
3631 | ring_buffer_detach(event, rb); | ||
3549 | mutex_unlock(&event->mmap_mutex); | 3632 | mutex_unlock(&event->mmap_mutex); |
3550 | 3633 | ||
3551 | ring_buffer_put(rb); | 3634 | ring_buffer_put(rb); |
@@ -3700,7 +3783,7 @@ static const struct file_operations perf_fops = { | |||
3700 | 3783 | ||
3701 | void perf_event_wakeup(struct perf_event *event) | 3784 | void perf_event_wakeup(struct perf_event *event) |
3702 | { | 3785 | { |
3703 | wake_up_all(&event->waitq); | 3786 | ring_buffer_wakeup(event); |
3704 | 3787 | ||
3705 | if (event->pending_kill) { | 3788 | if (event->pending_kill) { |
3706 | kill_fasync(&event->fasync, SIGIO, event->pending_kill); | 3789 | kill_fasync(&event->fasync, SIGIO, event->pending_kill); |
@@ -5822,6 +5905,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
5822 | INIT_LIST_HEAD(&event->group_entry); | 5905 | INIT_LIST_HEAD(&event->group_entry); |
5823 | INIT_LIST_HEAD(&event->event_entry); | 5906 | INIT_LIST_HEAD(&event->event_entry); |
5824 | INIT_LIST_HEAD(&event->sibling_list); | 5907 | INIT_LIST_HEAD(&event->sibling_list); |
5908 | INIT_LIST_HEAD(&event->rb_entry); | ||
5909 | |||
5825 | init_waitqueue_head(&event->waitq); | 5910 | init_waitqueue_head(&event->waitq); |
5826 | init_irq_work(&event->pending, perf_pending_event); | 5911 | init_irq_work(&event->pending, perf_pending_event); |
5827 | 5912 | ||
@@ -6028,6 +6113,8 @@ set: | |||
6028 | 6113 | ||
6029 | old_rb = event->rb; | 6114 | old_rb = event->rb; |
6030 | rcu_assign_pointer(event->rb, rb); | 6115 | rcu_assign_pointer(event->rb, rb); |
6116 | if (old_rb) | ||
6117 | ring_buffer_detach(event, old_rb); | ||
6031 | ret = 0; | 6118 | ret = 0; |
6032 | unlock: | 6119 | unlock: |
6033 | mutex_unlock(&event->mmap_mutex); | 6120 | mutex_unlock(&event->mmap_mutex); |
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 09097dd8116c..64568a699375 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h | |||
@@ -22,6 +22,9 @@ struct ring_buffer { | |||
22 | local_t lost; /* nr records lost */ | 22 | local_t lost; /* nr records lost */ |
23 | 23 | ||
24 | long watermark; /* wakeup watermark */ | 24 | long watermark; /* wakeup watermark */ |
25 | /* poll crap */ | ||
26 | spinlock_t event_lock; | ||
27 | struct list_head event_list; | ||
25 | 28 | ||
26 | struct perf_event_mmap_page *user_page; | 29 | struct perf_event_mmap_page *user_page; |
27 | void *data_pages[0]; | 30 | void *data_pages[0]; |
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index a2a29205cc0f..7f3011c6b57f 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
@@ -209,6 +209,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) | |||
209 | rb->writable = 1; | 209 | rb->writable = 1; |
210 | 210 | ||
211 | atomic_set(&rb->refcount, 1); | 211 | atomic_set(&rb->refcount, 1); |
212 | |||
213 | INIT_LIST_HEAD(&rb->event_list); | ||
214 | spin_lock_init(&rb->event_lock); | ||
212 | } | 215 | } |
213 | 216 | ||
214 | #ifndef CONFIG_PERF_USE_VMALLOC | 217 | #ifndef CONFIG_PERF_USE_VMALLOC |