diff options
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 2 | ||||
-rw-r--r-- | include/linux/perf_event.h | 3 | ||||
-rw-r--r-- | kernel/events/core.c | 233 | ||||
-rw-r--r-- | kernel/events/internal.h | 4 | ||||
-rw-r--r-- | kernel/kprobes.c | 30 |
5 files changed, 187 insertions, 85 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index f60d41ff9a97..a9e22073bd56 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -165,13 +165,13 @@ static struct extra_reg intel_snb_extra_regs[] __read_mostly = { | |||
165 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), | 165 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), |
166 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), | 166 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), |
167 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), | 167 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), |
168 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), | ||
169 | EVENT_EXTRA_END | 168 | EVENT_EXTRA_END |
170 | }; | 169 | }; |
171 | 170 | ||
172 | static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { | 171 | static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { |
173 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), | 172 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), |
174 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), | 173 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), |
174 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), | ||
175 | EVENT_EXTRA_END | 175 | EVENT_EXTRA_END |
176 | }; | 176 | }; |
177 | 177 | ||
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f463a46424e2..c5b6dbf9c2fc 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -389,8 +389,7 @@ struct perf_event { | |||
389 | /* mmap bits */ | 389 | /* mmap bits */ |
390 | struct mutex mmap_mutex; | 390 | struct mutex mmap_mutex; |
391 | atomic_t mmap_count; | 391 | atomic_t mmap_count; |
392 | int mmap_locked; | 392 | |
393 | struct user_struct *mmap_user; | ||
394 | struct ring_buffer *rb; | 393 | struct ring_buffer *rb; |
395 | struct list_head rb_entry; | 394 | struct list_head rb_entry; |
396 | 395 | ||
diff --git a/kernel/events/core.c b/kernel/events/core.c index 9dc297faf7c0..b391907d5352 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -196,9 +196,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | |||
196 | static void update_context_time(struct perf_event_context *ctx); | 196 | static void update_context_time(struct perf_event_context *ctx); |
197 | static u64 perf_event_time(struct perf_event *event); | 197 | static u64 perf_event_time(struct perf_event *event); |
198 | 198 | ||
199 | static void ring_buffer_attach(struct perf_event *event, | ||
200 | struct ring_buffer *rb); | ||
201 | |||
202 | void __weak perf_event_print_debug(void) { } | 199 | void __weak perf_event_print_debug(void) { } |
203 | 200 | ||
204 | extern __weak const char *perf_pmu_name(void) | 201 | extern __weak const char *perf_pmu_name(void) |
@@ -2918,6 +2915,7 @@ static void free_event_rcu(struct rcu_head *head) | |||
2918 | } | 2915 | } |
2919 | 2916 | ||
2920 | static void ring_buffer_put(struct ring_buffer *rb); | 2917 | static void ring_buffer_put(struct ring_buffer *rb); |
2918 | static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); | ||
2921 | 2919 | ||
2922 | static void free_event(struct perf_event *event) | 2920 | static void free_event(struct perf_event *event) |
2923 | { | 2921 | { |
@@ -2942,15 +2940,30 @@ static void free_event(struct perf_event *event) | |||
2942 | if (has_branch_stack(event)) { | 2940 | if (has_branch_stack(event)) { |
2943 | static_key_slow_dec_deferred(&perf_sched_events); | 2941 | static_key_slow_dec_deferred(&perf_sched_events); |
2944 | /* is system-wide event */ | 2942 | /* is system-wide event */ |
2945 | if (!(event->attach_state & PERF_ATTACH_TASK)) | 2943 | if (!(event->attach_state & PERF_ATTACH_TASK)) { |
2946 | atomic_dec(&per_cpu(perf_branch_stack_events, | 2944 | atomic_dec(&per_cpu(perf_branch_stack_events, |
2947 | event->cpu)); | 2945 | event->cpu)); |
2946 | } | ||
2948 | } | 2947 | } |
2949 | } | 2948 | } |
2950 | 2949 | ||
2951 | if (event->rb) { | 2950 | if (event->rb) { |
2952 | ring_buffer_put(event->rb); | 2951 | struct ring_buffer *rb; |
2953 | event->rb = NULL; | 2952 | |
2953 | /* | ||
2954 | * Can happen when we close an event with re-directed output. | ||
2955 | * | ||
2956 | * Since we have a 0 refcount, perf_mmap_close() will skip | ||
2957 | * over us; possibly making our ring_buffer_put() the last. | ||
2958 | */ | ||
2959 | mutex_lock(&event->mmap_mutex); | ||
2960 | rb = event->rb; | ||
2961 | if (rb) { | ||
2962 | rcu_assign_pointer(event->rb, NULL); | ||
2963 | ring_buffer_detach(event, rb); | ||
2964 | ring_buffer_put(rb); /* could be last */ | ||
2965 | } | ||
2966 | mutex_unlock(&event->mmap_mutex); | ||
2954 | } | 2967 | } |
2955 | 2968 | ||
2956 | if (is_cgroup_event(event)) | 2969 | if (is_cgroup_event(event)) |
@@ -3188,30 +3201,13 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
3188 | unsigned int events = POLL_HUP; | 3201 | unsigned int events = POLL_HUP; |
3189 | 3202 | ||
3190 | /* | 3203 | /* |
3191 | * Race between perf_event_set_output() and perf_poll(): perf_poll() | 3204 | * Pin the event->rb by taking event->mmap_mutex; otherwise |
3192 | * grabs the rb reference but perf_event_set_output() overrides it. | 3205 | * perf_event_set_output() can swizzle our rb and make us miss wakeups. |
3193 | * Here is the timeline for two threads T1, T2: | ||
3194 | * t0: T1, rb = rcu_dereference(event->rb) | ||
3195 | * t1: T2, old_rb = event->rb | ||
3196 | * t2: T2, event->rb = new rb | ||
3197 | * t3: T2, ring_buffer_detach(old_rb) | ||
3198 | * t4: T1, ring_buffer_attach(rb1) | ||
3199 | * t5: T1, poll_wait(event->waitq) | ||
3200 | * | ||
3201 | * To avoid this problem, we grab mmap_mutex in perf_poll() | ||
3202 | * thereby ensuring that the assignment of the new ring buffer | ||
3203 | * and the detachment of the old buffer appear atomic to perf_poll() | ||
3204 | */ | 3206 | */ |
3205 | mutex_lock(&event->mmap_mutex); | 3207 | mutex_lock(&event->mmap_mutex); |
3206 | 3208 | rb = event->rb; | |
3207 | rcu_read_lock(); | 3209 | if (rb) |
3208 | rb = rcu_dereference(event->rb); | ||
3209 | if (rb) { | ||
3210 | ring_buffer_attach(event, rb); | ||
3211 | events = atomic_xchg(&rb->poll, 0); | 3210 | events = atomic_xchg(&rb->poll, 0); |
3212 | } | ||
3213 | rcu_read_unlock(); | ||
3214 | |||
3215 | mutex_unlock(&event->mmap_mutex); | 3211 | mutex_unlock(&event->mmap_mutex); |
3216 | 3212 | ||
3217 | poll_wait(file, &event->waitq, wait); | 3213 | poll_wait(file, &event->waitq, wait); |
@@ -3521,16 +3517,12 @@ static void ring_buffer_attach(struct perf_event *event, | |||
3521 | return; | 3517 | return; |
3522 | 3518 | ||
3523 | spin_lock_irqsave(&rb->event_lock, flags); | 3519 | spin_lock_irqsave(&rb->event_lock, flags); |
3524 | if (!list_empty(&event->rb_entry)) | 3520 | if (list_empty(&event->rb_entry)) |
3525 | goto unlock; | 3521 | list_add(&event->rb_entry, &rb->event_list); |
3526 | |||
3527 | list_add(&event->rb_entry, &rb->event_list); | ||
3528 | unlock: | ||
3529 | spin_unlock_irqrestore(&rb->event_lock, flags); | 3522 | spin_unlock_irqrestore(&rb->event_lock, flags); |
3530 | } | 3523 | } |
3531 | 3524 | ||
3532 | static void ring_buffer_detach(struct perf_event *event, | 3525 | static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb) |
3533 | struct ring_buffer *rb) | ||
3534 | { | 3526 | { |
3535 | unsigned long flags; | 3527 | unsigned long flags; |
3536 | 3528 | ||
@@ -3549,13 +3541,10 @@ static void ring_buffer_wakeup(struct perf_event *event) | |||
3549 | 3541 | ||
3550 | rcu_read_lock(); | 3542 | rcu_read_lock(); |
3551 | rb = rcu_dereference(event->rb); | 3543 | rb = rcu_dereference(event->rb); |
3552 | if (!rb) | 3544 | if (rb) { |
3553 | goto unlock; | 3545 | list_for_each_entry_rcu(event, &rb->event_list, rb_entry) |
3554 | 3546 | wake_up_all(&event->waitq); | |
3555 | list_for_each_entry_rcu(event, &rb->event_list, rb_entry) | 3547 | } |
3556 | wake_up_all(&event->waitq); | ||
3557 | |||
3558 | unlock: | ||
3559 | rcu_read_unlock(); | 3548 | rcu_read_unlock(); |
3560 | } | 3549 | } |
3561 | 3550 | ||
@@ -3584,18 +3573,10 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event) | |||
3584 | 3573 | ||
3585 | static void ring_buffer_put(struct ring_buffer *rb) | 3574 | static void ring_buffer_put(struct ring_buffer *rb) |
3586 | { | 3575 | { |
3587 | struct perf_event *event, *n; | ||
3588 | unsigned long flags; | ||
3589 | |||
3590 | if (!atomic_dec_and_test(&rb->refcount)) | 3576 | if (!atomic_dec_and_test(&rb->refcount)) |
3591 | return; | 3577 | return; |
3592 | 3578 | ||
3593 | spin_lock_irqsave(&rb->event_lock, flags); | 3579 | WARN_ON_ONCE(!list_empty(&rb->event_list)); |
3594 | list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) { | ||
3595 | list_del_init(&event->rb_entry); | ||
3596 | wake_up_all(&event->waitq); | ||
3597 | } | ||
3598 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
3599 | 3580 | ||
3600 | call_rcu(&rb->rcu_head, rb_free_rcu); | 3581 | call_rcu(&rb->rcu_head, rb_free_rcu); |
3601 | } | 3582 | } |
@@ -3605,26 +3586,100 @@ static void perf_mmap_open(struct vm_area_struct *vma) | |||
3605 | struct perf_event *event = vma->vm_file->private_data; | 3586 | struct perf_event *event = vma->vm_file->private_data; |
3606 | 3587 | ||
3607 | atomic_inc(&event->mmap_count); | 3588 | atomic_inc(&event->mmap_count); |
3589 | atomic_inc(&event->rb->mmap_count); | ||
3608 | } | 3590 | } |
3609 | 3591 | ||
3592 | /* | ||
3593 | * A buffer can be mmap()ed multiple times; either directly through the same | ||
3594 | * event, or through other events by use of perf_event_set_output(). | ||
3595 | * | ||
3596 | * In order to undo the VM accounting done by perf_mmap() we need to destroy | ||
3597 | * the buffer here, where we still have a VM context. This means we need | ||
3598 | * to detach all events redirecting to us. | ||
3599 | */ | ||
3610 | static void perf_mmap_close(struct vm_area_struct *vma) | 3600 | static void perf_mmap_close(struct vm_area_struct *vma) |
3611 | { | 3601 | { |
3612 | struct perf_event *event = vma->vm_file->private_data; | 3602 | struct perf_event *event = vma->vm_file->private_data; |
3613 | 3603 | ||
3614 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { | 3604 | struct ring_buffer *rb = event->rb; |
3615 | unsigned long size = perf_data_size(event->rb); | 3605 | struct user_struct *mmap_user = rb->mmap_user; |
3616 | struct user_struct *user = event->mmap_user; | 3606 | int mmap_locked = rb->mmap_locked; |
3617 | struct ring_buffer *rb = event->rb; | 3607 | unsigned long size = perf_data_size(rb); |
3608 | |||
3609 | atomic_dec(&rb->mmap_count); | ||
3610 | |||
3611 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) | ||
3612 | return; | ||
3618 | 3613 | ||
3619 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); | 3614 | /* Detach current event from the buffer. */ |
3620 | vma->vm_mm->pinned_vm -= event->mmap_locked; | 3615 | rcu_assign_pointer(event->rb, NULL); |
3621 | rcu_assign_pointer(event->rb, NULL); | 3616 | ring_buffer_detach(event, rb); |
3622 | ring_buffer_detach(event, rb); | 3617 | mutex_unlock(&event->mmap_mutex); |
3618 | |||
3619 | /* If there's still other mmap()s of this buffer, we're done. */ | ||
3620 | if (atomic_read(&rb->mmap_count)) { | ||
3621 | ring_buffer_put(rb); /* can't be last */ | ||
3622 | return; | ||
3623 | } | ||
3624 | |||
3625 | /* | ||
3626 | * No other mmap()s, detach from all other events that might redirect | ||
3627 | * into the now unreachable buffer. Somewhat complicated by the | ||
3628 | * fact that rb::event_lock otherwise nests inside mmap_mutex. | ||
3629 | */ | ||
3630 | again: | ||
3631 | rcu_read_lock(); | ||
3632 | list_for_each_entry_rcu(event, &rb->event_list, rb_entry) { | ||
3633 | if (!atomic_long_inc_not_zero(&event->refcount)) { | ||
3634 | /* | ||
3635 | * This event is en-route to free_event() which will | ||
3636 | * detach it and remove it from the list. | ||
3637 | */ | ||
3638 | continue; | ||
3639 | } | ||
3640 | rcu_read_unlock(); | ||
3641 | |||
3642 | mutex_lock(&event->mmap_mutex); | ||
3643 | /* | ||
3644 | * Check we didn't race with perf_event_set_output() which can | ||
3645 | * swizzle the rb from under us while we were waiting to | ||
3646 | * acquire mmap_mutex. | ||
3647 | * | ||
3648 | * If we find a different rb; ignore this event, a next | ||
3649 | * iteration will no longer find it on the list. We have to | ||
3650 | * still restart the iteration to make sure we're not now | ||
3651 | * iterating the wrong list. | ||
3652 | */ | ||
3653 | if (event->rb == rb) { | ||
3654 | rcu_assign_pointer(event->rb, NULL); | ||
3655 | ring_buffer_detach(event, rb); | ||
3656 | ring_buffer_put(rb); /* can't be last, we still have one */ | ||
3657 | } | ||
3623 | mutex_unlock(&event->mmap_mutex); | 3658 | mutex_unlock(&event->mmap_mutex); |
3659 | put_event(event); | ||
3624 | 3660 | ||
3625 | ring_buffer_put(rb); | 3661 | /* |
3626 | free_uid(user); | 3662 | * Restart the iteration; either we're on the wrong list or |
3663 | * destroyed its integrity by doing a deletion. | ||
3664 | */ | ||
3665 | goto again; | ||
3627 | } | 3666 | } |
3667 | rcu_read_unlock(); | ||
3668 | |||
3669 | /* | ||
3670 | * It could be there's still a few 0-ref events on the list; they'll | ||
3671 | * get cleaned up by free_event() -- they'll also still have their | ||
3672 | * ref on the rb and will free it whenever they are done with it. | ||
3673 | * | ||
3674 | * Aside from that, this buffer is 'fully' detached and unmapped, | ||
3675 | * undo the VM accounting. | ||
3676 | */ | ||
3677 | |||
3678 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm); | ||
3679 | vma->vm_mm->pinned_vm -= mmap_locked; | ||
3680 | free_uid(mmap_user); | ||
3681 | |||
3682 | ring_buffer_put(rb); /* could be last */ | ||
3628 | } | 3683 | } |
3629 | 3684 | ||
3630 | static const struct vm_operations_struct perf_mmap_vmops = { | 3685 | static const struct vm_operations_struct perf_mmap_vmops = { |
@@ -3674,12 +3729,24 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3674 | return -EINVAL; | 3729 | return -EINVAL; |
3675 | 3730 | ||
3676 | WARN_ON_ONCE(event->ctx->parent_ctx); | 3731 | WARN_ON_ONCE(event->ctx->parent_ctx); |
3732 | again: | ||
3677 | mutex_lock(&event->mmap_mutex); | 3733 | mutex_lock(&event->mmap_mutex); |
3678 | if (event->rb) { | 3734 | if (event->rb) { |
3679 | if (event->rb->nr_pages == nr_pages) | 3735 | if (event->rb->nr_pages != nr_pages) { |
3680 | atomic_inc(&event->rb->refcount); | ||
3681 | else | ||
3682 | ret = -EINVAL; | 3736 | ret = -EINVAL; |
3737 | goto unlock; | ||
3738 | } | ||
3739 | |||
3740 | if (!atomic_inc_not_zero(&event->rb->mmap_count)) { | ||
3741 | /* | ||
3742 | * Raced against perf_mmap_close() through | ||
3743 | * perf_event_set_output(). Try again, hope for better | ||
3744 | * luck. | ||
3745 | */ | ||
3746 | mutex_unlock(&event->mmap_mutex); | ||
3747 | goto again; | ||
3748 | } | ||
3749 | |||
3683 | goto unlock; | 3750 | goto unlock; |
3684 | } | 3751 | } |
3685 | 3752 | ||
@@ -3720,12 +3787,16 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3720 | ret = -ENOMEM; | 3787 | ret = -ENOMEM; |
3721 | goto unlock; | 3788 | goto unlock; |
3722 | } | 3789 | } |
3723 | rcu_assign_pointer(event->rb, rb); | 3790 | |
3791 | atomic_set(&rb->mmap_count, 1); | ||
3792 | rb->mmap_locked = extra; | ||
3793 | rb->mmap_user = get_current_user(); | ||
3724 | 3794 | ||
3725 | atomic_long_add(user_extra, &user->locked_vm); | 3795 | atomic_long_add(user_extra, &user->locked_vm); |
3726 | event->mmap_locked = extra; | 3796 | vma->vm_mm->pinned_vm += extra; |
3727 | event->mmap_user = get_current_user(); | 3797 | |
3728 | vma->vm_mm->pinned_vm += event->mmap_locked; | 3798 | ring_buffer_attach(event, rb); |
3799 | rcu_assign_pointer(event->rb, rb); | ||
3729 | 3800 | ||
3730 | perf_event_update_userpage(event); | 3801 | perf_event_update_userpage(event); |
3731 | 3802 | ||
@@ -3734,7 +3805,11 @@ unlock: | |||
3734 | atomic_inc(&event->mmap_count); | 3805 | atomic_inc(&event->mmap_count); |
3735 | mutex_unlock(&event->mmap_mutex); | 3806 | mutex_unlock(&event->mmap_mutex); |
3736 | 3807 | ||
3737 | vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; | 3808 | /* |
3809 | * Since pinned accounting is per vm we cannot allow fork() to copy our | ||
3810 | * vma. | ||
3811 | */ | ||
3812 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; | ||
3738 | vma->vm_ops = &perf_mmap_vmops; | 3813 | vma->vm_ops = &perf_mmap_vmops; |
3739 | 3814 | ||
3740 | return ret; | 3815 | return ret; |
@@ -6412,6 +6487,8 @@ set: | |||
6412 | if (atomic_read(&event->mmap_count)) | 6487 | if (atomic_read(&event->mmap_count)) |
6413 | goto unlock; | 6488 | goto unlock; |
6414 | 6489 | ||
6490 | old_rb = event->rb; | ||
6491 | |||
6415 | if (output_event) { | 6492 | if (output_event) { |
6416 | /* get the rb we want to redirect to */ | 6493 | /* get the rb we want to redirect to */ |
6417 | rb = ring_buffer_get(output_event); | 6494 | rb = ring_buffer_get(output_event); |
@@ -6419,16 +6496,28 @@ set: | |||
6419 | goto unlock; | 6496 | goto unlock; |
6420 | } | 6497 | } |
6421 | 6498 | ||
6422 | old_rb = event->rb; | ||
6423 | rcu_assign_pointer(event->rb, rb); | ||
6424 | if (old_rb) | 6499 | if (old_rb) |
6425 | ring_buffer_detach(event, old_rb); | 6500 | ring_buffer_detach(event, old_rb); |
6501 | |||
6502 | if (rb) | ||
6503 | ring_buffer_attach(event, rb); | ||
6504 | |||
6505 | rcu_assign_pointer(event->rb, rb); | ||
6506 | |||
6507 | if (old_rb) { | ||
6508 | ring_buffer_put(old_rb); | ||
6509 | /* | ||
6510 | * Since we detached before setting the new rb, so that we | ||
6511 | * could attach the new rb, we could have missed a wakeup. | ||
6512 | * Provide it now. | ||
6513 | */ | ||
6514 | wake_up_all(&event->waitq); | ||
6515 | } | ||
6516 | |||
6426 | ret = 0; | 6517 | ret = 0; |
6427 | unlock: | 6518 | unlock: |
6428 | mutex_unlock(&event->mmap_mutex); | 6519 | mutex_unlock(&event->mmap_mutex); |
6429 | 6520 | ||
6430 | if (old_rb) | ||
6431 | ring_buffer_put(old_rb); | ||
6432 | out: | 6521 | out: |
6433 | return ret; | 6522 | return ret; |
6434 | } | 6523 | } |
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index eb675c4d59df..ca6599723be5 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h | |||
@@ -31,6 +31,10 @@ struct ring_buffer { | |||
31 | spinlock_t event_lock; | 31 | spinlock_t event_lock; |
32 | struct list_head event_list; | 32 | struct list_head event_list; |
33 | 33 | ||
34 | atomic_t mmap_count; | ||
35 | unsigned long mmap_locked; | ||
36 | struct user_struct *mmap_user; | ||
37 | |||
34 | struct perf_event_mmap_page *user_page; | 38 | struct perf_event_mmap_page *user_page; |
35 | void *data_pages[0]; | 39 | void *data_pages[0]; |
36 | }; | 40 | }; |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3fed7f0cbcdf..bddf3b201a48 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -467,6 +467,7 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) | |||
467 | /* Optimization staging list, protected by kprobe_mutex */ | 467 | /* Optimization staging list, protected by kprobe_mutex */ |
468 | static LIST_HEAD(optimizing_list); | 468 | static LIST_HEAD(optimizing_list); |
469 | static LIST_HEAD(unoptimizing_list); | 469 | static LIST_HEAD(unoptimizing_list); |
470 | static LIST_HEAD(freeing_list); | ||
470 | 471 | ||
471 | static void kprobe_optimizer(struct work_struct *work); | 472 | static void kprobe_optimizer(struct work_struct *work); |
472 | static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); | 473 | static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); |
@@ -504,7 +505,7 @@ static __kprobes void do_optimize_kprobes(void) | |||
504 | * Unoptimize (replace a jump with a breakpoint and remove the breakpoint | 505 | * Unoptimize (replace a jump with a breakpoint and remove the breakpoint |
505 | * if need) kprobes listed on unoptimizing_list. | 506 | * if need) kprobes listed on unoptimizing_list. |
506 | */ | 507 | */ |
507 | static __kprobes void do_unoptimize_kprobes(struct list_head *free_list) | 508 | static __kprobes void do_unoptimize_kprobes(void) |
508 | { | 509 | { |
509 | struct optimized_kprobe *op, *tmp; | 510 | struct optimized_kprobe *op, *tmp; |
510 | 511 | ||
@@ -515,9 +516,9 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list) | |||
515 | /* Ditto to do_optimize_kprobes */ | 516 | /* Ditto to do_optimize_kprobes */ |
516 | get_online_cpus(); | 517 | get_online_cpus(); |
517 | mutex_lock(&text_mutex); | 518 | mutex_lock(&text_mutex); |
518 | arch_unoptimize_kprobes(&unoptimizing_list, free_list); | 519 | arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); |
519 | /* Loop free_list for disarming */ | 520 | /* Loop free_list for disarming */ |
520 | list_for_each_entry_safe(op, tmp, free_list, list) { | 521 | list_for_each_entry_safe(op, tmp, &freeing_list, list) { |
521 | /* Disarm probes if marked disabled */ | 522 | /* Disarm probes if marked disabled */ |
522 | if (kprobe_disabled(&op->kp)) | 523 | if (kprobe_disabled(&op->kp)) |
523 | arch_disarm_kprobe(&op->kp); | 524 | arch_disarm_kprobe(&op->kp); |
@@ -536,11 +537,11 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list) | |||
536 | } | 537 | } |
537 | 538 | ||
538 | /* Reclaim all kprobes on the free_list */ | 539 | /* Reclaim all kprobes on the free_list */ |
539 | static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list) | 540 | static __kprobes void do_free_cleaned_kprobes(void) |
540 | { | 541 | { |
541 | struct optimized_kprobe *op, *tmp; | 542 | struct optimized_kprobe *op, *tmp; |
542 | 543 | ||
543 | list_for_each_entry_safe(op, tmp, free_list, list) { | 544 | list_for_each_entry_safe(op, tmp, &freeing_list, list) { |
544 | BUG_ON(!kprobe_unused(&op->kp)); | 545 | BUG_ON(!kprobe_unused(&op->kp)); |
545 | list_del_init(&op->list); | 546 | list_del_init(&op->list); |
546 | free_aggr_kprobe(&op->kp); | 547 | free_aggr_kprobe(&op->kp); |
@@ -556,8 +557,6 @@ static __kprobes void kick_kprobe_optimizer(void) | |||
556 | /* Kprobe jump optimizer */ | 557 | /* Kprobe jump optimizer */ |
557 | static __kprobes void kprobe_optimizer(struct work_struct *work) | 558 | static __kprobes void kprobe_optimizer(struct work_struct *work) |
558 | { | 559 | { |
559 | LIST_HEAD(free_list); | ||
560 | |||
561 | mutex_lock(&kprobe_mutex); | 560 | mutex_lock(&kprobe_mutex); |
562 | /* Lock modules while optimizing kprobes */ | 561 | /* Lock modules while optimizing kprobes */ |
563 | mutex_lock(&module_mutex); | 562 | mutex_lock(&module_mutex); |
@@ -566,7 +565,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) | |||
566 | * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) | 565 | * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) |
567 | * kprobes before waiting for quiesence period. | 566 | * kprobes before waiting for quiesence period. |
568 | */ | 567 | */ |
569 | do_unoptimize_kprobes(&free_list); | 568 | do_unoptimize_kprobes(); |
570 | 569 | ||
571 | /* | 570 | /* |
572 | * Step 2: Wait for quiesence period to ensure all running interrupts | 571 | * Step 2: Wait for quiesence period to ensure all running interrupts |
@@ -581,7 +580,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) | |||
581 | do_optimize_kprobes(); | 580 | do_optimize_kprobes(); |
582 | 581 | ||
583 | /* Step 4: Free cleaned kprobes after quiesence period */ | 582 | /* Step 4: Free cleaned kprobes after quiesence period */ |
584 | do_free_cleaned_kprobes(&free_list); | 583 | do_free_cleaned_kprobes(); |
585 | 584 | ||
586 | mutex_unlock(&module_mutex); | 585 | mutex_unlock(&module_mutex); |
587 | mutex_unlock(&kprobe_mutex); | 586 | mutex_unlock(&kprobe_mutex); |
@@ -723,8 +722,19 @@ static void __kprobes kill_optimized_kprobe(struct kprobe *p) | |||
723 | if (!list_empty(&op->list)) | 722 | if (!list_empty(&op->list)) |
724 | /* Dequeue from the (un)optimization queue */ | 723 | /* Dequeue from the (un)optimization queue */ |
725 | list_del_init(&op->list); | 724 | list_del_init(&op->list); |
726 | |||
727 | op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; | 725 | op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; |
726 | |||
727 | if (kprobe_unused(p)) { | ||
728 | /* Enqueue if it is unused */ | ||
729 | list_add(&op->list, &freeing_list); | ||
730 | /* | ||
731 | * Remove unused probes from the hash list. After waiting | ||
732 | * for synchronization, this probe is reclaimed. | ||
733 | * (reclaiming is done by do_free_cleaned_kprobes().) | ||
734 | */ | ||
735 | hlist_del_rcu(&op->kp.hlist); | ||
736 | } | ||
737 | |||
728 | /* Don't touch the code, because it is already freed. */ | 738 | /* Don't touch the code, because it is already freed. */ |
729 | arch_remove_optimized_kprobe(op); | 739 | arch_remove_optimized_kprobe(op); |
730 | } | 740 | } |