diff options
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r-- | kernel/perf_counter.c | 89 |
1 files changed, 79 insertions, 10 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 52e5a15321d8..db843f812a60 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -919,7 +919,8 @@ static int context_equiv(struct perf_counter_context *ctx1, | |||
919 | struct perf_counter_context *ctx2) | 919 | struct perf_counter_context *ctx2) |
920 | { | 920 | { |
921 | return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx | 921 | return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx |
922 | && ctx1->parent_gen == ctx2->parent_gen; | 922 | && ctx1->parent_gen == ctx2->parent_gen |
923 | && ctx1->parent_gen != ~0ull; | ||
923 | } | 924 | } |
924 | 925 | ||
925 | /* | 926 | /* |
@@ -1339,7 +1340,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) | |||
1339 | put_ctx(parent_ctx); | 1340 | put_ctx(parent_ctx); |
1340 | ctx->parent_ctx = NULL; /* no longer a clone */ | 1341 | ctx->parent_ctx = NULL; /* no longer a clone */ |
1341 | } | 1342 | } |
1342 | ++ctx->generation; | ||
1343 | /* | 1343 | /* |
1344 | * Get an extra reference before dropping the lock so that | 1344 | * Get an extra reference before dropping the lock so that |
1345 | * this context won't get freed if the task exits. | 1345 | * this context won't get freed if the task exits. |
@@ -1414,6 +1414,7 @@ static int perf_release(struct inode *inode, struct file *file) | |||
1414 | 1414 | ||
1415 | file->private_data = NULL; | 1415 | file->private_data = NULL; |
1416 | 1416 | ||
1417 | WARN_ON_ONCE(ctx->parent_ctx); | ||
1417 | mutex_lock(&ctx->mutex); | 1418 | mutex_lock(&ctx->mutex); |
1418 | perf_counter_remove_from_context(counter); | 1419 | perf_counter_remove_from_context(counter); |
1419 | mutex_unlock(&ctx->mutex); | 1420 | mutex_unlock(&ctx->mutex); |
@@ -1445,6 +1446,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) | |||
1445 | if (counter->state == PERF_COUNTER_STATE_ERROR) | 1446 | if (counter->state == PERF_COUNTER_STATE_ERROR) |
1446 | return 0; | 1447 | return 0; |
1447 | 1448 | ||
1449 | WARN_ON_ONCE(counter->ctx->parent_ctx); | ||
1448 | mutex_lock(&counter->child_mutex); | 1450 | mutex_lock(&counter->child_mutex); |
1449 | values[0] = perf_counter_read(counter); | 1451 | values[0] = perf_counter_read(counter); |
1450 | n = 1; | 1452 | n = 1; |
@@ -1504,6 +1506,7 @@ static void perf_counter_for_each_sibling(struct perf_counter *counter, | |||
1504 | struct perf_counter_context *ctx = counter->ctx; | 1506 | struct perf_counter_context *ctx = counter->ctx; |
1505 | struct perf_counter *sibling; | 1507 | struct perf_counter *sibling; |
1506 | 1508 | ||
1509 | WARN_ON_ONCE(ctx->parent_ctx); | ||
1507 | mutex_lock(&ctx->mutex); | 1510 | mutex_lock(&ctx->mutex); |
1508 | counter = counter->group_leader; | 1511 | counter = counter->group_leader; |
1509 | 1512 | ||
@@ -1524,6 +1527,7 @@ static void perf_counter_for_each_child(struct perf_counter *counter, | |||
1524 | { | 1527 | { |
1525 | struct perf_counter *child; | 1528 | struct perf_counter *child; |
1526 | 1529 | ||
1530 | WARN_ON_ONCE(counter->ctx->parent_ctx); | ||
1527 | mutex_lock(&counter->child_mutex); | 1531 | mutex_lock(&counter->child_mutex); |
1528 | func(counter); | 1532 | func(counter); |
1529 | list_for_each_entry(child, &counter->child_list, child_list) | 1533 | list_for_each_entry(child, &counter->child_list, child_list) |
@@ -1536,6 +1540,7 @@ static void perf_counter_for_each(struct perf_counter *counter, | |||
1536 | { | 1540 | { |
1537 | struct perf_counter *child; | 1541 | struct perf_counter *child; |
1538 | 1542 | ||
1543 | WARN_ON_ONCE(counter->ctx->parent_ctx); | ||
1539 | mutex_lock(&counter->child_mutex); | 1544 | mutex_lock(&counter->child_mutex); |
1540 | perf_counter_for_each_sibling(counter, func); | 1545 | perf_counter_for_each_sibling(counter, func); |
1541 | list_for_each_entry(child, &counter->child_list, child_list) | 1546 | list_for_each_entry(child, &counter->child_list, child_list) |
@@ -1741,6 +1746,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
1741 | { | 1746 | { |
1742 | struct perf_counter *counter = vma->vm_file->private_data; | 1747 | struct perf_counter *counter = vma->vm_file->private_data; |
1743 | 1748 | ||
1749 | WARN_ON_ONCE(counter->ctx->parent_ctx); | ||
1744 | if (atomic_dec_and_mutex_lock(&counter->mmap_count, | 1750 | if (atomic_dec_and_mutex_lock(&counter->mmap_count, |
1745 | &counter->mmap_mutex)) { | 1751 | &counter->mmap_mutex)) { |
1746 | struct user_struct *user = current_user(); | 1752 | struct user_struct *user = current_user(); |
@@ -1788,6 +1794,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
1788 | if (vma->vm_pgoff != 0) | 1794 | if (vma->vm_pgoff != 0) |
1789 | return -EINVAL; | 1795 | return -EINVAL; |
1790 | 1796 | ||
1797 | WARN_ON_ONCE(counter->ctx->parent_ctx); | ||
1791 | mutex_lock(&counter->mmap_mutex); | 1798 | mutex_lock(&counter->mmap_mutex); |
1792 | if (atomic_inc_not_zero(&counter->mmap_count)) { | 1799 | if (atomic_inc_not_zero(&counter->mmap_count)) { |
1793 | if (nr_pages != counter->data->nr_pages) | 1800 | if (nr_pages != counter->data->nr_pages) |
@@ -3349,8 +3356,10 @@ SYSCALL_DEFINE5(perf_counter_open, | |||
3349 | goto err_free_put_context; | 3356 | goto err_free_put_context; |
3350 | 3357 | ||
3351 | counter->filp = counter_file; | 3358 | counter->filp = counter_file; |
3359 | WARN_ON_ONCE(ctx->parent_ctx); | ||
3352 | mutex_lock(&ctx->mutex); | 3360 | mutex_lock(&ctx->mutex); |
3353 | perf_install_in_context(ctx, counter, cpu); | 3361 | perf_install_in_context(ctx, counter, cpu); |
3362 | ++ctx->generation; | ||
3354 | mutex_unlock(&ctx->mutex); | 3363 | mutex_unlock(&ctx->mutex); |
3355 | 3364 | ||
3356 | counter->owner = current; | 3365 | counter->owner = current; |
@@ -3436,6 +3445,7 @@ inherit_counter(struct perf_counter *parent_counter, | |||
3436 | /* | 3445 | /* |
3437 | * Link this into the parent counter's child list | 3446 | * Link this into the parent counter's child list |
3438 | */ | 3447 | */ |
3448 | WARN_ON_ONCE(parent_counter->ctx->parent_ctx); | ||
3439 | mutex_lock(&parent_counter->child_mutex); | 3449 | mutex_lock(&parent_counter->child_mutex); |
3440 | list_add_tail(&child_counter->child_list, &parent_counter->child_list); | 3450 | list_add_tail(&child_counter->child_list, &parent_counter->child_list); |
3441 | mutex_unlock(&parent_counter->child_mutex); | 3451 | mutex_unlock(&parent_counter->child_mutex); |
@@ -3485,6 +3495,7 @@ static void sync_child_counter(struct perf_counter *child_counter, | |||
3485 | /* | 3495 | /* |
3486 | * Remove this counter from the parent's list | 3496 | * Remove this counter from the parent's list |
3487 | */ | 3497 | */ |
3498 | WARN_ON_ONCE(parent_counter->ctx->parent_ctx); | ||
3488 | mutex_lock(&parent_counter->child_mutex); | 3499 | mutex_lock(&parent_counter->child_mutex); |
3489 | list_del_init(&child_counter->child_list); | 3500 | list_del_init(&child_counter->child_list); |
3490 | mutex_unlock(&parent_counter->child_mutex); | 3501 | mutex_unlock(&parent_counter->child_mutex); |
@@ -3527,12 +3538,17 @@ void perf_counter_exit_task(struct task_struct *child) | |||
3527 | struct perf_counter_context *child_ctx; | 3538 | struct perf_counter_context *child_ctx; |
3528 | unsigned long flags; | 3539 | unsigned long flags; |
3529 | 3540 | ||
3530 | child_ctx = child->perf_counter_ctxp; | 3541 | if (likely(!child->perf_counter_ctxp)) |
3531 | |||
3532 | if (likely(!child_ctx)) | ||
3533 | return; | 3542 | return; |
3534 | 3543 | ||
3535 | local_irq_save(flags); | 3544 | local_irq_save(flags); |
3545 | /* | ||
3546 | * We can't reschedule here because interrupts are disabled, | ||
3547 | * and either child is current or it is a task that can't be | ||
3548 | * scheduled, so we are now safe from rescheduling changing | ||
3549 | * our context. | ||
3550 | */ | ||
3551 | child_ctx = child->perf_counter_ctxp; | ||
3536 | __perf_counter_task_sched_out(child_ctx); | 3552 | __perf_counter_task_sched_out(child_ctx); |
3537 | 3553 | ||
3538 | /* | 3554 | /* |
@@ -3542,6 +3558,15 @@ void perf_counter_exit_task(struct task_struct *child) | |||
3542 | */ | 3558 | */ |
3543 | spin_lock(&child_ctx->lock); | 3559 | spin_lock(&child_ctx->lock); |
3544 | child->perf_counter_ctxp = NULL; | 3560 | child->perf_counter_ctxp = NULL; |
3561 | if (child_ctx->parent_ctx) { | ||
3562 | /* | ||
3563 | * This context is a clone; unclone it so it can't get | ||
3564 | * swapped to another process while we're removing all | ||
3565 | * the counters from it. | ||
3566 | */ | ||
3567 | put_ctx(child_ctx->parent_ctx); | ||
3568 | child_ctx->parent_ctx = NULL; | ||
3569 | } | ||
3545 | spin_unlock(&child_ctx->lock); | 3570 | spin_unlock(&child_ctx->lock); |
3546 | local_irq_restore(flags); | 3571 | local_irq_restore(flags); |
3547 | 3572 | ||
@@ -3571,9 +3596,11 @@ again: | |||
3571 | int perf_counter_init_task(struct task_struct *child) | 3596 | int perf_counter_init_task(struct task_struct *child) |
3572 | { | 3597 | { |
3573 | struct perf_counter_context *child_ctx, *parent_ctx; | 3598 | struct perf_counter_context *child_ctx, *parent_ctx; |
3599 | struct perf_counter_context *cloned_ctx; | ||
3574 | struct perf_counter *counter; | 3600 | struct perf_counter *counter; |
3575 | struct task_struct *parent = current; | 3601 | struct task_struct *parent = current; |
3576 | int inherited_all = 1; | 3602 | int inherited_all = 1; |
3603 | u64 cloned_gen; | ||
3577 | int ret = 0; | 3604 | int ret = 0; |
3578 | 3605 | ||
3579 | child->perf_counter_ctxp = NULL; | 3606 | child->perf_counter_ctxp = NULL; |
@@ -3581,8 +3608,7 @@ int perf_counter_init_task(struct task_struct *child) | |||
3581 | mutex_init(&child->perf_counter_mutex); | 3608 | mutex_init(&child->perf_counter_mutex); |
3582 | INIT_LIST_HEAD(&child->perf_counter_list); | 3609 | INIT_LIST_HEAD(&child->perf_counter_list); |
3583 | 3610 | ||
3584 | parent_ctx = parent->perf_counter_ctxp; | 3611 | if (likely(!parent->perf_counter_ctxp)) |
3585 | if (likely(!parent_ctx || !parent_ctx->nr_counters)) | ||
3586 | return 0; | 3612 | return 0; |
3587 | 3613 | ||
3588 | /* | 3614 | /* |
@@ -3600,6 +3626,34 @@ int perf_counter_init_task(struct task_struct *child) | |||
3600 | get_task_struct(child); | 3626 | get_task_struct(child); |
3601 | 3627 | ||
3602 | /* | 3628 | /* |
3629 | * If the parent's context is a clone, temporarily set its | ||
3630 | * parent_gen to an impossible value (all 1s) so it won't get | ||
3631 | * swapped under us. The rcu_read_lock makes sure that | ||
3632 | * parent_ctx continues to exist even if it gets swapped to | ||
3633 | * another process and then freed while we are trying to get | ||
3634 | * its lock. | ||
3635 | */ | ||
3636 | rcu_read_lock(); | ||
3637 | retry: | ||
3638 | parent_ctx = rcu_dereference(parent->perf_counter_ctxp); | ||
3639 | /* | ||
3640 | * No need to check if parent_ctx != NULL here; since we saw | ||
3641 | * it non-NULL earlier, the only reason for it to become NULL | ||
3642 | * is if we exit, and since we're currently in the middle of | ||
3643 | * a fork we can't be exiting at the same time. | ||
3644 | */ | ||
3645 | spin_lock_irq(&parent_ctx->lock); | ||
3646 | if (parent_ctx != rcu_dereference(parent->perf_counter_ctxp)) { | ||
3647 | spin_unlock_irq(&parent_ctx->lock); | ||
3648 | goto retry; | ||
3649 | } | ||
3650 | cloned_gen = parent_ctx->parent_gen; | ||
3651 | if (parent_ctx->parent_ctx) | ||
3652 | parent_ctx->parent_gen = ~0ull; | ||
3653 | spin_unlock_irq(&parent_ctx->lock); | ||
3654 | rcu_read_unlock(); | ||
3655 | |||
3656 | /* | ||
3603 | * Lock the parent list. No need to lock the child - not PID | 3657 | * Lock the parent list. No need to lock the child - not PID |
3604 | * hashed yet and not running, so nobody can access it. | 3658 | * hashed yet and not running, so nobody can access it. |
3605 | */ | 3659 | */ |
@@ -3630,10 +3684,15 @@ int perf_counter_init_task(struct task_struct *child) | |||
3630 | /* | 3684 | /* |
3631 | * Mark the child context as a clone of the parent | 3685 | * Mark the child context as a clone of the parent |
3632 | * context, or of whatever the parent is a clone of. | 3686 | * context, or of whatever the parent is a clone of. |
3687 | * Note that if the parent is a clone, it could get | ||
3688 | * uncloned at any point, but that doesn't matter | ||
3689 | * because the list of counters and the generation | ||
3690 | * count can't have changed since we took the mutex. | ||
3633 | */ | 3691 | */ |
3634 | if (parent_ctx->parent_ctx) { | 3692 | cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); |
3635 | child_ctx->parent_ctx = parent_ctx->parent_ctx; | 3693 | if (cloned_ctx) { |
3636 | child_ctx->parent_gen = parent_ctx->parent_gen; | 3694 | child_ctx->parent_ctx = cloned_ctx; |
3695 | child_ctx->parent_gen = cloned_gen; | ||
3637 | } else { | 3696 | } else { |
3638 | child_ctx->parent_ctx = parent_ctx; | 3697 | child_ctx->parent_ctx = parent_ctx; |
3639 | child_ctx->parent_gen = parent_ctx->generation; | 3698 | child_ctx->parent_gen = parent_ctx->generation; |
@@ -3643,6 +3702,16 @@ int perf_counter_init_task(struct task_struct *child) | |||
3643 | 3702 | ||
3644 | mutex_unlock(&parent_ctx->mutex); | 3703 | mutex_unlock(&parent_ctx->mutex); |
3645 | 3704 | ||
3705 | /* | ||
3706 | * Restore the clone status of the parent. | ||
3707 | */ | ||
3708 | if (parent_ctx->parent_ctx) { | ||
3709 | spin_lock_irq(&parent_ctx->lock); | ||
3710 | if (parent_ctx->parent_ctx) | ||
3711 | parent_ctx->parent_gen = cloned_gen; | ||
3712 | spin_unlock_irq(&parent_ctx->lock); | ||
3713 | } | ||
3714 | |||
3646 | return ret; | 3715 | return ret; |
3647 | } | 3716 | } |
3648 | 3717 | ||