aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLi Zefan <lizefan@huawei.com>2013-02-18 05:56:14 -0500
committerTejun Heo <tj@kernel.org>2013-02-18 12:17:24 -0500
commit810cbee4fad570ff167132d4ecf247d99c48f71d (patch)
tree2806f56a033f43b17ad2c547b2153941c0e77f40 /kernel
parent63f43f55c9bbc14f76b582644019b8a07dc8219a (diff)
cgroup: fix cgroup_rmdir() vs close(eventfd) race
commit 205a872bd6f9a9a09ef035ef1e90185a8245cc58 ("cgroup: fix lockdep warning for event_control") solved a deadlock by introducing a new bug. Move cgrp->event_list to a temporary list doesn't mean you can traverse this list locklessly, because at the same time cgroup_event_wake() can be called and remove the event from the list. The result of this race is disastrous. We adopt the way how kvm irqfd code implements race-free event removal, which is now described in the comments in cgroup_event_wake(). v3: - call eventfd_signal() no matter it's eventfd close or cgroup removal that removes the cgroup event. Acked-by: Kirill A. Shutemov <kirill@shutemov.name> Signed-off-by: Li Zefan <lizefan@huawei.com> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c41
1 files changed, 25 insertions, 16 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5d4c92ead691..feda81487be4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3786,8 +3786,13 @@ static void cgroup_event_remove(struct work_struct *work)
3786 remove); 3786 remove);
3787 struct cgroup *cgrp = event->cgrp; 3787 struct cgroup *cgrp = event->cgrp;
3788 3788
3789 remove_wait_queue(event->wqh, &event->wait);
3790
3789 event->cft->unregister_event(cgrp, event->cft, event->eventfd); 3791 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3790 3792
3793 /* Notify userspace the event is going away. */
3794 eventfd_signal(event->eventfd, 1);
3795
3791 eventfd_ctx_put(event->eventfd); 3796 eventfd_ctx_put(event->eventfd);
3792 kfree(event); 3797 kfree(event);
3793 dput(cgrp->dentry); 3798 dput(cgrp->dentry);
@@ -3807,15 +3812,25 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3807 unsigned long flags = (unsigned long)key; 3812 unsigned long flags = (unsigned long)key;
3808 3813
3809 if (flags & POLLHUP) { 3814 if (flags & POLLHUP) {
3810 __remove_wait_queue(event->wqh, &event->wait);
3811 spin_lock(&cgrp->event_list_lock);
3812 list_del_init(&event->list);
3813 spin_unlock(&cgrp->event_list_lock);
3814 /* 3815 /*
3815 * We are in atomic context, but cgroup_event_remove() may 3816 * If the event has been detached at cgroup removal, we
3816 * sleep, so we have to call it in workqueue. 3817 * can simply return knowing the other side will cleanup
3818 * for us.
3819 *
3820 * We can't race against event freeing since the other
3821 * side will require wqh->lock via remove_wait_queue(),
3822 * which we hold.
3817 */ 3823 */
3818 schedule_work(&event->remove); 3824 spin_lock(&cgrp->event_list_lock);
3825 if (!list_empty(&event->list)) {
3826 list_del_init(&event->list);
3827 /*
3828 * We are in atomic context, but cgroup_event_remove()
3829 * may sleep, so we have to call it in workqueue.
3830 */
3831 schedule_work(&event->remove);
3832 }
3833 spin_unlock(&cgrp->event_list_lock);
3819 } 3834 }
3820 3835
3821 return 0; 3836 return 0;
@@ -4375,20 +4390,14 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
4375 /* 4390 /*
4376 * Unregister events and notify userspace. 4391 * Unregister events and notify userspace.
4377 * Notify userspace about cgroup removing only after rmdir of cgroup 4392 * Notify userspace about cgroup removing only after rmdir of cgroup
4378 * directory to avoid race between userspace and kernelspace. Use 4393 * directory to avoid race between userspace and kernelspace.
4379 * a temporary list to avoid a deadlock with cgroup_event_wake(). Since
4380 * cgroup_event_wake() is called with the wait queue head locked,
4381 * remove_wait_queue() cannot be called while holding event_list_lock.
4382 */ 4394 */
4383 spin_lock(&cgrp->event_list_lock); 4395 spin_lock(&cgrp->event_list_lock);
4384 list_splice_init(&cgrp->event_list, &tmp_list); 4396 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
4385 spin_unlock(&cgrp->event_list_lock);
4386 list_for_each_entry_safe(event, tmp, &tmp_list, list) {
4387 list_del_init(&event->list); 4397 list_del_init(&event->list);
4388 remove_wait_queue(event->wqh, &event->wait);
4389 eventfd_signal(event->eventfd, 1);
4390 schedule_work(&event->remove); 4398 schedule_work(&event->remove);
4391 } 4399 }
4400 spin_unlock(&cgrp->event_list_lock);
4392 4401
4393 return 0; 4402 return 0;
4394} 4403}