aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-11-29 10:42:59 -0500
committerTejun Heo <tj@kernel.org>2013-11-29 10:42:59 -0500
commit4bac00d16a8760eae7205e41d2c246477d42a210 (patch)
tree135e6ab97d2b9fbddd506298d01c43aa400acb3d
parent069df3b7aeb3f4e926c4da9630c92010909af512 (diff)
cgroup: load and release pidlists from seq_file start and stop respectively
Currently, pidlists are reference counted from file open and release methods. This means that holding onto an open file may waste memory and reads may return data which is very stale. Both aren't critical because pidlists are keyed and shared per namespace and, well, the user isn't supposed to have large delay between open and reads. cgroup is planned to be converted to use kernfs and it'd be best if we can stick to just the seq_file operations - start, next, stop and show. This can be achieved by loading pidlist on demand from start and release with time delay from stop, so that consecutive reads don't end up reloading the pidlist on each iteration. This would remove the need for hooking into open and release while also avoiding issues with holding onto pidlist for too long. The previous patches implemented delayed release and restructured pidlist handling so that pidlists can be loaded and released from seq_file start / stop. This patch actually moves pidlist load to start and release to stop. This means that pidlist is pinned only between start and stop and may go away between two consecutive read calls if the two calls are apart by more than CGROUP_PIDLIST_DESTROY_DELAY. cgroup_pidlist_start() thus can't re-use the stored cgroup_pid_list_open_file->pidlist directly. During start, it's only used as a hint indicating whether this is the first start after open or not and pidlist is always looked up or created. pidlist_mutex locking and reference counting are moved out of pidlist_array_load() so that pidlist_array_load() can perform lookup and creation atomically. While this enlarges the area covered by pidlist_mutex, given how the lock is used, it's highly unlikely to be noticeable. v2: Refreshed on top of the updated "cgroup: introduce struct cgroup_pidlist_open_file". Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com>
-rw-r--r--kernel/cgroup.c63
1 files changed, 34 insertions, 29 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index dc39e1774542..671cbde883e9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3473,6 +3473,8 @@ struct cgroup_pidlist_open_file {
3473 struct cgroup_pidlist *pidlist; 3473 struct cgroup_pidlist *pidlist;
3474}; 3474};
3475 3475
3476static void cgroup_release_pid_array(struct cgroup_pidlist *l);
3477
3476/* 3478/*
3477 * The following two functions "fix" the issue where there are more pids 3479 * The following two functions "fix" the issue where there are more pids
3478 * than kmalloc will give memory for; in such cases, we use vmalloc/vfree. 3480 * than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
@@ -3630,6 +3632,8 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3630 struct task_struct *tsk; 3632 struct task_struct *tsk;
3631 struct cgroup_pidlist *l; 3633 struct cgroup_pidlist *l;
3632 3634
3635 lockdep_assert_held(&cgrp->pidlist_mutex);
3636
3633 /* 3637 /*
3634 * If cgroup gets more users after we read count, we won't have 3638 * If cgroup gets more users after we read count, we won't have
3635 * enough space - tough. This race is indistinguishable to the 3639 * enough space - tough. This race is indistinguishable to the
@@ -3660,8 +3664,6 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3660 if (type == CGROUP_FILE_PROCS) 3664 if (type == CGROUP_FILE_PROCS)
3661 length = pidlist_uniq(array, length); 3665 length = pidlist_uniq(array, length);
3662 3666
3663 mutex_lock(&cgrp->pidlist_mutex);
3664
3665 l = cgroup_pidlist_find_create(cgrp, type); 3667 l = cgroup_pidlist_find_create(cgrp, type);
3666 if (!l) { 3668 if (!l) {
3667 mutex_unlock(&cgrp->pidlist_mutex); 3669 mutex_unlock(&cgrp->pidlist_mutex);
@@ -3673,10 +3675,6 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3673 pidlist_free(l->list); 3675 pidlist_free(l->list);
3674 l->list = array; 3676 l->list = array;
3675 l->length = length; 3677 l->length = length;
3676 l->use_count++;
3677
3678 mutex_unlock(&cgrp->pidlist_mutex);
3679
3680 *lp = l; 3678 *lp = l;
3681 return 0; 3679 return 0;
3682} 3680}
@@ -3751,11 +3749,34 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
3751 * next pid to display, if any 3749 * next pid to display, if any
3752 */ 3750 */
3753 struct cgroup_pidlist_open_file *of = s->private; 3751 struct cgroup_pidlist_open_file *of = s->private;
3754 struct cgroup_pidlist *l = of->pidlist; 3752 struct cgroup *cgrp = of->cgrp;
3753 struct cgroup_pidlist *l;
3755 int index = 0, pid = *pos; 3754 int index = 0, pid = *pos;
3756 int *iter; 3755 int *iter, ret;
3756
3757 mutex_lock(&cgrp->pidlist_mutex);
3758
3759 /*
3760 * !NULL @of->pidlist indicates that this isn't the first start()
3761 * after open. If the matching pidlist is around, we can use that.
3762 * Look for it. Note that @of->pidlist can't be used directly. It
3763 * could already have been destroyed.
3764 */
3765 if (of->pidlist)
3766 of->pidlist = cgroup_pidlist_find(cgrp, of->type);
3767
3768 /*
3769 * Either this is the first start() after open or the matching
3770 * pidlist has been destroyed inbetween. Create a new one.
3771 */
3772 if (!of->pidlist) {
3773 ret = pidlist_array_load(of->cgrp, of->type, &of->pidlist);
3774 if (ret)
3775 return ERR_PTR(ret);
3776 }
3777 l = of->pidlist;
3778 l->use_count++;
3757 3779
3758 mutex_lock(&of->cgrp->pidlist_mutex);
3759 if (pid) { 3780 if (pid) {
3760 int end = l->length; 3781 int end = l->length;
3761 3782
@@ -3784,6 +3805,8 @@ static void cgroup_pidlist_stop(struct seq_file *s, void *v)
3784 struct cgroup_pidlist_open_file *of = s->private; 3805 struct cgroup_pidlist_open_file *of = s->private;
3785 3806
3786 mutex_unlock(&of->cgrp->pidlist_mutex); 3807 mutex_unlock(&of->cgrp->pidlist_mutex);
3808 if (of->pidlist)
3809 cgroup_release_pid_array(of->pidlist);
3787} 3810}
3788 3811
3789static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) 3812static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
@@ -3832,20 +3855,11 @@ static void cgroup_release_pid_array(struct cgroup_pidlist *l)
3832 mutex_unlock(&l->owner->pidlist_mutex); 3855 mutex_unlock(&l->owner->pidlist_mutex);
3833} 3856}
3834 3857
3835static int cgroup_pidlist_release(struct inode *inode, struct file *file)
3836{
3837 struct cgroup_pidlist_open_file *of;
3838
3839 of = ((struct seq_file *)file->private_data)->private;
3840 cgroup_release_pid_array(of->pidlist);
3841 return seq_release_private(inode, file);
3842}
3843
3844static const struct file_operations cgroup_pidlist_operations = { 3858static const struct file_operations cgroup_pidlist_operations = {
3845 .read = seq_read, 3859 .read = seq_read,
3846 .llseek = seq_lseek, 3860 .llseek = seq_lseek,
3847 .write = cgroup_file_write, 3861 .write = cgroup_file_write,
3848 .release = cgroup_pidlist_release, 3862 .release = seq_release_private,
3849}; 3863};
3850 3864
3851/* 3865/*
@@ -3858,26 +3872,17 @@ static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
3858{ 3872{
3859 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); 3873 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
3860 struct cgroup_pidlist_open_file *of; 3874 struct cgroup_pidlist_open_file *of;
3861 struct cgroup_pidlist *l;
3862 int retval;
3863 3875
3864 /* have the array populated */
3865 retval = pidlist_array_load(cgrp, type, &l);
3866 if (retval)
3867 return retval;
3868 /* configure file information */ 3876 /* configure file information */
3869 file->f_op = &cgroup_pidlist_operations; 3877 file->f_op = &cgroup_pidlist_operations;
3870 3878
3871 of = __seq_open_private(file, &cgroup_pidlist_seq_operations, 3879 of = __seq_open_private(file, &cgroup_pidlist_seq_operations,
3872 sizeof(*of)); 3880 sizeof(*of));
3873 if (!of) { 3881 if (!of)
3874 cgroup_release_pid_array(l);
3875 return -ENOMEM; 3882 return -ENOMEM;
3876 }
3877 3883
3878 of->type = type; 3884 of->type = type;
3879 of->cgrp = cgrp; 3885 of->cgrp = cgrp;
3880 of->pidlist = l;
3881 return 0; 3886 return 0;
3882} 3887}
3883static int cgroup_tasks_open(struct inode *unused, struct file *file) 3888static int cgroup_tasks_open(struct inode *unused, struct file *file)