aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-11-29 10:42:59 -0500
committerTejun Heo <tj@kernel.org>2013-11-29 10:42:59 -0500
commitafb2bc14e1c989cf0635bd04edb5ff55b8c1c7bd (patch)
treef7b3984bb26c3230d96a726f1af68bf882175c6a /kernel/cgroup.c
parent045023658ca1e30dc0bb1f148b42c95b740d3e02 (diff)
cgroup: don't guarantee cgroup.procs is sorted if sane_behavior
For some reason, tasks and cgroup.procs guarantee that the result is sorted. This is the only reason this whole pidlist logic is necessary instead of just iterating through sorted member tasks. We can't do anything about the existing interface but at least ensure that such expectation doesn't exist for the new interface so that pidlist logic may be removed in the distant future. This patch scrambles the sort order if sane_behavior so that the output is usually not sorted in the new interface. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c51
1 files changed, 46 insertions, 5 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a2458031d851..f9f5fe3526ac 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3564,11 +3564,49 @@ after:
3564 return dest; 3564 return dest;
3565} 3565}
3566 3566
3567/*
3568 * The two pid files - task and cgroup.procs - guaranteed that the result
3569 * is sorted, which forced this whole pidlist fiasco. As pid order is
3570 * different per namespace, each namespace needs differently sorted list,
3571 * making it impossible to use, for example, single rbtree of member tasks
3572 * sorted by task pointer. As pidlists can be fairly large, allocating one
3573 * per open file is dangerous, so cgroup had to implement shared pool of
3574 * pidlists keyed by cgroup and namespace.
3575 *
3576 * All this extra complexity was caused by the original implementation
3577 * committing to an entirely unnecessary property. In the long term, we
3578 * want to do away with it. Explicitly scramble sort order if
3579 * sane_behavior so that no such expectation exists in the new interface.
3580 *
3581 * Scrambling is done by swapping every two consecutive bits, which is
3582 * non-identity one-to-one mapping which disturbs sort order sufficiently.
3583 */
3584static pid_t pid_fry(pid_t pid)
3585{
3586 unsigned a = pid & 0x55555555;
3587 unsigned b = pid & 0xAAAAAAAA;
3588
3589 return (a << 1) | (b >> 1);
3590}
3591
3592static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid)
3593{
3594 if (cgroup_sane_behavior(cgrp))
3595 return pid_fry(pid);
3596 else
3597 return pid;
3598}
3599
3567static int cmppid(const void *a, const void *b) 3600static int cmppid(const void *a, const void *b)
3568{ 3601{
3569 return *(pid_t *)a - *(pid_t *)b; 3602 return *(pid_t *)a - *(pid_t *)b;
3570} 3603}
3571 3604
3605static int fried_cmppid(const void *a, const void *b)
3606{
3607 return pid_fry(*(pid_t *)a) - pid_fry(*(pid_t *)b);
3608}
3609
3572static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, 3610static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
3573 enum cgroup_filetype type) 3611 enum cgroup_filetype type)
3574{ 3612{
@@ -3656,7 +3694,10 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3656 css_task_iter_end(&it); 3694 css_task_iter_end(&it);
3657 length = n; 3695 length = n;
3658 /* now sort & (if procs) strip out duplicates */ 3696 /* now sort & (if procs) strip out duplicates */
3659 sort(array, length, sizeof(pid_t), cmppid, NULL); 3697 if (cgroup_sane_behavior(cgrp))
3698 sort(array, length, sizeof(pid_t), fried_cmppid, NULL);
3699 else
3700 sort(array, length, sizeof(pid_t), cmppid, NULL);
3660 if (type == CGROUP_FILE_PROCS) 3701 if (type == CGROUP_FILE_PROCS)
3661 length = pidlist_uniq(array, length); 3702 length = pidlist_uniq(array, length);
3662 3703
@@ -3777,10 +3818,10 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
3777 3818
3778 while (index < end) { 3819 while (index < end) {
3779 int mid = (index + end) / 2; 3820 int mid = (index + end) / 2;
3780 if (l->list[mid] == pid) { 3821 if (cgroup_pid_fry(cgrp, l->list[mid]) == pid) {
3781 index = mid; 3822 index = mid;
3782 break; 3823 break;
3783 } else if (l->list[mid] <= pid) 3824 } else if (cgroup_pid_fry(cgrp, l->list[mid]) <= pid)
3784 index = mid + 1; 3825 index = mid + 1;
3785 else 3826 else
3786 end = mid; 3827 end = mid;
@@ -3791,7 +3832,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
3791 return NULL; 3832 return NULL;
3792 /* Update the abstract position to be the actual pid that we found */ 3833 /* Update the abstract position to be the actual pid that we found */
3793 iter = l->list + index; 3834 iter = l->list + index;
3794 *pos = *iter; 3835 *pos = cgroup_pid_fry(cgrp, *iter);
3795 return iter; 3836 return iter;
3796} 3837}
3797 3838
@@ -3820,7 +3861,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
3820 if (p >= end) { 3861 if (p >= end) {
3821 return NULL; 3862 return NULL;
3822 } else { 3863 } else {
3823 *pos = *p; 3864 *pos = cgroup_pid_fry(of->cgrp, *p);
3824 return p; 3865 return p;
3825 } 3866 }
3826} 3867}