diff options
-rw-r--r-- | include/linux/cgroup.h | 3 | ||||
-rw-r--r-- | kernel/cgroup.c | 51 |
2 files changed, 49 insertions, 5 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5207c28c2402..50d8cc37498b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -275,6 +275,9 @@ enum { | |||
275 | * - "tasks" is removed. Everything should be at process | 275 | * - "tasks" is removed. Everything should be at process |
276 | * granularity. Use "cgroup.procs" instead. | 276 | * granularity. Use "cgroup.procs" instead. |
277 | * | 277 | * |
278 | * - "cgroup.procs" is not sorted. pids will be unique unless they | ||
279 | * got recycled inbetween reads. | ||
280 | * | ||
278 | * - "release_agent" and "notify_on_release" are removed. | 281 | * - "release_agent" and "notify_on_release" are removed. |
279 | * Replacement notification mechanism will be implemented. | 282 | * Replacement notification mechanism will be implemented. |
280 | * | 283 | * |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a2458031d851..f9f5fe3526ac 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -3564,11 +3564,49 @@ after: | |||
3564 | return dest; | 3564 | return dest; |
3565 | } | 3565 | } |
3566 | 3566 | ||
3567 | /* | ||
3568 | * The two pid files - task and cgroup.procs - guaranteed that the result | ||
3569 | * is sorted, which forced this whole pidlist fiasco. As pid order is | ||
3570 | * different per namespace, each namespace needs differently sorted list, | ||
3571 | * making it impossible to use, for example, single rbtree of member tasks | ||
3572 | * sorted by task pointer. As pidlists can be fairly large, allocating one | ||
3573 | * per open file is dangerous, so cgroup had to implement shared pool of | ||
3574 | * pidlists keyed by cgroup and namespace. | ||
3575 | * | ||
3576 | * All this extra complexity was caused by the original implementation | ||
3577 | * committing to an entirely unnecessary property. In the long term, we | ||
3578 | * want to do away with it. Explicitly scramble sort order if | ||
3579 | * sane_behavior so that no such expectation exists in the new interface. | ||
3580 | * | ||
3581 | * Scrambling is done by swapping every two consecutive bits, which is | ||
3582 | * non-identity one-to-one mapping which disturbs sort order sufficiently. | ||
3583 | */ | ||
3584 | static pid_t pid_fry(pid_t pid) | ||
3585 | { | ||
3586 | unsigned a = pid & 0x55555555; | ||
3587 | unsigned b = pid & 0xAAAAAAAA; | ||
3588 | |||
3589 | return (a << 1) | (b >> 1); | ||
3590 | } | ||
3591 | |||
3592 | static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid) | ||
3593 | { | ||
3594 | if (cgroup_sane_behavior(cgrp)) | ||
3595 | return pid_fry(pid); | ||
3596 | else | ||
3597 | return pid; | ||
3598 | } | ||
3599 | |||
3567 | static int cmppid(const void *a, const void *b) | 3600 | static int cmppid(const void *a, const void *b) |
3568 | { | 3601 | { |
3569 | return *(pid_t *)a - *(pid_t *)b; | 3602 | return *(pid_t *)a - *(pid_t *)b; |
3570 | } | 3603 | } |
3571 | 3604 | ||
3605 | static int fried_cmppid(const void *a, const void *b) | ||
3606 | { | ||
3607 | return pid_fry(*(pid_t *)a) - pid_fry(*(pid_t *)b); | ||
3608 | } | ||
3609 | |||
3572 | static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | 3610 | static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, |
3573 | enum cgroup_filetype type) | 3611 | enum cgroup_filetype type) |
3574 | { | 3612 | { |
@@ -3656,7 +3694,10 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
3656 | css_task_iter_end(&it); | 3694 | css_task_iter_end(&it); |
3657 | length = n; | 3695 | length = n; |
3658 | /* now sort & (if procs) strip out duplicates */ | 3696 | /* now sort & (if procs) strip out duplicates */ |
3659 | sort(array, length, sizeof(pid_t), cmppid, NULL); | 3697 | if (cgroup_sane_behavior(cgrp)) |
3698 | sort(array, length, sizeof(pid_t), fried_cmppid, NULL); | ||
3699 | else | ||
3700 | sort(array, length, sizeof(pid_t), cmppid, NULL); | ||
3660 | if (type == CGROUP_FILE_PROCS) | 3701 | if (type == CGROUP_FILE_PROCS) |
3661 | length = pidlist_uniq(array, length); | 3702 | length = pidlist_uniq(array, length); |
3662 | 3703 | ||
@@ -3777,10 +3818,10 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) | |||
3777 | 3818 | ||
3778 | while (index < end) { | 3819 | while (index < end) { |
3779 | int mid = (index + end) / 2; | 3820 | int mid = (index + end) / 2; |
3780 | if (l->list[mid] == pid) { | 3821 | if (cgroup_pid_fry(cgrp, l->list[mid]) == pid) { |
3781 | index = mid; | 3822 | index = mid; |
3782 | break; | 3823 | break; |
3783 | } else if (l->list[mid] <= pid) | 3824 | } else if (cgroup_pid_fry(cgrp, l->list[mid]) <= pid) |
3784 | index = mid + 1; | 3825 | index = mid + 1; |
3785 | else | 3826 | else |
3786 | end = mid; | 3827 | end = mid; |
@@ -3791,7 +3832,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) | |||
3791 | return NULL; | 3832 | return NULL; |
3792 | /* Update the abstract position to be the actual pid that we found */ | 3833 | /* Update the abstract position to be the actual pid that we found */ |
3793 | iter = l->list + index; | 3834 | iter = l->list + index; |
3794 | *pos = *iter; | 3835 | *pos = cgroup_pid_fry(cgrp, *iter); |
3795 | return iter; | 3836 | return iter; |
3796 | } | 3837 | } |
3797 | 3838 | ||
@@ -3820,7 +3861,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) | |||
3820 | if (p >= end) { | 3861 | if (p >= end) { |
3821 | return NULL; | 3862 | return NULL; |
3822 | } else { | 3863 | } else { |
3823 | *pos = *p; | 3864 | *pos = cgroup_pid_fry(of->cgrp, *p); |
3824 | return p; | 3865 | return p; |
3825 | } | 3866 | } |
3826 | } | 3867 | } |