diff options
author | Kirill A. Shutemov <kirill@shutemov.name> | 2010-05-26 17:42:47 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-27 12:12:44 -0400 |
commit | 2c488db27b614816024e7994117f599337de0f34 (patch) | |
tree | 1ccfe14f5b2bede321004427babdad111a3b0c8e /mm | |
parent | 907860ed381a31b0102f362df67c1c5cae6ef050 (diff) |
memcg: clean up memory thresholds
Introduce struct mem_cgroup_thresholds. It helps to reduce number of
checks of thresholds type (memory or mem+swap).
[akpm@linux-foundation.org: repair comment]
Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Phil Carmody <ext-phil.2.carmody@nokia.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 151 |
1 files changed, 66 insertions, 85 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a4172a861b30..c6ece0a57595 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -158,6 +158,18 @@ struct mem_cgroup_threshold_ary { | |||
158 | /* Array of thresholds */ | 158 | /* Array of thresholds */ |
159 | struct mem_cgroup_threshold entries[0]; | 159 | struct mem_cgroup_threshold entries[0]; |
160 | }; | 160 | }; |
161 | |||
162 | struct mem_cgroup_thresholds { | ||
163 | /* Primary thresholds array */ | ||
164 | struct mem_cgroup_threshold_ary *primary; | ||
165 | /* | ||
166 | * Spare threshold array. | ||
167 | * This is needed to make mem_cgroup_unregister_event() "never fail". | ||
168 | * It must be able to store at least primary->size - 1 entries. | ||
169 | */ | ||
170 | struct mem_cgroup_threshold_ary *spare; | ||
171 | }; | ||
172 | |||
161 | /* for OOM */ | 173 | /* for OOM */ |
162 | struct mem_cgroup_eventfd_list { | 174 | struct mem_cgroup_eventfd_list { |
163 | struct list_head list; | 175 | struct list_head list; |
@@ -224,20 +236,10 @@ struct mem_cgroup { | |||
224 | struct mutex thresholds_lock; | 236 | struct mutex thresholds_lock; |
225 | 237 | ||
226 | /* thresholds for memory usage. RCU-protected */ | 238 | /* thresholds for memory usage. RCU-protected */ |
227 | struct mem_cgroup_threshold_ary *thresholds; | 239 | struct mem_cgroup_thresholds thresholds; |
228 | |||
229 | /* | ||
230 | * Preallocated buffer to be used in mem_cgroup_unregister_event() | ||
231 | * to make it "never fail". | ||
232 | * It must be able to store at least thresholds->size - 1 entries. | ||
233 | */ | ||
234 | struct mem_cgroup_threshold_ary *__thresholds; | ||
235 | 240 | ||
236 | /* thresholds for mem+swap usage. RCU-protected */ | 241 | /* thresholds for mem+swap usage. RCU-protected */ |
237 | struct mem_cgroup_threshold_ary *memsw_thresholds; | 242 | struct mem_cgroup_thresholds memsw_thresholds; |
238 | |||
239 | /* the same as __thresholds, but for memsw_thresholds */ | ||
240 | struct mem_cgroup_threshold_ary *__memsw_thresholds; | ||
241 | 243 | ||
242 | /* For oom notifier event fd */ | 244 | /* For oom notifier event fd */ |
243 | struct list_head oom_notify; | 245 | struct list_head oom_notify; |
@@ -3467,9 +3469,9 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) | |||
3467 | 3469 | ||
3468 | rcu_read_lock(); | 3470 | rcu_read_lock(); |
3469 | if (!swap) | 3471 | if (!swap) |
3470 | t = rcu_dereference(memcg->thresholds); | 3472 | t = rcu_dereference(memcg->thresholds.primary); |
3471 | else | 3473 | else |
3472 | t = rcu_dereference(memcg->memsw_thresholds); | 3474 | t = rcu_dereference(memcg->memsw_thresholds.primary); |
3473 | 3475 | ||
3474 | if (!t) | 3476 | if (!t) |
3475 | goto unlock; | 3477 | goto unlock; |
@@ -3543,91 +3545,78 @@ static int mem_cgroup_usage_register_event(struct cgroup *cgrp, | |||
3543 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) | 3545 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) |
3544 | { | 3546 | { |
3545 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 3547 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); |
3546 | struct mem_cgroup_threshold_ary *thresholds, *thresholds_new; | 3548 | struct mem_cgroup_thresholds *thresholds; |
3549 | struct mem_cgroup_threshold_ary *new; | ||
3547 | int type = MEMFILE_TYPE(cft->private); | 3550 | int type = MEMFILE_TYPE(cft->private); |
3548 | u64 threshold, usage; | 3551 | u64 threshold, usage; |
3549 | int size; | 3552 | int i, size, ret; |
3550 | int i, ret; | ||
3551 | 3553 | ||
3552 | ret = res_counter_memparse_write_strategy(args, &threshold); | 3554 | ret = res_counter_memparse_write_strategy(args, &threshold); |
3553 | if (ret) | 3555 | if (ret) |
3554 | return ret; | 3556 | return ret; |
3555 | 3557 | ||
3556 | mutex_lock(&memcg->thresholds_lock); | 3558 | mutex_lock(&memcg->thresholds_lock); |
3559 | |||
3557 | if (type == _MEM) | 3560 | if (type == _MEM) |
3558 | thresholds = memcg->thresholds; | 3561 | thresholds = &memcg->thresholds; |
3559 | else if (type == _MEMSWAP) | 3562 | else if (type == _MEMSWAP) |
3560 | thresholds = memcg->memsw_thresholds; | 3563 | thresholds = &memcg->memsw_thresholds; |
3561 | else | 3564 | else |
3562 | BUG(); | 3565 | BUG(); |
3563 | 3566 | ||
3564 | usage = mem_cgroup_usage(memcg, type == _MEMSWAP); | 3567 | usage = mem_cgroup_usage(memcg, type == _MEMSWAP); |
3565 | 3568 | ||
3566 | /* Check if a threshold crossed before adding a new one */ | 3569 | /* Check if a threshold crossed before adding a new one */ |
3567 | if (thresholds) | 3570 | if (thresholds->primary) |
3568 | __mem_cgroup_threshold(memcg, type == _MEMSWAP); | 3571 | __mem_cgroup_threshold(memcg, type == _MEMSWAP); |
3569 | 3572 | ||
3570 | if (thresholds) | 3573 | size = thresholds->primary ? thresholds->primary->size + 1 : 1; |
3571 | size = thresholds->size + 1; | ||
3572 | else | ||
3573 | size = 1; | ||
3574 | 3574 | ||
3575 | /* Allocate memory for new array of thresholds */ | 3575 | /* Allocate memory for new array of thresholds */ |
3576 | thresholds_new = kmalloc(sizeof(*thresholds_new) + | 3576 | new = kmalloc(sizeof(*new) + size * sizeof(struct mem_cgroup_threshold), |
3577 | size * sizeof(struct mem_cgroup_threshold), | ||
3578 | GFP_KERNEL); | 3577 | GFP_KERNEL); |
3579 | if (!thresholds_new) { | 3578 | if (!new) { |
3580 | ret = -ENOMEM; | 3579 | ret = -ENOMEM; |
3581 | goto unlock; | 3580 | goto unlock; |
3582 | } | 3581 | } |
3583 | thresholds_new->size = size; | 3582 | new->size = size; |
3584 | 3583 | ||
3585 | /* Copy thresholds (if any) to new array */ | 3584 | /* Copy thresholds (if any) to new array */ |
3586 | if (thresholds) | 3585 | if (thresholds->primary) { |
3587 | memcpy(thresholds_new->entries, thresholds->entries, | 3586 | memcpy(new->entries, thresholds->primary->entries, (size - 1) * |
3588 | thresholds->size * | ||
3589 | sizeof(struct mem_cgroup_threshold)); | 3587 | sizeof(struct mem_cgroup_threshold)); |
3588 | } | ||
3589 | |||
3590 | /* Add new threshold */ | 3590 | /* Add new threshold */ |
3591 | thresholds_new->entries[size - 1].eventfd = eventfd; | 3591 | new->entries[size - 1].eventfd = eventfd; |
3592 | thresholds_new->entries[size - 1].threshold = threshold; | 3592 | new->entries[size - 1].threshold = threshold; |
3593 | 3593 | ||
3594 | /* Sort thresholds. Registering of new threshold isn't time-critical */ | 3594 | /* Sort thresholds. Registering of new threshold isn't time-critical */ |
3595 | sort(thresholds_new->entries, size, | 3595 | sort(new->entries, size, sizeof(struct mem_cgroup_threshold), |
3596 | sizeof(struct mem_cgroup_threshold), | ||
3597 | compare_thresholds, NULL); | 3596 | compare_thresholds, NULL); |
3598 | 3597 | ||
3599 | /* Find current threshold */ | 3598 | /* Find current threshold */ |
3600 | thresholds_new->current_threshold = -1; | 3599 | new->current_threshold = -1; |
3601 | for (i = 0; i < size; i++) { | 3600 | for (i = 0; i < size; i++) { |
3602 | if (thresholds_new->entries[i].threshold < usage) { | 3601 | if (new->entries[i].threshold < usage) { |
3603 | /* | 3602 | /* |
3604 | * thresholds_new->current_threshold will not be used | 3603 | * new->current_threshold will not be used until |
3605 | * until rcu_assign_pointer(), so it's safe to increment | 3604 | * rcu_assign_pointer(), so it's safe to increment |
3606 | * it here. | 3605 | * it here. |
3607 | */ | 3606 | */ |
3608 | ++thresholds_new->current_threshold; | 3607 | ++new->current_threshold; |
3609 | } | 3608 | } |
3610 | } | 3609 | } |
3611 | 3610 | ||
3612 | if (type == _MEM) | 3611 | /* Free old spare buffer and save old primary buffer as spare */ |
3613 | rcu_assign_pointer(memcg->thresholds, thresholds_new); | 3612 | kfree(thresholds->spare); |
3614 | else | 3613 | thresholds->spare = thresholds->primary; |
3615 | rcu_assign_pointer(memcg->memsw_thresholds, thresholds_new); | 3614 | |
3615 | rcu_assign_pointer(thresholds->primary, new); | ||
3616 | 3616 | ||
3617 | /* To be sure that nobody uses thresholds */ | 3617 | /* To be sure that nobody uses thresholds */ |
3618 | synchronize_rcu(); | 3618 | synchronize_rcu(); |
3619 | 3619 | ||
3620 | /* | ||
3621 | * Free old preallocated buffer and use thresholds as new | ||
3622 | * preallocated buffer. | ||
3623 | */ | ||
3624 | if (type == _MEM) { | ||
3625 | kfree(memcg->__thresholds); | ||
3626 | memcg->__thresholds = thresholds; | ||
3627 | } else { | ||
3628 | kfree(memcg->__memsw_thresholds); | ||
3629 | memcg->__memsw_thresholds = thresholds; | ||
3630 | } | ||
3631 | unlock: | 3620 | unlock: |
3632 | mutex_unlock(&memcg->thresholds_lock); | 3621 | mutex_unlock(&memcg->thresholds_lock); |
3633 | 3622 | ||
@@ -3638,17 +3627,17 @@ static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp, | |||
3638 | struct cftype *cft, struct eventfd_ctx *eventfd) | 3627 | struct cftype *cft, struct eventfd_ctx *eventfd) |
3639 | { | 3628 | { |
3640 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 3629 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); |
3641 | struct mem_cgroup_threshold_ary *thresholds, *thresholds_new; | 3630 | struct mem_cgroup_thresholds *thresholds; |
3631 | struct mem_cgroup_threshold_ary *new; | ||
3642 | int type = MEMFILE_TYPE(cft->private); | 3632 | int type = MEMFILE_TYPE(cft->private); |
3643 | u64 usage; | 3633 | u64 usage; |
3644 | int size = 0; | 3634 | int i, j, size; |
3645 | int i, j; | ||
3646 | 3635 | ||
3647 | mutex_lock(&memcg->thresholds_lock); | 3636 | mutex_lock(&memcg->thresholds_lock); |
3648 | if (type == _MEM) | 3637 | if (type == _MEM) |
3649 | thresholds = memcg->thresholds; | 3638 | thresholds = &memcg->thresholds; |
3650 | else if (type == _MEMSWAP) | 3639 | else if (type == _MEMSWAP) |
3651 | thresholds = memcg->memsw_thresholds; | 3640 | thresholds = &memcg->memsw_thresholds; |
3652 | else | 3641 | else |
3653 | BUG(); | 3642 | BUG(); |
3654 | 3643 | ||
@@ -3664,53 +3653,45 @@ static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp, | |||
3664 | __mem_cgroup_threshold(memcg, type == _MEMSWAP); | 3653 | __mem_cgroup_threshold(memcg, type == _MEMSWAP); |
3665 | 3654 | ||
3666 | /* Calculate new number of threshold */ | 3655 | /* Calculate new number of threshold */ |
3667 | for (i = 0; i < thresholds->size; i++) { | 3656 | size = 0; |
3668 | if (thresholds->entries[i].eventfd != eventfd) | 3657 | for (i = 0; i < thresholds->primary->size; i++) { |
3658 | if (thresholds->primary->entries[i].eventfd != eventfd) | ||
3669 | size++; | 3659 | size++; |
3670 | } | 3660 | } |
3671 | 3661 | ||
3672 | /* Use preallocated buffer for new array of thresholds */ | 3662 | new = thresholds->spare; |
3673 | if (type == _MEM) | ||
3674 | thresholds_new = memcg->__thresholds; | ||
3675 | else | ||
3676 | thresholds_new = memcg->__memsw_thresholds; | ||
3677 | 3663 | ||
3678 | /* Set thresholds array to NULL if we don't have thresholds */ | 3664 | /* Set thresholds array to NULL if we don't have thresholds */ |
3679 | if (!size) { | 3665 | if (!size) { |
3680 | kfree(thresholds_new); | 3666 | kfree(new); |
3681 | thresholds_new = NULL; | 3667 | new = NULL; |
3682 | goto swap_buffers; | 3668 | goto swap_buffers; |
3683 | } | 3669 | } |
3684 | 3670 | ||
3685 | thresholds_new->size = size; | 3671 | new->size = size; |
3686 | 3672 | ||
3687 | /* Copy thresholds and find current threshold */ | 3673 | /* Copy thresholds and find current threshold */ |
3688 | thresholds_new->current_threshold = -1; | 3674 | new->current_threshold = -1; |
3689 | for (i = 0, j = 0; i < thresholds->size; i++) { | 3675 | for (i = 0, j = 0; i < thresholds->primary->size; i++) { |
3690 | if (thresholds->entries[i].eventfd == eventfd) | 3676 | if (thresholds->primary->entries[i].eventfd == eventfd) |
3691 | continue; | 3677 | continue; |
3692 | 3678 | ||
3693 | thresholds_new->entries[j] = thresholds->entries[i]; | 3679 | new->entries[j] = thresholds->primary->entries[i]; |
3694 | if (thresholds_new->entries[j].threshold < usage) { | 3680 | if (new->entries[j].threshold < usage) { |
3695 | /* | 3681 | /* |
3696 | * thresholds_new->current_threshold will not be used | 3682 | * new->current_threshold will not be used |
3697 | * until rcu_assign_pointer(), so it's safe to increment | 3683 | * until rcu_assign_pointer(), so it's safe to increment |
3698 | * it here. | 3684 | * it here. |
3699 | */ | 3685 | */ |
3700 | ++thresholds_new->current_threshold; | 3686 | ++new->current_threshold; |
3701 | } | 3687 | } |
3702 | j++; | 3688 | j++; |
3703 | } | 3689 | } |
3704 | 3690 | ||
3705 | swap_buffers: | 3691 | swap_buffers: |
3706 | /* Swap thresholds array and preallocated buffer */ | 3692 | /* Swap primary and spare array */ |
3707 | if (type == _MEM) { | 3693 | thresholds->spare = thresholds->primary; |
3708 | memcg->__thresholds = thresholds; | 3694 | rcu_assign_pointer(thresholds->primary, new); |
3709 | rcu_assign_pointer(memcg->thresholds, thresholds_new); | ||
3710 | } else { | ||
3711 | memcg->__memsw_thresholds = thresholds; | ||
3712 | rcu_assign_pointer(memcg->memsw_thresholds, thresholds_new); | ||
3713 | } | ||
3714 | 3695 | ||
3715 | /* To be sure that nobody uses thresholds */ | 3696 | /* To be sure that nobody uses thresholds */ |
3716 | synchronize_rcu(); | 3697 | synchronize_rcu(); |