diff options
author | Tejun Heo <tj@kernel.org> | 2012-11-05 12:21:51 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2012-11-05 12:21:51 -0500 |
commit | 1db1e31b1ee3ae126ef98f39083b5f213c7b41bf (patch) | |
tree | 98328124d3b8b08b6db894c79d0e79b3a42417bf /mm/memcontrol.c | |
parent | 5d8f72b55c275677865de670fa147ed318191d81 (diff) | |
parent | bcf6de1b9129531215d26dd9af8331e84973bc52 (diff) |
Merge branch 'cgroup-rmdir-updates' into cgroup/for-3.8
Pull rmdir updates into for-3.8 so that further callback updates can
be put on top. This pull created a trivial conflict between the
following two commits.
8c7f6edbda ("cgroup: mark subsystems with broken hierarchy support and whine if cgroups are nested for them")
ed95779340 ("cgroup: kill cgroup_subsys->__DEPRECATED_clear_css_refs")
The former added a field to cgroup_subsys and the latter removed one
from it. They happen to be colocated causing the conflict. Keeping
what's added and removing what's removed resolves the conflict.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 181 |
1 files changed, 91 insertions, 90 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7acf43bf04a2..08adaaae6fcc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2337,7 +2337,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
2337 | again: | 2337 | again: |
2338 | if (*ptr) { /* css should be a valid one */ | 2338 | if (*ptr) { /* css should be a valid one */ |
2339 | memcg = *ptr; | 2339 | memcg = *ptr; |
2340 | VM_BUG_ON(css_is_removed(&memcg->css)); | ||
2341 | if (mem_cgroup_is_root(memcg)) | 2340 | if (mem_cgroup_is_root(memcg)) |
2342 | goto done; | 2341 | goto done; |
2343 | if (nr_pages == 1 && consume_stock(memcg)) | 2342 | if (nr_pages == 1 && consume_stock(memcg)) |
@@ -2477,9 +2476,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, | |||
2477 | 2476 | ||
2478 | /* | 2477 | /* |
2479 | * A helper function to get mem_cgroup from ID. must be called under | 2478 | * A helper function to get mem_cgroup from ID. must be called under |
2480 | * rcu_read_lock(). The caller must check css_is_removed() or some if | 2479 | * rcu_read_lock(). The caller is responsible for calling css_tryget if |
2481 | * it's concern. (dropping refcnt from swap can be called against removed | 2480 | * the mem_cgroup is used for charging. (dropping refcnt from swap can be |
2482 | * memcg.) | 2481 | * called against removed memcg.) |
2483 | */ | 2482 | */ |
2484 | static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) | 2483 | static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) |
2485 | { | 2484 | { |
@@ -2676,13 +2675,6 @@ static int mem_cgroup_move_account(struct page *page, | |||
2676 | /* caller should have done css_get */ | 2675 | /* caller should have done css_get */ |
2677 | pc->mem_cgroup = to; | 2676 | pc->mem_cgroup = to; |
2678 | mem_cgroup_charge_statistics(to, anon, nr_pages); | 2677 | mem_cgroup_charge_statistics(to, anon, nr_pages); |
2679 | /* | ||
2680 | * We charges against "to" which may not have any tasks. Then, "to" | ||
2681 | * can be under rmdir(). But in current implementation, caller of | ||
2682 | * this function is just force_empty() and move charge, so it's | ||
2683 | * guaranteed that "to" is never removed. So, we don't check rmdir | ||
2684 | * status here. | ||
2685 | */ | ||
2686 | move_unlock_mem_cgroup(from, &flags); | 2678 | move_unlock_mem_cgroup(from, &flags); |
2687 | ret = 0; | 2679 | ret = 0; |
2688 | unlock: | 2680 | unlock: |
@@ -2696,10 +2688,27 @@ out: | |||
2696 | return ret; | 2688 | return ret; |
2697 | } | 2689 | } |
2698 | 2690 | ||
2699 | /* | 2691 | /** |
2700 | * move charges to its parent. | 2692 | * mem_cgroup_move_parent - moves page to the parent group |
2693 | * @page: the page to move | ||
2694 | * @pc: page_cgroup of the page | ||
2695 | * @child: page's cgroup | ||
2696 | * | ||
2697 | * move charges to its parent or the root cgroup if the group has no | ||
2698 | * parent (aka use_hierarchy==0). | ||
2699 | * Although this might fail (get_page_unless_zero, isolate_lru_page or | ||
2700 | * mem_cgroup_move_account fails) the failure is always temporary and | ||
2701 | * it signals a race with a page removal/uncharge or migration. In the | ||
2702 | * first case the page is on the way out and it will vanish from the LRU | ||
2703 | * on the next attempt and the call should be retried later. | ||
2704 | * Isolation from the LRU fails only if page has been isolated from | ||
2705 | * the LRU since we looked at it and that usually means either global | ||
2706 | * reclaim or migration going on. The page will either get back to the | ||
2707 | * LRU or vanish. | ||
2708 | * Finaly mem_cgroup_move_account fails only if the page got uncharged | ||
2709 | * (!PageCgroupUsed) or moved to a different group. The page will | ||
2710 | * disappear in the next attempt. | ||
2701 | */ | 2711 | */ |
2702 | |||
2703 | static int mem_cgroup_move_parent(struct page *page, | 2712 | static int mem_cgroup_move_parent(struct page *page, |
2704 | struct page_cgroup *pc, | 2713 | struct page_cgroup *pc, |
2705 | struct mem_cgroup *child) | 2714 | struct mem_cgroup *child) |
@@ -2709,9 +2718,7 @@ static int mem_cgroup_move_parent(struct page *page, | |||
2709 | unsigned long uninitialized_var(flags); | 2718 | unsigned long uninitialized_var(flags); |
2710 | int ret; | 2719 | int ret; |
2711 | 2720 | ||
2712 | /* Is ROOT ? */ | 2721 | VM_BUG_ON(mem_cgroup_is_root(child)); |
2713 | if (mem_cgroup_is_root(child)) | ||
2714 | return -EINVAL; | ||
2715 | 2722 | ||
2716 | ret = -EBUSY; | 2723 | ret = -EBUSY; |
2717 | if (!get_page_unless_zero(page)) | 2724 | if (!get_page_unless_zero(page)) |
@@ -2728,8 +2735,10 @@ static int mem_cgroup_move_parent(struct page *page, | |||
2728 | if (!parent) | 2735 | if (!parent) |
2729 | parent = root_mem_cgroup; | 2736 | parent = root_mem_cgroup; |
2730 | 2737 | ||
2731 | if (nr_pages > 1) | 2738 | if (nr_pages > 1) { |
2739 | VM_BUG_ON(!PageTransHuge(page)); | ||
2732 | flags = compound_lock_irqsave(page); | 2740 | flags = compound_lock_irqsave(page); |
2741 | } | ||
2733 | 2742 | ||
2734 | ret = mem_cgroup_move_account(page, nr_pages, | 2743 | ret = mem_cgroup_move_account(page, nr_pages, |
2735 | pc, child, parent); | 2744 | pc, child, parent); |
@@ -2871,7 +2880,6 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg, | |||
2871 | return; | 2880 | return; |
2872 | if (!memcg) | 2881 | if (!memcg) |
2873 | return; | 2882 | return; |
2874 | cgroup_exclude_rmdir(&memcg->css); | ||
2875 | 2883 | ||
2876 | __mem_cgroup_commit_charge(memcg, page, 1, ctype, true); | 2884 | __mem_cgroup_commit_charge(memcg, page, 1, ctype, true); |
2877 | /* | 2885 | /* |
@@ -2885,12 +2893,6 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg, | |||
2885 | swp_entry_t ent = {.val = page_private(page)}; | 2893 | swp_entry_t ent = {.val = page_private(page)}; |
2886 | mem_cgroup_uncharge_swap(ent); | 2894 | mem_cgroup_uncharge_swap(ent); |
2887 | } | 2895 | } |
2888 | /* | ||
2889 | * At swapin, we may charge account against cgroup which has no tasks. | ||
2890 | * So, rmdir()->pre_destroy() can be called while we do this charge. | ||
2891 | * In that case, we need to call pre_destroy() again. check it here. | ||
2892 | */ | ||
2893 | cgroup_release_and_wakeup_rmdir(&memcg->css); | ||
2894 | } | 2896 | } |
2895 | 2897 | ||
2896 | void mem_cgroup_commit_charge_swapin(struct page *page, | 2898 | void mem_cgroup_commit_charge_swapin(struct page *page, |
@@ -3338,8 +3340,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg, | |||
3338 | 3340 | ||
3339 | if (!memcg) | 3341 | if (!memcg) |
3340 | return; | 3342 | return; |
3341 | /* blocks rmdir() */ | 3343 | |
3342 | cgroup_exclude_rmdir(&memcg->css); | ||
3343 | if (!migration_ok) { | 3344 | if (!migration_ok) { |
3344 | used = oldpage; | 3345 | used = oldpage; |
3345 | unused = newpage; | 3346 | unused = newpage; |
@@ -3373,13 +3374,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg, | |||
3373 | */ | 3374 | */ |
3374 | if (anon) | 3375 | if (anon) |
3375 | mem_cgroup_uncharge_page(used); | 3376 | mem_cgroup_uncharge_page(used); |
3376 | /* | ||
3377 | * At migration, we may charge account against cgroup which has no | ||
3378 | * tasks. | ||
3379 | * So, rmdir()->pre_destroy() can be called while we do this charge. | ||
3380 | * In that case, we need to call pre_destroy() again. check it here. | ||
3381 | */ | ||
3382 | cgroup_release_and_wakeup_rmdir(&memcg->css); | ||
3383 | } | 3377 | } |
3384 | 3378 | ||
3385 | /* | 3379 | /* |
@@ -3679,17 +3673,22 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
3679 | return nr_reclaimed; | 3673 | return nr_reclaimed; |
3680 | } | 3674 | } |
3681 | 3675 | ||
3682 | /* | 3676 | /** |
3677 | * mem_cgroup_force_empty_list - clears LRU of a group | ||
3678 | * @memcg: group to clear | ||
3679 | * @node: NUMA node | ||
3680 | * @zid: zone id | ||
3681 | * @lru: lru to to clear | ||
3682 | * | ||
3683 | * Traverse a specified page_cgroup list and try to drop them all. This doesn't | 3683 | * Traverse a specified page_cgroup list and try to drop them all. This doesn't |
3684 | * reclaim the pages page themselves - it just removes the page_cgroups. | 3684 | * reclaim the pages page themselves - pages are moved to the parent (or root) |
3685 | * Returns true if some page_cgroups were not freed, indicating that the caller | 3685 | * group. |
3686 | * must retry this operation. | ||
3687 | */ | 3686 | */ |
3688 | static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, | 3687 | static void mem_cgroup_force_empty_list(struct mem_cgroup *memcg, |
3689 | int node, int zid, enum lru_list lru) | 3688 | int node, int zid, enum lru_list lru) |
3690 | { | 3689 | { |
3691 | struct mem_cgroup_per_zone *mz; | 3690 | struct mem_cgroup_per_zone *mz; |
3692 | unsigned long flags, loop; | 3691 | unsigned long flags; |
3693 | struct list_head *list; | 3692 | struct list_head *list; |
3694 | struct page *busy; | 3693 | struct page *busy; |
3695 | struct zone *zone; | 3694 | struct zone *zone; |
@@ -3698,11 +3697,8 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, | |||
3698 | mz = mem_cgroup_zoneinfo(memcg, node, zid); | 3697 | mz = mem_cgroup_zoneinfo(memcg, node, zid); |
3699 | list = &mz->lruvec.lists[lru]; | 3698 | list = &mz->lruvec.lists[lru]; |
3700 | 3699 | ||
3701 | loop = mz->lru_size[lru]; | ||
3702 | /* give some margin against EBUSY etc...*/ | ||
3703 | loop += 256; | ||
3704 | busy = NULL; | 3700 | busy = NULL; |
3705 | while (loop--) { | 3701 | do { |
3706 | struct page_cgroup *pc; | 3702 | struct page_cgroup *pc; |
3707 | struct page *page; | 3703 | struct page *page; |
3708 | 3704 | ||
@@ -3728,76 +3724,72 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, | |||
3728 | cond_resched(); | 3724 | cond_resched(); |
3729 | } else | 3725 | } else |
3730 | busy = NULL; | 3726 | busy = NULL; |
3731 | } | 3727 | } while (!list_empty(list)); |
3732 | return !list_empty(list); | ||
3733 | } | 3728 | } |
3734 | 3729 | ||
3735 | /* | 3730 | /* |
3736 | * make mem_cgroup's charge to be 0 if there is no task. | 3731 | * make mem_cgroup's charge to be 0 if there is no task by moving |
3732 | * all the charges and pages to the parent. | ||
3737 | * This enables deleting this mem_cgroup. | 3733 | * This enables deleting this mem_cgroup. |
3734 | * | ||
3735 | * Caller is responsible for holding css reference on the memcg. | ||
3738 | */ | 3736 | */ |
3739 | static int mem_cgroup_force_empty(struct mem_cgroup *memcg, bool free_all) | 3737 | static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg) |
3740 | { | 3738 | { |
3741 | int ret; | 3739 | int node, zid; |
3742 | int node, zid, shrink; | ||
3743 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | ||
3744 | struct cgroup *cgrp = memcg->css.cgroup; | ||
3745 | |||
3746 | css_get(&memcg->css); | ||
3747 | 3740 | ||
3748 | shrink = 0; | ||
3749 | /* should free all ? */ | ||
3750 | if (free_all) | ||
3751 | goto try_to_free; | ||
3752 | move_account: | ||
3753 | do { | 3741 | do { |
3754 | ret = -EBUSY; | ||
3755 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) | ||
3756 | goto out; | ||
3757 | /* This is for making all *used* pages to be on LRU. */ | 3742 | /* This is for making all *used* pages to be on LRU. */ |
3758 | lru_add_drain_all(); | 3743 | lru_add_drain_all(); |
3759 | drain_all_stock_sync(memcg); | 3744 | drain_all_stock_sync(memcg); |
3760 | ret = 0; | ||
3761 | mem_cgroup_start_move(memcg); | 3745 | mem_cgroup_start_move(memcg); |
3762 | for_each_node_state(node, N_HIGH_MEMORY) { | 3746 | for_each_node_state(node, N_HIGH_MEMORY) { |
3763 | for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { | 3747 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { |
3764 | enum lru_list lru; | 3748 | enum lru_list lru; |
3765 | for_each_lru(lru) { | 3749 | for_each_lru(lru) { |
3766 | ret = mem_cgroup_force_empty_list(memcg, | 3750 | mem_cgroup_force_empty_list(memcg, |
3767 | node, zid, lru); | 3751 | node, zid, lru); |
3768 | if (ret) | ||
3769 | break; | ||
3770 | } | 3752 | } |
3771 | } | 3753 | } |
3772 | if (ret) | ||
3773 | break; | ||
3774 | } | 3754 | } |
3775 | mem_cgroup_end_move(memcg); | 3755 | mem_cgroup_end_move(memcg); |
3776 | memcg_oom_recover(memcg); | 3756 | memcg_oom_recover(memcg); |
3777 | cond_resched(); | 3757 | cond_resched(); |
3778 | /* "ret" should also be checked to ensure all lists are empty. */ | ||
3779 | } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0 || ret); | ||
3780 | out: | ||
3781 | css_put(&memcg->css); | ||
3782 | return ret; | ||
3783 | 3758 | ||
3784 | try_to_free: | 3759 | /* |
3760 | * This is a safety check because mem_cgroup_force_empty_list | ||
3761 | * could have raced with mem_cgroup_replace_page_cache callers | ||
3762 | * so the lru seemed empty but the page could have been added | ||
3763 | * right after the check. RES_USAGE should be safe as we always | ||
3764 | * charge before adding to the LRU. | ||
3765 | */ | ||
3766 | } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0); | ||
3767 | } | ||
3768 | |||
3769 | /* | ||
3770 | * Reclaims as many pages from the given memcg as possible and moves | ||
3771 | * the rest to the parent. | ||
3772 | * | ||
3773 | * Caller is responsible for holding css reference for memcg. | ||
3774 | */ | ||
3775 | static int mem_cgroup_force_empty(struct mem_cgroup *memcg) | ||
3776 | { | ||
3777 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | ||
3778 | struct cgroup *cgrp = memcg->css.cgroup; | ||
3779 | |||
3785 | /* returns EBUSY if there is a task or if we come here twice. */ | 3780 | /* returns EBUSY if there is a task or if we come here twice. */ |
3786 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) { | 3781 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) |
3787 | ret = -EBUSY; | 3782 | return -EBUSY; |
3788 | goto out; | 3783 | |
3789 | } | ||
3790 | /* we call try-to-free pages for make this cgroup empty */ | 3784 | /* we call try-to-free pages for make this cgroup empty */ |
3791 | lru_add_drain_all(); | 3785 | lru_add_drain_all(); |
3792 | /* try to free all pages in this cgroup */ | 3786 | /* try to free all pages in this cgroup */ |
3793 | shrink = 1; | ||
3794 | while (nr_retries && res_counter_read_u64(&memcg->res, RES_USAGE) > 0) { | 3787 | while (nr_retries && res_counter_read_u64(&memcg->res, RES_USAGE) > 0) { |
3795 | int progress; | 3788 | int progress; |
3796 | 3789 | ||
3797 | if (signal_pending(current)) { | 3790 | if (signal_pending(current)) |
3798 | ret = -EINTR; | 3791 | return -EINTR; |
3799 | goto out; | 3792 | |
3800 | } | ||
3801 | progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, | 3793 | progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, |
3802 | false); | 3794 | false); |
3803 | if (!progress) { | 3795 | if (!progress) { |
@@ -3808,13 +3800,23 @@ try_to_free: | |||
3808 | 3800 | ||
3809 | } | 3801 | } |
3810 | lru_add_drain(); | 3802 | lru_add_drain(); |
3811 | /* try move_account...there may be some *locked* pages. */ | 3803 | mem_cgroup_reparent_charges(memcg); |
3812 | goto move_account; | 3804 | |
3805 | return 0; | ||
3813 | } | 3806 | } |
3814 | 3807 | ||
3815 | static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) | 3808 | static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) |
3816 | { | 3809 | { |
3817 | return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true); | 3810 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
3811 | int ret; | ||
3812 | |||
3813 | if (mem_cgroup_is_root(memcg)) | ||
3814 | return -EINVAL; | ||
3815 | css_get(&memcg->css); | ||
3816 | ret = mem_cgroup_force_empty(memcg); | ||
3817 | css_put(&memcg->css); | ||
3818 | |||
3819 | return ret; | ||
3818 | } | 3820 | } |
3819 | 3821 | ||
3820 | 3822 | ||
@@ -5001,11 +5003,11 @@ free_out: | |||
5001 | return ERR_PTR(error); | 5003 | return ERR_PTR(error); |
5002 | } | 5004 | } |
5003 | 5005 | ||
5004 | static int mem_cgroup_pre_destroy(struct cgroup *cont) | 5006 | static void mem_cgroup_pre_destroy(struct cgroup *cont) |
5005 | { | 5007 | { |
5006 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 5008 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
5007 | 5009 | ||
5008 | return mem_cgroup_force_empty(memcg, false); | 5010 | mem_cgroup_reparent_charges(memcg); |
5009 | } | 5011 | } |
5010 | 5012 | ||
5011 | static void mem_cgroup_destroy(struct cgroup *cont) | 5013 | static void mem_cgroup_destroy(struct cgroup *cont) |
@@ -5607,7 +5609,6 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
5607 | .base_cftypes = mem_cgroup_files, | 5609 | .base_cftypes = mem_cgroup_files, |
5608 | .early_init = 0, | 5610 | .early_init = 0, |
5609 | .use_id = 1, | 5611 | .use_id = 1, |
5610 | .__DEPRECATED_clear_css_refs = true, | ||
5611 | }; | 5612 | }; |
5612 | 5613 | ||
5613 | #ifdef CONFIG_MEMCG_SWAP | 5614 | #ifdef CONFIG_MEMCG_SWAP |