aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.cz>2012-10-26 07:37:30 -0400
committerTejun Heo <tj@kernel.org>2012-10-29 19:22:21 -0400
commit2ef37d3fe474b218e170010a59066e19427c9847 (patch)
treee458c8c9a2c981923a8b73a5ac8c5f943ccc894b /mm
parentd842301181d9a4486aa24720ed4f96018b213292 (diff)
memcg: Simplify mem_cgroup_force_empty_list error handling
mem_cgroup_force_empty_list currently tries to remove all pages from the given LRU. To prevent from temoporary failures (EBUSY returned by mem_cgroup_move_parent) it uses a margin to the current LRU pages and returns the true if there are still some pages left on the list. If we consider that mem_cgroup_move_parent fails only when it is racing with somebody else removing (uncharging) the page or when the page is migrated then it is obvious that all those failures are only temporal and so we can safely retry later. Let's get rid of the safety margin and make the loop really wait for the empty LRU. The caller should still make sure that all charges have been removed from the res_counter because mem_cgroup_replace_page_cache might add a page to the LRU after the list_empty check (it doesn't touch res_counter though). This catches most of the cases except for shmem which might call mem_cgroup_replace_page_cache with a page which is not charged and on the LRU yet but this was the case also without this patch. In order to fix this we need a guarantee that try_get_mem_cgroup_from_page falls back to the current mm's cgroup so it needs css_tryget to fail. This will be fixed up in a later patch because it needs a help from cgroup core (pre_destroy has to be called after css is cleared). Although mem_cgroup_pre_destroy can still fail (if a new task or a new sub-group appears) there is no reason to retry pre_destroy callback from the cgroup core. This means that __DEPRECATED_clear_css_refs has lost its meaning and it can be removed. Changes since v2 - remove __DEPRECATED_clear_css_refs Changes since v1 - use kerndoc - be more specific about mem_cgroup_move_parent possible failures Signed-off-by: Michal Hocko <mhocko@suse.cz> Reviewed-by: Tejun Heo <tj@kernel.org> Reviewed-by: Glauber Costa <glommer@parallels.com> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c76
1 files changed, 48 insertions, 28 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 916132a29b36..5a1d584ffed3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2702,10 +2702,27 @@ out:
2702 return ret; 2702 return ret;
2703} 2703}
2704 2704
2705/* 2705/**
2706 * move charges to its parent. 2706 * mem_cgroup_move_parent - moves page to the parent group
2707 * @page: the page to move
2708 * @pc: page_cgroup of the page
2709 * @child: page's cgroup
2710 *
2711 * move charges to its parent or the root cgroup if the group has no
2712 * parent (aka use_hierarchy==0).
2713 * Although this might fail (get_page_unless_zero, isolate_lru_page or
2714 * mem_cgroup_move_account fails) the failure is always temporary and
2715 * it signals a race with a page removal/uncharge or migration. In the
2716 * first case the page is on the way out and it will vanish from the LRU
2717 * on the next attempt and the call should be retried later.
2718 * Isolation from the LRU fails only if page has been isolated from
2719 * the LRU since we looked at it and that usually means either global
2720 * reclaim or migration going on. The page will either get back to the
2721 * LRU or vanish.
2722 * Finaly mem_cgroup_move_account fails only if the page got uncharged
2723 * (!PageCgroupUsed) or moved to a different group. The page will
2724 * disappear in the next attempt.
2707 */ 2725 */
2708
2709static int mem_cgroup_move_parent(struct page *page, 2726static int mem_cgroup_move_parent(struct page *page,
2710 struct page_cgroup *pc, 2727 struct page_cgroup *pc,
2711 struct mem_cgroup *child) 2728 struct mem_cgroup *child)
@@ -2732,8 +2749,10 @@ static int mem_cgroup_move_parent(struct page *page,
2732 if (!parent) 2749 if (!parent)
2733 parent = root_mem_cgroup; 2750 parent = root_mem_cgroup;
2734 2751
2735 if (nr_pages > 1) 2752 if (nr_pages > 1) {
2753 VM_BUG_ON(!PageTransHuge(page));
2736 flags = compound_lock_irqsave(page); 2754 flags = compound_lock_irqsave(page);
2755 }
2737 2756
2738 ret = mem_cgroup_move_account(page, nr_pages, 2757 ret = mem_cgroup_move_account(page, nr_pages,
2739 pc, child, parent); 2758 pc, child, parent);
@@ -3683,17 +3702,22 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
3683 return nr_reclaimed; 3702 return nr_reclaimed;
3684} 3703}
3685 3704
3686/* 3705/**
3706 * mem_cgroup_force_empty_list - clears LRU of a group
3707 * @memcg: group to clear
3708 * @node: NUMA node
3709 * @zid: zone id
3710 * @lru: lru to to clear
3711 *
3687 * Traverse a specified page_cgroup list and try to drop them all. This doesn't 3712 * Traverse a specified page_cgroup list and try to drop them all. This doesn't
3688 * reclaim the pages page themselves - it just removes the page_cgroups. 3713 * reclaim the pages page themselves - pages are moved to the parent (or root)
3689 * Returns true if some page_cgroups were not freed, indicating that the caller 3714 * group.
3690 * must retry this operation.
3691 */ 3715 */
3692static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, 3716static void mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
3693 int node, int zid, enum lru_list lru) 3717 int node, int zid, enum lru_list lru)
3694{ 3718{
3695 struct mem_cgroup_per_zone *mz; 3719 struct mem_cgroup_per_zone *mz;
3696 unsigned long flags, loop; 3720 unsigned long flags;
3697 struct list_head *list; 3721 struct list_head *list;
3698 struct page *busy; 3722 struct page *busy;
3699 struct zone *zone; 3723 struct zone *zone;
@@ -3702,11 +3726,8 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
3702 mz = mem_cgroup_zoneinfo(memcg, node, zid); 3726 mz = mem_cgroup_zoneinfo(memcg, node, zid);
3703 list = &mz->lruvec.lists[lru]; 3727 list = &mz->lruvec.lists[lru];
3704 3728
3705 loop = mz->lru_size[lru];
3706 /* give some margin against EBUSY etc...*/
3707 loop += 256;
3708 busy = NULL; 3729 busy = NULL;
3709 while (loop--) { 3730 do {
3710 struct page_cgroup *pc; 3731 struct page_cgroup *pc;
3711 struct page *page; 3732 struct page *page;
3712 3733
@@ -3732,8 +3753,7 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
3732 cond_resched(); 3753 cond_resched();
3733 } else 3754 } else
3734 busy = NULL; 3755 busy = NULL;
3735 } 3756 } while (!list_empty(list));
3736 return !list_empty(list);
3737} 3757}
3738 3758
3739/* 3759/*
@@ -3747,7 +3767,6 @@ static int mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
3747{ 3767{
3748 struct cgroup *cgrp = memcg->css.cgroup; 3768 struct cgroup *cgrp = memcg->css.cgroup;
3749 int node, zid; 3769 int node, zid;
3750 int ret;
3751 3770
3752 do { 3771 do {
3753 if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) 3772 if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
@@ -3755,28 +3774,30 @@ static int mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
3755 /* This is for making all *used* pages to be on LRU. */ 3774 /* This is for making all *used* pages to be on LRU. */
3756 lru_add_drain_all(); 3775 lru_add_drain_all();
3757 drain_all_stock_sync(memcg); 3776 drain_all_stock_sync(memcg);
3758 ret = 0;
3759 mem_cgroup_start_move(memcg); 3777 mem_cgroup_start_move(memcg);
3760 for_each_node_state(node, N_HIGH_MEMORY) { 3778 for_each_node_state(node, N_HIGH_MEMORY) {
3761 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { 3779 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
3762 enum lru_list lru; 3780 enum lru_list lru;
3763 for_each_lru(lru) { 3781 for_each_lru(lru) {
3764 ret = mem_cgroup_force_empty_list(memcg, 3782 mem_cgroup_force_empty_list(memcg,
3765 node, zid, lru); 3783 node, zid, lru);
3766 if (ret)
3767 break;
3768 } 3784 }
3769 } 3785 }
3770 if (ret)
3771 break;
3772 } 3786 }
3773 mem_cgroup_end_move(memcg); 3787 mem_cgroup_end_move(memcg);
3774 memcg_oom_recover(memcg); 3788 memcg_oom_recover(memcg);
3775 cond_resched(); 3789 cond_resched();
3776 /* "ret" should also be checked to ensure all lists are empty. */
3777 } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0 || ret);
3778 3790
3779 return ret; 3791 /*
3792 * This is a safety check because mem_cgroup_force_empty_list
3793 * could have raced with mem_cgroup_replace_page_cache callers
3794 * so the lru seemed empty but the page could have been added
3795 * right after the check. RES_USAGE should be safe as we always
3796 * charge before adding to the LRU.
3797 */
3798 } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0);
3799
3800 return 0;
3780} 3801}
3781 3802
3782/* 3803/*
@@ -5618,7 +5639,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
5618 .base_cftypes = mem_cgroup_files, 5639 .base_cftypes = mem_cgroup_files,
5619 .early_init = 0, 5640 .early_init = 0,
5620 .use_id = 1, 5641 .use_id = 1,
5621 .__DEPRECATED_clear_css_refs = true,
5622}; 5642};
5623 5643
5624#ifdef CONFIG_MEMCG_SWAP 5644#ifdef CONFIG_MEMCG_SWAP