aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-11-05 12:21:51 -0500
committerTejun Heo <tj@kernel.org>2012-11-05 12:21:51 -0500
commit1db1e31b1ee3ae126ef98f39083b5f213c7b41bf (patch)
tree98328124d3b8b08b6db894c79d0e79b3a42417bf /mm/memcontrol.c
parent5d8f72b55c275677865de670fa147ed318191d81 (diff)
parentbcf6de1b9129531215d26dd9af8331e84973bc52 (diff)
Merge branch 'cgroup-rmdir-updates' into cgroup/for-3.8
Pull rmdir updates into for-3.8 so that further callback updates can be put on top. This pull created a trivial conflict between the following two commits. 8c7f6edbda ("cgroup: mark subsystems with broken hierarchy support and whine if cgroups are nested for them") ed95779340 ("cgroup: kill cgroup_subsys->__DEPRECATED_clear_css_refs") The former added a field to cgroup_subsys and the latter removed one from it. They happen to be colocated causing the conflict. Keeping what's added and removing what's removed resolves the conflict. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c181
1 files changed, 91 insertions, 90 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7acf43bf04a2..08adaaae6fcc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2337,7 +2337,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
2337again: 2337again:
2338 if (*ptr) { /* css should be a valid one */ 2338 if (*ptr) { /* css should be a valid one */
2339 memcg = *ptr; 2339 memcg = *ptr;
2340 VM_BUG_ON(css_is_removed(&memcg->css));
2341 if (mem_cgroup_is_root(memcg)) 2340 if (mem_cgroup_is_root(memcg))
2342 goto done; 2341 goto done;
2343 if (nr_pages == 1 && consume_stock(memcg)) 2342 if (nr_pages == 1 && consume_stock(memcg))
@@ -2477,9 +2476,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
2477 2476
2478/* 2477/*
2479 * A helper function to get mem_cgroup from ID. must be called under 2478 * A helper function to get mem_cgroup from ID. must be called under
2480 * rcu_read_lock(). The caller must check css_is_removed() or some if 2479 * rcu_read_lock(). The caller is responsible for calling css_tryget if
2481 * it's concern. (dropping refcnt from swap can be called against removed 2480 * the mem_cgroup is used for charging. (dropping refcnt from swap can be
2482 * memcg.) 2481 * called against removed memcg.)
2483 */ 2482 */
2484static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) 2483static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
2485{ 2484{
@@ -2676,13 +2675,6 @@ static int mem_cgroup_move_account(struct page *page,
2676 /* caller should have done css_get */ 2675 /* caller should have done css_get */
2677 pc->mem_cgroup = to; 2676 pc->mem_cgroup = to;
2678 mem_cgroup_charge_statistics(to, anon, nr_pages); 2677 mem_cgroup_charge_statistics(to, anon, nr_pages);
2679 /*
2680 * We charges against "to" which may not have any tasks. Then, "to"
2681 * can be under rmdir(). But in current implementation, caller of
2682 * this function is just force_empty() and move charge, so it's
2683 * guaranteed that "to" is never removed. So, we don't check rmdir
2684 * status here.
2685 */
2686 move_unlock_mem_cgroup(from, &flags); 2678 move_unlock_mem_cgroup(from, &flags);
2687 ret = 0; 2679 ret = 0;
2688unlock: 2680unlock:
@@ -2696,10 +2688,27 @@ out:
2696 return ret; 2688 return ret;
2697} 2689}
2698 2690
2699/* 2691/**
2700 * move charges to its parent. 2692 * mem_cgroup_move_parent - moves page to the parent group
2693 * @page: the page to move
2694 * @pc: page_cgroup of the page
2695 * @child: page's cgroup
2696 *
2697 * move charges to its parent or the root cgroup if the group has no
2698 * parent (aka use_hierarchy==0).
2699 * Although this might fail (get_page_unless_zero, isolate_lru_page or
2700 * mem_cgroup_move_account fails) the failure is always temporary and
2701 * it signals a race with a page removal/uncharge or migration. In the
2702 * first case the page is on the way out and it will vanish from the LRU
2703 * on the next attempt and the call should be retried later.
2704 * Isolation from the LRU fails only if page has been isolated from
2705 * the LRU since we looked at it and that usually means either global
2706 * reclaim or migration going on. The page will either get back to the
2707 * LRU or vanish.
2708 * Finaly mem_cgroup_move_account fails only if the page got uncharged
2709 * (!PageCgroupUsed) or moved to a different group. The page will
2710 * disappear in the next attempt.
2701 */ 2711 */
2702
2703static int mem_cgroup_move_parent(struct page *page, 2712static int mem_cgroup_move_parent(struct page *page,
2704 struct page_cgroup *pc, 2713 struct page_cgroup *pc,
2705 struct mem_cgroup *child) 2714 struct mem_cgroup *child)
@@ -2709,9 +2718,7 @@ static int mem_cgroup_move_parent(struct page *page,
2709 unsigned long uninitialized_var(flags); 2718 unsigned long uninitialized_var(flags);
2710 int ret; 2719 int ret;
2711 2720
2712 /* Is ROOT ? */ 2721 VM_BUG_ON(mem_cgroup_is_root(child));
2713 if (mem_cgroup_is_root(child))
2714 return -EINVAL;
2715 2722
2716 ret = -EBUSY; 2723 ret = -EBUSY;
2717 if (!get_page_unless_zero(page)) 2724 if (!get_page_unless_zero(page))
@@ -2728,8 +2735,10 @@ static int mem_cgroup_move_parent(struct page *page,
2728 if (!parent) 2735 if (!parent)
2729 parent = root_mem_cgroup; 2736 parent = root_mem_cgroup;
2730 2737
2731 if (nr_pages > 1) 2738 if (nr_pages > 1) {
2739 VM_BUG_ON(!PageTransHuge(page));
2732 flags = compound_lock_irqsave(page); 2740 flags = compound_lock_irqsave(page);
2741 }
2733 2742
2734 ret = mem_cgroup_move_account(page, nr_pages, 2743 ret = mem_cgroup_move_account(page, nr_pages,
2735 pc, child, parent); 2744 pc, child, parent);
@@ -2871,7 +2880,6 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
2871 return; 2880 return;
2872 if (!memcg) 2881 if (!memcg)
2873 return; 2882 return;
2874 cgroup_exclude_rmdir(&memcg->css);
2875 2883
2876 __mem_cgroup_commit_charge(memcg, page, 1, ctype, true); 2884 __mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
2877 /* 2885 /*
@@ -2885,12 +2893,6 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
2885 swp_entry_t ent = {.val = page_private(page)}; 2893 swp_entry_t ent = {.val = page_private(page)};
2886 mem_cgroup_uncharge_swap(ent); 2894 mem_cgroup_uncharge_swap(ent);
2887 } 2895 }
2888 /*
2889 * At swapin, we may charge account against cgroup which has no tasks.
2890 * So, rmdir()->pre_destroy() can be called while we do this charge.
2891 * In that case, we need to call pre_destroy() again. check it here.
2892 */
2893 cgroup_release_and_wakeup_rmdir(&memcg->css);
2894} 2896}
2895 2897
2896void mem_cgroup_commit_charge_swapin(struct page *page, 2898void mem_cgroup_commit_charge_swapin(struct page *page,
@@ -3338,8 +3340,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
3338 3340
3339 if (!memcg) 3341 if (!memcg)
3340 return; 3342 return;
3341 /* blocks rmdir() */ 3343
3342 cgroup_exclude_rmdir(&memcg->css);
3343 if (!migration_ok) { 3344 if (!migration_ok) {
3344 used = oldpage; 3345 used = oldpage;
3345 unused = newpage; 3346 unused = newpage;
@@ -3373,13 +3374,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
3373 */ 3374 */
3374 if (anon) 3375 if (anon)
3375 mem_cgroup_uncharge_page(used); 3376 mem_cgroup_uncharge_page(used);
3376 /*
3377 * At migration, we may charge account against cgroup which has no
3378 * tasks.
3379 * So, rmdir()->pre_destroy() can be called while we do this charge.
3380 * In that case, we need to call pre_destroy() again. check it here.
3381 */
3382 cgroup_release_and_wakeup_rmdir(&memcg->css);
3383} 3377}
3384 3378
3385/* 3379/*
@@ -3679,17 +3673,22 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
3679 return nr_reclaimed; 3673 return nr_reclaimed;
3680} 3674}
3681 3675
3682/* 3676/**
3677 * mem_cgroup_force_empty_list - clears LRU of a group
3678 * @memcg: group to clear
3679 * @node: NUMA node
3680 * @zid: zone id
3681 * @lru: lru to to clear
3682 *
3683 * Traverse a specified page_cgroup list and try to drop them all. This doesn't 3683 * Traverse a specified page_cgroup list and try to drop them all. This doesn't
3684 * reclaim the pages page themselves - it just removes the page_cgroups. 3684 * reclaim the pages page themselves - pages are moved to the parent (or root)
3685 * Returns true if some page_cgroups were not freed, indicating that the caller 3685 * group.
3686 * must retry this operation.
3687 */ 3686 */
3688static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, 3687static void mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
3689 int node, int zid, enum lru_list lru) 3688 int node, int zid, enum lru_list lru)
3690{ 3689{
3691 struct mem_cgroup_per_zone *mz; 3690 struct mem_cgroup_per_zone *mz;
3692 unsigned long flags, loop; 3691 unsigned long flags;
3693 struct list_head *list; 3692 struct list_head *list;
3694 struct page *busy; 3693 struct page *busy;
3695 struct zone *zone; 3694 struct zone *zone;
@@ -3698,11 +3697,8 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
3698 mz = mem_cgroup_zoneinfo(memcg, node, zid); 3697 mz = mem_cgroup_zoneinfo(memcg, node, zid);
3699 list = &mz->lruvec.lists[lru]; 3698 list = &mz->lruvec.lists[lru];
3700 3699
3701 loop = mz->lru_size[lru];
3702 /* give some margin against EBUSY etc...*/
3703 loop += 256;
3704 busy = NULL; 3700 busy = NULL;
3705 while (loop--) { 3701 do {
3706 struct page_cgroup *pc; 3702 struct page_cgroup *pc;
3707 struct page *page; 3703 struct page *page;
3708 3704
@@ -3728,76 +3724,72 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
3728 cond_resched(); 3724 cond_resched();
3729 } else 3725 } else
3730 busy = NULL; 3726 busy = NULL;
3731 } 3727 } while (!list_empty(list));
3732 return !list_empty(list);
3733} 3728}
3734 3729
3735/* 3730/*
3736 * make mem_cgroup's charge to be 0 if there is no task. 3731 * make mem_cgroup's charge to be 0 if there is no task by moving
3732 * all the charges and pages to the parent.
3737 * This enables deleting this mem_cgroup. 3733 * This enables deleting this mem_cgroup.
3734 *
3735 * Caller is responsible for holding css reference on the memcg.
3738 */ 3736 */
3739static int mem_cgroup_force_empty(struct mem_cgroup *memcg, bool free_all) 3737static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
3740{ 3738{
3741 int ret; 3739 int node, zid;
3742 int node, zid, shrink;
3743 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
3744 struct cgroup *cgrp = memcg->css.cgroup;
3745
3746 css_get(&memcg->css);
3747 3740
3748 shrink = 0;
3749 /* should free all ? */
3750 if (free_all)
3751 goto try_to_free;
3752move_account:
3753 do { 3741 do {
3754 ret = -EBUSY;
3755 if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
3756 goto out;
3757 /* This is for making all *used* pages to be on LRU. */ 3742 /* This is for making all *used* pages to be on LRU. */
3758 lru_add_drain_all(); 3743 lru_add_drain_all();
3759 drain_all_stock_sync(memcg); 3744 drain_all_stock_sync(memcg);
3760 ret = 0;
3761 mem_cgroup_start_move(memcg); 3745 mem_cgroup_start_move(memcg);
3762 for_each_node_state(node, N_HIGH_MEMORY) { 3746 for_each_node_state(node, N_HIGH_MEMORY) {
3763 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { 3747 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
3764 enum lru_list lru; 3748 enum lru_list lru;
3765 for_each_lru(lru) { 3749 for_each_lru(lru) {
3766 ret = mem_cgroup_force_empty_list(memcg, 3750 mem_cgroup_force_empty_list(memcg,
3767 node, zid, lru); 3751 node, zid, lru);
3768 if (ret)
3769 break;
3770 } 3752 }
3771 } 3753 }
3772 if (ret)
3773 break;
3774 } 3754 }
3775 mem_cgroup_end_move(memcg); 3755 mem_cgroup_end_move(memcg);
3776 memcg_oom_recover(memcg); 3756 memcg_oom_recover(memcg);
3777 cond_resched(); 3757 cond_resched();
3778 /* "ret" should also be checked to ensure all lists are empty. */
3779 } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0 || ret);
3780out:
3781 css_put(&memcg->css);
3782 return ret;
3783 3758
3784try_to_free: 3759 /*
3760 * This is a safety check because mem_cgroup_force_empty_list
3761 * could have raced with mem_cgroup_replace_page_cache callers
3762 * so the lru seemed empty but the page could have been added
3763 * right after the check. RES_USAGE should be safe as we always
3764 * charge before adding to the LRU.
3765 */
3766 } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0);
3767}
3768
3769/*
3770 * Reclaims as many pages from the given memcg as possible and moves
3771 * the rest to the parent.
3772 *
3773 * Caller is responsible for holding css reference for memcg.
3774 */
3775static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
3776{
3777 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
3778 struct cgroup *cgrp = memcg->css.cgroup;
3779
3785 /* returns EBUSY if there is a task or if we come here twice. */ 3780 /* returns EBUSY if there is a task or if we come here twice. */
3786 if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) { 3781 if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
3787 ret = -EBUSY; 3782 return -EBUSY;
3788 goto out; 3783
3789 }
3790 /* we call try-to-free pages for make this cgroup empty */ 3784 /* we call try-to-free pages for make this cgroup empty */
3791 lru_add_drain_all(); 3785 lru_add_drain_all();
3792 /* try to free all pages in this cgroup */ 3786 /* try to free all pages in this cgroup */
3793 shrink = 1;
3794 while (nr_retries && res_counter_read_u64(&memcg->res, RES_USAGE) > 0) { 3787 while (nr_retries && res_counter_read_u64(&memcg->res, RES_USAGE) > 0) {
3795 int progress; 3788 int progress;
3796 3789
3797 if (signal_pending(current)) { 3790 if (signal_pending(current))
3798 ret = -EINTR; 3791 return -EINTR;
3799 goto out; 3792
3800 }
3801 progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, 3793 progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL,
3802 false); 3794 false);
3803 if (!progress) { 3795 if (!progress) {
@@ -3808,13 +3800,23 @@ try_to_free:
3808 3800
3809 } 3801 }
3810 lru_add_drain(); 3802 lru_add_drain();
3811 /* try move_account...there may be some *locked* pages. */ 3803 mem_cgroup_reparent_charges(memcg);
3812 goto move_account; 3804
3805 return 0;
3813} 3806}
3814 3807
3815static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) 3808static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
3816{ 3809{
3817 return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true); 3810 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
3811 int ret;
3812
3813 if (mem_cgroup_is_root(memcg))
3814 return -EINVAL;
3815 css_get(&memcg->css);
3816 ret = mem_cgroup_force_empty(memcg);
3817 css_put(&memcg->css);
3818
3819 return ret;
3818} 3820}
3819 3821
3820 3822
@@ -5001,11 +5003,11 @@ free_out:
5001 return ERR_PTR(error); 5003 return ERR_PTR(error);
5002} 5004}
5003 5005
5004static int mem_cgroup_pre_destroy(struct cgroup *cont) 5006static void mem_cgroup_pre_destroy(struct cgroup *cont)
5005{ 5007{
5006 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 5008 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5007 5009
5008 return mem_cgroup_force_empty(memcg, false); 5010 mem_cgroup_reparent_charges(memcg);
5009} 5011}
5010 5012
5011static void mem_cgroup_destroy(struct cgroup *cont) 5013static void mem_cgroup_destroy(struct cgroup *cont)
@@ -5607,7 +5609,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
5607 .base_cftypes = mem_cgroup_files, 5609 .base_cftypes = mem_cgroup_files,
5608 .early_init = 0, 5610 .early_init = 0,
5609 .use_id = 1, 5611 .use_id = 1,
5610 .__DEPRECATED_clear_css_refs = true,
5611}; 5612};
5612 5613
5613#ifdef CONFIG_MEMCG_SWAP 5614#ifdef CONFIG_MEMCG_SWAP