diff options
-rw-r--r-- | mm/memcontrol.c | 76 |
1 files changed, 48 insertions, 28 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 916132a29b36..5a1d584ffed3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2702,10 +2702,27 @@ out: | |||
2702 | return ret; | 2702 | return ret; |
2703 | } | 2703 | } |
2704 | 2704 | ||
2705 | /* | 2705 | /** |
2706 | * move charges to its parent. | 2706 | * mem_cgroup_move_parent - moves page to the parent group |
2707 | * @page: the page to move | ||
2708 | * @pc: page_cgroup of the page | ||
2709 | * @child: page's cgroup | ||
2710 | * | ||
2711 | * move charges to its parent or the root cgroup if the group has no | ||
2712 | * parent (aka use_hierarchy==0). | ||
2713 | * Although this might fail (get_page_unless_zero, isolate_lru_page or | ||
2714 | * mem_cgroup_move_account fails) the failure is always temporary and | ||
2715 | * it signals a race with a page removal/uncharge or migration. In the | ||
2716 | * first case the page is on the way out and it will vanish from the LRU | ||
2717 | * on the next attempt and the call should be retried later. | ||
2718 | * Isolation from the LRU fails only if page has been isolated from | ||
2719 | * the LRU since we looked at it and that usually means either global | ||
2720 | * reclaim or migration going on. The page will either get back to the | ||
2721 | * LRU or vanish. | ||
2722 | * Finaly mem_cgroup_move_account fails only if the page got uncharged | ||
2723 | * (!PageCgroupUsed) or moved to a different group. The page will | ||
2724 | * disappear in the next attempt. | ||
2707 | */ | 2725 | */ |
2708 | |||
2709 | static int mem_cgroup_move_parent(struct page *page, | 2726 | static int mem_cgroup_move_parent(struct page *page, |
2710 | struct page_cgroup *pc, | 2727 | struct page_cgroup *pc, |
2711 | struct mem_cgroup *child) | 2728 | struct mem_cgroup *child) |
@@ -2732,8 +2749,10 @@ static int mem_cgroup_move_parent(struct page *page, | |||
2732 | if (!parent) | 2749 | if (!parent) |
2733 | parent = root_mem_cgroup; | 2750 | parent = root_mem_cgroup; |
2734 | 2751 | ||
2735 | if (nr_pages > 1) | 2752 | if (nr_pages > 1) { |
2753 | VM_BUG_ON(!PageTransHuge(page)); | ||
2736 | flags = compound_lock_irqsave(page); | 2754 | flags = compound_lock_irqsave(page); |
2755 | } | ||
2737 | 2756 | ||
2738 | ret = mem_cgroup_move_account(page, nr_pages, | 2757 | ret = mem_cgroup_move_account(page, nr_pages, |
2739 | pc, child, parent); | 2758 | pc, child, parent); |
@@ -3683,17 +3702,22 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
3683 | return nr_reclaimed; | 3702 | return nr_reclaimed; |
3684 | } | 3703 | } |
3685 | 3704 | ||
3686 | /* | 3705 | /** |
3706 | * mem_cgroup_force_empty_list - clears LRU of a group | ||
3707 | * @memcg: group to clear | ||
3708 | * @node: NUMA node | ||
3709 | * @zid: zone id | ||
3710 | * @lru: lru to to clear | ||
3711 | * | ||
3687 | * Traverse a specified page_cgroup list and try to drop them all. This doesn't | 3712 | * Traverse a specified page_cgroup list and try to drop them all. This doesn't |
3688 | * reclaim the pages page themselves - it just removes the page_cgroups. | 3713 | * reclaim the pages page themselves - pages are moved to the parent (or root) |
3689 | * Returns true if some page_cgroups were not freed, indicating that the caller | 3714 | * group. |
3690 | * must retry this operation. | ||
3691 | */ | 3715 | */ |
3692 | static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, | 3716 | static void mem_cgroup_force_empty_list(struct mem_cgroup *memcg, |
3693 | int node, int zid, enum lru_list lru) | 3717 | int node, int zid, enum lru_list lru) |
3694 | { | 3718 | { |
3695 | struct mem_cgroup_per_zone *mz; | 3719 | struct mem_cgroup_per_zone *mz; |
3696 | unsigned long flags, loop; | 3720 | unsigned long flags; |
3697 | struct list_head *list; | 3721 | struct list_head *list; |
3698 | struct page *busy; | 3722 | struct page *busy; |
3699 | struct zone *zone; | 3723 | struct zone *zone; |
@@ -3702,11 +3726,8 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, | |||
3702 | mz = mem_cgroup_zoneinfo(memcg, node, zid); | 3726 | mz = mem_cgroup_zoneinfo(memcg, node, zid); |
3703 | list = &mz->lruvec.lists[lru]; | 3727 | list = &mz->lruvec.lists[lru]; |
3704 | 3728 | ||
3705 | loop = mz->lru_size[lru]; | ||
3706 | /* give some margin against EBUSY etc...*/ | ||
3707 | loop += 256; | ||
3708 | busy = NULL; | 3729 | busy = NULL; |
3709 | while (loop--) { | 3730 | do { |
3710 | struct page_cgroup *pc; | 3731 | struct page_cgroup *pc; |
3711 | struct page *page; | 3732 | struct page *page; |
3712 | 3733 | ||
@@ -3732,8 +3753,7 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, | |||
3732 | cond_resched(); | 3753 | cond_resched(); |
3733 | } else | 3754 | } else |
3734 | busy = NULL; | 3755 | busy = NULL; |
3735 | } | 3756 | } while (!list_empty(list)); |
3736 | return !list_empty(list); | ||
3737 | } | 3757 | } |
3738 | 3758 | ||
3739 | /* | 3759 | /* |
@@ -3747,7 +3767,6 @@ static int mem_cgroup_reparent_charges(struct mem_cgroup *memcg) | |||
3747 | { | 3767 | { |
3748 | struct cgroup *cgrp = memcg->css.cgroup; | 3768 | struct cgroup *cgrp = memcg->css.cgroup; |
3749 | int node, zid; | 3769 | int node, zid; |
3750 | int ret; | ||
3751 | 3770 | ||
3752 | do { | 3771 | do { |
3753 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) | 3772 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) |
@@ -3755,28 +3774,30 @@ static int mem_cgroup_reparent_charges(struct mem_cgroup *memcg) | |||
3755 | /* This is for making all *used* pages to be on LRU. */ | 3774 | /* This is for making all *used* pages to be on LRU. */ |
3756 | lru_add_drain_all(); | 3775 | lru_add_drain_all(); |
3757 | drain_all_stock_sync(memcg); | 3776 | drain_all_stock_sync(memcg); |
3758 | ret = 0; | ||
3759 | mem_cgroup_start_move(memcg); | 3777 | mem_cgroup_start_move(memcg); |
3760 | for_each_node_state(node, N_HIGH_MEMORY) { | 3778 | for_each_node_state(node, N_HIGH_MEMORY) { |
3761 | for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { | 3779 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { |
3762 | enum lru_list lru; | 3780 | enum lru_list lru; |
3763 | for_each_lru(lru) { | 3781 | for_each_lru(lru) { |
3764 | ret = mem_cgroup_force_empty_list(memcg, | 3782 | mem_cgroup_force_empty_list(memcg, |
3765 | node, zid, lru); | 3783 | node, zid, lru); |
3766 | if (ret) | ||
3767 | break; | ||
3768 | } | 3784 | } |
3769 | } | 3785 | } |
3770 | if (ret) | ||
3771 | break; | ||
3772 | } | 3786 | } |
3773 | mem_cgroup_end_move(memcg); | 3787 | mem_cgroup_end_move(memcg); |
3774 | memcg_oom_recover(memcg); | 3788 | memcg_oom_recover(memcg); |
3775 | cond_resched(); | 3789 | cond_resched(); |
3776 | /* "ret" should also be checked to ensure all lists are empty. */ | ||
3777 | } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0 || ret); | ||
3778 | 3790 | ||
3779 | return ret; | 3791 | /* |
3792 | * This is a safety check because mem_cgroup_force_empty_list | ||
3793 | * could have raced with mem_cgroup_replace_page_cache callers | ||
3794 | * so the lru seemed empty but the page could have been added | ||
3795 | * right after the check. RES_USAGE should be safe as we always | ||
3796 | * charge before adding to the LRU. | ||
3797 | */ | ||
3798 | } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0); | ||
3799 | |||
3800 | return 0; | ||
3780 | } | 3801 | } |
3781 | 3802 | ||
3782 | /* | 3803 | /* |
@@ -5618,7 +5639,6 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
5618 | .base_cftypes = mem_cgroup_files, | 5639 | .base_cftypes = mem_cgroup_files, |
5619 | .early_init = 0, | 5640 | .early_init = 0, |
5620 | .use_id = 1, | 5641 | .use_id = 1, |
5621 | .__DEPRECATED_clear_css_refs = true, | ||
5622 | }; | 5642 | }; |
5623 | 5643 | ||
5624 | #ifdef CONFIG_MEMCG_SWAP | 5644 | #ifdef CONFIG_MEMCG_SWAP |