aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2012-05-29 18:06:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 19:22:24 -0400
commit4b91355e9dc9ac1eb3d69e56de093899ff2677ef (patch)
treeb6d18618013eadd3b3e3144c90b16d0d5f07b3af /mm
parent181eb39425f2b9275afcb015eaa547d11f71a02f (diff)
memcg: fix/change behavior of shared anon at moving task
This patch changes memcg's behavior at task_move(). At task_move(), the kernel scans a task's page table and move the changes for mapped pages from source cgroup to target cgroup. There has been a bug at handling shared anonymous pages for a long time. Before patch: - The spec says 'shared anonymous pages are not moved.' - The implementation was 'shared anonymoys pages may be moved'. If page_mapcount <=2, shared anonymous pages's charge were moved. After patch: - The spec says 'all anonymous pages are moved'. - The implementation is 'all anonymous pages are moved'. Considering usage of memcg, this will not affect user's experience. 'shared anonymous' pages only exists between a tree of processes which don't do exec(). Moving one of process without exec() seems not sane. For example, libcgroup will not be affected by this change. (Anyway, no one noticed the implementation for a long time...) Below is a discussion log: - current spec/implementation are complex - Now, shared file caches are moved - It adds unclear check as page_mapcount(). To do correct check, we should check swap users, etc. - No one notice this implementation behavior. So, no one get benefit from the design. - In general, once task is moved to a cgroup for running, it will not be moved.... - Finally, we have control knob as memory.move_charge_at_immigrate. Here is a patch to allow moving shared pages, completely. This makes memcg simpler and fix current broken code. Suggested-by: Hugh Dickins <hughd@google.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Glauber Costa <glommer@parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c22
-rw-r--r--mm/swapfile.c31
2 files changed, 14 insertions, 39 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d7ce417cae7c..e7db70f3d2d6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5154,7 +5154,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
5154 return NULL; 5154 return NULL;
5155 if (PageAnon(page)) { 5155 if (PageAnon(page)) {
5156 /* we don't move shared anon */ 5156 /* we don't move shared anon */
5157 if (!move_anon() || page_mapcount(page) > 2) 5157 if (!move_anon())
5158 return NULL; 5158 return NULL;
5159 } else if (!move_file()) 5159 } else if (!move_file())
5160 /* we ignore mapcount for file pages */ 5160 /* we ignore mapcount for file pages */
@@ -5165,26 +5165,32 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
5165 return page; 5165 return page;
5166} 5166}
5167 5167
5168#ifdef CONFIG_SWAP
5168static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, 5169static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
5169 unsigned long addr, pte_t ptent, swp_entry_t *entry) 5170 unsigned long addr, pte_t ptent, swp_entry_t *entry)
5170{ 5171{
5171 int usage_count;
5172 struct page *page = NULL; 5172 struct page *page = NULL;
5173 swp_entry_t ent = pte_to_swp_entry(ptent); 5173 swp_entry_t ent = pte_to_swp_entry(ptent);
5174 5174
5175 if (!move_anon() || non_swap_entry(ent)) 5175 if (!move_anon() || non_swap_entry(ent))
5176 return NULL; 5176 return NULL;
5177 usage_count = mem_cgroup_count_swap_user(ent, &page); 5177 /*
5178 if (usage_count > 1) { /* we don't move shared anon */ 5178 * Because lookup_swap_cache() updates some statistics counter,
5179 if (page) 5179 * we call find_get_page() with swapper_space directly.
5180 put_page(page); 5180 */
5181 return NULL; 5181 page = find_get_page(&swapper_space, ent.val);
5182 }
5183 if (do_swap_account) 5182 if (do_swap_account)
5184 entry->val = ent.val; 5183 entry->val = ent.val;
5185 5184
5186 return page; 5185 return page;
5187} 5186}
5187#else
5188static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
5189 unsigned long addr, pte_t ptent, swp_entry_t *entry)
5190{
5191 return NULL;
5192}
5193#endif
5188 5194
5189static struct page *mc_handle_file_pte(struct vm_area_struct *vma, 5195static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
5190 unsigned long addr, pte_t ptent, swp_entry_t *entry) 5196 unsigned long addr, pte_t ptent, swp_entry_t *entry)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b0c86e92f42c..457b10baef59 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -717,37 +717,6 @@ int free_swap_and_cache(swp_entry_t entry)
717 return p != NULL; 717 return p != NULL;
718} 718}
719 719
720#ifdef CONFIG_CGROUP_MEM_RES_CTLR
721/**
722 * mem_cgroup_count_swap_user - count the user of a swap entry
723 * @ent: the swap entry to be checked
724 * @pagep: the pointer for the swap cache page of the entry to be stored
725 *
726 * Returns the number of the user of the swap entry. The number is valid only
727 * for swaps of anonymous pages.
728 * If the entry is found on swap cache, the page is stored to pagep with
729 * refcount of it being incremented.
730 */
731int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
732{
733 struct page *page;
734 struct swap_info_struct *p;
735 int count = 0;
736
737 page = find_get_page(&swapper_space, ent.val);
738 if (page)
739 count += page_mapcount(page);
740 p = swap_info_get(ent);
741 if (p) {
742 count += swap_count(p->swap_map[swp_offset(ent)]);
743 spin_unlock(&swap_lock);
744 }
745
746 *pagep = page;
747 return count;
748}
749#endif
750
751#ifdef CONFIG_HIBERNATION 720#ifdef CONFIG_HIBERNATION
752/* 721/*
753 * Find the swap type that corresponds to given device (if any). 722 * Find the swap type that corresponds to given device (if any).