diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2012-05-29 18:06:51 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-29 19:22:24 -0400 |
commit | 4b91355e9dc9ac1eb3d69e56de093899ff2677ef (patch) | |
tree | b6d18618013eadd3b3e3144c90b16d0d5f07b3af /mm | |
parent | 181eb39425f2b9275afcb015eaa547d11f71a02f (diff) |
memcg: fix/change behavior of shared anon at moving task
This patch changes memcg's behavior at task_move().
At task_move(), the kernel scans a task's page table and move the changes
for mapped pages from source cgroup to target cgroup. There has been a
bug at handling shared anonymous pages for a long time.
Before patch:
- The spec says 'shared anonymous pages are not moved.'
- The implementation was 'shared anonymoys pages may be moved'.
If page_mapcount <=2, shared anonymous pages's charge were moved.
After patch:
- The spec says 'all anonymous pages are moved'.
- The implementation is 'all anonymous pages are moved'.
Considering usage of memcg, this will not affect user's experience.
'shared anonymous' pages only exists between a tree of processes which
don't do exec(). Moving one of process without exec() seems not sane.
For example, libcgroup will not be affected by this change. (Anyway, no
one noticed the implementation for a long time...)
Below is a discussion log:
- current spec/implementation are complex
- Now, shared file caches are moved
- It adds unclear check as page_mapcount(). To do correct check,
we should check swap users, etc.
- No one notice this implementation behavior. So, no one get benefit
from the design.
- In general, once task is moved to a cgroup for running, it will not
be moved....
- Finally, we have control knob as memory.move_charge_at_immigrate.
Here is a patch to allow moving shared pages, completely. This makes
memcg simpler and fix current broken code.
Suggested-by: Hugh Dickins <hughd@google.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Glauber Costa <glommer@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 22 | ||||
-rw-r--r-- | mm/swapfile.c | 31 |
2 files changed, 14 insertions, 39 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d7ce417cae7c..e7db70f3d2d6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -5154,7 +5154,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma, | |||
5154 | return NULL; | 5154 | return NULL; |
5155 | if (PageAnon(page)) { | 5155 | if (PageAnon(page)) { |
5156 | /* we don't move shared anon */ | 5156 | /* we don't move shared anon */ |
5157 | if (!move_anon() || page_mapcount(page) > 2) | 5157 | if (!move_anon()) |
5158 | return NULL; | 5158 | return NULL; |
5159 | } else if (!move_file()) | 5159 | } else if (!move_file()) |
5160 | /* we ignore mapcount for file pages */ | 5160 | /* we ignore mapcount for file pages */ |
@@ -5165,26 +5165,32 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma, | |||
5165 | return page; | 5165 | return page; |
5166 | } | 5166 | } |
5167 | 5167 | ||
5168 | #ifdef CONFIG_SWAP | ||
5168 | static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, | 5169 | static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, |
5169 | unsigned long addr, pte_t ptent, swp_entry_t *entry) | 5170 | unsigned long addr, pte_t ptent, swp_entry_t *entry) |
5170 | { | 5171 | { |
5171 | int usage_count; | ||
5172 | struct page *page = NULL; | 5172 | struct page *page = NULL; |
5173 | swp_entry_t ent = pte_to_swp_entry(ptent); | 5173 | swp_entry_t ent = pte_to_swp_entry(ptent); |
5174 | 5174 | ||
5175 | if (!move_anon() || non_swap_entry(ent)) | 5175 | if (!move_anon() || non_swap_entry(ent)) |
5176 | return NULL; | 5176 | return NULL; |
5177 | usage_count = mem_cgroup_count_swap_user(ent, &page); | 5177 | /* |
5178 | if (usage_count > 1) { /* we don't move shared anon */ | 5178 | * Because lookup_swap_cache() updates some statistics counter, |
5179 | if (page) | 5179 | * we call find_get_page() with swapper_space directly. |
5180 | put_page(page); | 5180 | */ |
5181 | return NULL; | 5181 | page = find_get_page(&swapper_space, ent.val); |
5182 | } | ||
5183 | if (do_swap_account) | 5182 | if (do_swap_account) |
5184 | entry->val = ent.val; | 5183 | entry->val = ent.val; |
5185 | 5184 | ||
5186 | return page; | 5185 | return page; |
5187 | } | 5186 | } |
5187 | #else | ||
5188 | static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, | ||
5189 | unsigned long addr, pte_t ptent, swp_entry_t *entry) | ||
5190 | { | ||
5191 | return NULL; | ||
5192 | } | ||
5193 | #endif | ||
5188 | 5194 | ||
5189 | static struct page *mc_handle_file_pte(struct vm_area_struct *vma, | 5195 | static struct page *mc_handle_file_pte(struct vm_area_struct *vma, |
5190 | unsigned long addr, pte_t ptent, swp_entry_t *entry) | 5196 | unsigned long addr, pte_t ptent, swp_entry_t *entry) |
diff --git a/mm/swapfile.c b/mm/swapfile.c index b0c86e92f42c..457b10baef59 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -717,37 +717,6 @@ int free_swap_and_cache(swp_entry_t entry) | |||
717 | return p != NULL; | 717 | return p != NULL; |
718 | } | 718 | } |
719 | 719 | ||
720 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
721 | /** | ||
722 | * mem_cgroup_count_swap_user - count the user of a swap entry | ||
723 | * @ent: the swap entry to be checked | ||
724 | * @pagep: the pointer for the swap cache page of the entry to be stored | ||
725 | * | ||
726 | * Returns the number of the user of the swap entry. The number is valid only | ||
727 | * for swaps of anonymous pages. | ||
728 | * If the entry is found on swap cache, the page is stored to pagep with | ||
729 | * refcount of it being incremented. | ||
730 | */ | ||
731 | int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep) | ||
732 | { | ||
733 | struct page *page; | ||
734 | struct swap_info_struct *p; | ||
735 | int count = 0; | ||
736 | |||
737 | page = find_get_page(&swapper_space, ent.val); | ||
738 | if (page) | ||
739 | count += page_mapcount(page); | ||
740 | p = swap_info_get(ent); | ||
741 | if (p) { | ||
742 | count += swap_count(p->swap_map[swp_offset(ent)]); | ||
743 | spin_unlock(&swap_lock); | ||
744 | } | ||
745 | |||
746 | *pagep = page; | ||
747 | return count; | ||
748 | } | ||
749 | #endif | ||
750 | |||
751 | #ifdef CONFIG_HIBERNATION | 720 | #ifdef CONFIG_HIBERNATION |
752 | /* | 721 | /* |
753 | * Find the swap type that corresponds to given device (if any). | 722 | * Find the swap type that corresponds to given device (if any). |