aboutsummaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-01-07 21:07:48 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:04 -0500
commit7a81b88cb53e335ff7d019e6398c95792c817d93 (patch)
tree6ebca4d509a541ac707e10f9369916549e90c0ad /mm/swapfile.c
parent0b82ac37b889ec881b645860da3775118effb3ca (diff)
memcg: introduce charge-commit-cancel style of functions
There is a small race in do_swap_page(). When the page swapped-in is charged, the mapcount can be greater than 0. But, at the same time some process (shares it ) call unmap and make mapcount 1->0 and the page is uncharged. CPUA CPUB mapcount == 1. (1) charge if mapcount==0 zap_pte_range() (2) mapcount 1 => 0. (3) uncharge(). (success) (4) set page's rmap() mapcount 0=>1 Then, this swap page's account is leaked. For fixing this, I added a new interface. - charge account to res_counter by PAGE_SIZE and try to free pages if necessary. - commit register page_cgroup and add to LRU if necessary. - cancel uncharge PAGE_SIZE because of do_swap_page failure. CPUA (1) charge (always) (2) set page's rmap (mapcount > 0) (3) commit charge was necessary or not after set_pte(). This protocol uses PCG_USED bit on page_cgroup for avoiding over accounting. Usual mem_cgroup_charge_common() does charge -> commit at a time. And this patch also adds following function to clarify all charges. - mem_cgroup_newpage_charge() ....replacement for mem_cgroup_charge() called against newly allocated anon pages. - mem_cgroup_charge_migrate_fixup() called only from remove_migration_ptes(). we'll have to rewrite this later.(this patch just keeps old behavior) This function will be removed by additional patch to make migration clearer. Good for clarifying "what we do" Then, we have 4 following charge points. - newpage - swap-in - add-to-cache. - migration. [akpm@linux-foundation.org: add missing inline directives to stubs] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c6
1 files changed, 4 insertions, 2 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index eec5ca758a23..fb926efb5167 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -690,17 +690,18 @@ unsigned int count_swap_pages(int type, int free)
690static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, 690static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
691 unsigned long addr, swp_entry_t entry, struct page *page) 691 unsigned long addr, swp_entry_t entry, struct page *page)
692{ 692{
693 struct mem_cgroup *ptr = NULL;
693 spinlock_t *ptl; 694 spinlock_t *ptl;
694 pte_t *pte; 695 pte_t *pte;
695 int ret = 1; 696 int ret = 1;
696 697
697 if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) 698 if (mem_cgroup_try_charge(vma->vm_mm, GFP_KERNEL, &ptr))
698 ret = -ENOMEM; 699 ret = -ENOMEM;
699 700
700 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 701 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
701 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { 702 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
702 if (ret > 0) 703 if (ret > 0)
703 mem_cgroup_uncharge_page(page); 704 mem_cgroup_cancel_charge_swapin(ptr);
704 ret = 0; 705 ret = 0;
705 goto out; 706 goto out;
706 } 707 }
@@ -710,6 +711,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
710 set_pte_at(vma->vm_mm, addr, pte, 711 set_pte_at(vma->vm_mm, addr, pte,
711 pte_mkold(mk_pte(page, vma->vm_page_prot))); 712 pte_mkold(mk_pte(page, vma->vm_page_prot)));
712 page_add_anon_rmap(page, vma, addr); 713 page_add_anon_rmap(page, vma, addr);
714 mem_cgroup_commit_charge_swapin(page, ptr);
713 swap_free(entry); 715 swap_free(entry);
714 /* 716 /*
715 * Move the page to the active list so it is not 717 * Move the page to the active list so it is not