aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-01-07 21:07:48 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:04 -0500
commit7a81b88cb53e335ff7d019e6398c95792c817d93 (patch)
tree6ebca4d509a541ac707e10f9369916549e90c0ad /mm/memory.c
parent0b82ac37b889ec881b645860da3775118effb3ca (diff)
memcg: introduce charge-commit-cancel style of functions
There is a small race in do_swap_page(). When the page swapped-in is charged, the mapcount can be greater than 0. But, at the same time some process (shares it ) call unmap and make mapcount 1->0 and the page is uncharged. CPUA CPUB mapcount == 1. (1) charge if mapcount==0 zap_pte_range() (2) mapcount 1 => 0. (3) uncharge(). (success) (4) set page's rmap() mapcount 0=>1 Then, this swap page's account is leaked. For fixing this, I added a new interface. - charge account to res_counter by PAGE_SIZE and try to free pages if necessary. - commit register page_cgroup and add to LRU if necessary. - cancel uncharge PAGE_SIZE because of do_swap_page failure. CPUA (1) charge (always) (2) set page's rmap (mapcount > 0) (3) commit charge was necessary or not after set_pte(). This protocol uses PCG_USED bit on page_cgroup for avoiding over accounting. Usual mem_cgroup_charge_common() does charge -> commit at a time. And this patch also adds following function to clarify all charges. - mem_cgroup_newpage_charge() ....replacement for mem_cgroup_charge() called against newly allocated anon pages. - mem_cgroup_charge_migrate_fixup() called only from remove_migration_ptes(). we'll have to rewrite this later.(this patch just keeps old behavior) This function will be removed by additional patch to make migration clearer. Good for clarifying "what we do" Then, we have 4 following charge points. - newpage - swap-in - add-to-cache. - migration. [akpm@linux-foundation.org: add missing inline directives to stubs] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c12
1 files changed, 7 insertions, 5 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 3f8fa06b963..7f210f16099 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2000,7 +2000,7 @@ gotten:
2000 cow_user_page(new_page, old_page, address, vma); 2000 cow_user_page(new_page, old_page, address, vma);
2001 __SetPageUptodate(new_page); 2001 __SetPageUptodate(new_page);
2002 2002
2003 if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) 2003 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
2004 goto oom_free_new; 2004 goto oom_free_new;
2005 2005
2006 /* 2006 /*
@@ -2392,6 +2392,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2392 struct page *page; 2392 struct page *page;
2393 swp_entry_t entry; 2393 swp_entry_t entry;
2394 pte_t pte; 2394 pte_t pte;
2395 struct mem_cgroup *ptr = NULL;
2395 int ret = 0; 2396 int ret = 0;
2396 2397
2397 if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) 2398 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
@@ -2430,7 +2431,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2430 lock_page(page); 2431 lock_page(page);
2431 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2432 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2432 2433
2433 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { 2434 if (mem_cgroup_try_charge(mm, GFP_KERNEL, &ptr) == -ENOMEM) {
2434 ret = VM_FAULT_OOM; 2435 ret = VM_FAULT_OOM;
2435 unlock_page(page); 2436 unlock_page(page);
2436 goto out; 2437 goto out;
@@ -2460,6 +2461,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2460 flush_icache_page(vma, page); 2461 flush_icache_page(vma, page);
2461 set_pte_at(mm, address, page_table, pte); 2462 set_pte_at(mm, address, page_table, pte);
2462 page_add_anon_rmap(page, vma, address); 2463 page_add_anon_rmap(page, vma, address);
2464 mem_cgroup_commit_charge_swapin(page, ptr);
2463 2465
2464 swap_free(entry); 2466 swap_free(entry);
2465 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) 2467 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2480,7 +2482,7 @@ unlock:
2480out: 2482out:
2481 return ret; 2483 return ret;
2482out_nomap: 2484out_nomap:
2483 mem_cgroup_uncharge_page(page); 2485 mem_cgroup_cancel_charge_swapin(ptr);
2484 pte_unmap_unlock(page_table, ptl); 2486 pte_unmap_unlock(page_table, ptl);
2485 unlock_page(page); 2487 unlock_page(page);
2486 page_cache_release(page); 2488 page_cache_release(page);
@@ -2510,7 +2512,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2510 goto oom; 2512 goto oom;
2511 __SetPageUptodate(page); 2513 __SetPageUptodate(page);
2512 2514
2513 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) 2515 if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))
2514 goto oom_free_page; 2516 goto oom_free_page;
2515 2517
2516 entry = mk_pte(page, vma->vm_page_prot); 2518 entry = mk_pte(page, vma->vm_page_prot);
@@ -2601,7 +2603,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2601 ret = VM_FAULT_OOM; 2603 ret = VM_FAULT_OOM;
2602 goto out; 2604 goto out;
2603 } 2605 }
2604 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { 2606 if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) {
2605 ret = VM_FAULT_OOM; 2607 ret = VM_FAULT_OOM;
2606 page_cache_release(page); 2608 page_cache_release(page);
2607 goto out; 2609 goto out;