diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2009-01-07 21:07:48 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-08 11:31:04 -0500 |
commit | 7a81b88cb53e335ff7d019e6398c95792c817d93 (patch) | |
tree | 6ebca4d509a541ac707e10f9369916549e90c0ad /mm/memory.c | |
parent | 0b82ac37b889ec881b645860da3775118effb3ca (diff) |
memcg: introduce charge-commit-cancel style of functions
There is a small race in do_swap_page(). When the page swapped-in is
charged, the mapcount can be greater than 0. But, at the same time some
process (shares it ) call unmap and make mapcount 1->0 and the page is
uncharged.
CPUA CPUB
mapcount == 1.
(1) charge if mapcount==0 zap_pte_range()
(2) mapcount 1 => 0.
(3) uncharge(). (success)
(4) set page's rmap()
mapcount 0=>1
Then, this swap page's account is leaked.
For fixing this, I added a new interface.
- charge
account to res_counter by PAGE_SIZE and try to free pages if necessary.
- commit
register page_cgroup and add to LRU if necessary.
- cancel
uncharge PAGE_SIZE because of do_swap_page failure.
CPUA
(1) charge (always)
(2) set page's rmap (mapcount > 0)
(3) commit charge was necessary or not after set_pte().
This protocol uses PCG_USED bit on page_cgroup for avoiding over accounting.
Usual mem_cgroup_charge_common() does charge -> commit at a time.
And this patch also adds following function to clarify all charges.
- mem_cgroup_newpage_charge() ....replacement for mem_cgroup_charge()
called against newly allocated anon pages.
- mem_cgroup_charge_migrate_fixup()
called only from remove_migration_ptes().
we'll have to rewrite this later.(this patch just keeps old behavior)
This function will be removed by additional patch to make migration
clearer.
Good for clarifying "what we do"
Then, we have 4 following charge points.
- newpage
- swap-in
- add-to-cache.
- migration.
[akpm@linux-foundation.org: add missing inline directives to stubs]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/mm/memory.c b/mm/memory.c index 3f8fa06b963..7f210f16099 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2000,7 +2000,7 @@ gotten: | |||
2000 | cow_user_page(new_page, old_page, address, vma); | 2000 | cow_user_page(new_page, old_page, address, vma); |
2001 | __SetPageUptodate(new_page); | 2001 | __SetPageUptodate(new_page); |
2002 | 2002 | ||
2003 | if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) | 2003 | if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) |
2004 | goto oom_free_new; | 2004 | goto oom_free_new; |
2005 | 2005 | ||
2006 | /* | 2006 | /* |
@@ -2392,6 +2392,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2392 | struct page *page; | 2392 | struct page *page; |
2393 | swp_entry_t entry; | 2393 | swp_entry_t entry; |
2394 | pte_t pte; | 2394 | pte_t pte; |
2395 | struct mem_cgroup *ptr = NULL; | ||
2395 | int ret = 0; | 2396 | int ret = 0; |
2396 | 2397 | ||
2397 | if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) | 2398 | if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) |
@@ -2430,7 +2431,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2430 | lock_page(page); | 2431 | lock_page(page); |
2431 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2432 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
2432 | 2433 | ||
2433 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { | 2434 | if (mem_cgroup_try_charge(mm, GFP_KERNEL, &ptr) == -ENOMEM) { |
2434 | ret = VM_FAULT_OOM; | 2435 | ret = VM_FAULT_OOM; |
2435 | unlock_page(page); | 2436 | unlock_page(page); |
2436 | goto out; | 2437 | goto out; |
@@ -2460,6 +2461,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2460 | flush_icache_page(vma, page); | 2461 | flush_icache_page(vma, page); |
2461 | set_pte_at(mm, address, page_table, pte); | 2462 | set_pte_at(mm, address, page_table, pte); |
2462 | page_add_anon_rmap(page, vma, address); | 2463 | page_add_anon_rmap(page, vma, address); |
2464 | mem_cgroup_commit_charge_swapin(page, ptr); | ||
2463 | 2465 | ||
2464 | swap_free(entry); | 2466 | swap_free(entry); |
2465 | if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) | 2467 | if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) |
@@ -2480,7 +2482,7 @@ unlock: | |||
2480 | out: | 2482 | out: |
2481 | return ret; | 2483 | return ret; |
2482 | out_nomap: | 2484 | out_nomap: |
2483 | mem_cgroup_uncharge_page(page); | 2485 | mem_cgroup_cancel_charge_swapin(ptr); |
2484 | pte_unmap_unlock(page_table, ptl); | 2486 | pte_unmap_unlock(page_table, ptl); |
2485 | unlock_page(page); | 2487 | unlock_page(page); |
2486 | page_cache_release(page); | 2488 | page_cache_release(page); |
@@ -2510,7 +2512,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2510 | goto oom; | 2512 | goto oom; |
2511 | __SetPageUptodate(page); | 2513 | __SetPageUptodate(page); |
2512 | 2514 | ||
2513 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) | 2515 | if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) |
2514 | goto oom_free_page; | 2516 | goto oom_free_page; |
2515 | 2517 | ||
2516 | entry = mk_pte(page, vma->vm_page_prot); | 2518 | entry = mk_pte(page, vma->vm_page_prot); |
@@ -2601,7 +2603,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2601 | ret = VM_FAULT_OOM; | 2603 | ret = VM_FAULT_OOM; |
2602 | goto out; | 2604 | goto out; |
2603 | } | 2605 | } |
2604 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { | 2606 | if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) { |
2605 | ret = VM_FAULT_OOM; | 2607 | ret = VM_FAULT_OOM; |
2606 | page_cache_release(page); | 2608 | page_cache_release(page); |
2607 | goto out; | 2609 | goto out; |