aboutsummaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-01-07 21:08:00 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:05 -0500
commit8c7c6e34a1256a5082d38c8e9bd1474476912715 (patch)
tree09f53c7c4bac5532a9ecbdadb4450702c744ea6f /mm/swapfile.c
parent27a7faa0779dd13729196c1a818c294f44bbd1ee (diff)
memcg: mem+swap controller core
This patch implements per cgroup limit for usage of memory+swap. However there are SwapCache, double counting of swap-cache and swap-entry is avoided. Mem+Swap controller works as following. - memory usage is limited by memory.limit_in_bytes. - memory + swap usage is limited by memory.memsw_limit_in_bytes. This has following benefits. - A user can limit total resource usage of mem+swap. Without this, because memory resource controller doesn't take care of usage of swap, a process can exhaust all the swap (by memory leak.) We can avoid this case. And Swap is shared resource but it cannot be reclaimed (goes back to memory) until it's used. This characteristic can be trouble when the memory is divided into some parts by cpuset or memcg. Assume group A and group B. After some application executes, the system can be.. Group A -- very large free memory space but occupy 99% of swap. Group B -- under memory shortage but cannot use swap...it's nearly full. Ability to set appropriate swap limit for each group is required. Maybe someone wonder "why not swap but mem+swap ?" - The global LRU(kswapd) can swap out arbitrary pages. Swap-out means to move account from memory to swap...there is no change in usage of mem+swap. In other words, when we want to limit the usage of swap without affecting global LRU, mem+swap limit is better than just limiting swap. Accounting target information is stored in swap_cgroup which is per swap entry record. Charge is done as following. map - charge page and memsw. unmap - uncharge page/memsw if not SwapCache. swap-out (__delete_from_swap_cache) - uncharge page - record mem_cgroup information to swap_cgroup. swap-in (do_swap_page) - charged as page and memsw. record in swap_cgroup is cleared. memsw accounting is decremented. swap-free (swap_free()) - if swap entry is freed, memsw is uncharged by PAGE_SIZE. There are people work under never-swap environments and consider swap as something bad. For such people, this mem+swap controller extension is just an overhead. This overhead is avoided by config or boot option. (see Kconfig. detail is not in this patch.) TODO: - maybe more optimization can be don in swap-in path. (but not very safe.) But we just do simple accounting at this stage. [nishimura@mxp.nes.nec.co.jp: make resize limit hold mutex] [hugh@veritas.com: memswap controller core swapcache fixes] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Pavel Emelyanov <xemul@openvz.org> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c11
1 files changed, 7 insertions, 4 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1e7a715a3866..0579d9069b61 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -471,8 +471,9 @@ out:
471 return NULL; 471 return NULL;
472} 472}
473 473
474static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) 474static int swap_entry_free(struct swap_info_struct *p, swp_entry_t ent)
475{ 475{
476 unsigned long offset = swp_offset(ent);
476 int count = p->swap_map[offset]; 477 int count = p->swap_map[offset];
477 478
478 if (count < SWAP_MAP_MAX) { 479 if (count < SWAP_MAP_MAX) {
@@ -487,6 +488,7 @@ static int swap_entry_free(struct swap_info_struct *p, unsigned long offset)
487 swap_list.next = p - swap_info; 488 swap_list.next = p - swap_info;
488 nr_swap_pages++; 489 nr_swap_pages++;
489 p->inuse_pages--; 490 p->inuse_pages--;
491 mem_cgroup_uncharge_swap(ent);
490 } 492 }
491 } 493 }
492 return count; 494 return count;
@@ -502,7 +504,7 @@ void swap_free(swp_entry_t entry)
502 504
503 p = swap_info_get(entry); 505 p = swap_info_get(entry);
504 if (p) { 506 if (p) {
505 swap_entry_free(p, swp_offset(entry)); 507 swap_entry_free(p, entry);
506 spin_unlock(&swap_lock); 508 spin_unlock(&swap_lock);
507 } 509 }
508} 510}
@@ -582,7 +584,7 @@ int free_swap_and_cache(swp_entry_t entry)
582 584
583 p = swap_info_get(entry); 585 p = swap_info_get(entry);
584 if (p) { 586 if (p) {
585 if (swap_entry_free(p, swp_offset(entry)) == 1) { 587 if (swap_entry_free(p, entry) == 1) {
586 page = find_get_page(&swapper_space, entry.val); 588 page = find_get_page(&swapper_space, entry.val);
587 if (page && !trylock_page(page)) { 589 if (page && !trylock_page(page)) {
588 page_cache_release(page); 590 page_cache_release(page);
@@ -696,7 +698,8 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
696 pte_t *pte; 698 pte_t *pte;
697 int ret = 1; 699 int ret = 1;
698 700
699 if (mem_cgroup_try_charge(vma->vm_mm, GFP_HIGHUSER_MOVABLE, &ptr)) 701 if (mem_cgroup_try_charge_swapin(vma->vm_mm, page,
702 GFP_HIGHUSER_MOVABLE, &ptr))
700 ret = -ENOMEM; 703 ret = -ENOMEM;
701 704
702 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 705 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);