aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2008-10-18 23:28:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:52:38 -0400
commit5b4e655e948d8b6e9b0d001616d4c9d7e7ffe924 (patch)
tree2822f944d1bdbc8414a65cd94b982c51b5032b50
parentb7abea9630bc8ffc663a751e46680db25c4cdf8d (diff)
memcg: avoid accounting special pages
There are not-on-LRU pages which can be mapped and they are not worth to be accounted. (becasue we can't shrink them and need dirty codes to handle specical case) We'd like to make use of usual objrmap/radix-tree's protcol and don't want to account out-of-vm's control pages. When special_mapping_fault() is called, page->mapping is tend to be NULL and it's charged as Anonymous page. insert_page() also handles some special pages from drivers. This patch is for avoiding to account special pages. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/controllers/memory.txt24
-rw-r--r--mm/memory.c25
-rw-r--r--mm/rmap.c4
3 files changed, 29 insertions, 24 deletions
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index 9b53d5827361..1c07547d3f81 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -112,14 +112,22 @@ the per cgroup LRU.
112 112
1132.2.1 Accounting details 1132.2.1 Accounting details
114 114
115All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted. 115All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
116RSS pages are accounted at the time of page_add_*_rmap() unless they've already 116(some pages which never be reclaimable and will not be on global LRU
117been accounted for earlier. A file page will be accounted for as Page Cache; 117 are not accounted. we just accounts pages under usual vm management.)
118it's mapped into the page tables of a process, duplicate accounting is carefully 118
119avoided. Page Cache pages are accounted at the time of add_to_page_cache(). 119RSS pages are accounted at page_fault unless they've already been accounted
120The corresponding routines that remove a page from the page tables or removes 120for earlier. A file page will be accounted for as Page Cache when it's
121a page from Page Cache is used to decrement the accounting counters of the 121inserted into inode (radix-tree). While it's mapped into the page tables of
122cgroup. 122processes, duplicate accounting is carefully avoided.
123
124A RSS page is unaccounted when it's fully unmapped. A PageCache page is
125unaccounted when it's removed from radix-tree.
126
127At page migration, accounting information is kept.
128
129Note: we just account pages-on-lru because our purpose is to control amount
130of used pages. not-on-lru pages are tend to be out-of-control from vm view.
123 131
1242.3 Shared Page Accounting 1322.3 Shared Page Accounting
125 133
diff --git a/mm/memory.c b/mm/memory.c
index 54cf20ee0a83..3a6c4a658325 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1323,18 +1323,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
1323 pte_t *pte; 1323 pte_t *pte;
1324 spinlock_t *ptl; 1324 spinlock_t *ptl;
1325 1325
1326 retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
1327 if (retval)
1328 goto out;
1329
1330 retval = -EINVAL; 1326 retval = -EINVAL;
1331 if (PageAnon(page)) 1327 if (PageAnon(page))
1332 goto out_uncharge; 1328 goto out;
1333 retval = -ENOMEM; 1329 retval = -ENOMEM;
1334 flush_dcache_page(page); 1330 flush_dcache_page(page);
1335 pte = get_locked_pte(mm, addr, &ptl); 1331 pte = get_locked_pte(mm, addr, &ptl);
1336 if (!pte) 1332 if (!pte)
1337 goto out_uncharge; 1333 goto out;
1338 retval = -EBUSY; 1334 retval = -EBUSY;
1339 if (!pte_none(*pte)) 1335 if (!pte_none(*pte))
1340 goto out_unlock; 1336 goto out_unlock;
@@ -1350,8 +1346,6 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
1350 return retval; 1346 return retval;
1351out_unlock: 1347out_unlock:
1352 pte_unmap_unlock(pte, ptl); 1348 pte_unmap_unlock(pte, ptl);
1353out_uncharge:
1354 mem_cgroup_uncharge_page(page);
1355out: 1349out:
1356 return retval; 1350 return retval;
1357} 1351}
@@ -2463,6 +2457,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2463 struct page *page; 2457 struct page *page;
2464 pte_t entry; 2458 pte_t entry;
2465 int anon = 0; 2459 int anon = 0;
2460 int charged = 0;
2466 struct page *dirty_page = NULL; 2461 struct page *dirty_page = NULL;
2467 struct vm_fault vmf; 2462 struct vm_fault vmf;
2468 int ret; 2463 int ret;
@@ -2503,6 +2498,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2503 ret = VM_FAULT_OOM; 2498 ret = VM_FAULT_OOM;
2504 goto out; 2499 goto out;
2505 } 2500 }
2501 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
2502 ret = VM_FAULT_OOM;
2503 page_cache_release(page);
2504 goto out;
2505 }
2506 charged = 1;
2506 /* 2507 /*
2507 * Don't let another task, with possibly unlocked vma, 2508 * Don't let another task, with possibly unlocked vma,
2508 * keep the mlocked page. 2509 * keep the mlocked page.
@@ -2543,11 +2544,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2543 2544
2544 } 2545 }
2545 2546
2546 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
2547 ret = VM_FAULT_OOM;
2548 goto out;
2549 }
2550
2551 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2547 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2552 2548
2553 /* 2549 /*
@@ -2585,7 +2581,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2585 /* no need to invalidate: a not-present page won't be cached */ 2581 /* no need to invalidate: a not-present page won't be cached */
2586 update_mmu_cache(vma, address, entry); 2582 update_mmu_cache(vma, address, entry);
2587 } else { 2583 } else {
2588 mem_cgroup_uncharge_page(page); 2584 if (charged)
2585 mem_cgroup_uncharge_page(page);
2589 if (anon) 2586 if (anon)
2590 page_cache_release(page); 2587 page_cache_release(page);
2591 else 2588 else
diff --git a/mm/rmap.c b/mm/rmap.c
index 7e90bebbeb6c..8701d5fce732 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -727,8 +727,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
727 page_clear_dirty(page); 727 page_clear_dirty(page);
728 set_page_dirty(page); 728 set_page_dirty(page);
729 } 729 }
730 730 if (PageAnon(page))
731 mem_cgroup_uncharge_page(page); 731 mem_cgroup_uncharge_page(page);
732 __dec_zone_page_state(page, 732 __dec_zone_page_state(page,
733 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); 733 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
734 /* 734 /*