aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2008-10-18 23:26:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:50:25 -0400
commit4f98a2fee8acdb4ac84545df98cccecfd130f8db (patch)
tree035a2937f4c3e2f7b4269412041c073ac646937c /mm/memcontrol.c
parentb2e185384f534781fd22f5ce170b2ad26f97df70 (diff)
vmscan: split LRU lists into anon & file sets
Split the LRU lists in two, one set for pages that are backed by real file systems ("file") and one for pages that are backed by memory and swap ("anon"). The latter includes tmpfs. The advantage of doing this is that the VM will not have to scan over lots of anonymous pages (which we generally do not want to swap out), just to find the page cache pages that it should evict. This patch has the infrastructure and a basic policy to balance how much we scan the anon lists and how much we scan the file lists. The big policy changes are in separate patches. [lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset] [kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru] [kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page] [hugh@veritas.com: memcg swapbacked pages active] [hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED] [akpm@linux-foundation.org: fix /proc/vmstat units] [nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration] [kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo] [kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()] Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c88
1 files changed, 50 insertions, 38 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c0cbd7790c51..27e9e75f4eab 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -162,6 +162,7 @@ struct page_cgroup {
162}; 162};
163#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ 163#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
164#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ 164#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */
165#define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */
165 166
166static int page_cgroup_nid(struct page_cgroup *pc) 167static int page_cgroup_nid(struct page_cgroup *pc)
167{ 168{
@@ -177,6 +178,7 @@ enum charge_type {
177 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 178 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
178 MEM_CGROUP_CHARGE_TYPE_MAPPED, 179 MEM_CGROUP_CHARGE_TYPE_MAPPED,
179 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ 180 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
181 MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */
180}; 182};
181 183
182/* 184/*
@@ -288,8 +290,12 @@ static void unlock_page_cgroup(struct page *page)
288static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, 290static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
289 struct page_cgroup *pc) 291 struct page_cgroup *pc)
290{ 292{
291 int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; 293 int lru = LRU_BASE;
292 int lru = !!from; 294
295 if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
296 lru += LRU_ACTIVE;
297 if (pc->flags & PAGE_CGROUP_FLAG_FILE)
298 lru += LRU_FILE;
293 299
294 MEM_CGROUP_ZSTAT(mz, lru) -= 1; 300 MEM_CGROUP_ZSTAT(mz, lru) -= 1;
295 301
@@ -300,10 +306,12 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
300static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, 306static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
301 struct page_cgroup *pc) 307 struct page_cgroup *pc)
302{ 308{
303 int lru = LRU_INACTIVE; 309 int lru = LRU_BASE;
304 310
305 if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) 311 if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
306 lru += LRU_ACTIVE; 312 lru += LRU_ACTIVE;
313 if (pc->flags & PAGE_CGROUP_FLAG_FILE)
314 lru += LRU_FILE;
307 315
308 MEM_CGROUP_ZSTAT(mz, lru) += 1; 316 MEM_CGROUP_ZSTAT(mz, lru) += 1;
309 list_add(&pc->lru, &mz->lists[lru]); 317 list_add(&pc->lru, &mz->lists[lru]);
@@ -314,10 +322,9 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
314static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) 322static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
315{ 323{
316 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); 324 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
317 int lru = LRU_INACTIVE; 325 int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
318 326 int file = pc->flags & PAGE_CGROUP_FLAG_FILE;
319 if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) 327 int lru = LRU_FILE * !!file + !!from;
320 lru += LRU_ACTIVE;
321 328
322 MEM_CGROUP_ZSTAT(mz, lru) -= 1; 329 MEM_CGROUP_ZSTAT(mz, lru) -= 1;
323 330
@@ -326,7 +333,7 @@ static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
326 else 333 else
327 pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; 334 pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
328 335
329 lru = !!active; 336 lru = LRU_FILE * !!file + !!active;
330 MEM_CGROUP_ZSTAT(mz, lru) += 1; 337 MEM_CGROUP_ZSTAT(mz, lru) += 1;
331 list_move(&pc->lru, &mz->lists[lru]); 338 list_move(&pc->lru, &mz->lists[lru]);
332} 339}
@@ -391,21 +398,6 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
391} 398}
392 399
393/* 400/*
394 * This function is called from vmscan.c. In page reclaiming loop. balance
395 * between active and inactive list is calculated. For memory controller
396 * page reclaiming, we should use using mem_cgroup's imbalance rather than
397 * zone's global lru imbalance.
398 */
399long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
400{
401 unsigned long active, inactive;
402 /* active and inactive are the number of pages. 'long' is ok.*/
403 active = mem_cgroup_get_all_zonestat(mem, LRU_ACTIVE);
404 inactive = mem_cgroup_get_all_zonestat(mem, LRU_INACTIVE);
405 return (long) (active / (inactive + 1));
406}
407
408/*
409 * prev_priority control...this will be used in memory reclaim path. 401 * prev_priority control...this will be used in memory reclaim path.
410 */ 402 */
411int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) 403int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
@@ -450,7 +442,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
450 unsigned long *scanned, int order, 442 unsigned long *scanned, int order,
451 int mode, struct zone *z, 443 int mode, struct zone *z,
452 struct mem_cgroup *mem_cont, 444 struct mem_cgroup *mem_cont,
453 int active) 445 int active, int file)
454{ 446{
455 unsigned long nr_taken = 0; 447 unsigned long nr_taken = 0;
456 struct page *page; 448 struct page *page;
@@ -461,7 +453,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
461 int nid = z->zone_pgdat->node_id; 453 int nid = z->zone_pgdat->node_id;
462 int zid = zone_idx(z); 454 int zid = zone_idx(z);
463 struct mem_cgroup_per_zone *mz; 455 struct mem_cgroup_per_zone *mz;
464 int lru = !!active; 456 int lru = LRU_FILE * !!file + !!active;
465 457
466 BUG_ON(!mem_cont); 458 BUG_ON(!mem_cont);
467 mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); 459 mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
@@ -477,6 +469,9 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
477 if (unlikely(!PageLRU(page))) 469 if (unlikely(!PageLRU(page)))
478 continue; 470 continue;
479 471
472 /*
473 * TODO: play better with lumpy reclaim, grabbing anything.
474 */
480 if (PageActive(page) && !active) { 475 if (PageActive(page) && !active) {
481 __mem_cgroup_move_lists(pc, true); 476 __mem_cgroup_move_lists(pc, true);
482 continue; 477 continue;
@@ -489,7 +484,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
489 scan++; 484 scan++;
490 list_move(&pc->lru, &pc_list); 485 list_move(&pc->lru, &pc_list);
491 486
492 if (__isolate_lru_page(page, mode) == 0) { 487 if (__isolate_lru_page(page, mode, file) == 0) {
493 list_move(&page->lru, dst); 488 list_move(&page->lru, dst);
494 nr_taken++; 489 nr_taken++;
495 } 490 }
@@ -575,10 +570,16 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
575 * If a page is accounted as a page cache, insert to inactive list. 570 * If a page is accounted as a page cache, insert to inactive list.
576 * If anon, insert to active list. 571 * If anon, insert to active list.
577 */ 572 */
578 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) 573 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) {
579 pc->flags = PAGE_CGROUP_FLAG_CACHE; 574 pc->flags = PAGE_CGROUP_FLAG_CACHE;
580 else 575 if (page_is_file_cache(page))
576 pc->flags |= PAGE_CGROUP_FLAG_FILE;
577 else
578 pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
579 } else if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
581 pc->flags = PAGE_CGROUP_FLAG_ACTIVE; 580 pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
581 else /* MEM_CGROUP_CHARGE_TYPE_SHMEM */
582 pc->flags = PAGE_CGROUP_FLAG_CACHE | PAGE_CGROUP_FLAG_ACTIVE;
582 583
583 lock_page_cgroup(page); 584 lock_page_cgroup(page);
584 if (unlikely(page_get_page_cgroup(page))) { 585 if (unlikely(page_get_page_cgroup(page))) {
@@ -737,8 +738,12 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
737 if (pc) { 738 if (pc) {
738 mem = pc->mem_cgroup; 739 mem = pc->mem_cgroup;
739 css_get(&mem->css); 740 css_get(&mem->css);
740 if (pc->flags & PAGE_CGROUP_FLAG_CACHE) 741 if (pc->flags & PAGE_CGROUP_FLAG_CACHE) {
741 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; 742 if (page_is_file_cache(page))
743 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
744 else
745 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
746 }
742 } 747 }
743 unlock_page_cgroup(page); 748 unlock_page_cgroup(page);
744 if (mem) { 749 if (mem) {
@@ -982,14 +987,21 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
982 } 987 }
983 /* showing # of active pages */ 988 /* showing # of active pages */
984 { 989 {
985 unsigned long active, inactive; 990 unsigned long active_anon, inactive_anon;
986 991 unsigned long active_file, inactive_file;
987 inactive = mem_cgroup_get_all_zonestat(mem_cont, 992
988 LRU_INACTIVE); 993 inactive_anon = mem_cgroup_get_all_zonestat(mem_cont,
989 active = mem_cgroup_get_all_zonestat(mem_cont, 994 LRU_INACTIVE_ANON);
990 LRU_ACTIVE); 995 active_anon = mem_cgroup_get_all_zonestat(mem_cont,
991 cb->fill(cb, "active", (active) * PAGE_SIZE); 996 LRU_ACTIVE_ANON);
992 cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); 997 inactive_file = mem_cgroup_get_all_zonestat(mem_cont,
998 LRU_INACTIVE_FILE);
999 active_file = mem_cgroup_get_all_zonestat(mem_cont,
1000 LRU_ACTIVE_FILE);
1001 cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE);
1002 cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE);
1003 cb->fill(cb, "active_file", (active_file) * PAGE_SIZE);
1004 cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE);
993 } 1005 }
994 return 0; 1006 return 0;
995} 1007}