diff options
author | Rik van Riel <riel@redhat.com> | 2008-10-18 23:26:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-20 11:50:25 -0400 |
commit | 4f98a2fee8acdb4ac84545df98cccecfd130f8db (patch) | |
tree | 035a2937f4c3e2f7b4269412041c073ac646937c /mm/memcontrol.c | |
parent | b2e185384f534781fd22f5ce170b2ad26f97df70 (diff) |
vmscan: split LRU lists into anon & file sets
Split the LRU lists in two, one set for pages that are backed by real file
systems ("file") and one for pages that are backed by memory and swap
("anon"). The latter includes tmpfs.
The advantage of doing this is that the VM will not have to scan over lots
of anonymous pages (which we generally do not want to swap out), just to
find the page cache pages that it should evict.
This patch has the infrastructure and a basic policy to balance how much
we scan the anon lists and how much we scan the file lists. The big
policy changes are in separate patches.
[lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset]
[kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru]
[kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page]
[hugh@veritas.com: memcg swapbacked pages active]
[hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED]
[akpm@linux-foundation.org: fix /proc/vmstat units]
[nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration]
[kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo]
[kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()]
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 88 |
1 files changed, 50 insertions, 38 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c0cbd7790c51..27e9e75f4eab 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -162,6 +162,7 @@ struct page_cgroup { | |||
162 | }; | 162 | }; |
163 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | 163 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
164 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ | 164 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ |
165 | #define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */ | ||
165 | 166 | ||
166 | static int page_cgroup_nid(struct page_cgroup *pc) | 167 | static int page_cgroup_nid(struct page_cgroup *pc) |
167 | { | 168 | { |
@@ -177,6 +178,7 @@ enum charge_type { | |||
177 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, | 178 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, |
178 | MEM_CGROUP_CHARGE_TYPE_MAPPED, | 179 | MEM_CGROUP_CHARGE_TYPE_MAPPED, |
179 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ | 180 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ |
181 | MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */ | ||
180 | }; | 182 | }; |
181 | 183 | ||
182 | /* | 184 | /* |
@@ -288,8 +290,12 @@ static void unlock_page_cgroup(struct page *page) | |||
288 | static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, | 290 | static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, |
289 | struct page_cgroup *pc) | 291 | struct page_cgroup *pc) |
290 | { | 292 | { |
291 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | 293 | int lru = LRU_BASE; |
292 | int lru = !!from; | 294 | |
295 | if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) | ||
296 | lru += LRU_ACTIVE; | ||
297 | if (pc->flags & PAGE_CGROUP_FLAG_FILE) | ||
298 | lru += LRU_FILE; | ||
293 | 299 | ||
294 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | 300 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; |
295 | 301 | ||
@@ -300,10 +306,12 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, | |||
300 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, | 306 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, |
301 | struct page_cgroup *pc) | 307 | struct page_cgroup *pc) |
302 | { | 308 | { |
303 | int lru = LRU_INACTIVE; | 309 | int lru = LRU_BASE; |
304 | 310 | ||
305 | if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) | 311 | if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) |
306 | lru += LRU_ACTIVE; | 312 | lru += LRU_ACTIVE; |
313 | if (pc->flags & PAGE_CGROUP_FLAG_FILE) | ||
314 | lru += LRU_FILE; | ||
307 | 315 | ||
308 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | 316 | MEM_CGROUP_ZSTAT(mz, lru) += 1; |
309 | list_add(&pc->lru, &mz->lists[lru]); | 317 | list_add(&pc->lru, &mz->lists[lru]); |
@@ -314,10 +322,9 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, | |||
314 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) | 322 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) |
315 | { | 323 | { |
316 | struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); | 324 | struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); |
317 | int lru = LRU_INACTIVE; | 325 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; |
318 | 326 | int file = pc->flags & PAGE_CGROUP_FLAG_FILE; | |
319 | if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) | 327 | int lru = LRU_FILE * !!file + !!from; |
320 | lru += LRU_ACTIVE; | ||
321 | 328 | ||
322 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | 329 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; |
323 | 330 | ||
@@ -326,7 +333,7 @@ static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) | |||
326 | else | 333 | else |
327 | pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; | 334 | pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; |
328 | 335 | ||
329 | lru = !!active; | 336 | lru = LRU_FILE * !!file + !!active; |
330 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | 337 | MEM_CGROUP_ZSTAT(mz, lru) += 1; |
331 | list_move(&pc->lru, &mz->lists[lru]); | 338 | list_move(&pc->lru, &mz->lists[lru]); |
332 | } | 339 | } |
@@ -391,21 +398,6 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) | |||
391 | } | 398 | } |
392 | 399 | ||
393 | /* | 400 | /* |
394 | * This function is called from vmscan.c. In page reclaiming loop. balance | ||
395 | * between active and inactive list is calculated. For memory controller | ||
396 | * page reclaiming, we should use using mem_cgroup's imbalance rather than | ||
397 | * zone's global lru imbalance. | ||
398 | */ | ||
399 | long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem) | ||
400 | { | ||
401 | unsigned long active, inactive; | ||
402 | /* active and inactive are the number of pages. 'long' is ok.*/ | ||
403 | active = mem_cgroup_get_all_zonestat(mem, LRU_ACTIVE); | ||
404 | inactive = mem_cgroup_get_all_zonestat(mem, LRU_INACTIVE); | ||
405 | return (long) (active / (inactive + 1)); | ||
406 | } | ||
407 | |||
408 | /* | ||
409 | * prev_priority control...this will be used in memory reclaim path. | 401 | * prev_priority control...this will be used in memory reclaim path. |
410 | */ | 402 | */ |
411 | int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) | 403 | int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) |
@@ -450,7 +442,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
450 | unsigned long *scanned, int order, | 442 | unsigned long *scanned, int order, |
451 | int mode, struct zone *z, | 443 | int mode, struct zone *z, |
452 | struct mem_cgroup *mem_cont, | 444 | struct mem_cgroup *mem_cont, |
453 | int active) | 445 | int active, int file) |
454 | { | 446 | { |
455 | unsigned long nr_taken = 0; | 447 | unsigned long nr_taken = 0; |
456 | struct page *page; | 448 | struct page *page; |
@@ -461,7 +453,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
461 | int nid = z->zone_pgdat->node_id; | 453 | int nid = z->zone_pgdat->node_id; |
462 | int zid = zone_idx(z); | 454 | int zid = zone_idx(z); |
463 | struct mem_cgroup_per_zone *mz; | 455 | struct mem_cgroup_per_zone *mz; |
464 | int lru = !!active; | 456 | int lru = LRU_FILE * !!file + !!active; |
465 | 457 | ||
466 | BUG_ON(!mem_cont); | 458 | BUG_ON(!mem_cont); |
467 | mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); | 459 | mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); |
@@ -477,6 +469,9 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
477 | if (unlikely(!PageLRU(page))) | 469 | if (unlikely(!PageLRU(page))) |
478 | continue; | 470 | continue; |
479 | 471 | ||
472 | /* | ||
473 | * TODO: play better with lumpy reclaim, grabbing anything. | ||
474 | */ | ||
480 | if (PageActive(page) && !active) { | 475 | if (PageActive(page) && !active) { |
481 | __mem_cgroup_move_lists(pc, true); | 476 | __mem_cgroup_move_lists(pc, true); |
482 | continue; | 477 | continue; |
@@ -489,7 +484,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
489 | scan++; | 484 | scan++; |
490 | list_move(&pc->lru, &pc_list); | 485 | list_move(&pc->lru, &pc_list); |
491 | 486 | ||
492 | if (__isolate_lru_page(page, mode) == 0) { | 487 | if (__isolate_lru_page(page, mode, file) == 0) { |
493 | list_move(&page->lru, dst); | 488 | list_move(&page->lru, dst); |
494 | nr_taken++; | 489 | nr_taken++; |
495 | } | 490 | } |
@@ -575,10 +570,16 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
575 | * If a page is accounted as a page cache, insert to inactive list. | 570 | * If a page is accounted as a page cache, insert to inactive list. |
576 | * If anon, insert to active list. | 571 | * If anon, insert to active list. |
577 | */ | 572 | */ |
578 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) | 573 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) { |
579 | pc->flags = PAGE_CGROUP_FLAG_CACHE; | 574 | pc->flags = PAGE_CGROUP_FLAG_CACHE; |
580 | else | 575 | if (page_is_file_cache(page)) |
576 | pc->flags |= PAGE_CGROUP_FLAG_FILE; | ||
577 | else | ||
578 | pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; | ||
579 | } else if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) | ||
581 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | 580 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; |
581 | else /* MEM_CGROUP_CHARGE_TYPE_SHMEM */ | ||
582 | pc->flags = PAGE_CGROUP_FLAG_CACHE | PAGE_CGROUP_FLAG_ACTIVE; | ||
582 | 583 | ||
583 | lock_page_cgroup(page); | 584 | lock_page_cgroup(page); |
584 | if (unlikely(page_get_page_cgroup(page))) { | 585 | if (unlikely(page_get_page_cgroup(page))) { |
@@ -737,8 +738,12 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) | |||
737 | if (pc) { | 738 | if (pc) { |
738 | mem = pc->mem_cgroup; | 739 | mem = pc->mem_cgroup; |
739 | css_get(&mem->css); | 740 | css_get(&mem->css); |
740 | if (pc->flags & PAGE_CGROUP_FLAG_CACHE) | 741 | if (pc->flags & PAGE_CGROUP_FLAG_CACHE) { |
741 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | 742 | if (page_is_file_cache(page)) |
743 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
744 | else | ||
745 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | ||
746 | } | ||
742 | } | 747 | } |
743 | unlock_page_cgroup(page); | 748 | unlock_page_cgroup(page); |
744 | if (mem) { | 749 | if (mem) { |
@@ -982,14 +987,21 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | |||
982 | } | 987 | } |
983 | /* showing # of active pages */ | 988 | /* showing # of active pages */ |
984 | { | 989 | { |
985 | unsigned long active, inactive; | 990 | unsigned long active_anon, inactive_anon; |
986 | 991 | unsigned long active_file, inactive_file; | |
987 | inactive = mem_cgroup_get_all_zonestat(mem_cont, | 992 | |
988 | LRU_INACTIVE); | 993 | inactive_anon = mem_cgroup_get_all_zonestat(mem_cont, |
989 | active = mem_cgroup_get_all_zonestat(mem_cont, | 994 | LRU_INACTIVE_ANON); |
990 | LRU_ACTIVE); | 995 | active_anon = mem_cgroup_get_all_zonestat(mem_cont, |
991 | cb->fill(cb, "active", (active) * PAGE_SIZE); | 996 | LRU_ACTIVE_ANON); |
992 | cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); | 997 | inactive_file = mem_cgroup_get_all_zonestat(mem_cont, |
998 | LRU_INACTIVE_FILE); | ||
999 | active_file = mem_cgroup_get_all_zonestat(mem_cont, | ||
1000 | LRU_ACTIVE_FILE); | ||
1001 | cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE); | ||
1002 | cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE); | ||
1003 | cb->fill(cb, "active_file", (active_file) * PAGE_SIZE); | ||
1004 | cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE); | ||
993 | } | 1005 | } |
994 | return 0; | 1006 | return 0; |
995 | } | 1007 | } |