aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorLee Schermerhorn <Lee.Schermerhorn@hp.com>2008-10-18 23:26:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:50:26 -0400
commit894bc310419ac95f4fa4142dc364401a7e607f65 (patch)
tree15d56a7333b41620016b845d2323dd06e822b621 /mm/vmscan.c
parent8a7a8544a4f6554ec2d8048ac9f9672f442db5a2 (diff)
Unevictable LRU Infrastructure
When the system contains lots of mlocked or otherwise unevictable pages, the pageout code (kswapd) can spend lots of time scanning over these pages. Worse still, the presence of lots of unevictable pages can confuse kswapd into thinking that more aggressive pageout modes are required, resulting in all kinds of bad behaviour. Infrastructure to manage pages excluded from reclaim--i.e., hidden from vmscan. Based on a patch by Larry Woodman of Red Hat. Reworked to maintain "unevictable" pages on a separate per-zone LRU list, to "hide" them from vmscan. Kosaki Motohiro added the support for the memory controller unevictable lru list. Pages on the unevictable list have both PG_unevictable and PG_lru set. Thus, PG_unevictable is analogous to and mutually exclusive with PG_active--it specifies which LRU list the page is on. The unevictable infrastructure is enabled by a new mm Kconfig option [CONFIG_]UNEVICTABLE_LRU. A new function 'page_evictable(page, vma)' in vmscan.c tests whether or not a page may be evictable. Subsequent patches will add the various !evictable tests. We'll want to keep these tests light-weight for use in shrink_active_list() and, possibly, the fault path. To avoid races between tasks putting pages [back] onto an LRU list and tasks that might be moving the page from non-evictable to evictable state, the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()' -- tests the "evictability" of a page after placing it on the LRU, before dropping the reference. If the page has become unevictable, putback_lru_page() will redo the 'putback', thus moving the page to the unevictable list. This way, we avoid "stranding" evictable pages on the unevictable list. [akpm@linux-foundation.org: fix fallout from out-of-order merge] [riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build] [nishimura@mxp.nes.nec.co.jp: remove redundant mapping check] [kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework] [kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c] [kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure] [kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch] [kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch] Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Debugged-by: Benjamin Kidwell <benjkidwell@yahoo.com> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c149
1 files changed, 136 insertions, 13 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a8347b677e7..154b9b608da 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -470,6 +470,79 @@ int remove_mapping(struct address_space *mapping, struct page *page)
470 return 0; 470 return 0;
471} 471}
472 472
473/**
474 * putback_lru_page - put previously isolated page onto appropriate LRU list
475 * @page: page to be put back to appropriate lru list
476 *
477 * Add previously isolated @page to appropriate LRU list.
478 * Page may still be unevictable for other reasons.
479 *
480 * lru_lock must not be held, interrupts must be enabled.
481 */
482#ifdef CONFIG_UNEVICTABLE_LRU
483void putback_lru_page(struct page *page)
484{
485 int lru;
486 int active = !!TestClearPageActive(page);
487
488 VM_BUG_ON(PageLRU(page));
489
490redo:
491 ClearPageUnevictable(page);
492
493 if (page_evictable(page, NULL)) {
494 /*
495 * For evictable pages, we can use the cache.
496 * In event of a race, worst case is we end up with an
497 * unevictable page on [in]active list.
498 * We know how to handle that.
499 */
500 lru = active + page_is_file_cache(page);
501 lru_cache_add_lru(page, lru);
502 } else {
503 /*
504 * Put unevictable pages directly on zone's unevictable
505 * list.
506 */
507 lru = LRU_UNEVICTABLE;
508 add_page_to_unevictable_list(page);
509 }
510 mem_cgroup_move_lists(page, lru);
511
512 /*
513 * page's status can change while we move it among lru. If an evictable
514 * page is on unevictable list, it never be freed. To avoid that,
515 * check after we added it to the list, again.
516 */
517 if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) {
518 if (!isolate_lru_page(page)) {
519 put_page(page);
520 goto redo;
521 }
522 /* This means someone else dropped this page from LRU
523 * So, it will be freed or putback to LRU again. There is
524 * nothing to do here.
525 */
526 }
527
528 put_page(page); /* drop ref from isolate */
529}
530
531#else /* CONFIG_UNEVICTABLE_LRU */
532
533void putback_lru_page(struct page *page)
534{
535 int lru;
536 VM_BUG_ON(PageLRU(page));
537
538 lru = !!TestClearPageActive(page) + page_is_file_cache(page);
539 lru_cache_add_lru(page, lru);
540 mem_cgroup_move_lists(page, lru);
541 put_page(page);
542}
543#endif /* CONFIG_UNEVICTABLE_LRU */
544
545
473/* 546/*
474 * shrink_page_list() returns the number of reclaimed pages 547 * shrink_page_list() returns the number of reclaimed pages
475 */ 548 */
@@ -503,6 +576,12 @@ static unsigned long shrink_page_list(struct list_head *page_list,
503 576
504 sc->nr_scanned++; 577 sc->nr_scanned++;
505 578
579 if (unlikely(!page_evictable(page, NULL))) {
580 unlock_page(page);
581 putback_lru_page(page);
582 continue;
583 }
584
506 if (!sc->may_swap && page_mapped(page)) 585 if (!sc->may_swap && page_mapped(page))
507 goto keep_locked; 586 goto keep_locked;
508 587
@@ -602,7 +681,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
602 * possible for a page to have PageDirty set, but it is actually 681 * possible for a page to have PageDirty set, but it is actually
603 * clean (all its buffers are clean). This happens if the 682 * clean (all its buffers are clean). This happens if the
604 * buffers were written out directly, with submit_bh(). ext3 683 * buffers were written out directly, with submit_bh(). ext3
605 * will do this, as well as the blockdev mapping. 684 * will do this, as well as the blockdev mapping.
606 * try_to_release_page() will discover that cleanness and will 685 * try_to_release_page() will discover that cleanness and will
607 * drop the buffers and mark the page clean - it can be freed. 686 * drop the buffers and mark the page clean - it can be freed.
608 * 687 *
@@ -650,6 +729,7 @@ activate_locked:
650 /* Not a candidate for swapping, so reclaim swap space. */ 729 /* Not a candidate for swapping, so reclaim swap space. */
651 if (PageSwapCache(page) && vm_swap_full()) 730 if (PageSwapCache(page) && vm_swap_full())
652 remove_exclusive_swap_page_ref(page); 731 remove_exclusive_swap_page_ref(page);
732 VM_BUG_ON(PageActive(page));
653 SetPageActive(page); 733 SetPageActive(page);
654 pgactivate++; 734 pgactivate++;
655keep_locked: 735keep_locked:
@@ -699,6 +779,14 @@ int __isolate_lru_page(struct page *page, int mode, int file)
699 if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file)) 779 if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file))
700 return ret; 780 return ret;
701 781
782 /*
783 * When this function is being called for lumpy reclaim, we
784 * initially look into all LRU pages, active, inactive and
785 * unevictable; only give shrink_page_list evictable pages.
786 */
787 if (PageUnevictable(page))
788 return ret;
789
702 ret = -EBUSY; 790 ret = -EBUSY;
703 if (likely(get_page_unless_zero(page))) { 791 if (likely(get_page_unless_zero(page))) {
704 /* 792 /*
@@ -810,7 +898,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
810 /* else it is being freed elsewhere */ 898 /* else it is being freed elsewhere */
811 list_move(&cursor_page->lru, src); 899 list_move(&cursor_page->lru, src);
812 default: 900 default:
813 break; 901 break; /* ! on LRU or wrong list */
814 } 902 }
815 } 903 }
816 } 904 }
@@ -870,8 +958,9 @@ static unsigned long clear_active_flags(struct list_head *page_list,
870 * Returns -EBUSY if the page was not on an LRU list. 958 * Returns -EBUSY if the page was not on an LRU list.
871 * 959 *
872 * The returned page will have PageLRU() cleared. If it was found on 960 * The returned page will have PageLRU() cleared. If it was found on
873 * the active list, it will have PageActive set. That flag may need 961 * the active list, it will have PageActive set. If it was found on
874 * to be cleared by the caller before letting the page go. 962 * the unevictable list, it will have the PageUnevictable bit set. That flag
963 * may need to be cleared by the caller before letting the page go.
875 * 964 *
876 * The vmstat statistic corresponding to the list on which the page was 965 * The vmstat statistic corresponding to the list on which the page was
877 * found will be decremented. 966 * found will be decremented.
@@ -892,11 +981,10 @@ int isolate_lru_page(struct page *page)
892 981
893 spin_lock_irq(&zone->lru_lock); 982 spin_lock_irq(&zone->lru_lock);
894 if (PageLRU(page) && get_page_unless_zero(page)) { 983 if (PageLRU(page) && get_page_unless_zero(page)) {
895 int lru = LRU_BASE; 984 int lru = page_lru(page);
896 ret = 0; 985 ret = 0;
897 ClearPageLRU(page); 986 ClearPageLRU(page);
898 987
899 lru += page_is_file_cache(page) + !!PageActive(page);
900 del_page_from_lru_list(zone, page, lru); 988 del_page_from_lru_list(zone, page, lru);
901 } 989 }
902 spin_unlock_irq(&zone->lru_lock); 990 spin_unlock_irq(&zone->lru_lock);
@@ -1008,11 +1096,20 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
1008 * Put back any unfreeable pages. 1096 * Put back any unfreeable pages.
1009 */ 1097 */
1010 while (!list_empty(&page_list)) { 1098 while (!list_empty(&page_list)) {
1099 int lru;
1011 page = lru_to_page(&page_list); 1100 page = lru_to_page(&page_list);
1012 VM_BUG_ON(PageLRU(page)); 1101 VM_BUG_ON(PageLRU(page));
1013 SetPageLRU(page);
1014 list_del(&page->lru); 1102 list_del(&page->lru);
1015 add_page_to_lru_list(zone, page, page_lru(page)); 1103 if (unlikely(!page_evictable(page, NULL))) {
1104 spin_unlock_irq(&zone->lru_lock);
1105 putback_lru_page(page);
1106 spin_lock_irq(&zone->lru_lock);
1107 continue;
1108 }
1109 SetPageLRU(page);
1110 lru = page_lru(page);
1111 add_page_to_lru_list(zone, page, lru);
1112 mem_cgroup_move_lists(page, lru);
1016 if (PageActive(page) && scan_global_lru(sc)) { 1113 if (PageActive(page) && scan_global_lru(sc)) {
1017 int file = !!page_is_file_cache(page); 1114 int file = !!page_is_file_cache(page);
1018 zone->recent_rotated[file]++; 1115 zone->recent_rotated[file]++;
@@ -1107,6 +1204,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1107 page = lru_to_page(&l_hold); 1204 page = lru_to_page(&l_hold);
1108 list_del(&page->lru); 1205 list_del(&page->lru);
1109 1206
1207 if (unlikely(!page_evictable(page, NULL))) {
1208 putback_lru_page(page);
1209 continue;
1210 }
1211
1110 /* page_referenced clears PageReferenced */ 1212 /* page_referenced clears PageReferenced */
1111 if (page_mapping_inuse(page) && 1213 if (page_mapping_inuse(page) &&
1112 page_referenced(page, 0, sc->mem_cgroup)) 1214 page_referenced(page, 0, sc->mem_cgroup))
@@ -1140,7 +1242,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1140 ClearPageActive(page); 1242 ClearPageActive(page);
1141 1243
1142 list_move(&page->lru, &zone->lru[lru].list); 1244 list_move(&page->lru, &zone->lru[lru].list);
1143 mem_cgroup_move_lists(page, false); 1245 mem_cgroup_move_lists(page, lru);
1144 pgmoved++; 1246 pgmoved++;
1145 if (!pagevec_add(&pvec, page)) { 1247 if (!pagevec_add(&pvec, page)) {
1146 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); 1248 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
@@ -1286,7 +1388,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
1286 1388
1287 get_scan_ratio(zone, sc, percent); 1389 get_scan_ratio(zone, sc, percent);
1288 1390
1289 for_each_lru(l) { 1391 for_each_evictable_lru(l) {
1290 if (scan_global_lru(sc)) { 1392 if (scan_global_lru(sc)) {
1291 int file = is_file_lru(l); 1393 int file = is_file_lru(l);
1292 int scan; 1394 int scan;
@@ -1318,7 +1420,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
1318 1420
1319 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || 1421 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
1320 nr[LRU_INACTIVE_FILE]) { 1422 nr[LRU_INACTIVE_FILE]) {
1321 for_each_lru(l) { 1423 for_each_evictable_lru(l) {
1322 if (nr[l]) { 1424 if (nr[l]) {
1323 nr_to_scan = min(nr[l], 1425 nr_to_scan = min(nr[l],
1324 (unsigned long)sc->swap_cluster_max); 1426 (unsigned long)sc->swap_cluster_max);
@@ -1875,8 +1977,8 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1875 if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) 1977 if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
1876 continue; 1978 continue;
1877 1979
1878 for_each_lru(l) { 1980 for_each_evictable_lru(l) {
1879 /* For pass = 0 we don't shrink the active list */ 1981 /* For pass = 0, we don't shrink the active list */
1880 if (pass == 0 && 1982 if (pass == 0 &&
1881 (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE)) 1983 (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE))
1882 continue; 1984 continue;
@@ -2213,3 +2315,24 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2213 return ret; 2315 return ret;
2214} 2316}
2215#endif 2317#endif
2318
2319#ifdef CONFIG_UNEVICTABLE_LRU
2320/*
2321 * page_evictable - test whether a page is evictable
2322 * @page: the page to test
2323 * @vma: the VMA in which the page is or will be mapped, may be NULL
2324 *
2325 * Test whether page is evictable--i.e., should be placed on active/inactive
2326 * lists vs unevictable list.
2327 *
2328 * Reasons page might not be evictable:
2329 * TODO - later patches
2330 */
2331int page_evictable(struct page *page, struct vm_area_struct *vma)
2332{
2333
2334 /* TODO: test page [!]evictable conditions */
2335
2336 return 1;
2337}
2338#endif