diff options
author | Lee Schermerhorn <Lee.Schermerhorn@hp.com> | 2008-10-18 23:26:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-20 11:50:26 -0400 |
commit | 894bc310419ac95f4fa4142dc364401a7e607f65 (patch) | |
tree | 15d56a7333b41620016b845d2323dd06e822b621 /mm | |
parent | 8a7a8544a4f6554ec2d8048ac9f9672f442db5a2 (diff) |
Unevictable LRU Infrastructure
When the system contains lots of mlocked or otherwise unevictable pages,
the pageout code (kswapd) can spend lots of time scanning over these
pages. Worse still, the presence of lots of unevictable pages can confuse
kswapd into thinking that more aggressive pageout modes are required,
resulting in all kinds of bad behaviour.
Infrastructure to manage pages excluded from reclaim--i.e., hidden from
vmscan. Based on a patch by Larry Woodman of Red Hat. Reworked to
maintain "unevictable" pages on a separate per-zone LRU list, to "hide"
them from vmscan.
Kosaki Motohiro added the support for the memory controller unevictable
lru list.
Pages on the unevictable list have both PG_unevictable and PG_lru set.
Thus, PG_unevictable is analogous to and mutually exclusive with
PG_active--it specifies which LRU list the page is on.
The unevictable infrastructure is enabled by a new mm Kconfig option
[CONFIG_]UNEVICTABLE_LRU.
A new function 'page_evictable(page, vma)' in vmscan.c tests whether or
not a page may be evictable. Subsequent patches will add the various
!evictable tests. We'll want to keep these tests light-weight for use in
shrink_active_list() and, possibly, the fault path.
To avoid races between tasks putting pages [back] onto an LRU list and
tasks that might be moving the page from non-evictable to evictable state,
the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()'
-- tests the "evictability" of a page after placing it on the LRU, before
dropping the reference. If the page has become unevictable,
putback_lru_page() will redo the 'putback', thus moving the page to the
unevictable list. This way, we avoid "stranding" evictable pages on the
unevictable list.
[akpm@linux-foundation.org: fix fallout from out-of-order merge]
[riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build]
[nishimura@mxp.nes.nec.co.jp: remove redundant mapping check]
[kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework]
[kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c]
[kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure]
[kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch]
[kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Debugged-by: Benjamin Kidwell <benjkidwell@yahoo.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 11 | ||||
-rw-r--r-- | mm/internal.h | 26 | ||||
-rw-r--r-- | mm/memcontrol.c | 73 | ||||
-rw-r--r-- | mm/mempolicy.c | 2 | ||||
-rw-r--r-- | mm/migrate.c | 31 | ||||
-rw-r--r-- | mm/swap.c | 42 | ||||
-rw-r--r-- | mm/vmscan.c | 149 |
7 files changed, 272 insertions, 62 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 1a501a4de95c..5b5790f8a816 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -209,5 +209,16 @@ config VIRT_TO_BUS | |||
209 | def_bool y | 209 | def_bool y |
210 | depends on !ARCH_NO_VIRT_TO_BUS | 210 | depends on !ARCH_NO_VIRT_TO_BUS |
211 | 211 | ||
212 | config UNEVICTABLE_LRU | ||
213 | bool "Add LRU list to track non-evictable pages" | ||
214 | default y | ||
215 | depends on MMU | ||
216 | help | ||
217 | Keeps unevictable pages off of the active and inactive pageout | ||
218 | lists, so kswapd will not waste CPU time or have its balancing | ||
219 | algorithms thrown off by scanning these pages. Selecting this | ||
220 | will use one page flag and increase the code size a little, | ||
221 | say Y unless you know what you are doing. | ||
222 | |||
212 | config MMU_NOTIFIER | 223 | config MMU_NOTIFIER |
213 | bool | 224 | bool |
diff --git a/mm/internal.h b/mm/internal.h index 4e8e78b978b5..3db17b2a1ac6 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -39,8 +39,15 @@ static inline void __put_page(struct page *page) | |||
39 | atomic_dec(&page->_count); | 39 | atomic_dec(&page->_count); |
40 | } | 40 | } |
41 | 41 | ||
42 | /* | ||
43 | * in mm/vmscan.c: | ||
44 | */ | ||
42 | extern int isolate_lru_page(struct page *page); | 45 | extern int isolate_lru_page(struct page *page); |
46 | extern void putback_lru_page(struct page *page); | ||
43 | 47 | ||
48 | /* | ||
49 | * in mm/page_alloc.c | ||
50 | */ | ||
44 | extern void __free_pages_bootmem(struct page *page, unsigned int order); | 51 | extern void __free_pages_bootmem(struct page *page, unsigned int order); |
45 | 52 | ||
46 | /* | 53 | /* |
@@ -54,6 +61,25 @@ static inline unsigned long page_order(struct page *page) | |||
54 | return page_private(page); | 61 | return page_private(page); |
55 | } | 62 | } |
56 | 63 | ||
64 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
65 | /* | ||
66 | * unevictable_migrate_page() called only from migrate_page_copy() to | ||
67 | * migrate unevictable flag to new page. | ||
68 | * Note that the old page has been isolated from the LRU lists at this | ||
69 | * point so we don't need to worry about LRU statistics. | ||
70 | */ | ||
71 | static inline void unevictable_migrate_page(struct page *new, struct page *old) | ||
72 | { | ||
73 | if (TestClearPageUnevictable(old)) | ||
74 | SetPageUnevictable(new); | ||
75 | } | ||
76 | #else | ||
77 | static inline void unevictable_migrate_page(struct page *new, struct page *old) | ||
78 | { | ||
79 | } | ||
80 | #endif | ||
81 | |||
82 | |||
57 | /* | 83 | /* |
58 | * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, | 84 | * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, |
59 | * so all functions starting at paging_init should be marked __init | 85 | * so all functions starting at paging_init should be marked __init |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 27e9e75f4eab..82c065e7551e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -160,9 +160,10 @@ struct page_cgroup { | |||
160 | struct mem_cgroup *mem_cgroup; | 160 | struct mem_cgroup *mem_cgroup; |
161 | int flags; | 161 | int flags; |
162 | }; | 162 | }; |
163 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | 163 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
164 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ | 164 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ |
165 | #define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */ | 165 | #define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */ |
166 | #define PAGE_CGROUP_FLAG_UNEVICTABLE (0x8) /* page is unevictableable */ | ||
166 | 167 | ||
167 | static int page_cgroup_nid(struct page_cgroup *pc) | 168 | static int page_cgroup_nid(struct page_cgroup *pc) |
168 | { | 169 | { |
@@ -292,10 +293,14 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, | |||
292 | { | 293 | { |
293 | int lru = LRU_BASE; | 294 | int lru = LRU_BASE; |
294 | 295 | ||
295 | if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) | 296 | if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE) |
296 | lru += LRU_ACTIVE; | 297 | lru = LRU_UNEVICTABLE; |
297 | if (pc->flags & PAGE_CGROUP_FLAG_FILE) | 298 | else { |
298 | lru += LRU_FILE; | 299 | if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) |
300 | lru += LRU_ACTIVE; | ||
301 | if (pc->flags & PAGE_CGROUP_FLAG_FILE) | ||
302 | lru += LRU_FILE; | ||
303 | } | ||
299 | 304 | ||
300 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | 305 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; |
301 | 306 | ||
@@ -308,10 +313,14 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, | |||
308 | { | 313 | { |
309 | int lru = LRU_BASE; | 314 | int lru = LRU_BASE; |
310 | 315 | ||
311 | if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) | 316 | if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE) |
312 | lru += LRU_ACTIVE; | 317 | lru = LRU_UNEVICTABLE; |
313 | if (pc->flags & PAGE_CGROUP_FLAG_FILE) | 318 | else { |
314 | lru += LRU_FILE; | 319 | if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) |
320 | lru += LRU_ACTIVE; | ||
321 | if (pc->flags & PAGE_CGROUP_FLAG_FILE) | ||
322 | lru += LRU_FILE; | ||
323 | } | ||
315 | 324 | ||
316 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | 325 | MEM_CGROUP_ZSTAT(mz, lru) += 1; |
317 | list_add(&pc->lru, &mz->lists[lru]); | 326 | list_add(&pc->lru, &mz->lists[lru]); |
@@ -319,21 +328,31 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, | |||
319 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true); | 328 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true); |
320 | } | 329 | } |
321 | 330 | ||
322 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) | 331 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru) |
323 | { | 332 | { |
324 | struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); | 333 | struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); |
325 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | 334 | int active = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; |
326 | int file = pc->flags & PAGE_CGROUP_FLAG_FILE; | 335 | int file = pc->flags & PAGE_CGROUP_FLAG_FILE; |
327 | int lru = LRU_FILE * !!file + !!from; | 336 | int unevictable = pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE; |
337 | enum lru_list from = unevictable ? LRU_UNEVICTABLE : | ||
338 | (LRU_FILE * !!file + !!active); | ||
328 | 339 | ||
329 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | 340 | if (lru == from) |
341 | return; | ||
330 | 342 | ||
331 | if (active) | 343 | MEM_CGROUP_ZSTAT(mz, from) -= 1; |
332 | pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; | 344 | |
333 | else | 345 | if (is_unevictable_lru(lru)) { |
334 | pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; | 346 | pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; |
347 | pc->flags |= PAGE_CGROUP_FLAG_UNEVICTABLE; | ||
348 | } else { | ||
349 | if (is_active_lru(lru)) | ||
350 | pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; | ||
351 | else | ||
352 | pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; | ||
353 | pc->flags &= ~PAGE_CGROUP_FLAG_UNEVICTABLE; | ||
354 | } | ||
335 | 355 | ||
336 | lru = LRU_FILE * !!file + !!active; | ||
337 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | 356 | MEM_CGROUP_ZSTAT(mz, lru) += 1; |
338 | list_move(&pc->lru, &mz->lists[lru]); | 357 | list_move(&pc->lru, &mz->lists[lru]); |
339 | } | 358 | } |
@@ -351,7 +370,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | |||
351 | /* | 370 | /* |
352 | * This routine assumes that the appropriate zone's lru lock is already held | 371 | * This routine assumes that the appropriate zone's lru lock is already held |
353 | */ | 372 | */ |
354 | void mem_cgroup_move_lists(struct page *page, bool active) | 373 | void mem_cgroup_move_lists(struct page *page, enum lru_list lru) |
355 | { | 374 | { |
356 | struct page_cgroup *pc; | 375 | struct page_cgroup *pc; |
357 | struct mem_cgroup_per_zone *mz; | 376 | struct mem_cgroup_per_zone *mz; |
@@ -374,7 +393,7 @@ void mem_cgroup_move_lists(struct page *page, bool active) | |||
374 | if (pc) { | 393 | if (pc) { |
375 | mz = page_cgroup_zoneinfo(pc); | 394 | mz = page_cgroup_zoneinfo(pc); |
376 | spin_lock_irqsave(&mz->lru_lock, flags); | 395 | spin_lock_irqsave(&mz->lru_lock, flags); |
377 | __mem_cgroup_move_lists(pc, active); | 396 | __mem_cgroup_move_lists(pc, lru); |
378 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 397 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
379 | } | 398 | } |
380 | unlock_page_cgroup(page); | 399 | unlock_page_cgroup(page); |
@@ -472,12 +491,10 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
472 | /* | 491 | /* |
473 | * TODO: play better with lumpy reclaim, grabbing anything. | 492 | * TODO: play better with lumpy reclaim, grabbing anything. |
474 | */ | 493 | */ |
475 | if (PageActive(page) && !active) { | 494 | if (PageUnevictable(page) || |
476 | __mem_cgroup_move_lists(pc, true); | 495 | (PageActive(page) && !active) || |
477 | continue; | 496 | (!PageActive(page) && active)) { |
478 | } | 497 | __mem_cgroup_move_lists(pc, page_lru(page)); |
479 | if (!PageActive(page) && active) { | ||
480 | __mem_cgroup_move_lists(pc, false); | ||
481 | continue; | 498 | continue; |
482 | } | 499 | } |
483 | 500 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 71b47491487d..36f42573a335 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -2202,7 +2202,7 @@ static void gather_stats(struct page *page, void *private, int pte_dirty) | |||
2202 | if (PageSwapCache(page)) | 2202 | if (PageSwapCache(page)) |
2203 | md->swapcache++; | 2203 | md->swapcache++; |
2204 | 2204 | ||
2205 | if (PageActive(page)) | 2205 | if (PageActive(page) || PageUnevictable(page)) |
2206 | md->active++; | 2206 | md->active++; |
2207 | 2207 | ||
2208 | if (PageWriteback(page)) | 2208 | if (PageWriteback(page)) |
diff --git a/mm/migrate.c b/mm/migrate.c index c07327487111..b10237d8b459 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -53,14 +53,9 @@ int migrate_prep(void) | |||
53 | return 0; | 53 | return 0; |
54 | } | 54 | } |
55 | 55 | ||
56 | static inline void move_to_lru(struct page *page) | ||
57 | { | ||
58 | lru_cache_add_lru(page, page_lru(page)); | ||
59 | put_page(page); | ||
60 | } | ||
61 | |||
62 | /* | 56 | /* |
63 | * Add isolated pages on the list back to the LRU. | 57 | * Add isolated pages on the list back to the LRU under page lock |
58 | * to avoid leaking evictable pages back onto unevictable list. | ||
64 | * | 59 | * |
65 | * returns the number of pages put back. | 60 | * returns the number of pages put back. |
66 | */ | 61 | */ |
@@ -72,7 +67,7 @@ int putback_lru_pages(struct list_head *l) | |||
72 | 67 | ||
73 | list_for_each_entry_safe(page, page2, l, lru) { | 68 | list_for_each_entry_safe(page, page2, l, lru) { |
74 | list_del(&page->lru); | 69 | list_del(&page->lru); |
75 | move_to_lru(page); | 70 | putback_lru_page(page); |
76 | count++; | 71 | count++; |
77 | } | 72 | } |
78 | return count; | 73 | return count; |
@@ -354,8 +349,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
354 | SetPageReferenced(newpage); | 349 | SetPageReferenced(newpage); |
355 | if (PageUptodate(page)) | 350 | if (PageUptodate(page)) |
356 | SetPageUptodate(newpage); | 351 | SetPageUptodate(newpage); |
357 | if (PageActive(page)) | 352 | if (TestClearPageActive(page)) { |
353 | VM_BUG_ON(PageUnevictable(page)); | ||
358 | SetPageActive(newpage); | 354 | SetPageActive(newpage); |
355 | } else | ||
356 | unevictable_migrate_page(newpage, page); | ||
359 | if (PageChecked(page)) | 357 | if (PageChecked(page)) |
360 | SetPageChecked(newpage); | 358 | SetPageChecked(newpage); |
361 | if (PageMappedToDisk(page)) | 359 | if (PageMappedToDisk(page)) |
@@ -376,7 +374,6 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
376 | #ifdef CONFIG_SWAP | 374 | #ifdef CONFIG_SWAP |
377 | ClearPageSwapCache(page); | 375 | ClearPageSwapCache(page); |
378 | #endif | 376 | #endif |
379 | ClearPageActive(page); | ||
380 | ClearPagePrivate(page); | 377 | ClearPagePrivate(page); |
381 | set_page_private(page, 0); | 378 | set_page_private(page, 0); |
382 | page->mapping = NULL; | 379 | page->mapping = NULL; |
@@ -555,6 +552,10 @@ static int fallback_migrate_page(struct address_space *mapping, | |||
555 | * | 552 | * |
556 | * The new page will have replaced the old page if this function | 553 | * The new page will have replaced the old page if this function |
557 | * is successful. | 554 | * is successful. |
555 | * | ||
556 | * Return value: | ||
557 | * < 0 - error code | ||
558 | * == 0 - success | ||
558 | */ | 559 | */ |
559 | static int move_to_new_page(struct page *newpage, struct page *page) | 560 | static int move_to_new_page(struct page *newpage, struct page *page) |
560 | { | 561 | { |
@@ -617,9 +618,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
617 | if (!newpage) | 618 | if (!newpage) |
618 | return -ENOMEM; | 619 | return -ENOMEM; |
619 | 620 | ||
620 | if (page_count(page) == 1) | 621 | if (page_count(page) == 1) { |
621 | /* page was freed from under us. So we are done. */ | 622 | /* page was freed from under us. So we are done. */ |
622 | goto move_newpage; | 623 | goto move_newpage; |
624 | } | ||
623 | 625 | ||
624 | charge = mem_cgroup_prepare_migration(page, newpage); | 626 | charge = mem_cgroup_prepare_migration(page, newpage); |
625 | if (charge == -ENOMEM) { | 627 | if (charge == -ENOMEM) { |
@@ -693,7 +695,6 @@ rcu_unlock: | |||
693 | rcu_read_unlock(); | 695 | rcu_read_unlock(); |
694 | 696 | ||
695 | unlock: | 697 | unlock: |
696 | |||
697 | unlock_page(page); | 698 | unlock_page(page); |
698 | 699 | ||
699 | if (rc != -EAGAIN) { | 700 | if (rc != -EAGAIN) { |
@@ -704,17 +705,19 @@ unlock: | |||
704 | * restored. | 705 | * restored. |
705 | */ | 706 | */ |
706 | list_del(&page->lru); | 707 | list_del(&page->lru); |
707 | move_to_lru(page); | 708 | putback_lru_page(page); |
708 | } | 709 | } |
709 | 710 | ||
710 | move_newpage: | 711 | move_newpage: |
711 | if (!charge) | 712 | if (!charge) |
712 | mem_cgroup_end_migration(newpage); | 713 | mem_cgroup_end_migration(newpage); |
714 | |||
713 | /* | 715 | /* |
714 | * Move the new page to the LRU. If migration was not successful | 716 | * Move the new page to the LRU. If migration was not successful |
715 | * then this will free the page. | 717 | * then this will free the page. |
716 | */ | 718 | */ |
717 | move_to_lru(newpage); | 719 | putback_lru_page(newpage); |
720 | |||
718 | if (result) { | 721 | if (result) { |
719 | if (rc) | 722 | if (rc) |
720 | *result = rc; | 723 | *result = rc; |
@@ -115,7 +115,7 @@ static void pagevec_move_tail(struct pagevec *pvec) | |||
115 | zone = pagezone; | 115 | zone = pagezone; |
116 | spin_lock(&zone->lru_lock); | 116 | spin_lock(&zone->lru_lock); |
117 | } | 117 | } |
118 | if (PageLRU(page) && !PageActive(page)) { | 118 | if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { |
119 | int lru = page_is_file_cache(page); | 119 | int lru = page_is_file_cache(page); |
120 | list_move_tail(&page->lru, &zone->lru[lru].list); | 120 | list_move_tail(&page->lru, &zone->lru[lru].list); |
121 | pgmoved++; | 121 | pgmoved++; |
@@ -136,7 +136,7 @@ static void pagevec_move_tail(struct pagevec *pvec) | |||
136 | void rotate_reclaimable_page(struct page *page) | 136 | void rotate_reclaimable_page(struct page *page) |
137 | { | 137 | { |
138 | if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && | 138 | if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && |
139 | PageLRU(page)) { | 139 | !PageUnevictable(page) && PageLRU(page)) { |
140 | struct pagevec *pvec; | 140 | struct pagevec *pvec; |
141 | unsigned long flags; | 141 | unsigned long flags; |
142 | 142 | ||
@@ -157,7 +157,7 @@ void activate_page(struct page *page) | |||
157 | struct zone *zone = page_zone(page); | 157 | struct zone *zone = page_zone(page); |
158 | 158 | ||
159 | spin_lock_irq(&zone->lru_lock); | 159 | spin_lock_irq(&zone->lru_lock); |
160 | if (PageLRU(page) && !PageActive(page)) { | 160 | if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { |
161 | int file = page_is_file_cache(page); | 161 | int file = page_is_file_cache(page); |
162 | int lru = LRU_BASE + file; | 162 | int lru = LRU_BASE + file; |
163 | del_page_from_lru_list(zone, page, lru); | 163 | del_page_from_lru_list(zone, page, lru); |
@@ -166,7 +166,7 @@ void activate_page(struct page *page) | |||
166 | lru += LRU_ACTIVE; | 166 | lru += LRU_ACTIVE; |
167 | add_page_to_lru_list(zone, page, lru); | 167 | add_page_to_lru_list(zone, page, lru); |
168 | __count_vm_event(PGACTIVATE); | 168 | __count_vm_event(PGACTIVATE); |
169 | mem_cgroup_move_lists(page, true); | 169 | mem_cgroup_move_lists(page, lru); |
170 | 170 | ||
171 | zone->recent_rotated[!!file]++; | 171 | zone->recent_rotated[!!file]++; |
172 | zone->recent_scanned[!!file]++; | 172 | zone->recent_scanned[!!file]++; |
@@ -183,7 +183,8 @@ void activate_page(struct page *page) | |||
183 | */ | 183 | */ |
184 | void mark_page_accessed(struct page *page) | 184 | void mark_page_accessed(struct page *page) |
185 | { | 185 | { |
186 | if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) { | 186 | if (!PageActive(page) && !PageUnevictable(page) && |
187 | PageReferenced(page) && PageLRU(page)) { | ||
187 | activate_page(page); | 188 | activate_page(page); |
188 | ClearPageReferenced(page); | 189 | ClearPageReferenced(page); |
189 | } else if (!PageReferenced(page)) { | 190 | } else if (!PageReferenced(page)) { |
@@ -211,13 +212,38 @@ void __lru_cache_add(struct page *page, enum lru_list lru) | |||
211 | void lru_cache_add_lru(struct page *page, enum lru_list lru) | 212 | void lru_cache_add_lru(struct page *page, enum lru_list lru) |
212 | { | 213 | { |
213 | if (PageActive(page)) { | 214 | if (PageActive(page)) { |
215 | VM_BUG_ON(PageUnevictable(page)); | ||
214 | ClearPageActive(page); | 216 | ClearPageActive(page); |
217 | } else if (PageUnevictable(page)) { | ||
218 | VM_BUG_ON(PageActive(page)); | ||
219 | ClearPageUnevictable(page); | ||
215 | } | 220 | } |
216 | 221 | ||
217 | VM_BUG_ON(PageLRU(page) || PageActive(page)); | 222 | VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page)); |
218 | __lru_cache_add(page, lru); | 223 | __lru_cache_add(page, lru); |
219 | } | 224 | } |
220 | 225 | ||
226 | /** | ||
227 | * add_page_to_unevictable_list - add a page to the unevictable list | ||
228 | * @page: the page to be added to the unevictable list | ||
229 | * | ||
230 | * Add page directly to its zone's unevictable list. To avoid races with | ||
231 | * tasks that might be making the page evictable, through eg. munlock, | ||
232 | * munmap or exit, while it's not on the lru, we want to add the page | ||
233 | * while it's locked or otherwise "invisible" to other tasks. This is | ||
234 | * difficult to do when using the pagevec cache, so bypass that. | ||
235 | */ | ||
236 | void add_page_to_unevictable_list(struct page *page) | ||
237 | { | ||
238 | struct zone *zone = page_zone(page); | ||
239 | |||
240 | spin_lock_irq(&zone->lru_lock); | ||
241 | SetPageUnevictable(page); | ||
242 | SetPageLRU(page); | ||
243 | add_page_to_lru_list(zone, page, LRU_UNEVICTABLE); | ||
244 | spin_unlock_irq(&zone->lru_lock); | ||
245 | } | ||
246 | |||
221 | /* | 247 | /* |
222 | * Drain pages out of the cpu's pagevecs. | 248 | * Drain pages out of the cpu's pagevecs. |
223 | * Either "cpu" is the current CPU, and preemption has already been | 249 | * Either "cpu" is the current CPU, and preemption has already been |
@@ -316,6 +342,7 @@ void release_pages(struct page **pages, int nr, int cold) | |||
316 | 342 | ||
317 | if (PageLRU(page)) { | 343 | if (PageLRU(page)) { |
318 | struct zone *pagezone = page_zone(page); | 344 | struct zone *pagezone = page_zone(page); |
345 | |||
319 | if (pagezone != zone) { | 346 | if (pagezone != zone) { |
320 | if (zone) | 347 | if (zone) |
321 | spin_unlock_irqrestore(&zone->lru_lock, | 348 | spin_unlock_irqrestore(&zone->lru_lock, |
@@ -392,6 +419,7 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) | |||
392 | { | 419 | { |
393 | int i; | 420 | int i; |
394 | struct zone *zone = NULL; | 421 | struct zone *zone = NULL; |
422 | VM_BUG_ON(is_unevictable_lru(lru)); | ||
395 | 423 | ||
396 | for (i = 0; i < pagevec_count(pvec); i++) { | 424 | for (i = 0; i < pagevec_count(pvec); i++) { |
397 | struct page *page = pvec->pages[i]; | 425 | struct page *page = pvec->pages[i]; |
@@ -403,6 +431,8 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) | |||
403 | zone = pagezone; | 431 | zone = pagezone; |
404 | spin_lock_irq(&zone->lru_lock); | 432 | spin_lock_irq(&zone->lru_lock); |
405 | } | 433 | } |
434 | VM_BUG_ON(PageActive(page)); | ||
435 | VM_BUG_ON(PageUnevictable(page)); | ||
406 | VM_BUG_ON(PageLRU(page)); | 436 | VM_BUG_ON(PageLRU(page)); |
407 | SetPageLRU(page); | 437 | SetPageLRU(page); |
408 | if (is_active_lru(lru)) | 438 | if (is_active_lru(lru)) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index a8347b677e74..154b9b608da6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -470,6 +470,79 @@ int remove_mapping(struct address_space *mapping, struct page *page) | |||
470 | return 0; | 470 | return 0; |
471 | } | 471 | } |
472 | 472 | ||
473 | /** | ||
474 | * putback_lru_page - put previously isolated page onto appropriate LRU list | ||
475 | * @page: page to be put back to appropriate lru list | ||
476 | * | ||
477 | * Add previously isolated @page to appropriate LRU list. | ||
478 | * Page may still be unevictable for other reasons. | ||
479 | * | ||
480 | * lru_lock must not be held, interrupts must be enabled. | ||
481 | */ | ||
482 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
483 | void putback_lru_page(struct page *page) | ||
484 | { | ||
485 | int lru; | ||
486 | int active = !!TestClearPageActive(page); | ||
487 | |||
488 | VM_BUG_ON(PageLRU(page)); | ||
489 | |||
490 | redo: | ||
491 | ClearPageUnevictable(page); | ||
492 | |||
493 | if (page_evictable(page, NULL)) { | ||
494 | /* | ||
495 | * For evictable pages, we can use the cache. | ||
496 | * In event of a race, worst case is we end up with an | ||
497 | * unevictable page on [in]active list. | ||
498 | * We know how to handle that. | ||
499 | */ | ||
500 | lru = active + page_is_file_cache(page); | ||
501 | lru_cache_add_lru(page, lru); | ||
502 | } else { | ||
503 | /* | ||
504 | * Put unevictable pages directly on zone's unevictable | ||
505 | * list. | ||
506 | */ | ||
507 | lru = LRU_UNEVICTABLE; | ||
508 | add_page_to_unevictable_list(page); | ||
509 | } | ||
510 | mem_cgroup_move_lists(page, lru); | ||
511 | |||
512 | /* | ||
513 | * page's status can change while we move it among lru. If an evictable | ||
514 | * page is on unevictable list, it never be freed. To avoid that, | ||
515 | * check after we added it to the list, again. | ||
516 | */ | ||
517 | if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) { | ||
518 | if (!isolate_lru_page(page)) { | ||
519 | put_page(page); | ||
520 | goto redo; | ||
521 | } | ||
522 | /* This means someone else dropped this page from LRU | ||
523 | * So, it will be freed or putback to LRU again. There is | ||
524 | * nothing to do here. | ||
525 | */ | ||
526 | } | ||
527 | |||
528 | put_page(page); /* drop ref from isolate */ | ||
529 | } | ||
530 | |||
531 | #else /* CONFIG_UNEVICTABLE_LRU */ | ||
532 | |||
533 | void putback_lru_page(struct page *page) | ||
534 | { | ||
535 | int lru; | ||
536 | VM_BUG_ON(PageLRU(page)); | ||
537 | |||
538 | lru = !!TestClearPageActive(page) + page_is_file_cache(page); | ||
539 | lru_cache_add_lru(page, lru); | ||
540 | mem_cgroup_move_lists(page, lru); | ||
541 | put_page(page); | ||
542 | } | ||
543 | #endif /* CONFIG_UNEVICTABLE_LRU */ | ||
544 | |||
545 | |||
473 | /* | 546 | /* |
474 | * shrink_page_list() returns the number of reclaimed pages | 547 | * shrink_page_list() returns the number of reclaimed pages |
475 | */ | 548 | */ |
@@ -503,6 +576,12 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
503 | 576 | ||
504 | sc->nr_scanned++; | 577 | sc->nr_scanned++; |
505 | 578 | ||
579 | if (unlikely(!page_evictable(page, NULL))) { | ||
580 | unlock_page(page); | ||
581 | putback_lru_page(page); | ||
582 | continue; | ||
583 | } | ||
584 | |||
506 | if (!sc->may_swap && page_mapped(page)) | 585 | if (!sc->may_swap && page_mapped(page)) |
507 | goto keep_locked; | 586 | goto keep_locked; |
508 | 587 | ||
@@ -602,7 +681,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
602 | * possible for a page to have PageDirty set, but it is actually | 681 | * possible for a page to have PageDirty set, but it is actually |
603 | * clean (all its buffers are clean). This happens if the | 682 | * clean (all its buffers are clean). This happens if the |
604 | * buffers were written out directly, with submit_bh(). ext3 | 683 | * buffers were written out directly, with submit_bh(). ext3 |
605 | * will do this, as well as the blockdev mapping. | 684 | * will do this, as well as the blockdev mapping. |
606 | * try_to_release_page() will discover that cleanness and will | 685 | * try_to_release_page() will discover that cleanness and will |
607 | * drop the buffers and mark the page clean - it can be freed. | 686 | * drop the buffers and mark the page clean - it can be freed. |
608 | * | 687 | * |
@@ -650,6 +729,7 @@ activate_locked: | |||
650 | /* Not a candidate for swapping, so reclaim swap space. */ | 729 | /* Not a candidate for swapping, so reclaim swap space. */ |
651 | if (PageSwapCache(page) && vm_swap_full()) | 730 | if (PageSwapCache(page) && vm_swap_full()) |
652 | remove_exclusive_swap_page_ref(page); | 731 | remove_exclusive_swap_page_ref(page); |
732 | VM_BUG_ON(PageActive(page)); | ||
653 | SetPageActive(page); | 733 | SetPageActive(page); |
654 | pgactivate++; | 734 | pgactivate++; |
655 | keep_locked: | 735 | keep_locked: |
@@ -699,6 +779,14 @@ int __isolate_lru_page(struct page *page, int mode, int file) | |||
699 | if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file)) | 779 | if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file)) |
700 | return ret; | 780 | return ret; |
701 | 781 | ||
782 | /* | ||
783 | * When this function is being called for lumpy reclaim, we | ||
784 | * initially look into all LRU pages, active, inactive and | ||
785 | * unevictable; only give shrink_page_list evictable pages. | ||
786 | */ | ||
787 | if (PageUnevictable(page)) | ||
788 | return ret; | ||
789 | |||
702 | ret = -EBUSY; | 790 | ret = -EBUSY; |
703 | if (likely(get_page_unless_zero(page))) { | 791 | if (likely(get_page_unless_zero(page))) { |
704 | /* | 792 | /* |
@@ -810,7 +898,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
810 | /* else it is being freed elsewhere */ | 898 | /* else it is being freed elsewhere */ |
811 | list_move(&cursor_page->lru, src); | 899 | list_move(&cursor_page->lru, src); |
812 | default: | 900 | default: |
813 | break; | 901 | break; /* ! on LRU or wrong list */ |
814 | } | 902 | } |
815 | } | 903 | } |
816 | } | 904 | } |
@@ -870,8 +958,9 @@ static unsigned long clear_active_flags(struct list_head *page_list, | |||
870 | * Returns -EBUSY if the page was not on an LRU list. | 958 | * Returns -EBUSY if the page was not on an LRU list. |
871 | * | 959 | * |
872 | * The returned page will have PageLRU() cleared. If it was found on | 960 | * The returned page will have PageLRU() cleared. If it was found on |
873 | * the active list, it will have PageActive set. That flag may need | 961 | * the active list, it will have PageActive set. If it was found on |
874 | * to be cleared by the caller before letting the page go. | 962 | * the unevictable list, it will have the PageUnevictable bit set. That flag |
963 | * may need to be cleared by the caller before letting the page go. | ||
875 | * | 964 | * |
876 | * The vmstat statistic corresponding to the list on which the page was | 965 | * The vmstat statistic corresponding to the list on which the page was |
877 | * found will be decremented. | 966 | * found will be decremented. |
@@ -892,11 +981,10 @@ int isolate_lru_page(struct page *page) | |||
892 | 981 | ||
893 | spin_lock_irq(&zone->lru_lock); | 982 | spin_lock_irq(&zone->lru_lock); |
894 | if (PageLRU(page) && get_page_unless_zero(page)) { | 983 | if (PageLRU(page) && get_page_unless_zero(page)) { |
895 | int lru = LRU_BASE; | 984 | int lru = page_lru(page); |
896 | ret = 0; | 985 | ret = 0; |
897 | ClearPageLRU(page); | 986 | ClearPageLRU(page); |
898 | 987 | ||
899 | lru += page_is_file_cache(page) + !!PageActive(page); | ||
900 | del_page_from_lru_list(zone, page, lru); | 988 | del_page_from_lru_list(zone, page, lru); |
901 | } | 989 | } |
902 | spin_unlock_irq(&zone->lru_lock); | 990 | spin_unlock_irq(&zone->lru_lock); |
@@ -1008,11 +1096,20 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1008 | * Put back any unfreeable pages. | 1096 | * Put back any unfreeable pages. |
1009 | */ | 1097 | */ |
1010 | while (!list_empty(&page_list)) { | 1098 | while (!list_empty(&page_list)) { |
1099 | int lru; | ||
1011 | page = lru_to_page(&page_list); | 1100 | page = lru_to_page(&page_list); |
1012 | VM_BUG_ON(PageLRU(page)); | 1101 | VM_BUG_ON(PageLRU(page)); |
1013 | SetPageLRU(page); | ||
1014 | list_del(&page->lru); | 1102 | list_del(&page->lru); |
1015 | add_page_to_lru_list(zone, page, page_lru(page)); | 1103 | if (unlikely(!page_evictable(page, NULL))) { |
1104 | spin_unlock_irq(&zone->lru_lock); | ||
1105 | putback_lru_page(page); | ||
1106 | spin_lock_irq(&zone->lru_lock); | ||
1107 | continue; | ||
1108 | } | ||
1109 | SetPageLRU(page); | ||
1110 | lru = page_lru(page); | ||
1111 | add_page_to_lru_list(zone, page, lru); | ||
1112 | mem_cgroup_move_lists(page, lru); | ||
1016 | if (PageActive(page) && scan_global_lru(sc)) { | 1113 | if (PageActive(page) && scan_global_lru(sc)) { |
1017 | int file = !!page_is_file_cache(page); | 1114 | int file = !!page_is_file_cache(page); |
1018 | zone->recent_rotated[file]++; | 1115 | zone->recent_rotated[file]++; |
@@ -1107,6 +1204,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1107 | page = lru_to_page(&l_hold); | 1204 | page = lru_to_page(&l_hold); |
1108 | list_del(&page->lru); | 1205 | list_del(&page->lru); |
1109 | 1206 | ||
1207 | if (unlikely(!page_evictable(page, NULL))) { | ||
1208 | putback_lru_page(page); | ||
1209 | continue; | ||
1210 | } | ||
1211 | |||
1110 | /* page_referenced clears PageReferenced */ | 1212 | /* page_referenced clears PageReferenced */ |
1111 | if (page_mapping_inuse(page) && | 1213 | if (page_mapping_inuse(page) && |
1112 | page_referenced(page, 0, sc->mem_cgroup)) | 1214 | page_referenced(page, 0, sc->mem_cgroup)) |
@@ -1140,7 +1242,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1140 | ClearPageActive(page); | 1242 | ClearPageActive(page); |
1141 | 1243 | ||
1142 | list_move(&page->lru, &zone->lru[lru].list); | 1244 | list_move(&page->lru, &zone->lru[lru].list); |
1143 | mem_cgroup_move_lists(page, false); | 1245 | mem_cgroup_move_lists(page, lru); |
1144 | pgmoved++; | 1246 | pgmoved++; |
1145 | if (!pagevec_add(&pvec, page)) { | 1247 | if (!pagevec_add(&pvec, page)) { |
1146 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | 1248 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); |
@@ -1286,7 +1388,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone, | |||
1286 | 1388 | ||
1287 | get_scan_ratio(zone, sc, percent); | 1389 | get_scan_ratio(zone, sc, percent); |
1288 | 1390 | ||
1289 | for_each_lru(l) { | 1391 | for_each_evictable_lru(l) { |
1290 | if (scan_global_lru(sc)) { | 1392 | if (scan_global_lru(sc)) { |
1291 | int file = is_file_lru(l); | 1393 | int file = is_file_lru(l); |
1292 | int scan; | 1394 | int scan; |
@@ -1318,7 +1420,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone, | |||
1318 | 1420 | ||
1319 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1421 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
1320 | nr[LRU_INACTIVE_FILE]) { | 1422 | nr[LRU_INACTIVE_FILE]) { |
1321 | for_each_lru(l) { | 1423 | for_each_evictable_lru(l) { |
1322 | if (nr[l]) { | 1424 | if (nr[l]) { |
1323 | nr_to_scan = min(nr[l], | 1425 | nr_to_scan = min(nr[l], |
1324 | (unsigned long)sc->swap_cluster_max); | 1426 | (unsigned long)sc->swap_cluster_max); |
@@ -1875,8 +1977,8 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, | |||
1875 | if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) | 1977 | if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) |
1876 | continue; | 1978 | continue; |
1877 | 1979 | ||
1878 | for_each_lru(l) { | 1980 | for_each_evictable_lru(l) { |
1879 | /* For pass = 0 we don't shrink the active list */ | 1981 | /* For pass = 0, we don't shrink the active list */ |
1880 | if (pass == 0 && | 1982 | if (pass == 0 && |
1881 | (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE)) | 1983 | (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE)) |
1882 | continue; | 1984 | continue; |
@@ -2213,3 +2315,24 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2213 | return ret; | 2315 | return ret; |
2214 | } | 2316 | } |
2215 | #endif | 2317 | #endif |
2318 | |||
2319 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
2320 | /* | ||
2321 | * page_evictable - test whether a page is evictable | ||
2322 | * @page: the page to test | ||
2323 | * @vma: the VMA in which the page is or will be mapped, may be NULL | ||
2324 | * | ||
2325 | * Test whether page is evictable--i.e., should be placed on active/inactive | ||
2326 | * lists vs unevictable list. | ||
2327 | * | ||
2328 | * Reasons page might not be evictable: | ||
2329 | * TODO - later patches | ||
2330 | */ | ||
2331 | int page_evictable(struct page *page, struct vm_area_struct *vma) | ||
2332 | { | ||
2333 | |||
2334 | /* TODO: test page [!]evictable conditions */ | ||
2335 | |||
2336 | return 1; | ||
2337 | } | ||
2338 | #endif | ||