aboutsummaryrefslogtreecommitdiffstats
path: root/mm/swap.c
diff options
context:
space:
mode:
authorLee Schermerhorn <Lee.Schermerhorn@hp.com>2008-10-18 23:26:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:50:26 -0400
commit894bc310419ac95f4fa4142dc364401a7e607f65 (patch)
tree15d56a7333b41620016b845d2323dd06e822b621 /mm/swap.c
parent8a7a8544a4f6554ec2d8048ac9f9672f442db5a2 (diff)
Unevictable LRU Infrastructure
When the system contains lots of mlocked or otherwise unevictable pages, the pageout code (kswapd) can spend lots of time scanning over these pages. Worse still, the presence of lots of unevictable pages can confuse kswapd into thinking that more aggressive pageout modes are required, resulting in all kinds of bad behaviour. Infrastructure to manage pages excluded from reclaim--i.e., hidden from vmscan. Based on a patch by Larry Woodman of Red Hat. Reworked to maintain "unevictable" pages on a separate per-zone LRU list, to "hide" them from vmscan. Kosaki Motohiro added the support for the memory controller unevictable lru list. Pages on the unevictable list have both PG_unevictable and PG_lru set. Thus, PG_unevictable is analogous to and mutually exclusive with PG_active--it specifies which LRU list the page is on. The unevictable infrastructure is enabled by a new mm Kconfig option [CONFIG_]UNEVICTABLE_LRU. A new function 'page_evictable(page, vma)' in vmscan.c tests whether or not a page may be evictable. Subsequent patches will add the various !evictable tests. We'll want to keep these tests light-weight for use in shrink_active_list() and, possibly, the fault path. To avoid races between tasks putting pages [back] onto an LRU list and tasks that might be moving the page from non-evictable to evictable state, the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()' -- tests the "evictability" of a page after placing it on the LRU, before dropping the reference. If the page has become unevictable, putback_lru_page() will redo the 'putback', thus moving the page to the unevictable list. This way, we avoid "stranding" evictable pages on the unevictable list. [akpm@linux-foundation.org: fix fallout from out-of-order merge] [riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build] [nishimura@mxp.nes.nec.co.jp: remove redundant mapping check] [kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework] [kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c] [kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure] [kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch] [kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch] Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Debugged-by: Benjamin Kidwell <benjkidwell@yahoo.com> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swap.c')
-rw-r--r--mm/swap.c42
1 files changed, 36 insertions, 6 deletions
diff --git a/mm/swap.c b/mm/swap.c
index 0b1974a08974..fee6b973f143 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -115,7 +115,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
115 zone = pagezone; 115 zone = pagezone;
116 spin_lock(&zone->lru_lock); 116 spin_lock(&zone->lru_lock);
117 } 117 }
118 if (PageLRU(page) && !PageActive(page)) { 118 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
119 int lru = page_is_file_cache(page); 119 int lru = page_is_file_cache(page);
120 list_move_tail(&page->lru, &zone->lru[lru].list); 120 list_move_tail(&page->lru, &zone->lru[lru].list);
121 pgmoved++; 121 pgmoved++;
@@ -136,7 +136,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
136void rotate_reclaimable_page(struct page *page) 136void rotate_reclaimable_page(struct page *page)
137{ 137{
138 if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && 138 if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
139 PageLRU(page)) { 139 !PageUnevictable(page) && PageLRU(page)) {
140 struct pagevec *pvec; 140 struct pagevec *pvec;
141 unsigned long flags; 141 unsigned long flags;
142 142
@@ -157,7 +157,7 @@ void activate_page(struct page *page)
157 struct zone *zone = page_zone(page); 157 struct zone *zone = page_zone(page);
158 158
159 spin_lock_irq(&zone->lru_lock); 159 spin_lock_irq(&zone->lru_lock);
160 if (PageLRU(page) && !PageActive(page)) { 160 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
161 int file = page_is_file_cache(page); 161 int file = page_is_file_cache(page);
162 int lru = LRU_BASE + file; 162 int lru = LRU_BASE + file;
163 del_page_from_lru_list(zone, page, lru); 163 del_page_from_lru_list(zone, page, lru);
@@ -166,7 +166,7 @@ void activate_page(struct page *page)
166 lru += LRU_ACTIVE; 166 lru += LRU_ACTIVE;
167 add_page_to_lru_list(zone, page, lru); 167 add_page_to_lru_list(zone, page, lru);
168 __count_vm_event(PGACTIVATE); 168 __count_vm_event(PGACTIVATE);
169 mem_cgroup_move_lists(page, true); 169 mem_cgroup_move_lists(page, lru);
170 170
171 zone->recent_rotated[!!file]++; 171 zone->recent_rotated[!!file]++;
172 zone->recent_scanned[!!file]++; 172 zone->recent_scanned[!!file]++;
@@ -183,7 +183,8 @@ void activate_page(struct page *page)
183 */ 183 */
184void mark_page_accessed(struct page *page) 184void mark_page_accessed(struct page *page)
185{ 185{
186 if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) { 186 if (!PageActive(page) && !PageUnevictable(page) &&
187 PageReferenced(page) && PageLRU(page)) {
187 activate_page(page); 188 activate_page(page);
188 ClearPageReferenced(page); 189 ClearPageReferenced(page);
189 } else if (!PageReferenced(page)) { 190 } else if (!PageReferenced(page)) {
@@ -211,13 +212,38 @@ void __lru_cache_add(struct page *page, enum lru_list lru)
211void lru_cache_add_lru(struct page *page, enum lru_list lru) 212void lru_cache_add_lru(struct page *page, enum lru_list lru)
212{ 213{
213 if (PageActive(page)) { 214 if (PageActive(page)) {
215 VM_BUG_ON(PageUnevictable(page));
214 ClearPageActive(page); 216 ClearPageActive(page);
217 } else if (PageUnevictable(page)) {
218 VM_BUG_ON(PageActive(page));
219 ClearPageUnevictable(page);
215 } 220 }
216 221
217 VM_BUG_ON(PageLRU(page) || PageActive(page)); 222 VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
218 __lru_cache_add(page, lru); 223 __lru_cache_add(page, lru);
219} 224}
220 225
226/**
227 * add_page_to_unevictable_list - add a page to the unevictable list
228 * @page: the page to be added to the unevictable list
229 *
230 * Add page directly to its zone's unevictable list. To avoid races with
231 * tasks that might be making the page evictable, through eg. munlock,
232 * munmap or exit, while it's not on the lru, we want to add the page
233 * while it's locked or otherwise "invisible" to other tasks. This is
234 * difficult to do when using the pagevec cache, so bypass that.
235 */
236void add_page_to_unevictable_list(struct page *page)
237{
238 struct zone *zone = page_zone(page);
239
240 spin_lock_irq(&zone->lru_lock);
241 SetPageUnevictable(page);
242 SetPageLRU(page);
243 add_page_to_lru_list(zone, page, LRU_UNEVICTABLE);
244 spin_unlock_irq(&zone->lru_lock);
245}
246
221/* 247/*
222 * Drain pages out of the cpu's pagevecs. 248 * Drain pages out of the cpu's pagevecs.
223 * Either "cpu" is the current CPU, and preemption has already been 249 * Either "cpu" is the current CPU, and preemption has already been
@@ -316,6 +342,7 @@ void release_pages(struct page **pages, int nr, int cold)
316 342
317 if (PageLRU(page)) { 343 if (PageLRU(page)) {
318 struct zone *pagezone = page_zone(page); 344 struct zone *pagezone = page_zone(page);
345
319 if (pagezone != zone) { 346 if (pagezone != zone) {
320 if (zone) 347 if (zone)
321 spin_unlock_irqrestore(&zone->lru_lock, 348 spin_unlock_irqrestore(&zone->lru_lock,
@@ -392,6 +419,7 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
392{ 419{
393 int i; 420 int i;
394 struct zone *zone = NULL; 421 struct zone *zone = NULL;
422 VM_BUG_ON(is_unevictable_lru(lru));
395 423
396 for (i = 0; i < pagevec_count(pvec); i++) { 424 for (i = 0; i < pagevec_count(pvec); i++) {
397 struct page *page = pvec->pages[i]; 425 struct page *page = pvec->pages[i];
@@ -403,6 +431,8 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
403 zone = pagezone; 431 zone = pagezone;
404 spin_lock_irq(&zone->lru_lock); 432 spin_lock_irq(&zone->lru_lock);
405 } 433 }
434 VM_BUG_ON(PageActive(page));
435 VM_BUG_ON(PageUnevictable(page));
406 VM_BUG_ON(PageLRU(page)); 436 VM_BUG_ON(PageLRU(page));
407 SetPageLRU(page); 437 SetPageLRU(page);
408 if (is_active_lru(lru)) 438 if (is_active_lru(lru))