diff options
author | Lee Schermerhorn <Lee.Schermerhorn@hp.com> | 2008-10-18 23:26:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-20 11:50:26 -0400 |
commit | 894bc310419ac95f4fa4142dc364401a7e607f65 (patch) | |
tree | 15d56a7333b41620016b845d2323dd06e822b621 /mm/memcontrol.c | |
parent | 8a7a8544a4f6554ec2d8048ac9f9672f442db5a2 (diff) |
Unevictable LRU Infrastructure
When the system contains lots of mlocked or otherwise unevictable pages,
the pageout code (kswapd) can spend lots of time scanning over these
pages. Worse still, the presence of lots of unevictable pages can confuse
kswapd into thinking that more aggressive pageout modes are required,
resulting in all kinds of bad behaviour.
Infrastructure to manage pages excluded from reclaim--i.e., hidden from
vmscan. Based on a patch by Larry Woodman of Red Hat. Reworked to
maintain "unevictable" pages on a separate per-zone LRU list, to "hide"
them from vmscan.
Kosaki Motohiro added the support for the memory controller unevictable
lru list.
Pages on the unevictable list have both PG_unevictable and PG_lru set.
Thus, PG_unevictable is analogous to and mutually exclusive with
PG_active--it specifies which LRU list the page is on.
The unevictable infrastructure is enabled by a new mm Kconfig option
[CONFIG_]UNEVICTABLE_LRU.
A new function 'page_evictable(page, vma)' in vmscan.c tests whether or
not a page may be evictable. Subsequent patches will add the various
!evictable tests. We'll want to keep these tests light-weight for use in
shrink_active_list() and, possibly, the fault path.
To avoid races between tasks putting pages [back] onto an LRU list and
tasks that might be moving the page from non-evictable to evictable state,
the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()'
-- tests the "evictability" of a page after placing it on the LRU, before
dropping the reference. If the page has become unevictable,
putback_lru_page() will redo the 'putback', thus moving the page to the
unevictable list. This way, we avoid "stranding" evictable pages on the
unevictable list.
[akpm@linux-foundation.org: fix fallout from out-of-order merge]
[riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build]
[nishimura@mxp.nes.nec.co.jp: remove redundant mapping check]
[kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework]
[kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c]
[kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure]
[kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch]
[kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Debugged-by: Benjamin Kidwell <benjkidwell@yahoo.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 73 |
1 files changed, 45 insertions, 28 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 27e9e75f4eab..82c065e7551e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -160,9 +160,10 @@ struct page_cgroup { | |||
160 | struct mem_cgroup *mem_cgroup; | 160 | struct mem_cgroup *mem_cgroup; |
161 | int flags; | 161 | int flags; |
162 | }; | 162 | }; |
163 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | 163 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
164 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ | 164 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ |
165 | #define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */ | 165 | #define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */ |
166 | #define PAGE_CGROUP_FLAG_UNEVICTABLE (0x8) /* page is unevictableable */ | ||
166 | 167 | ||
167 | static int page_cgroup_nid(struct page_cgroup *pc) | 168 | static int page_cgroup_nid(struct page_cgroup *pc) |
168 | { | 169 | { |
@@ -292,10 +293,14 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, | |||
292 | { | 293 | { |
293 | int lru = LRU_BASE; | 294 | int lru = LRU_BASE; |
294 | 295 | ||
295 | if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) | 296 | if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE) |
296 | lru += LRU_ACTIVE; | 297 | lru = LRU_UNEVICTABLE; |
297 | if (pc->flags & PAGE_CGROUP_FLAG_FILE) | 298 | else { |
298 | lru += LRU_FILE; | 299 | if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) |
300 | lru += LRU_ACTIVE; | ||
301 | if (pc->flags & PAGE_CGROUP_FLAG_FILE) | ||
302 | lru += LRU_FILE; | ||
303 | } | ||
299 | 304 | ||
300 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | 305 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; |
301 | 306 | ||
@@ -308,10 +313,14 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, | |||
308 | { | 313 | { |
309 | int lru = LRU_BASE; | 314 | int lru = LRU_BASE; |
310 | 315 | ||
311 | if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) | 316 | if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE) |
312 | lru += LRU_ACTIVE; | 317 | lru = LRU_UNEVICTABLE; |
313 | if (pc->flags & PAGE_CGROUP_FLAG_FILE) | 318 | else { |
314 | lru += LRU_FILE; | 319 | if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) |
320 | lru += LRU_ACTIVE; | ||
321 | if (pc->flags & PAGE_CGROUP_FLAG_FILE) | ||
322 | lru += LRU_FILE; | ||
323 | } | ||
315 | 324 | ||
316 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | 325 | MEM_CGROUP_ZSTAT(mz, lru) += 1; |
317 | list_add(&pc->lru, &mz->lists[lru]); | 326 | list_add(&pc->lru, &mz->lists[lru]); |
@@ -319,21 +328,31 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, | |||
319 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true); | 328 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true); |
320 | } | 329 | } |
321 | 330 | ||
322 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) | 331 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru) |
323 | { | 332 | { |
324 | struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); | 333 | struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); |
325 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | 334 | int active = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; |
326 | int file = pc->flags & PAGE_CGROUP_FLAG_FILE; | 335 | int file = pc->flags & PAGE_CGROUP_FLAG_FILE; |
327 | int lru = LRU_FILE * !!file + !!from; | 336 | int unevictable = pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE; |
337 | enum lru_list from = unevictable ? LRU_UNEVICTABLE : | ||
338 | (LRU_FILE * !!file + !!active); | ||
328 | 339 | ||
329 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | 340 | if (lru == from) |
341 | return; | ||
330 | 342 | ||
331 | if (active) | 343 | MEM_CGROUP_ZSTAT(mz, from) -= 1; |
332 | pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; | 344 | |
333 | else | 345 | if (is_unevictable_lru(lru)) { |
334 | pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; | 346 | pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; |
347 | pc->flags |= PAGE_CGROUP_FLAG_UNEVICTABLE; | ||
348 | } else { | ||
349 | if (is_active_lru(lru)) | ||
350 | pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; | ||
351 | else | ||
352 | pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; | ||
353 | pc->flags &= ~PAGE_CGROUP_FLAG_UNEVICTABLE; | ||
354 | } | ||
335 | 355 | ||
336 | lru = LRU_FILE * !!file + !!active; | ||
337 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | 356 | MEM_CGROUP_ZSTAT(mz, lru) += 1; |
338 | list_move(&pc->lru, &mz->lists[lru]); | 357 | list_move(&pc->lru, &mz->lists[lru]); |
339 | } | 358 | } |
@@ -351,7 +370,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | |||
351 | /* | 370 | /* |
352 | * This routine assumes that the appropriate zone's lru lock is already held | 371 | * This routine assumes that the appropriate zone's lru lock is already held |
353 | */ | 372 | */ |
354 | void mem_cgroup_move_lists(struct page *page, bool active) | 373 | void mem_cgroup_move_lists(struct page *page, enum lru_list lru) |
355 | { | 374 | { |
356 | struct page_cgroup *pc; | 375 | struct page_cgroup *pc; |
357 | struct mem_cgroup_per_zone *mz; | 376 | struct mem_cgroup_per_zone *mz; |
@@ -374,7 +393,7 @@ void mem_cgroup_move_lists(struct page *page, bool active) | |||
374 | if (pc) { | 393 | if (pc) { |
375 | mz = page_cgroup_zoneinfo(pc); | 394 | mz = page_cgroup_zoneinfo(pc); |
376 | spin_lock_irqsave(&mz->lru_lock, flags); | 395 | spin_lock_irqsave(&mz->lru_lock, flags); |
377 | __mem_cgroup_move_lists(pc, active); | 396 | __mem_cgroup_move_lists(pc, lru); |
378 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 397 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
379 | } | 398 | } |
380 | unlock_page_cgroup(page); | 399 | unlock_page_cgroup(page); |
@@ -472,12 +491,10 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
472 | /* | 491 | /* |
473 | * TODO: play better with lumpy reclaim, grabbing anything. | 492 | * TODO: play better with lumpy reclaim, grabbing anything. |
474 | */ | 493 | */ |
475 | if (PageActive(page) && !active) { | 494 | if (PageUnevictable(page) || |
476 | __mem_cgroup_move_lists(pc, true); | 495 | (PageActive(page) && !active) || |
477 | continue; | 496 | (!PageActive(page) && active)) { |
478 | } | 497 | __mem_cgroup_move_lists(pc, page_lru(page)); |
479 | if (!PageActive(page) && active) { | ||
480 | __mem_cgroup_move_lists(pc, false); | ||
481 | continue; | 498 | continue; |
482 | } | 499 | } |
483 | 500 | ||