aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLee Schermerhorn <Lee.Schermerhorn@hp.com>2008-10-18 23:26:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:50:26 -0400
commit894bc310419ac95f4fa4142dc364401a7e607f65 (patch)
tree15d56a7333b41620016b845d2323dd06e822b621 /include/linux
parent8a7a8544a4f6554ec2d8048ac9f9672f442db5a2 (diff)
Unevictable LRU Infrastructure
When the system contains lots of mlocked or otherwise unevictable pages, the pageout code (kswapd) can spend lots of time scanning over these pages. Worse still, the presence of lots of unevictable pages can confuse kswapd into thinking that more aggressive pageout modes are required, resulting in all kinds of bad behaviour. Infrastructure to manage pages excluded from reclaim--i.e., hidden from vmscan. Based on a patch by Larry Woodman of Red Hat. Reworked to maintain "unevictable" pages on a separate per-zone LRU list, to "hide" them from vmscan. Kosaki Motohiro added the support for the memory controller unevictable lru list. Pages on the unevictable list have both PG_unevictable and PG_lru set. Thus, PG_unevictable is analogous to and mutually exclusive with PG_active--it specifies which LRU list the page is on. The unevictable infrastructure is enabled by a new mm Kconfig option [CONFIG_]UNEVICTABLE_LRU. A new function 'page_evictable(page, vma)' in vmscan.c tests whether or not a page may be evictable. Subsequent patches will add the various !evictable tests. We'll want to keep these tests light-weight for use in shrink_active_list() and, possibly, the fault path. To avoid races between tasks putting pages [back] onto an LRU list and tasks that might be moving the page from non-evictable to evictable state, the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()' -- tests the "evictability" of a page after placing it on the LRU, before dropping the reference. If the page has become unevictable, putback_lru_page() will redo the 'putback', thus moving the page to the unevictable list. This way, we avoid "stranding" evictable pages on the unevictable list. [akpm@linux-foundation.org: fix fallout from out-of-order merge] [riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build] [nishimura@mxp.nes.nec.co.jp: remove redundant mapping check] [kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework] [kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c] [kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure] [kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch] [kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch] Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Debugged-by: Benjamin Kidwell <benjkidwell@yahoo.com> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/mm_inline.h23
-rw-r--r--include/linux/mmzone.h24
-rw-r--r--include/linux/page-flags.h22
-rw-r--r--include/linux/pagevec.h1
-rw-r--r--include/linux/swap.h12
6 files changed, 73 insertions, 11 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8d8f05c1515a..ee1b2fcb4410 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -34,9 +34,9 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
34 gfp_t gfp_mask); 34 gfp_t gfp_mask);
35extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 35extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
36 gfp_t gfp_mask); 36 gfp_t gfp_mask);
37extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
37extern void mem_cgroup_uncharge_page(struct page *page); 38extern void mem_cgroup_uncharge_page(struct page *page);
38extern void mem_cgroup_uncharge_cache_page(struct page *page); 39extern void mem_cgroup_uncharge_cache_page(struct page *page);
39extern void mem_cgroup_move_lists(struct page *page, bool active);
40extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); 40extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
41 41
42extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, 42extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index f451fedd1e75..67d7697fd019 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -91,11 +91,16 @@ del_page_from_lru(struct zone *zone, struct page *page)
91 enum lru_list l = LRU_BASE; 91 enum lru_list l = LRU_BASE;
92 92
93 list_del(&page->lru); 93 list_del(&page->lru);
94 if (PageActive(page)) { 94 if (PageUnevictable(page)) {
95 __ClearPageActive(page); 95 __ClearPageUnevictable(page);
96 l += LRU_ACTIVE; 96 l = LRU_UNEVICTABLE;
97 } else {
98 if (PageActive(page)) {
99 __ClearPageActive(page);
100 l += LRU_ACTIVE;
101 }
102 l += page_is_file_cache(page);
97 } 103 }
98 l += page_is_file_cache(page);
99 __dec_zone_state(zone, NR_LRU_BASE + l); 104 __dec_zone_state(zone, NR_LRU_BASE + l);
100} 105}
101 106
@@ -110,9 +115,13 @@ static inline enum lru_list page_lru(struct page *page)
110{ 115{
111 enum lru_list lru = LRU_BASE; 116 enum lru_list lru = LRU_BASE;
112 117
113 if (PageActive(page)) 118 if (PageUnevictable(page))
114 lru += LRU_ACTIVE; 119 lru = LRU_UNEVICTABLE;
115 lru += page_is_file_cache(page); 120 else {
121 if (PageActive(page))
122 lru += LRU_ACTIVE;
123 lru += page_is_file_cache(page);
124 }
116 125
117 return lru; 126 return lru;
118} 127}
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9c5111f49a32..d1f60d5fe2ea 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -86,6 +86,11 @@ enum zone_stat_item {
86 NR_ACTIVE_ANON, /* " " " " " */ 86 NR_ACTIVE_ANON, /* " " " " " */
87 NR_INACTIVE_FILE, /* " " " " " */ 87 NR_INACTIVE_FILE, /* " " " " " */
88 NR_ACTIVE_FILE, /* " " " " " */ 88 NR_ACTIVE_FILE, /* " " " " " */
89#ifdef CONFIG_UNEVICTABLE_LRU
90 NR_UNEVICTABLE, /* " " " " " */
91#else
92 NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */
93#endif
89 NR_ANON_PAGES, /* Mapped anonymous pages */ 94 NR_ANON_PAGES, /* Mapped anonymous pages */
90 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. 95 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
91 only modified from process context */ 96 only modified from process context */
@@ -128,10 +133,18 @@ enum lru_list {
128 LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, 133 LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
129 LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, 134 LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
130 LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, 135 LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
131 NR_LRU_LISTS }; 136#ifdef CONFIG_UNEVICTABLE_LRU
137 LRU_UNEVICTABLE,
138#else
139 LRU_UNEVICTABLE = LRU_ACTIVE_FILE, /* avoid compiler errors in dead code */
140#endif
141 NR_LRU_LISTS
142};
132 143
133#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++) 144#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
134 145
146#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++)
147
135static inline int is_file_lru(enum lru_list l) 148static inline int is_file_lru(enum lru_list l)
136{ 149{
137 return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE); 150 return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
@@ -142,6 +155,15 @@ static inline int is_active_lru(enum lru_list l)
142 return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE); 155 return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
143} 156}
144 157
158static inline int is_unevictable_lru(enum lru_list l)
159{
160#ifdef CONFIG_UNEVICTABLE_LRU
161 return (l == LRU_UNEVICTABLE);
162#else
163 return 0;
164#endif
165}
166
145struct per_cpu_pages { 167struct per_cpu_pages {
146 int count; /* number of pages in the list */ 168 int count; /* number of pages in the list */
147 int high; /* high watermark, emptying needed */ 169 int high; /* high watermark, emptying needed */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 3d31616dcd23..ec1a1baad348 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -94,6 +94,9 @@ enum pageflags {
94 PG_reclaim, /* To be reclaimed asap */ 94 PG_reclaim, /* To be reclaimed asap */
95 PG_buddy, /* Page is free, on buddy lists */ 95 PG_buddy, /* Page is free, on buddy lists */
96 PG_swapbacked, /* Page is backed by RAM/swap */ 96 PG_swapbacked, /* Page is backed by RAM/swap */
97#ifdef CONFIG_UNEVICTABLE_LRU
98 PG_unevictable, /* Page is "unevictable" */
99#endif
97#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR 100#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
98 PG_uncached, /* Page has been mapped as uncached */ 101 PG_uncached, /* Page has been mapped as uncached */
99#endif 102#endif
@@ -182,6 +185,7 @@ PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
182PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) 185PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
183PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru) 186PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
184PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) 187PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
188 TESTCLEARFLAG(Active, active)
185__PAGEFLAG(Slab, slab) 189__PAGEFLAG(Slab, slab)
186PAGEFLAG(Checked, checked) /* Used by some filesystems */ 190PAGEFLAG(Checked, checked) /* Used by some filesystems */
187PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ 191PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */
@@ -225,6 +229,15 @@ PAGEFLAG(SwapCache, swapcache)
225PAGEFLAG_FALSE(SwapCache) 229PAGEFLAG_FALSE(SwapCache)
226#endif 230#endif
227 231
232#ifdef CONFIG_UNEVICTABLE_LRU
233PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
234 TESTCLEARFLAG(Unevictable, unevictable)
235#else
236PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable)
237 SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable)
238 __CLEARPAGEFLAG_NOOP(Unevictable)
239#endif
240
228#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR 241#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
229PAGEFLAG(Uncached, uncached) 242PAGEFLAG(Uncached, uncached)
230#else 243#else
@@ -340,9 +353,16 @@ static inline void __ClearPageTail(struct page *page)
340 353
341#endif /* !PAGEFLAGS_EXTENDED */ 354#endif /* !PAGEFLAGS_EXTENDED */
342 355
356#ifdef CONFIG_UNEVICTABLE_LRU
357#define __PG_UNEVICTABLE (1 << PG_unevictable)
358#else
359#define __PG_UNEVICTABLE 0
360#endif
361
343#define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ 362#define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \
344 1 << PG_buddy | 1 << PG_writeback | \ 363 1 << PG_buddy | 1 << PG_writeback | \
345 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active) 364 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
365 __PG_UNEVICTABLE)
346 366
347/* 367/*
348 * Flags checked in bad_page(). Pages on the free list should not have 368 * Flags checked in bad_page(). Pages on the free list should not have
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 5fc96a4e760f..e90a2cb02915 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -101,7 +101,6 @@ static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
101 ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE); 101 ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
102} 102}
103 103
104
105static inline void pagevec_lru_add_file(struct pagevec *pvec) 104static inline void pagevec_lru_add_file(struct pagevec *pvec)
106{ 105{
107 if (pagevec_count(pvec)) 106 if (pagevec_count(pvec))
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7d09d79997a4..a2113044d20a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -180,6 +180,8 @@ extern int lru_add_drain_all(void);
180extern void rotate_reclaimable_page(struct page *page); 180extern void rotate_reclaimable_page(struct page *page);
181extern void swap_setup(void); 181extern void swap_setup(void);
182 182
183extern void add_page_to_unevictable_list(struct page *page);
184
183/** 185/**
184 * lru_cache_add: add a page to the page lists 186 * lru_cache_add: add a page to the page lists
185 * @page: the page to add 187 * @page: the page to add
@@ -228,6 +230,16 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
228} 230}
229#endif 231#endif
230 232
233#ifdef CONFIG_UNEVICTABLE_LRU
234extern int page_evictable(struct page *page, struct vm_area_struct *vma);
235#else
236static inline int page_evictable(struct page *page,
237 struct vm_area_struct *vma)
238{
239 return 1;
240}
241#endif
242
231extern int kswapd_run(int nid); 243extern int kswapd_run(int nid);
232 244
233#ifdef CONFIG_MMU 245#ifdef CONFIG_MMU