aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2008-10-18 23:26:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:50:25 -0400
commit4f98a2fee8acdb4ac84545df98cccecfd130f8db (patch)
tree035a2937f4c3e2f7b4269412041c073ac646937c /include/linux
parentb2e185384f534781fd22f5ce170b2ad26f97df70 (diff)
vmscan: split LRU lists into anon & file sets
Split the LRU lists in two, one set for pages that are backed by real file systems ("file") and one for pages that are backed by memory and swap ("anon"). The latter includes tmpfs. The advantage of doing this is that the VM will not have to scan over lots of anonymous pages (which we generally do not want to swap out), just to find the page cache pages that it should evict. This patch has the infrastructure and a basic policy to balance how much we scan the anon lists and how much we scan the file lists. The big policy changes are in separate patches. [lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset] [kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru] [kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page] [hugh@veritas.com: memcg swapbacked pages active] [hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED] [akpm@linux-foundation.org: fix /proc/vmstat units] [nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration] [kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo] [kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()] Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/backing-dev.h13
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/mm_inline.h50
-rw-r--r--include/linux/mmzone.h47
-rw-r--r--include/linux/pagevec.h29
-rw-r--r--include/linux/swap.h20
-rw-r--r--include/linux/vmstat.h10
7 files changed, 140 insertions, 31 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0a24d5550eb3..bee52abb8a4d 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -175,6 +175,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
175 * BDI_CAP_READ_MAP: Can be mapped for reading 175 * BDI_CAP_READ_MAP: Can be mapped for reading
176 * BDI_CAP_WRITE_MAP: Can be mapped for writing 176 * BDI_CAP_WRITE_MAP: Can be mapped for writing
177 * BDI_CAP_EXEC_MAP: Can be mapped for execution 177 * BDI_CAP_EXEC_MAP: Can be mapped for execution
178 *
179 * BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed.
178 */ 180 */
179#define BDI_CAP_NO_ACCT_DIRTY 0x00000001 181#define BDI_CAP_NO_ACCT_DIRTY 0x00000001
180#define BDI_CAP_NO_WRITEBACK 0x00000002 182#define BDI_CAP_NO_WRITEBACK 0x00000002
@@ -184,6 +186,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
184#define BDI_CAP_WRITE_MAP 0x00000020 186#define BDI_CAP_WRITE_MAP 0x00000020
185#define BDI_CAP_EXEC_MAP 0x00000040 187#define BDI_CAP_EXEC_MAP 0x00000040
186#define BDI_CAP_NO_ACCT_WB 0x00000080 188#define BDI_CAP_NO_ACCT_WB 0x00000080
189#define BDI_CAP_SWAP_BACKED 0x00000100
187 190
188#define BDI_CAP_VMFLAGS \ 191#define BDI_CAP_VMFLAGS \
189 (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP) 192 (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -248,6 +251,11 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
248 BDI_CAP_NO_WRITEBACK)); 251 BDI_CAP_NO_WRITEBACK));
249} 252}
250 253
254static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
255{
256 return bdi->capabilities & BDI_CAP_SWAP_BACKED;
257}
258
251static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) 259static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
252{ 260{
253 return bdi_cap_writeback_dirty(mapping->backing_dev_info); 261 return bdi_cap_writeback_dirty(mapping->backing_dev_info);
@@ -258,4 +266,9 @@ static inline bool mapping_cap_account_dirty(struct address_space *mapping)
258 return bdi_cap_account_dirty(mapping->backing_dev_info); 266 return bdi_cap_account_dirty(mapping->backing_dev_info);
259} 267}
260 268
269static inline bool mapping_cap_swap_backed(struct address_space *mapping)
270{
271 return bdi_cap_swap_backed(mapping->backing_dev_info);
272}
273
261#endif /* _LINUX_BACKING_DEV_H */ 274#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a6ac0d491fe6..8d8f05c1515a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -44,7 +44,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
44 unsigned long *scanned, int order, 44 unsigned long *scanned, int order,
45 int mode, struct zone *z, 45 int mode, struct zone *z,
46 struct mem_cgroup *mem_cont, 46 struct mem_cgroup *mem_cont,
47 int active); 47 int active, int file);
48extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); 48extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
49int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); 49int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
50 50
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 96e970485b6c..2eb599465d56 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -5,7 +5,7 @@
5 * page_is_file_cache - should the page be on a file LRU or anon LRU? 5 * page_is_file_cache - should the page be on a file LRU or anon LRU?
6 * @page: the page to test 6 * @page: the page to test
7 * 7 *
8 * Returns !0 if @page is page cache page backed by a regular filesystem, 8 * Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
9 * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed. 9 * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
10 * Used by functions that manipulate the LRU lists, to sort a page 10 * Used by functions that manipulate the LRU lists, to sort a page
11 * onto the right LRU list. 11 * onto the right LRU list.
@@ -20,7 +20,7 @@ static inline int page_is_file_cache(struct page *page)
20 return 0; 20 return 0;
21 21
22 /* The page is page cache backed by a normal filesystem. */ 22 /* The page is page cache backed by a normal filesystem. */
23 return 1; 23 return LRU_FILE;
24} 24}
25 25
26static inline void 26static inline void
@@ -38,39 +38,64 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
38} 38}
39 39
40static inline void 40static inline void
41add_page_to_active_list(struct zone *zone, struct page *page) 41add_page_to_inactive_anon_list(struct zone *zone, struct page *page)
42{ 42{
43 add_page_to_lru_list(zone, page, LRU_ACTIVE); 43 add_page_to_lru_list(zone, page, LRU_INACTIVE_ANON);
44} 44}
45 45
46static inline void 46static inline void
47add_page_to_inactive_list(struct zone *zone, struct page *page) 47add_page_to_active_anon_list(struct zone *zone, struct page *page)
48{ 48{
49 add_page_to_lru_list(zone, page, LRU_INACTIVE); 49 add_page_to_lru_list(zone, page, LRU_ACTIVE_ANON);
50} 50}
51 51
52static inline void 52static inline void
53del_page_from_active_list(struct zone *zone, struct page *page) 53add_page_to_inactive_file_list(struct zone *zone, struct page *page)
54{ 54{
55 del_page_from_lru_list(zone, page, LRU_ACTIVE); 55 add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE);
56} 56}
57 57
58static inline void 58static inline void
59del_page_from_inactive_list(struct zone *zone, struct page *page) 59add_page_to_active_file_list(struct zone *zone, struct page *page)
60{ 60{
61 del_page_from_lru_list(zone, page, LRU_INACTIVE); 61 add_page_to_lru_list(zone, page, LRU_ACTIVE_FILE);
62}
63
64static inline void
65del_page_from_inactive_anon_list(struct zone *zone, struct page *page)
66{
67 del_page_from_lru_list(zone, page, LRU_INACTIVE_ANON);
68}
69
70static inline void
71del_page_from_active_anon_list(struct zone *zone, struct page *page)
72{
73 del_page_from_lru_list(zone, page, LRU_ACTIVE_ANON);
74}
75
76static inline void
77del_page_from_inactive_file_list(struct zone *zone, struct page *page)
78{
79 del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE);
80}
81
82static inline void
83del_page_from_active_file_list(struct zone *zone, struct page *page)
84{
85 del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE);
62} 86}
63 87
64static inline void 88static inline void
65del_page_from_lru(struct zone *zone, struct page *page) 89del_page_from_lru(struct zone *zone, struct page *page)
66{ 90{
67 enum lru_list l = LRU_INACTIVE; 91 enum lru_list l = LRU_BASE;
68 92
69 list_del(&page->lru); 93 list_del(&page->lru);
70 if (PageActive(page)) { 94 if (PageActive(page)) {
71 __ClearPageActive(page); 95 __ClearPageActive(page);
72 l = LRU_ACTIVE; 96 l += LRU_ACTIVE;
73 } 97 }
98 l += page_is_file_cache(page);
74 __dec_zone_state(zone, NR_LRU_BASE + l); 99 __dec_zone_state(zone, NR_LRU_BASE + l);
75} 100}
76 101
@@ -87,6 +112,7 @@ static inline enum lru_list page_lru(struct page *page)
87 112
88 if (PageActive(page)) 113 if (PageActive(page))
89 lru += LRU_ACTIVE; 114 lru += LRU_ACTIVE;
115 lru += page_is_file_cache(page);
90 116
91 return lru; 117 return lru;
92} 118}
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 156e18f3919b..59a4c8fd6ebd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -82,21 +82,23 @@ enum zone_stat_item {
82 /* First 128 byte cacheline (assuming 64 bit words) */ 82 /* First 128 byte cacheline (assuming 64 bit words) */
83 NR_FREE_PAGES, 83 NR_FREE_PAGES,
84 NR_LRU_BASE, 84 NR_LRU_BASE,
85 NR_INACTIVE = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ 85 NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
86 NR_ACTIVE, /* " " " " " */ 86 NR_ACTIVE_ANON, /* " " " " " */
87 NR_INACTIVE_FILE, /* " " " " " */
88 NR_ACTIVE_FILE, /* " " " " " */
87 NR_ANON_PAGES, /* Mapped anonymous pages */ 89 NR_ANON_PAGES, /* Mapped anonymous pages */
88 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. 90 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
89 only modified from process context */ 91 only modified from process context */
90 NR_FILE_PAGES, 92 NR_FILE_PAGES,
91 NR_FILE_DIRTY, 93 NR_FILE_DIRTY,
92 NR_WRITEBACK, 94 NR_WRITEBACK,
93 /* Second 128 byte cacheline */
94 NR_SLAB_RECLAIMABLE, 95 NR_SLAB_RECLAIMABLE,
95 NR_SLAB_UNRECLAIMABLE, 96 NR_SLAB_UNRECLAIMABLE,
96 NR_PAGETABLE, /* used for pagetables */ 97 NR_PAGETABLE, /* used for pagetables */
97 NR_UNSTABLE_NFS, /* NFS unstable pages */ 98 NR_UNSTABLE_NFS, /* NFS unstable pages */
98 NR_BOUNCE, 99 NR_BOUNCE,
99 NR_VMSCAN_WRITE, 100 NR_VMSCAN_WRITE,
101 /* Second 128 byte cacheline */
100 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ 102 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
101#ifdef CONFIG_NUMA 103#ifdef CONFIG_NUMA
102 NUMA_HIT, /* allocated in intended node */ 104 NUMA_HIT, /* allocated in intended node */
@@ -108,17 +110,36 @@ enum zone_stat_item {
108#endif 110#endif
109 NR_VM_ZONE_STAT_ITEMS }; 111 NR_VM_ZONE_STAT_ITEMS };
110 112
113/*
114 * We do arithmetic on the LRU lists in various places in the code,
115 * so it is important to keep the active lists LRU_ACTIVE higher in
116 * the array than the corresponding inactive lists, and to keep
117 * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
118 *
119 * This has to be kept in sync with the statistics in zone_stat_item
120 * above and the descriptions in vmstat_text in mm/vmstat.c
121 */
122#define LRU_BASE 0
123#define LRU_ACTIVE 1
124#define LRU_FILE 2
125
111enum lru_list { 126enum lru_list {
112 LRU_BASE, 127 LRU_INACTIVE_ANON = LRU_BASE,
113 LRU_INACTIVE=LRU_BASE, /* must match order of NR_[IN]ACTIVE */ 128 LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
114 LRU_ACTIVE, /* " " " " " */ 129 LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
130 LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
115 NR_LRU_LISTS }; 131 NR_LRU_LISTS };
116 132
117#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++) 133#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
118 134
135static inline int is_file_lru(enum lru_list l)
136{
137 return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
138}
139
119static inline int is_active_lru(enum lru_list l) 140static inline int is_active_lru(enum lru_list l)
120{ 141{
121 return (l == LRU_ACTIVE); 142 return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
122} 143}
123 144
124struct per_cpu_pages { 145struct per_cpu_pages {
@@ -269,6 +290,18 @@ struct zone {
269 struct list_head list; 290 struct list_head list;
270 unsigned long nr_scan; 291 unsigned long nr_scan;
271 } lru[NR_LRU_LISTS]; 292 } lru[NR_LRU_LISTS];
293
294 /*
295 * The pageout code in vmscan.c keeps track of how many of the
296 * mem/swap backed and file backed pages are refeferenced.
297 * The higher the rotated/scanned ratio, the more valuable
298 * that cache is.
299 *
300 * The anon LRU stats live in [0], file LRU stats in [1]
301 */
302 unsigned long recent_rotated[2];
303 unsigned long recent_scanned[2];
304
272 unsigned long pages_scanned; /* since last reclaim */ 305 unsigned long pages_scanned; /* since last reclaim */
273 unsigned long flags; /* zone flags, see below */ 306 unsigned long flags; /* zone flags, see below */
274 307
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index fea3a982ee55..5fc96a4e760f 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -81,20 +81,37 @@ static inline void pagevec_free(struct pagevec *pvec)
81 __pagevec_free(pvec); 81 __pagevec_free(pvec);
82} 82}
83 83
84static inline void __pagevec_lru_add(struct pagevec *pvec) 84static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
85{ 85{
86 ____pagevec_lru_add(pvec, LRU_INACTIVE); 86 ____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
87} 87}
88 88
89static inline void __pagevec_lru_add_active(struct pagevec *pvec) 89static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
90{ 90{
91 ____pagevec_lru_add(pvec, LRU_ACTIVE); 91 ____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
92} 92}
93 93
94static inline void pagevec_lru_add(struct pagevec *pvec) 94static inline void __pagevec_lru_add_file(struct pagevec *pvec)
95{
96 ____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
97}
98
99static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
100{
101 ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
102}
103
104
105static inline void pagevec_lru_add_file(struct pagevec *pvec)
106{
107 if (pagevec_count(pvec))
108 __pagevec_lru_add_file(pvec);
109}
110
111static inline void pagevec_lru_add_anon(struct pagevec *pvec)
95{ 112{
96 if (pagevec_count(pvec)) 113 if (pagevec_count(pvec))
97 __pagevec_lru_add(pvec); 114 __pagevec_lru_add_anon(pvec);
98} 115}
99 116
100#endif /* _LINUX_PAGEVEC_H */ 117#endif /* _LINUX_PAGEVEC_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 833be56ad835..7d09d79997a4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -184,14 +184,24 @@ extern void swap_setup(void);
184 * lru_cache_add: add a page to the page lists 184 * lru_cache_add: add a page to the page lists
185 * @page: the page to add 185 * @page: the page to add
186 */ 186 */
187static inline void lru_cache_add(struct page *page) 187static inline void lru_cache_add_anon(struct page *page)
188{ 188{
189 __lru_cache_add(page, LRU_INACTIVE); 189 __lru_cache_add(page, LRU_INACTIVE_ANON);
190} 190}
191 191
192static inline void lru_cache_add_active(struct page *page) 192static inline void lru_cache_add_active_anon(struct page *page)
193{ 193{
194 __lru_cache_add(page, LRU_ACTIVE); 194 __lru_cache_add(page, LRU_ACTIVE_ANON);
195}
196
197static inline void lru_cache_add_file(struct page *page)
198{
199 __lru_cache_add(page, LRU_INACTIVE_FILE);
200}
201
202static inline void lru_cache_add_active_file(struct page *page)
203{
204 __lru_cache_add(page, LRU_ACTIVE_FILE);
195} 205}
196 206
197/* linux/mm/vmscan.c */ 207/* linux/mm/vmscan.c */
@@ -199,7 +209,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
199 gfp_t gfp_mask); 209 gfp_t gfp_mask);
200extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, 210extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
201 gfp_t gfp_mask); 211 gfp_t gfp_mask);
202extern int __isolate_lru_page(struct page *page, int mode); 212extern int __isolate_lru_page(struct page *page, int mode, int file);
203extern unsigned long shrink_all_memory(unsigned long nr_pages); 213extern unsigned long shrink_all_memory(unsigned long nr_pages);
204extern int vm_swappiness; 214extern int vm_swappiness;
205extern int remove_mapping(struct address_space *mapping, struct page *page); 215extern int remove_mapping(struct address_space *mapping, struct page *page);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 58334d439516..ff5179f2b153 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -159,6 +159,16 @@ static inline unsigned long zone_page_state(struct zone *zone,
159 return x; 159 return x;
160} 160}
161 161
162extern unsigned long global_lru_pages(void);
163
164static inline unsigned long zone_lru_pages(struct zone *zone)
165{
166 return (zone_page_state(zone, NR_ACTIVE_ANON)
167 + zone_page_state(zone, NR_ACTIVE_FILE)
168 + zone_page_state(zone, NR_INACTIVE_ANON)
169 + zone_page_state(zone, NR_INACTIVE_FILE));
170}
171
162#ifdef CONFIG_NUMA 172#ifdef CONFIG_NUMA
163/* 173/*
164 * Determine the per node value of a stat item. This function 174 * Determine the per node value of a stat item. This function