aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2008-10-18 23:26:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:50:25 -0400
commit4f98a2fee8acdb4ac84545df98cccecfd130f8db (patch)
tree035a2937f4c3e2f7b4269412041c073ac646937c
parentb2e185384f534781fd22f5ce170b2ad26f97df70 (diff)
vmscan: split LRU lists into anon & file sets
Split the LRU lists in two, one set for pages that are backed by real file systems ("file") and one for pages that are backed by memory and swap ("anon"). The latter includes tmpfs. The advantage of doing this is that the VM will not have to scan over lots of anonymous pages (which we generally do not want to swap out), just to find the page cache pages that it should evict. This patch has the infrastructure and a basic policy to balance how much we scan the anon lists and how much we scan the file lists. The big policy changes are in separate patches. [lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset] [kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru] [kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page] [hugh@veritas.com: memcg swapbacked pages active] [hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED] [akpm@linux-foundation.org: fix /proc/vmstat units] [nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration] [kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo] [kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()] Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/base/node.c56
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/proc/proc_misc.c77
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--include/linux/backing-dev.h13
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/mm_inline.h50
-rw-r--r--include/linux/mmzone.h47
-rw-r--r--include/linux/pagevec.h29
-rw-r--r--include/linux/swap.h20
-rw-r--r--include/linux/vmstat.h10
-rw-r--r--mm/filemap.c22
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/memcontrol.c88
-rw-r--r--mm/memory.c6
-rw-r--r--mm/page-writeback.c8
-rw-r--r--mm/page_alloc.c25
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/swap.c14
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/vmscan.c416
-rw-r--r--mm/vmstat.c14
25 files changed, 562 insertions, 367 deletions
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 5116b78c6325..fc7e9bf0cdbc 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -61,34 +61,44 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
61 si_meminfo_node(&i, nid); 61 si_meminfo_node(&i, nid);
62 62
63 n = sprintf(buf, "\n" 63 n = sprintf(buf, "\n"
64 "Node %d MemTotal: %8lu kB\n" 64 "Node %d MemTotal: %8lu kB\n"
65 "Node %d MemFree: %8lu kB\n" 65 "Node %d MemFree: %8lu kB\n"
66 "Node %d MemUsed: %8lu kB\n" 66 "Node %d MemUsed: %8lu kB\n"
67 "Node %d Active: %8lu kB\n" 67 "Node %d Active: %8lu kB\n"
68 "Node %d Inactive: %8lu kB\n" 68 "Node %d Inactive: %8lu kB\n"
69 "Node %d Active(anon): %8lu kB\n"
70 "Node %d Inactive(anon): %8lu kB\n"
71 "Node %d Active(file): %8lu kB\n"
72 "Node %d Inactive(file): %8lu kB\n"
69#ifdef CONFIG_HIGHMEM 73#ifdef CONFIG_HIGHMEM
70 "Node %d HighTotal: %8lu kB\n" 74 "Node %d HighTotal: %8lu kB\n"
71 "Node %d HighFree: %8lu kB\n" 75 "Node %d HighFree: %8lu kB\n"
72 "Node %d LowTotal: %8lu kB\n" 76 "Node %d LowTotal: %8lu kB\n"
73 "Node %d LowFree: %8lu kB\n" 77 "Node %d LowFree: %8lu kB\n"
74#endif 78#endif
75 "Node %d Dirty: %8lu kB\n" 79 "Node %d Dirty: %8lu kB\n"
76 "Node %d Writeback: %8lu kB\n" 80 "Node %d Writeback: %8lu kB\n"
77 "Node %d FilePages: %8lu kB\n" 81 "Node %d FilePages: %8lu kB\n"
78 "Node %d Mapped: %8lu kB\n" 82 "Node %d Mapped: %8lu kB\n"
79 "Node %d AnonPages: %8lu kB\n" 83 "Node %d AnonPages: %8lu kB\n"
80 "Node %d PageTables: %8lu kB\n" 84 "Node %d PageTables: %8lu kB\n"
81 "Node %d NFS_Unstable: %8lu kB\n" 85 "Node %d NFS_Unstable: %8lu kB\n"
82 "Node %d Bounce: %8lu kB\n" 86 "Node %d Bounce: %8lu kB\n"
83 "Node %d WritebackTmp: %8lu kB\n" 87 "Node %d WritebackTmp: %8lu kB\n"
84 "Node %d Slab: %8lu kB\n" 88 "Node %d Slab: %8lu kB\n"
85 "Node %d SReclaimable: %8lu kB\n" 89 "Node %d SReclaimable: %8lu kB\n"
86 "Node %d SUnreclaim: %8lu kB\n", 90 "Node %d SUnreclaim: %8lu kB\n",
87 nid, K(i.totalram), 91 nid, K(i.totalram),
88 nid, K(i.freeram), 92 nid, K(i.freeram),
89 nid, K(i.totalram - i.freeram), 93 nid, K(i.totalram - i.freeram),
90 nid, K(node_page_state(nid, NR_ACTIVE)), 94 nid, K(node_page_state(nid, NR_ACTIVE_ANON) +
91 nid, K(node_page_state(nid, NR_INACTIVE)), 95 node_page_state(nid, NR_ACTIVE_FILE)),
96 nid, K(node_page_state(nid, NR_INACTIVE_ANON) +
97 node_page_state(nid, NR_INACTIVE_FILE)),
98 nid, K(node_page_state(nid, NR_ACTIVE_ANON)),
99 nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
100 nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
101 nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
92#ifdef CONFIG_HIGHMEM 102#ifdef CONFIG_HIGHMEM
93 nid, K(i.totalhigh), 103 nid, K(i.totalhigh),
94 nid, K(i.freehigh), 104 nid, K(i.freehigh),
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c4a8a0605125..62d8bd8f14c0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1791,7 +1791,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
1791 SetPageUptodate(page); 1791 SetPageUptodate(page);
1792 unlock_page(page); 1792 unlock_page(page);
1793 if (!pagevec_add(plru_pvec, page)) 1793 if (!pagevec_add(plru_pvec, page))
1794 __pagevec_lru_add(plru_pvec); 1794 __pagevec_lru_add_file(plru_pvec);
1795 data += PAGE_CACHE_SIZE; 1795 data += PAGE_CACHE_SIZE;
1796 } 1796 }
1797 return; 1797 return;
@@ -1925,7 +1925,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1925 bytes_read = 0; 1925 bytes_read = 0;
1926 } 1926 }
1927 1927
1928 pagevec_lru_add(&lru_pvec); 1928 pagevec_lru_add_file(&lru_pvec);
1929 1929
1930/* need to free smb_read_data buf before exit */ 1930/* need to free smb_read_data buf before exit */
1931 if (smb_read_data) { 1931 if (smb_read_data) {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2ab70d46ecbc..efdba2e802d7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1517,7 +1517,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
1517 if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, 1517 if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
1518 GFP_KERNEL)) { 1518 GFP_KERNEL)) {
1519 pagevec_add(&lru_pvec, page); 1519 pagevec_add(&lru_pvec, page);
1520 pagevec_lru_add(&lru_pvec); 1520 pagevec_lru_add_file(&lru_pvec);
1521 SetPageUptodate(page); 1521 SetPageUptodate(page);
1522 unlock_page(page); 1522 unlock_page(page);
1523 } else 1523 } else
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index d020866d4232..3140a4429af1 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -439,7 +439,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
439 pages[nr] = *cached_page; 439 pages[nr] = *cached_page;
440 page_cache_get(*cached_page); 440 page_cache_get(*cached_page);
441 if (unlikely(!pagevec_add(lru_pvec, *cached_page))) 441 if (unlikely(!pagevec_add(lru_pvec, *cached_page)))
442 __pagevec_lru_add(lru_pvec); 442 __pagevec_lru_add_file(lru_pvec);
443 *cached_page = NULL; 443 *cached_page = NULL;
444 } 444 }
445 index++; 445 index++;
@@ -2084,7 +2084,7 @@ err_out:
2084 OSYNC_METADATA|OSYNC_DATA); 2084 OSYNC_METADATA|OSYNC_DATA);
2085 } 2085 }
2086 } 2086 }
2087 pagevec_lru_add(&lru_pvec); 2087 pagevec_lru_add_file(&lru_pvec);
2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
2089 written ? "written" : "status", (unsigned long)written, 2089 written ? "written" : "status", (unsigned long)written,
2090 (long)status); 2090 (long)status);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 59ea42e1ef03..b8edb2860557 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -136,6 +136,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
136 unsigned long allowed; 136 unsigned long allowed;
137 struct vmalloc_info vmi; 137 struct vmalloc_info vmi;
138 long cached; 138 long cached;
139 unsigned long pages[NR_LRU_LISTS];
140 int lru;
139 141
140/* 142/*
141 * display in kilobytes. 143 * display in kilobytes.
@@ -154,51 +156,62 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
154 156
155 get_vmalloc_info(&vmi); 157 get_vmalloc_info(&vmi);
156 158
159 for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
160 pages[lru] = global_page_state(NR_LRU_BASE + lru);
161
157 /* 162 /*
158 * Tagged format, for easy grepping and expansion. 163 * Tagged format, for easy grepping and expansion.
159 */ 164 */
160 len = sprintf(page, 165 len = sprintf(page,
161 "MemTotal: %8lu kB\n" 166 "MemTotal: %8lu kB\n"
162 "MemFree: %8lu kB\n" 167 "MemFree: %8lu kB\n"
163 "Buffers: %8lu kB\n" 168 "Buffers: %8lu kB\n"
164 "Cached: %8lu kB\n" 169 "Cached: %8lu kB\n"
165 "SwapCached: %8lu kB\n" 170 "SwapCached: %8lu kB\n"
166 "Active: %8lu kB\n" 171 "Active: %8lu kB\n"
167 "Inactive: %8lu kB\n" 172 "Inactive: %8lu kB\n"
173 "Active(anon): %8lu kB\n"
174 "Inactive(anon): %8lu kB\n"
175 "Active(file): %8lu kB\n"
176 "Inactive(file): %8lu kB\n"
168#ifdef CONFIG_HIGHMEM 177#ifdef CONFIG_HIGHMEM
169 "HighTotal: %8lu kB\n" 178 "HighTotal: %8lu kB\n"
170 "HighFree: %8lu kB\n" 179 "HighFree: %8lu kB\n"
171 "LowTotal: %8lu kB\n" 180 "LowTotal: %8lu kB\n"
172 "LowFree: %8lu kB\n" 181 "LowFree: %8lu kB\n"
173#endif 182#endif
174 "SwapTotal: %8lu kB\n" 183 "SwapTotal: %8lu kB\n"
175 "SwapFree: %8lu kB\n" 184 "SwapFree: %8lu kB\n"
176 "Dirty: %8lu kB\n" 185 "Dirty: %8lu kB\n"
177 "Writeback: %8lu kB\n" 186 "Writeback: %8lu kB\n"
178 "AnonPages: %8lu kB\n" 187 "AnonPages: %8lu kB\n"
179 "Mapped: %8lu kB\n" 188 "Mapped: %8lu kB\n"
180 "Slab: %8lu kB\n" 189 "Slab: %8lu kB\n"
181 "SReclaimable: %8lu kB\n" 190 "SReclaimable: %8lu kB\n"
182 "SUnreclaim: %8lu kB\n" 191 "SUnreclaim: %8lu kB\n"
183 "PageTables: %8lu kB\n" 192 "PageTables: %8lu kB\n"
184#ifdef CONFIG_QUICKLIST 193#ifdef CONFIG_QUICKLIST
185 "Quicklists: %8lu kB\n" 194 "Quicklists: %8lu kB\n"
186#endif 195#endif
187 "NFS_Unstable: %8lu kB\n" 196 "NFS_Unstable: %8lu kB\n"
188 "Bounce: %8lu kB\n" 197 "Bounce: %8lu kB\n"
189 "WritebackTmp: %8lu kB\n" 198 "WritebackTmp: %8lu kB\n"
190 "CommitLimit: %8lu kB\n" 199 "CommitLimit: %8lu kB\n"
191 "Committed_AS: %8lu kB\n" 200 "Committed_AS: %8lu kB\n"
192 "VmallocTotal: %8lu kB\n" 201 "VmallocTotal: %8lu kB\n"
193 "VmallocUsed: %8lu kB\n" 202 "VmallocUsed: %8lu kB\n"
194 "VmallocChunk: %8lu kB\n", 203 "VmallocChunk: %8lu kB\n",
195 K(i.totalram), 204 K(i.totalram),
196 K(i.freeram), 205 K(i.freeram),
197 K(i.bufferram), 206 K(i.bufferram),
198 K(cached), 207 K(cached),
199 K(total_swapcache_pages), 208 K(total_swapcache_pages),
200 K(global_page_state(NR_ACTIVE)), 209 K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
201 K(global_page_state(NR_INACTIVE)), 210 K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
211 K(pages[LRU_ACTIVE_ANON]),
212 K(pages[LRU_INACTIVE_ANON]),
213 K(pages[LRU_ACTIVE_FILE]),
214 K(pages[LRU_INACTIVE_FILE]),
202#ifdef CONFIG_HIGHMEM 215#ifdef CONFIG_HIGHMEM
203 K(i.totalhigh), 216 K(i.totalhigh),
204 K(i.freehigh), 217 K(i.freehigh),
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5145cb9125af..76acdbc34611 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,12 +112,12 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
112 goto add_error; 112 goto add_error;
113 113
114 if (!pagevec_add(&lru_pvec, page)) 114 if (!pagevec_add(&lru_pvec, page))
115 __pagevec_lru_add(&lru_pvec); 115 __pagevec_lru_add_file(&lru_pvec);
116 116
117 unlock_page(page); 117 unlock_page(page);
118 } 118 }
119 119
120 pagevec_lru_add(&lru_pvec); 120 pagevec_lru_add_file(&lru_pvec);
121 return 0; 121 return 0;
122 122
123 fsize_exceeded: 123 fsize_exceeded:
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0a24d5550eb3..bee52abb8a4d 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -175,6 +175,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
175 * BDI_CAP_READ_MAP: Can be mapped for reading 175 * BDI_CAP_READ_MAP: Can be mapped for reading
176 * BDI_CAP_WRITE_MAP: Can be mapped for writing 176 * BDI_CAP_WRITE_MAP: Can be mapped for writing
177 * BDI_CAP_EXEC_MAP: Can be mapped for execution 177 * BDI_CAP_EXEC_MAP: Can be mapped for execution
178 *
179 * BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed.
178 */ 180 */
179#define BDI_CAP_NO_ACCT_DIRTY 0x00000001 181#define BDI_CAP_NO_ACCT_DIRTY 0x00000001
180#define BDI_CAP_NO_WRITEBACK 0x00000002 182#define BDI_CAP_NO_WRITEBACK 0x00000002
@@ -184,6 +186,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
184#define BDI_CAP_WRITE_MAP 0x00000020 186#define BDI_CAP_WRITE_MAP 0x00000020
185#define BDI_CAP_EXEC_MAP 0x00000040 187#define BDI_CAP_EXEC_MAP 0x00000040
186#define BDI_CAP_NO_ACCT_WB 0x00000080 188#define BDI_CAP_NO_ACCT_WB 0x00000080
189#define BDI_CAP_SWAP_BACKED 0x00000100
187 190
188#define BDI_CAP_VMFLAGS \ 191#define BDI_CAP_VMFLAGS \
189 (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP) 192 (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -248,6 +251,11 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
248 BDI_CAP_NO_WRITEBACK)); 251 BDI_CAP_NO_WRITEBACK));
249} 252}
250 253
254static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
255{
256 return bdi->capabilities & BDI_CAP_SWAP_BACKED;
257}
258
251static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) 259static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
252{ 260{
253 return bdi_cap_writeback_dirty(mapping->backing_dev_info); 261 return bdi_cap_writeback_dirty(mapping->backing_dev_info);
@@ -258,4 +266,9 @@ static inline bool mapping_cap_account_dirty(struct address_space *mapping)
258 return bdi_cap_account_dirty(mapping->backing_dev_info); 266 return bdi_cap_account_dirty(mapping->backing_dev_info);
259} 267}
260 268
269static inline bool mapping_cap_swap_backed(struct address_space *mapping)
270{
271 return bdi_cap_swap_backed(mapping->backing_dev_info);
272}
273
261#endif /* _LINUX_BACKING_DEV_H */ 274#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a6ac0d491fe6..8d8f05c1515a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -44,7 +44,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
44 unsigned long *scanned, int order, 44 unsigned long *scanned, int order,
45 int mode, struct zone *z, 45 int mode, struct zone *z,
46 struct mem_cgroup *mem_cont, 46 struct mem_cgroup *mem_cont,
47 int active); 47 int active, int file);
48extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); 48extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
49int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); 49int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
50 50
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 96e970485b6c..2eb599465d56 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -5,7 +5,7 @@
5 * page_is_file_cache - should the page be on a file LRU or anon LRU? 5 * page_is_file_cache - should the page be on a file LRU or anon LRU?
6 * @page: the page to test 6 * @page: the page to test
7 * 7 *
8 * Returns !0 if @page is page cache page backed by a regular filesystem, 8 * Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
9 * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed. 9 * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
10 * Used by functions that manipulate the LRU lists, to sort a page 10 * Used by functions that manipulate the LRU lists, to sort a page
11 * onto the right LRU list. 11 * onto the right LRU list.
@@ -20,7 +20,7 @@ static inline int page_is_file_cache(struct page *page)
20 return 0; 20 return 0;
21 21
22 /* The page is page cache backed by a normal filesystem. */ 22 /* The page is page cache backed by a normal filesystem. */
23 return 1; 23 return LRU_FILE;
24} 24}
25 25
26static inline void 26static inline void
@@ -38,39 +38,64 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
38} 38}
39 39
40static inline void 40static inline void
41add_page_to_active_list(struct zone *zone, struct page *page) 41add_page_to_inactive_anon_list(struct zone *zone, struct page *page)
42{ 42{
43 add_page_to_lru_list(zone, page, LRU_ACTIVE); 43 add_page_to_lru_list(zone, page, LRU_INACTIVE_ANON);
44} 44}
45 45
46static inline void 46static inline void
47add_page_to_inactive_list(struct zone *zone, struct page *page) 47add_page_to_active_anon_list(struct zone *zone, struct page *page)
48{ 48{
49 add_page_to_lru_list(zone, page, LRU_INACTIVE); 49 add_page_to_lru_list(zone, page, LRU_ACTIVE_ANON);
50} 50}
51 51
52static inline void 52static inline void
53del_page_from_active_list(struct zone *zone, struct page *page) 53add_page_to_inactive_file_list(struct zone *zone, struct page *page)
54{ 54{
55 del_page_from_lru_list(zone, page, LRU_ACTIVE); 55 add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE);
56} 56}
57 57
58static inline void 58static inline void
59del_page_from_inactive_list(struct zone *zone, struct page *page) 59add_page_to_active_file_list(struct zone *zone, struct page *page)
60{ 60{
61 del_page_from_lru_list(zone, page, LRU_INACTIVE); 61 add_page_to_lru_list(zone, page, LRU_ACTIVE_FILE);
62}
63
64static inline void
65del_page_from_inactive_anon_list(struct zone *zone, struct page *page)
66{
67 del_page_from_lru_list(zone, page, LRU_INACTIVE_ANON);
68}
69
70static inline void
71del_page_from_active_anon_list(struct zone *zone, struct page *page)
72{
73 del_page_from_lru_list(zone, page, LRU_ACTIVE_ANON);
74}
75
76static inline void
77del_page_from_inactive_file_list(struct zone *zone, struct page *page)
78{
79 del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE);
80}
81
82static inline void
83del_page_from_active_file_list(struct zone *zone, struct page *page)
84{
85 del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE);
62} 86}
63 87
64static inline void 88static inline void
65del_page_from_lru(struct zone *zone, struct page *page) 89del_page_from_lru(struct zone *zone, struct page *page)
66{ 90{
67 enum lru_list l = LRU_INACTIVE; 91 enum lru_list l = LRU_BASE;
68 92
69 list_del(&page->lru); 93 list_del(&page->lru);
70 if (PageActive(page)) { 94 if (PageActive(page)) {
71 __ClearPageActive(page); 95 __ClearPageActive(page);
72 l = LRU_ACTIVE; 96 l += LRU_ACTIVE;
73 } 97 }
98 l += page_is_file_cache(page);
74 __dec_zone_state(zone, NR_LRU_BASE + l); 99 __dec_zone_state(zone, NR_LRU_BASE + l);
75} 100}
76 101
@@ -87,6 +112,7 @@ static inline enum lru_list page_lru(struct page *page)
87 112
88 if (PageActive(page)) 113 if (PageActive(page))
89 lru += LRU_ACTIVE; 114 lru += LRU_ACTIVE;
115 lru += page_is_file_cache(page);
90 116
91 return lru; 117 return lru;
92} 118}
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 156e18f3919b..59a4c8fd6ebd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -82,21 +82,23 @@ enum zone_stat_item {
82 /* First 128 byte cacheline (assuming 64 bit words) */ 82 /* First 128 byte cacheline (assuming 64 bit words) */
83 NR_FREE_PAGES, 83 NR_FREE_PAGES,
84 NR_LRU_BASE, 84 NR_LRU_BASE,
85 NR_INACTIVE = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ 85 NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
86 NR_ACTIVE, /* " " " " " */ 86 NR_ACTIVE_ANON, /* " " " " " */
87 NR_INACTIVE_FILE, /* " " " " " */
88 NR_ACTIVE_FILE, /* " " " " " */
87 NR_ANON_PAGES, /* Mapped anonymous pages */ 89 NR_ANON_PAGES, /* Mapped anonymous pages */
88 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. 90 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
89 only modified from process context */ 91 only modified from process context */
90 NR_FILE_PAGES, 92 NR_FILE_PAGES,
91 NR_FILE_DIRTY, 93 NR_FILE_DIRTY,
92 NR_WRITEBACK, 94 NR_WRITEBACK,
93 /* Second 128 byte cacheline */
94 NR_SLAB_RECLAIMABLE, 95 NR_SLAB_RECLAIMABLE,
95 NR_SLAB_UNRECLAIMABLE, 96 NR_SLAB_UNRECLAIMABLE,
96 NR_PAGETABLE, /* used for pagetables */ 97 NR_PAGETABLE, /* used for pagetables */
97 NR_UNSTABLE_NFS, /* NFS unstable pages */ 98 NR_UNSTABLE_NFS, /* NFS unstable pages */
98 NR_BOUNCE, 99 NR_BOUNCE,
99 NR_VMSCAN_WRITE, 100 NR_VMSCAN_WRITE,
101 /* Second 128 byte cacheline */
100 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ 102 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
101#ifdef CONFIG_NUMA 103#ifdef CONFIG_NUMA
102 NUMA_HIT, /* allocated in intended node */ 104 NUMA_HIT, /* allocated in intended node */
@@ -108,17 +110,36 @@ enum zone_stat_item {
108#endif 110#endif
109 NR_VM_ZONE_STAT_ITEMS }; 111 NR_VM_ZONE_STAT_ITEMS };
110 112
113/*
114 * We do arithmetic on the LRU lists in various places in the code,
115 * so it is important to keep the active lists LRU_ACTIVE higher in
116 * the array than the corresponding inactive lists, and to keep
117 * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
118 *
119 * This has to be kept in sync with the statistics in zone_stat_item
120 * above and the descriptions in vmstat_text in mm/vmstat.c
121 */
122#define LRU_BASE 0
123#define LRU_ACTIVE 1
124#define LRU_FILE 2
125
111enum lru_list { 126enum lru_list {
112 LRU_BASE, 127 LRU_INACTIVE_ANON = LRU_BASE,
113 LRU_INACTIVE=LRU_BASE, /* must match order of NR_[IN]ACTIVE */ 128 LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
114 LRU_ACTIVE, /* " " " " " */ 129 LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
130 LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
115 NR_LRU_LISTS }; 131 NR_LRU_LISTS };
116 132
117#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++) 133#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
118 134
135static inline int is_file_lru(enum lru_list l)
136{
137 return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
138}
139
119static inline int is_active_lru(enum lru_list l) 140static inline int is_active_lru(enum lru_list l)
120{ 141{
121 return (l == LRU_ACTIVE); 142 return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
122} 143}
123 144
124struct per_cpu_pages { 145struct per_cpu_pages {
@@ -269,6 +290,18 @@ struct zone {
269 struct list_head list; 290 struct list_head list;
270 unsigned long nr_scan; 291 unsigned long nr_scan;
271 } lru[NR_LRU_LISTS]; 292 } lru[NR_LRU_LISTS];
293
294 /*
295 * The pageout code in vmscan.c keeps track of how many of the
296 * mem/swap backed and file backed pages are refeferenced.
297 * The higher the rotated/scanned ratio, the more valuable
298 * that cache is.
299 *
300 * The anon LRU stats live in [0], file LRU stats in [1]
301 */
302 unsigned long recent_rotated[2];
303 unsigned long recent_scanned[2];
304
272 unsigned long pages_scanned; /* since last reclaim */ 305 unsigned long pages_scanned; /* since last reclaim */
273 unsigned long flags; /* zone flags, see below */ 306 unsigned long flags; /* zone flags, see below */
274 307
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index fea3a982ee55..5fc96a4e760f 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -81,20 +81,37 @@ static inline void pagevec_free(struct pagevec *pvec)
81 __pagevec_free(pvec); 81 __pagevec_free(pvec);
82} 82}
83 83
84static inline void __pagevec_lru_add(struct pagevec *pvec) 84static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
85{ 85{
86 ____pagevec_lru_add(pvec, LRU_INACTIVE); 86 ____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
87} 87}
88 88
89static inline void __pagevec_lru_add_active(struct pagevec *pvec) 89static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
90{ 90{
91 ____pagevec_lru_add(pvec, LRU_ACTIVE); 91 ____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
92} 92}
93 93
94static inline void pagevec_lru_add(struct pagevec *pvec) 94static inline void __pagevec_lru_add_file(struct pagevec *pvec)
95{
96 ____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
97}
98
99static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
100{
101 ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
102}
103
104
105static inline void pagevec_lru_add_file(struct pagevec *pvec)
106{
107 if (pagevec_count(pvec))
108 __pagevec_lru_add_file(pvec);
109}
110
111static inline void pagevec_lru_add_anon(struct pagevec *pvec)
95{ 112{
96 if (pagevec_count(pvec)) 113 if (pagevec_count(pvec))
97 __pagevec_lru_add(pvec); 114 __pagevec_lru_add_anon(pvec);
98} 115}
99 116
100#endif /* _LINUX_PAGEVEC_H */ 117#endif /* _LINUX_PAGEVEC_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 833be56ad835..7d09d79997a4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -184,14 +184,24 @@ extern void swap_setup(void);
184 * lru_cache_add: add a page to the page lists 184 * lru_cache_add: add a page to the page lists
185 * @page: the page to add 185 * @page: the page to add
186 */ 186 */
187static inline void lru_cache_add(struct page *page) 187static inline void lru_cache_add_anon(struct page *page)
188{ 188{
189 __lru_cache_add(page, LRU_INACTIVE); 189 __lru_cache_add(page, LRU_INACTIVE_ANON);
190} 190}
191 191
192static inline void lru_cache_add_active(struct page *page) 192static inline void lru_cache_add_active_anon(struct page *page)
193{ 193{
194 __lru_cache_add(page, LRU_ACTIVE); 194 __lru_cache_add(page, LRU_ACTIVE_ANON);
195}
196
197static inline void lru_cache_add_file(struct page *page)
198{
199 __lru_cache_add(page, LRU_INACTIVE_FILE);
200}
201
202static inline void lru_cache_add_active_file(struct page *page)
203{
204 __lru_cache_add(page, LRU_ACTIVE_FILE);
195} 205}
196 206
197/* linux/mm/vmscan.c */ 207/* linux/mm/vmscan.c */
@@ -199,7 +209,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
199 gfp_t gfp_mask); 209 gfp_t gfp_mask);
200extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, 210extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
201 gfp_t gfp_mask); 211 gfp_t gfp_mask);
202extern int __isolate_lru_page(struct page *page, int mode); 212extern int __isolate_lru_page(struct page *page, int mode, int file);
203extern unsigned long shrink_all_memory(unsigned long nr_pages); 213extern unsigned long shrink_all_memory(unsigned long nr_pages);
204extern int vm_swappiness; 214extern int vm_swappiness;
205extern int remove_mapping(struct address_space *mapping, struct page *page); 215extern int remove_mapping(struct address_space *mapping, struct page *page);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 58334d439516..ff5179f2b153 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -159,6 +159,16 @@ static inline unsigned long zone_page_state(struct zone *zone,
159 return x; 159 return x;
160} 160}
161 161
162extern unsigned long global_lru_pages(void);
163
164static inline unsigned long zone_lru_pages(struct zone *zone)
165{
166 return (zone_page_state(zone, NR_ACTIVE_ANON)
167 + zone_page_state(zone, NR_ACTIVE_FILE)
168 + zone_page_state(zone, NR_INACTIVE_ANON)
169 + zone_page_state(zone, NR_INACTIVE_FILE));
170}
171
162#ifdef CONFIG_NUMA 172#ifdef CONFIG_NUMA
163/* 173/*
164 * Determine the per node value of a stat item. This function 174 * Determine the per node value of a stat item. This function
diff --git a/mm/filemap.c b/mm/filemap.c
index 903bf316912a..a1ddd2557af2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,6 +33,7 @@
33#include <linux/cpuset.h> 33#include <linux/cpuset.h>
34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
35#include <linux/memcontrol.h> 35#include <linux/memcontrol.h>
36#include <linux/mm_inline.h> /* for page_is_file_cache() */
36#include "internal.h" 37#include "internal.h"
37 38
38/* 39/*
@@ -492,9 +493,24 @@ EXPORT_SYMBOL(add_to_page_cache_locked);
492int add_to_page_cache_lru(struct page *page, struct address_space *mapping, 493int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
493 pgoff_t offset, gfp_t gfp_mask) 494 pgoff_t offset, gfp_t gfp_mask)
494{ 495{
495 int ret = add_to_page_cache(page, mapping, offset, gfp_mask); 496 int ret;
496 if (ret == 0) 497
497 lru_cache_add(page); 498 /*
499 * Splice_read and readahead add shmem/tmpfs pages into the page cache
500 * before shmem_readpage has a chance to mark them as SwapBacked: they
501 * need to go on the active_anon lru below, and mem_cgroup_cache_charge
502 * (called in add_to_page_cache) needs to know where they're going too.
503 */
504 if (mapping_cap_swap_backed(mapping))
505 SetPageSwapBacked(page);
506
507 ret = add_to_page_cache(page, mapping, offset, gfp_mask);
508 if (ret == 0) {
509 if (page_is_file_cache(page))
510 lru_cache_add_file(page);
511 else
512 lru_cache_add_active_anon(page);
513 }
498 return ret; 514 return ret;
499} 515}
500 516
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 38633864a93e..2fc7fddd9b1f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1459,11 +1459,11 @@ int hugetlb_report_meminfo(char *buf)
1459{ 1459{
1460 struct hstate *h = &default_hstate; 1460 struct hstate *h = &default_hstate;
1461 return sprintf(buf, 1461 return sprintf(buf,
1462 "HugePages_Total: %5lu\n" 1462 "HugePages_Total: %5lu\n"
1463 "HugePages_Free: %5lu\n" 1463 "HugePages_Free: %5lu\n"
1464 "HugePages_Rsvd: %5lu\n" 1464 "HugePages_Rsvd: %5lu\n"
1465 "HugePages_Surp: %5lu\n" 1465 "HugePages_Surp: %5lu\n"
1466 "Hugepagesize: %5lu kB\n", 1466 "Hugepagesize: %8lu kB\n",
1467 h->nr_huge_pages, 1467 h->nr_huge_pages,
1468 h->free_huge_pages, 1468 h->free_huge_pages,
1469 h->resv_huge_pages, 1469 h->resv_huge_pages,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c0cbd7790c51..27e9e75f4eab 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -162,6 +162,7 @@ struct page_cgroup {
162}; 162};
163#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ 163#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
164#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ 164#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */
165#define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */
165 166
166static int page_cgroup_nid(struct page_cgroup *pc) 167static int page_cgroup_nid(struct page_cgroup *pc)
167{ 168{
@@ -177,6 +178,7 @@ enum charge_type {
177 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 178 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
178 MEM_CGROUP_CHARGE_TYPE_MAPPED, 179 MEM_CGROUP_CHARGE_TYPE_MAPPED,
179 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ 180 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
181 MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */
180}; 182};
181 183
182/* 184/*
@@ -288,8 +290,12 @@ static void unlock_page_cgroup(struct page *page)
288static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, 290static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
289 struct page_cgroup *pc) 291 struct page_cgroup *pc)
290{ 292{
291 int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; 293 int lru = LRU_BASE;
292 int lru = !!from; 294
295 if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
296 lru += LRU_ACTIVE;
297 if (pc->flags & PAGE_CGROUP_FLAG_FILE)
298 lru += LRU_FILE;
293 299
294 MEM_CGROUP_ZSTAT(mz, lru) -= 1; 300 MEM_CGROUP_ZSTAT(mz, lru) -= 1;
295 301
@@ -300,10 +306,12 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
300static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, 306static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
301 struct page_cgroup *pc) 307 struct page_cgroup *pc)
302{ 308{
303 int lru = LRU_INACTIVE; 309 int lru = LRU_BASE;
304 310
305 if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) 311 if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
306 lru += LRU_ACTIVE; 312 lru += LRU_ACTIVE;
313 if (pc->flags & PAGE_CGROUP_FLAG_FILE)
314 lru += LRU_FILE;
307 315
308 MEM_CGROUP_ZSTAT(mz, lru) += 1; 316 MEM_CGROUP_ZSTAT(mz, lru) += 1;
309 list_add(&pc->lru, &mz->lists[lru]); 317 list_add(&pc->lru, &mz->lists[lru]);
@@ -314,10 +322,9 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
314static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) 322static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
315{ 323{
316 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); 324 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
317 int lru = LRU_INACTIVE; 325 int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
318 326 int file = pc->flags & PAGE_CGROUP_FLAG_FILE;
319 if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) 327 int lru = LRU_FILE * !!file + !!from;
320 lru += LRU_ACTIVE;
321 328
322 MEM_CGROUP_ZSTAT(mz, lru) -= 1; 329 MEM_CGROUP_ZSTAT(mz, lru) -= 1;
323 330
@@ -326,7 +333,7 @@ static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
326 else 333 else
327 pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; 334 pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
328 335
329 lru = !!active; 336 lru = LRU_FILE * !!file + !!active;
330 MEM_CGROUP_ZSTAT(mz, lru) += 1; 337 MEM_CGROUP_ZSTAT(mz, lru) += 1;
331 list_move(&pc->lru, &mz->lists[lru]); 338 list_move(&pc->lru, &mz->lists[lru]);
332} 339}
@@ -391,21 +398,6 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
391} 398}
392 399
393/* 400/*
394 * This function is called from vmscan.c. In page reclaiming loop. balance
395 * between active and inactive list is calculated. For memory controller
396 * page reclaiming, we should use using mem_cgroup's imbalance rather than
397 * zone's global lru imbalance.
398 */
399long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
400{
401 unsigned long active, inactive;
402 /* active and inactive are the number of pages. 'long' is ok.*/
403 active = mem_cgroup_get_all_zonestat(mem, LRU_ACTIVE);
404 inactive = mem_cgroup_get_all_zonestat(mem, LRU_INACTIVE);
405 return (long) (active / (inactive + 1));
406}
407
408/*
409 * prev_priority control...this will be used in memory reclaim path. 401 * prev_priority control...this will be used in memory reclaim path.
410 */ 402 */
411int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) 403int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
@@ -450,7 +442,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
450 unsigned long *scanned, int order, 442 unsigned long *scanned, int order,
451 int mode, struct zone *z, 443 int mode, struct zone *z,
452 struct mem_cgroup *mem_cont, 444 struct mem_cgroup *mem_cont,
453 int active) 445 int active, int file)
454{ 446{
455 unsigned long nr_taken = 0; 447 unsigned long nr_taken = 0;
456 struct page *page; 448 struct page *page;
@@ -461,7 +453,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
461 int nid = z->zone_pgdat->node_id; 453 int nid = z->zone_pgdat->node_id;
462 int zid = zone_idx(z); 454 int zid = zone_idx(z);
463 struct mem_cgroup_per_zone *mz; 455 struct mem_cgroup_per_zone *mz;
464 int lru = !!active; 456 int lru = LRU_FILE * !!file + !!active;
465 457
466 BUG_ON(!mem_cont); 458 BUG_ON(!mem_cont);
467 mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); 459 mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
@@ -477,6 +469,9 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
477 if (unlikely(!PageLRU(page))) 469 if (unlikely(!PageLRU(page)))
478 continue; 470 continue;
479 471
472 /*
473 * TODO: play better with lumpy reclaim, grabbing anything.
474 */
480 if (PageActive(page) && !active) { 475 if (PageActive(page) && !active) {
481 __mem_cgroup_move_lists(pc, true); 476 __mem_cgroup_move_lists(pc, true);
482 continue; 477 continue;
@@ -489,7 +484,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
489 scan++; 484 scan++;
490 list_move(&pc->lru, &pc_list); 485 list_move(&pc->lru, &pc_list);
491 486
492 if (__isolate_lru_page(page, mode) == 0) { 487 if (__isolate_lru_page(page, mode, file) == 0) {
493 list_move(&page->lru, dst); 488 list_move(&page->lru, dst);
494 nr_taken++; 489 nr_taken++;
495 } 490 }
@@ -575,10 +570,16 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
575 * If a page is accounted as a page cache, insert to inactive list. 570 * If a page is accounted as a page cache, insert to inactive list.
576 * If anon, insert to active list. 571 * If anon, insert to active list.
577 */ 572 */
578 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) 573 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) {
579 pc->flags = PAGE_CGROUP_FLAG_CACHE; 574 pc->flags = PAGE_CGROUP_FLAG_CACHE;
580 else 575 if (page_is_file_cache(page))
576 pc->flags |= PAGE_CGROUP_FLAG_FILE;
577 else
578 pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
579 } else if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
581 pc->flags = PAGE_CGROUP_FLAG_ACTIVE; 580 pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
581 else /* MEM_CGROUP_CHARGE_TYPE_SHMEM */
582 pc->flags = PAGE_CGROUP_FLAG_CACHE | PAGE_CGROUP_FLAG_ACTIVE;
582 583
583 lock_page_cgroup(page); 584 lock_page_cgroup(page);
584 if (unlikely(page_get_page_cgroup(page))) { 585 if (unlikely(page_get_page_cgroup(page))) {
@@ -737,8 +738,12 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
737 if (pc) { 738 if (pc) {
738 mem = pc->mem_cgroup; 739 mem = pc->mem_cgroup;
739 css_get(&mem->css); 740 css_get(&mem->css);
740 if (pc->flags & PAGE_CGROUP_FLAG_CACHE) 741 if (pc->flags & PAGE_CGROUP_FLAG_CACHE) {
741 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; 742 if (page_is_file_cache(page))
743 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
744 else
745 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
746 }
742 } 747 }
743 unlock_page_cgroup(page); 748 unlock_page_cgroup(page);
744 if (mem) { 749 if (mem) {
@@ -982,14 +987,21 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
982 } 987 }
983 /* showing # of active pages */ 988 /* showing # of active pages */
984 { 989 {
985 unsigned long active, inactive; 990 unsigned long active_anon, inactive_anon;
986 991 unsigned long active_file, inactive_file;
987 inactive = mem_cgroup_get_all_zonestat(mem_cont, 992
988 LRU_INACTIVE); 993 inactive_anon = mem_cgroup_get_all_zonestat(mem_cont,
989 active = mem_cgroup_get_all_zonestat(mem_cont, 994 LRU_INACTIVE_ANON);
990 LRU_ACTIVE); 995 active_anon = mem_cgroup_get_all_zonestat(mem_cont,
991 cb->fill(cb, "active", (active) * PAGE_SIZE); 996 LRU_ACTIVE_ANON);
992 cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); 997 inactive_file = mem_cgroup_get_all_zonestat(mem_cont,
998 LRU_INACTIVE_FILE);
999 active_file = mem_cgroup_get_all_zonestat(mem_cont,
1000 LRU_ACTIVE_FILE);
1001 cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE);
1002 cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE);
1003 cb->fill(cb, "active_file", (active_file) * PAGE_SIZE);
1004 cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE);
993 } 1005 }
994 return 0; 1006 return 0;
995} 1007}
diff --git a/mm/memory.c b/mm/memory.c
index 7512933dcc10..71cdefd1ef14 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1889,7 +1889,7 @@ gotten:
1889 set_pte_at(mm, address, page_table, entry); 1889 set_pte_at(mm, address, page_table, entry);
1890 update_mmu_cache(vma, address, entry); 1890 update_mmu_cache(vma, address, entry);
1891 SetPageSwapBacked(new_page); 1891 SetPageSwapBacked(new_page);
1892 lru_cache_add_active(new_page); 1892 lru_cache_add_active_anon(new_page);
1893 page_add_new_anon_rmap(new_page, vma, address); 1893 page_add_new_anon_rmap(new_page, vma, address);
1894 1894
1895 if (old_page) { 1895 if (old_page) {
@@ -2384,7 +2384,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2384 goto release; 2384 goto release;
2385 inc_mm_counter(mm, anon_rss); 2385 inc_mm_counter(mm, anon_rss);
2386 SetPageSwapBacked(page); 2386 SetPageSwapBacked(page);
2387 lru_cache_add_active(page); 2387 lru_cache_add_active_anon(page);
2388 page_add_new_anon_rmap(page, vma, address); 2388 page_add_new_anon_rmap(page, vma, address);
2389 set_pte_at(mm, address, page_table, entry); 2389 set_pte_at(mm, address, page_table, entry);
2390 2390
@@ -2526,7 +2526,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2526 if (anon) { 2526 if (anon) {
2527 inc_mm_counter(mm, anon_rss); 2527 inc_mm_counter(mm, anon_rss);
2528 SetPageSwapBacked(page); 2528 SetPageSwapBacked(page);
2529 lru_cache_add_active(page); 2529 lru_cache_add_active_anon(page);
2530 page_add_new_anon_rmap(page, vma, address); 2530 page_add_new_anon_rmap(page, vma, address);
2531 } else { 2531 } else {
2532 inc_mm_counter(mm, file_rss); 2532 inc_mm_counter(mm, file_rss);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b40f6d5f8fe9..2970e35fd03f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -329,9 +329,7 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
329 struct zone *z = 329 struct zone *z =
330 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM]; 330 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
331 331
332 x += zone_page_state(z, NR_FREE_PAGES) 332 x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
333 + zone_page_state(z, NR_INACTIVE)
334 + zone_page_state(z, NR_ACTIVE);
335 } 333 }
336 /* 334 /*
337 * Make sure that the number of highmem pages is never larger 335 * Make sure that the number of highmem pages is never larger
@@ -355,9 +353,7 @@ unsigned long determine_dirtyable_memory(void)
355{ 353{
356 unsigned long x; 354 unsigned long x;
357 355
358 x = global_page_state(NR_FREE_PAGES) 356 x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
359 + global_page_state(NR_INACTIVE)
360 + global_page_state(NR_ACTIVE);
361 357
362 if (!vm_highmem_is_dirtyable) 358 if (!vm_highmem_is_dirtyable)
363 x -= highmem_dirtyable_memory(x); 359 x -= highmem_dirtyable_memory(x);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2099904d6cc4..740a16a32c22 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1864,10 +1864,13 @@ void show_free_areas(void)
1864 } 1864 }
1865 } 1865 }
1866 1866
1867 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n" 1867 printk("Active_anon:%lu active_file:%lu inactive_anon%lu\n"
1868 " inactive_file:%lu dirty:%lu writeback:%lu unstable:%lu\n"
1868 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", 1869 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
1869 global_page_state(NR_ACTIVE), 1870 global_page_state(NR_ACTIVE_ANON),
1870 global_page_state(NR_INACTIVE), 1871 global_page_state(NR_ACTIVE_FILE),
1872 global_page_state(NR_INACTIVE_ANON),
1873 global_page_state(NR_INACTIVE_FILE),
1871 global_page_state(NR_FILE_DIRTY), 1874 global_page_state(NR_FILE_DIRTY),
1872 global_page_state(NR_WRITEBACK), 1875 global_page_state(NR_WRITEBACK),
1873 global_page_state(NR_UNSTABLE_NFS), 1876 global_page_state(NR_UNSTABLE_NFS),
@@ -1890,8 +1893,10 @@ void show_free_areas(void)
1890 " min:%lukB" 1893 " min:%lukB"
1891 " low:%lukB" 1894 " low:%lukB"
1892 " high:%lukB" 1895 " high:%lukB"
1893 " active:%lukB" 1896 " active_anon:%lukB"
1894 " inactive:%lukB" 1897 " inactive_anon:%lukB"
1898 " active_file:%lukB"
1899 " inactive_file:%lukB"
1895 " present:%lukB" 1900 " present:%lukB"
1896 " pages_scanned:%lu" 1901 " pages_scanned:%lu"
1897 " all_unreclaimable? %s" 1902 " all_unreclaimable? %s"
@@ -1901,8 +1906,10 @@ void show_free_areas(void)
1901 K(zone->pages_min), 1906 K(zone->pages_min),
1902 K(zone->pages_low), 1907 K(zone->pages_low),
1903 K(zone->pages_high), 1908 K(zone->pages_high),
1904 K(zone_page_state(zone, NR_ACTIVE)), 1909 K(zone_page_state(zone, NR_ACTIVE_ANON)),
1905 K(zone_page_state(zone, NR_INACTIVE)), 1910 K(zone_page_state(zone, NR_INACTIVE_ANON)),
1911 K(zone_page_state(zone, NR_ACTIVE_FILE)),
1912 K(zone_page_state(zone, NR_INACTIVE_FILE)),
1906 K(zone->present_pages), 1913 K(zone->present_pages),
1907 zone->pages_scanned, 1914 zone->pages_scanned,
1908 (zone_is_all_unreclaimable(zone) ? "yes" : "no") 1915 (zone_is_all_unreclaimable(zone) ? "yes" : "no")
@@ -3472,6 +3479,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3472 INIT_LIST_HEAD(&zone->lru[l].list); 3479 INIT_LIST_HEAD(&zone->lru[l].list);
3473 zone->lru[l].nr_scan = 0; 3480 zone->lru[l].nr_scan = 0;
3474 } 3481 }
3482 zone->recent_rotated[0] = 0;
3483 zone->recent_rotated[1] = 0;
3484 zone->recent_scanned[0] = 0;
3485 zone->recent_scanned[1] = 0;
3475 zap_zone_vm_stats(zone); 3486 zap_zone_vm_stats(zone);
3476 zone->flags = 0; 3487 zone->flags = 0;
3477 if (!size) 3488 if (!size)
diff --git a/mm/readahead.c b/mm/readahead.c
index 6cbd9a72fde2..bec83c15a78f 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -229,7 +229,7 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
229 */ 229 */
230unsigned long max_sane_readahead(unsigned long nr) 230unsigned long max_sane_readahead(unsigned long nr)
231{ 231{
232 return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE) 232 return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
233 + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); 233 + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
234} 234}
235 235
diff --git a/mm/shmem.c b/mm/shmem.c
index fd421ed703ed..fc2ccf79a776 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -199,7 +199,7 @@ static struct vm_operations_struct shmem_vm_ops;
199 199
200static struct backing_dev_info shmem_backing_dev_info __read_mostly = { 200static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
201 .ra_pages = 0, /* No readahead */ 201 .ra_pages = 0, /* No readahead */
202 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 202 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
203 .unplug_io_fn = default_unplug_io_fn, 203 .unplug_io_fn = default_unplug_io_fn,
204}; 204};
205 205
diff --git a/mm/swap.c b/mm/swap.c
index 88a394872677..0b1974a08974 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -116,7 +116,8 @@ static void pagevec_move_tail(struct pagevec *pvec)
116 spin_lock(&zone->lru_lock); 116 spin_lock(&zone->lru_lock);
117 } 117 }
118 if (PageLRU(page) && !PageActive(page)) { 118 if (PageLRU(page) && !PageActive(page)) {
119 list_move_tail(&page->lru, &zone->lru[LRU_INACTIVE].list); 119 int lru = page_is_file_cache(page);
120 list_move_tail(&page->lru, &zone->lru[lru].list);
120 pgmoved++; 121 pgmoved++;
121 } 122 }
122 } 123 }
@@ -157,11 +158,18 @@ void activate_page(struct page *page)
157 158
158 spin_lock_irq(&zone->lru_lock); 159 spin_lock_irq(&zone->lru_lock);
159 if (PageLRU(page) && !PageActive(page)) { 160 if (PageLRU(page) && !PageActive(page)) {
160 del_page_from_inactive_list(zone, page); 161 int file = page_is_file_cache(page);
162 int lru = LRU_BASE + file;
163 del_page_from_lru_list(zone, page, lru);
164
161 SetPageActive(page); 165 SetPageActive(page);
162 add_page_to_active_list(zone, page); 166 lru += LRU_ACTIVE;
167 add_page_to_lru_list(zone, page, lru);
163 __count_vm_event(PGACTIVATE); 168 __count_vm_event(PGACTIVATE);
164 mem_cgroup_move_lists(page, true); 169 mem_cgroup_move_lists(page, true);
170
171 zone->recent_rotated[!!file]++;
172 zone->recent_scanned[!!file]++;
165 } 173 }
166 spin_unlock_irq(&zone->lru_lock); 174 spin_unlock_irq(&zone->lru_lock);
167} 175}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 7a3ece0b5a3b..ea62084ed402 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -33,7 +33,7 @@ static const struct address_space_operations swap_aops = {
33}; 33};
34 34
35static struct backing_dev_info swap_backing_dev_info = { 35static struct backing_dev_info swap_backing_dev_info = {
36 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 36 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
37 .unplug_io_fn = swap_unplug_io_fn, 37 .unplug_io_fn = swap_unplug_io_fn,
38}; 38};
39 39
@@ -310,7 +310,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
310 /* 310 /*
311 * Initiate read into locked page and return. 311 * Initiate read into locked page and return.
312 */ 312 */
313 lru_cache_add_active(new_page); 313 lru_cache_add_active_anon(new_page);
314 swap_readpage(NULL, new_page); 314 swap_readpage(NULL, new_page);
315 return new_page; 315 return new_page;
316 } 316 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e656035d3406..d10d2f9a33f3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -78,7 +78,7 @@ struct scan_control {
78 unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst, 78 unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
79 unsigned long *scanned, int order, int mode, 79 unsigned long *scanned, int order, int mode,
80 struct zone *z, struct mem_cgroup *mem_cont, 80 struct zone *z, struct mem_cgroup *mem_cont,
81 int active); 81 int active, int file);
82}; 82};
83 83
84#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 84#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -680,7 +680,7 @@ keep:
680 * 680 *
681 * returns 0 on success, -ve errno on failure. 681 * returns 0 on success, -ve errno on failure.
682 */ 682 */
683int __isolate_lru_page(struct page *page, int mode) 683int __isolate_lru_page(struct page *page, int mode, int file)
684{ 684{
685 int ret = -EINVAL; 685 int ret = -EINVAL;
686 686
@@ -696,6 +696,9 @@ int __isolate_lru_page(struct page *page, int mode)
696 if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) 696 if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
697 return ret; 697 return ret;
698 698
699 if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file))
700 return ret;
701
699 ret = -EBUSY; 702 ret = -EBUSY;
700 if (likely(get_page_unless_zero(page))) { 703 if (likely(get_page_unless_zero(page))) {
701 /* 704 /*
@@ -726,12 +729,13 @@ int __isolate_lru_page(struct page *page, int mode)
726 * @scanned: The number of pages that were scanned. 729 * @scanned: The number of pages that were scanned.
727 * @order: The caller's attempted allocation order 730 * @order: The caller's attempted allocation order
728 * @mode: One of the LRU isolation modes 731 * @mode: One of the LRU isolation modes
732 * @file: True [1] if isolating file [!anon] pages
729 * 733 *
730 * returns how many pages were moved onto *@dst. 734 * returns how many pages were moved onto *@dst.
731 */ 735 */
732static unsigned long isolate_lru_pages(unsigned long nr_to_scan, 736static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
733 struct list_head *src, struct list_head *dst, 737 struct list_head *src, struct list_head *dst,
734 unsigned long *scanned, int order, int mode) 738 unsigned long *scanned, int order, int mode, int file)
735{ 739{
736 unsigned long nr_taken = 0; 740 unsigned long nr_taken = 0;
737 unsigned long scan; 741 unsigned long scan;
@@ -748,7 +752,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
748 752
749 VM_BUG_ON(!PageLRU(page)); 753 VM_BUG_ON(!PageLRU(page));
750 754
751 switch (__isolate_lru_page(page, mode)) { 755 switch (__isolate_lru_page(page, mode, file)) {
752 case 0: 756 case 0:
753 list_move(&page->lru, dst); 757 list_move(&page->lru, dst);
754 nr_taken++; 758 nr_taken++;
@@ -791,10 +795,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
791 break; 795 break;
792 796
793 cursor_page = pfn_to_page(pfn); 797 cursor_page = pfn_to_page(pfn);
798
794 /* Check that we have not crossed a zone boundary. */ 799 /* Check that we have not crossed a zone boundary. */
795 if (unlikely(page_zone_id(cursor_page) != zone_id)) 800 if (unlikely(page_zone_id(cursor_page) != zone_id))
796 continue; 801 continue;
797 switch (__isolate_lru_page(cursor_page, mode)) { 802 switch (__isolate_lru_page(cursor_page, mode, file)) {
798 case 0: 803 case 0:
799 list_move(&cursor_page->lru, dst); 804 list_move(&cursor_page->lru, dst);
800 nr_taken++; 805 nr_taken++;
@@ -819,30 +824,37 @@ static unsigned long isolate_pages_global(unsigned long nr,
819 unsigned long *scanned, int order, 824 unsigned long *scanned, int order,
820 int mode, struct zone *z, 825 int mode, struct zone *z,
821 struct mem_cgroup *mem_cont, 826 struct mem_cgroup *mem_cont,
822 int active) 827 int active, int file)
823{ 828{
829 int lru = LRU_BASE;
824 if (active) 830 if (active)
825 return isolate_lru_pages(nr, &z->lru[LRU_ACTIVE].list, dst, 831 lru += LRU_ACTIVE;
826 scanned, order, mode); 832 if (file)
827 else 833 lru += LRU_FILE;
828 return isolate_lru_pages(nr, &z->lru[LRU_INACTIVE].list, dst, 834 return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
829 scanned, order, mode); 835 mode, !!file);
830} 836}
831 837
832/* 838/*
833 * clear_active_flags() is a helper for shrink_active_list(), clearing 839 * clear_active_flags() is a helper for shrink_active_list(), clearing
834 * any active bits from the pages in the list. 840 * any active bits from the pages in the list.
835 */ 841 */
836static unsigned long clear_active_flags(struct list_head *page_list) 842static unsigned long clear_active_flags(struct list_head *page_list,
843 unsigned int *count)
837{ 844{
838 int nr_active = 0; 845 int nr_active = 0;
846 int lru;
839 struct page *page; 847 struct page *page;
840 848
841 list_for_each_entry(page, page_list, lru) 849 list_for_each_entry(page, page_list, lru) {
850 lru = page_is_file_cache(page);
842 if (PageActive(page)) { 851 if (PageActive(page)) {
852 lru += LRU_ACTIVE;
843 ClearPageActive(page); 853 ClearPageActive(page);
844 nr_active++; 854 nr_active++;
845 } 855 }
856 count[lru]++;
857 }
846 858
847 return nr_active; 859 return nr_active;
848} 860}
@@ -880,12 +892,12 @@ int isolate_lru_page(struct page *page)
880 892
881 spin_lock_irq(&zone->lru_lock); 893 spin_lock_irq(&zone->lru_lock);
882 if (PageLRU(page) && get_page_unless_zero(page)) { 894 if (PageLRU(page) && get_page_unless_zero(page)) {
895 int lru = LRU_BASE;
883 ret = 0; 896 ret = 0;
884 ClearPageLRU(page); 897 ClearPageLRU(page);
885 if (PageActive(page)) 898
886 del_page_from_active_list(zone, page); 899 lru += page_is_file_cache(page) + !!PageActive(page);
887 else 900 del_page_from_lru_list(zone, page, lru);
888 del_page_from_inactive_list(zone, page);
889 } 901 }
890 spin_unlock_irq(&zone->lru_lock); 902 spin_unlock_irq(&zone->lru_lock);
891 } 903 }
@@ -897,7 +909,7 @@ int isolate_lru_page(struct page *page)
897 * of reclaimed pages 909 * of reclaimed pages
898 */ 910 */
899static unsigned long shrink_inactive_list(unsigned long max_scan, 911static unsigned long shrink_inactive_list(unsigned long max_scan,
900 struct zone *zone, struct scan_control *sc) 912 struct zone *zone, struct scan_control *sc, int file)
901{ 913{
902 LIST_HEAD(page_list); 914 LIST_HEAD(page_list);
903 struct pagevec pvec; 915 struct pagevec pvec;
@@ -914,20 +926,32 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
914 unsigned long nr_scan; 926 unsigned long nr_scan;
915 unsigned long nr_freed; 927 unsigned long nr_freed;
916 unsigned long nr_active; 928 unsigned long nr_active;
929 unsigned int count[NR_LRU_LISTS] = { 0, };
930 int mode = (sc->order > PAGE_ALLOC_COSTLY_ORDER) ?
931 ISOLATE_BOTH : ISOLATE_INACTIVE;
917 932
918 nr_taken = sc->isolate_pages(sc->swap_cluster_max, 933 nr_taken = sc->isolate_pages(sc->swap_cluster_max,
919 &page_list, &nr_scan, sc->order, 934 &page_list, &nr_scan, sc->order, mode,
920 (sc->order > PAGE_ALLOC_COSTLY_ORDER)? 935 zone, sc->mem_cgroup, 0, file);
921 ISOLATE_BOTH : ISOLATE_INACTIVE, 936 nr_active = clear_active_flags(&page_list, count);
922 zone, sc->mem_cgroup, 0);
923 nr_active = clear_active_flags(&page_list);
924 __count_vm_events(PGDEACTIVATE, nr_active); 937 __count_vm_events(PGDEACTIVATE, nr_active);
925 938
926 __mod_zone_page_state(zone, NR_ACTIVE, -nr_active); 939 __mod_zone_page_state(zone, NR_ACTIVE_FILE,
927 __mod_zone_page_state(zone, NR_INACTIVE, 940 -count[LRU_ACTIVE_FILE]);
928 -(nr_taken - nr_active)); 941 __mod_zone_page_state(zone, NR_INACTIVE_FILE,
929 if (scan_global_lru(sc)) 942 -count[LRU_INACTIVE_FILE]);
943 __mod_zone_page_state(zone, NR_ACTIVE_ANON,
944 -count[LRU_ACTIVE_ANON]);
945 __mod_zone_page_state(zone, NR_INACTIVE_ANON,
946 -count[LRU_INACTIVE_ANON]);
947
948 if (scan_global_lru(sc)) {
930 zone->pages_scanned += nr_scan; 949 zone->pages_scanned += nr_scan;
950 zone->recent_scanned[0] += count[LRU_INACTIVE_ANON];
951 zone->recent_scanned[0] += count[LRU_ACTIVE_ANON];
952 zone->recent_scanned[1] += count[LRU_INACTIVE_FILE];
953 zone->recent_scanned[1] += count[LRU_ACTIVE_FILE];
954 }
931 spin_unlock_irq(&zone->lru_lock); 955 spin_unlock_irq(&zone->lru_lock);
932 956
933 nr_scanned += nr_scan; 957 nr_scanned += nr_scan;
@@ -947,7 +971,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
947 * The attempt at page out may have made some 971 * The attempt at page out may have made some
948 * of the pages active, mark them inactive again. 972 * of the pages active, mark them inactive again.
949 */ 973 */
950 nr_active = clear_active_flags(&page_list); 974 nr_active = clear_active_flags(&page_list, count);
951 count_vm_events(PGDEACTIVATE, nr_active); 975 count_vm_events(PGDEACTIVATE, nr_active);
952 976
953 nr_freed += shrink_page_list(&page_list, sc, 977 nr_freed += shrink_page_list(&page_list, sc,
@@ -977,6 +1001,10 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
977 SetPageLRU(page); 1001 SetPageLRU(page);
978 list_del(&page->lru); 1002 list_del(&page->lru);
979 add_page_to_lru_list(zone, page, page_lru(page)); 1003 add_page_to_lru_list(zone, page, page_lru(page));
1004 if (PageActive(page) && scan_global_lru(sc)) {
1005 int file = !!page_is_file_cache(page);
1006 zone->recent_rotated[file]++;
1007 }
980 if (!pagevec_add(&pvec, page)) { 1008 if (!pagevec_add(&pvec, page)) {
981 spin_unlock_irq(&zone->lru_lock); 1009 spin_unlock_irq(&zone->lru_lock);
982 __pagevec_release(&pvec); 1010 __pagevec_release(&pvec);
@@ -1007,115 +1035,7 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
1007 1035
1008static inline int zone_is_near_oom(struct zone *zone) 1036static inline int zone_is_near_oom(struct zone *zone)
1009{ 1037{
1010 return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE) 1038 return zone->pages_scanned >= (zone_lru_pages(zone) * 3);
1011 + zone_page_state(zone, NR_INACTIVE))*3;
1012}
1013
1014/*
1015 * Determine we should try to reclaim mapped pages.
1016 * This is called only when sc->mem_cgroup is NULL.
1017 */
1018static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
1019 int priority)
1020{
1021 long mapped_ratio;
1022 long distress;
1023 long swap_tendency;
1024 long imbalance;
1025 int reclaim_mapped = 0;
1026 int prev_priority;
1027
1028 if (scan_global_lru(sc) && zone_is_near_oom(zone))
1029 return 1;
1030 /*
1031 * `distress' is a measure of how much trouble we're having
1032 * reclaiming pages. 0 -> no problems. 100 -> great trouble.
1033 */
1034 if (scan_global_lru(sc))
1035 prev_priority = zone->prev_priority;
1036 else
1037 prev_priority = mem_cgroup_get_reclaim_priority(sc->mem_cgroup);
1038
1039 distress = 100 >> min(prev_priority, priority);
1040
1041 /*
1042 * The point of this algorithm is to decide when to start
1043 * reclaiming mapped memory instead of just pagecache. Work out
1044 * how much memory
1045 * is mapped.
1046 */
1047 if (scan_global_lru(sc))
1048 mapped_ratio = ((global_page_state(NR_FILE_MAPPED) +
1049 global_page_state(NR_ANON_PAGES)) * 100) /
1050 vm_total_pages;
1051 else
1052 mapped_ratio = mem_cgroup_calc_mapped_ratio(sc->mem_cgroup);
1053
1054 /*
1055 * Now decide how much we really want to unmap some pages. The
1056 * mapped ratio is downgraded - just because there's a lot of
1057 * mapped memory doesn't necessarily mean that page reclaim
1058 * isn't succeeding.
1059 *
1060 * The distress ratio is important - we don't want to start
1061 * going oom.
1062 *
1063 * A 100% value of vm_swappiness overrides this algorithm
1064 * altogether.
1065 */
1066 swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
1067
1068 /*
1069 * If there's huge imbalance between active and inactive
1070 * (think active 100 times larger than inactive) we should
1071 * become more permissive, or the system will take too much
1072 * cpu before it start swapping during memory pressure.
1073 * Distress is about avoiding early-oom, this is about
1074 * making swappiness graceful despite setting it to low
1075 * values.
1076 *
1077 * Avoid div by zero with nr_inactive+1, and max resulting
1078 * value is vm_total_pages.
1079 */
1080 if (scan_global_lru(sc)) {
1081 imbalance = zone_page_state(zone, NR_ACTIVE);
1082 imbalance /= zone_page_state(zone, NR_INACTIVE) + 1;
1083 } else
1084 imbalance = mem_cgroup_reclaim_imbalance(sc->mem_cgroup);
1085
1086 /*
1087 * Reduce the effect of imbalance if swappiness is low,
1088 * this means for a swappiness very low, the imbalance
1089 * must be much higher than 100 for this logic to make
1090 * the difference.
1091 *
1092 * Max temporary value is vm_total_pages*100.
1093 */
1094 imbalance *= (vm_swappiness + 1);
1095 imbalance /= 100;
1096
1097 /*
1098 * If not much of the ram is mapped, makes the imbalance
1099 * less relevant, it's high priority we refill the inactive
1100 * list with mapped pages only in presence of high ratio of
1101 * mapped pages.
1102 *
1103 * Max temporary value is vm_total_pages*100.
1104 */
1105 imbalance *= mapped_ratio;
1106 imbalance /= 100;
1107
1108 /* apply imbalance feedback to swap_tendency */
1109 swap_tendency += imbalance;
1110
1111 /*
1112 * Now use this metric to decide whether to start moving mapped
1113 * memory onto the inactive list.
1114 */
1115 if (swap_tendency >= 100)
1116 reclaim_mapped = 1;
1117
1118 return reclaim_mapped;
1119} 1039}
1120 1040
1121/* 1041/*
@@ -1138,7 +1058,7 @@ static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
1138 1058
1139 1059
1140static void shrink_active_list(unsigned long nr_pages, struct zone *zone, 1060static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1141 struct scan_control *sc, int priority) 1061 struct scan_control *sc, int priority, int file)
1142{ 1062{
1143 unsigned long pgmoved; 1063 unsigned long pgmoved;
1144 int pgdeactivate = 0; 1064 int pgdeactivate = 0;
@@ -1148,43 +1068,42 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1148 LIST_HEAD(l_inactive); 1068 LIST_HEAD(l_inactive);
1149 struct page *page; 1069 struct page *page;
1150 struct pagevec pvec; 1070 struct pagevec pvec;
1151 int reclaim_mapped = 0; 1071 enum lru_list lru;
1152
1153 if (sc->may_swap)
1154 reclaim_mapped = calc_reclaim_mapped(sc, zone, priority);
1155 1072
1156 lru_add_drain(); 1073 lru_add_drain();
1157 spin_lock_irq(&zone->lru_lock); 1074 spin_lock_irq(&zone->lru_lock);
1158 pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order, 1075 pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
1159 ISOLATE_ACTIVE, zone, 1076 ISOLATE_ACTIVE, zone,
1160 sc->mem_cgroup, 1); 1077 sc->mem_cgroup, 1, file);
1161 /* 1078 /*
1162 * zone->pages_scanned is used for detect zone's oom 1079 * zone->pages_scanned is used for detect zone's oom
1163 * mem_cgroup remembers nr_scan by itself. 1080 * mem_cgroup remembers nr_scan by itself.
1164 */ 1081 */
1165 if (scan_global_lru(sc)) 1082 if (scan_global_lru(sc)) {
1166 zone->pages_scanned += pgscanned; 1083 zone->pages_scanned += pgscanned;
1084 zone->recent_scanned[!!file] += pgmoved;
1085 }
1167 1086
1168 __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved); 1087 if (file)
1088 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
1089 else
1090 __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
1169 spin_unlock_irq(&zone->lru_lock); 1091 spin_unlock_irq(&zone->lru_lock);
1170 1092
1171 while (!list_empty(&l_hold)) { 1093 while (!list_empty(&l_hold)) {
1172 cond_resched(); 1094 cond_resched();
1173 page = lru_to_page(&l_hold); 1095 page = lru_to_page(&l_hold);
1174 list_del(&page->lru); 1096 list_del(&page->lru);
1175 if (page_mapped(page)) {
1176 if (!reclaim_mapped ||
1177 (total_swap_pages == 0 && PageAnon(page)) ||
1178 page_referenced(page, 0, sc->mem_cgroup)) {
1179 list_add(&page->lru, &l_active);
1180 continue;
1181 }
1182 }
1183 list_add(&page->lru, &l_inactive); 1097 list_add(&page->lru, &l_inactive);
1184 } 1098 }
1185 1099
1100 /*
1101 * Now put the pages back on the appropriate [file or anon] inactive
1102 * and active lists.
1103 */
1186 pagevec_init(&pvec, 1); 1104 pagevec_init(&pvec, 1);
1187 pgmoved = 0; 1105 pgmoved = 0;
1106 lru = LRU_BASE + file * LRU_FILE;
1188 spin_lock_irq(&zone->lru_lock); 1107 spin_lock_irq(&zone->lru_lock);
1189 while (!list_empty(&l_inactive)) { 1108 while (!list_empty(&l_inactive)) {
1190 page = lru_to_page(&l_inactive); 1109 page = lru_to_page(&l_inactive);
@@ -1194,11 +1113,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1194 VM_BUG_ON(!PageActive(page)); 1113 VM_BUG_ON(!PageActive(page));
1195 ClearPageActive(page); 1114 ClearPageActive(page);
1196 1115
1197 list_move(&page->lru, &zone->lru[LRU_INACTIVE].list); 1116 list_move(&page->lru, &zone->lru[lru].list);
1198 mem_cgroup_move_lists(page, false); 1117 mem_cgroup_move_lists(page, false);
1199 pgmoved++; 1118 pgmoved++;
1200 if (!pagevec_add(&pvec, page)) { 1119 if (!pagevec_add(&pvec, page)) {
1201 __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); 1120 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
1202 spin_unlock_irq(&zone->lru_lock); 1121 spin_unlock_irq(&zone->lru_lock);
1203 pgdeactivate += pgmoved; 1122 pgdeactivate += pgmoved;
1204 pgmoved = 0; 1123 pgmoved = 0;
@@ -1208,7 +1127,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1208 spin_lock_irq(&zone->lru_lock); 1127 spin_lock_irq(&zone->lru_lock);
1209 } 1128 }
1210 } 1129 }
1211 __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); 1130 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
1212 pgdeactivate += pgmoved; 1131 pgdeactivate += pgmoved;
1213 if (buffer_heads_over_limit) { 1132 if (buffer_heads_over_limit) {
1214 spin_unlock_irq(&zone->lru_lock); 1133 spin_unlock_irq(&zone->lru_lock);
@@ -1217,6 +1136,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1217 } 1136 }
1218 1137
1219 pgmoved = 0; 1138 pgmoved = 0;
1139 lru = LRU_ACTIVE + file * LRU_FILE;
1220 while (!list_empty(&l_active)) { 1140 while (!list_empty(&l_active)) {
1221 page = lru_to_page(&l_active); 1141 page = lru_to_page(&l_active);
1222 prefetchw_prev_lru_page(page, &l_active, flags); 1142 prefetchw_prev_lru_page(page, &l_active, flags);
@@ -1224,11 +1144,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1224 SetPageLRU(page); 1144 SetPageLRU(page);
1225 VM_BUG_ON(!PageActive(page)); 1145 VM_BUG_ON(!PageActive(page));
1226 1146
1227 list_move(&page->lru, &zone->lru[LRU_ACTIVE].list); 1147 list_move(&page->lru, &zone->lru[lru].list);
1228 mem_cgroup_move_lists(page, true); 1148 mem_cgroup_move_lists(page, true);
1229 pgmoved++; 1149 pgmoved++;
1230 if (!pagevec_add(&pvec, page)) { 1150 if (!pagevec_add(&pvec, page)) {
1231 __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); 1151 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
1232 pgmoved = 0; 1152 pgmoved = 0;
1233 spin_unlock_irq(&zone->lru_lock); 1153 spin_unlock_irq(&zone->lru_lock);
1234 if (vm_swap_full()) 1154 if (vm_swap_full())
@@ -1237,7 +1157,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1237 spin_lock_irq(&zone->lru_lock); 1157 spin_lock_irq(&zone->lru_lock);
1238 } 1158 }
1239 } 1159 }
1240 __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); 1160 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
1161 zone->recent_rotated[!!file] += pgmoved;
1241 1162
1242 __count_zone_vm_events(PGREFILL, zone, pgscanned); 1163 __count_zone_vm_events(PGREFILL, zone, pgscanned);
1243 __count_vm_events(PGDEACTIVATE, pgdeactivate); 1164 __count_vm_events(PGDEACTIVATE, pgdeactivate);
@@ -1248,16 +1169,103 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1248 pagevec_release(&pvec); 1169 pagevec_release(&pvec);
1249} 1170}
1250 1171
1251static unsigned long shrink_list(enum lru_list l, unsigned long nr_to_scan, 1172static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1252 struct zone *zone, struct scan_control *sc, int priority) 1173 struct zone *zone, struct scan_control *sc, int priority)
1253{ 1174{
1254 if (l == LRU_ACTIVE) { 1175 int file = is_file_lru(lru);
1255 shrink_active_list(nr_to_scan, zone, sc, priority); 1176
1177 if (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE) {
1178 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1256 return 0; 1179 return 0;
1257 } 1180 }
1258 return shrink_inactive_list(nr_to_scan, zone, sc); 1181 return shrink_inactive_list(nr_to_scan, zone, sc, file);
1182}
1183
1184/*
1185 * Determine how aggressively the anon and file LRU lists should be
1186 * scanned. The relative value of each set of LRU lists is determined
1187 * by looking at the fraction of the pages scanned we did rotate back
1188 * onto the active list instead of evict.
1189 *
1190 * percent[0] specifies how much pressure to put on ram/swap backed
1191 * memory, while percent[1] determines pressure on the file LRUs.
1192 */
1193static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1194 unsigned long *percent)
1195{
1196 unsigned long anon, file, free;
1197 unsigned long anon_prio, file_prio;
1198 unsigned long ap, fp;
1199
1200 anon = zone_page_state(zone, NR_ACTIVE_ANON) +
1201 zone_page_state(zone, NR_INACTIVE_ANON);
1202 file = zone_page_state(zone, NR_ACTIVE_FILE) +
1203 zone_page_state(zone, NR_INACTIVE_FILE);
1204 free = zone_page_state(zone, NR_FREE_PAGES);
1205
1206 /* If we have no swap space, do not bother scanning anon pages. */
1207 if (nr_swap_pages <= 0) {
1208 percent[0] = 0;
1209 percent[1] = 100;
1210 return;
1211 }
1212
1213 /* If we have very few page cache pages, force-scan anon pages. */
1214 if (unlikely(file + free <= zone->pages_high)) {
1215 percent[0] = 100;
1216 percent[1] = 0;
1217 return;
1218 }
1219
1220 /*
1221 * OK, so we have swap space and a fair amount of page cache
1222 * pages. We use the recently rotated / recently scanned
1223 * ratios to determine how valuable each cache is.
1224 *
1225 * Because workloads change over time (and to avoid overflow)
1226 * we keep these statistics as a floating average, which ends
1227 * up weighing recent references more than old ones.
1228 *
1229 * anon in [0], file in [1]
1230 */
1231 if (unlikely(zone->recent_scanned[0] > anon / 4)) {
1232 spin_lock_irq(&zone->lru_lock);
1233 zone->recent_scanned[0] /= 2;
1234 zone->recent_rotated[0] /= 2;
1235 spin_unlock_irq(&zone->lru_lock);
1236 }
1237
1238 if (unlikely(zone->recent_scanned[1] > file / 4)) {
1239 spin_lock_irq(&zone->lru_lock);
1240 zone->recent_scanned[1] /= 2;
1241 zone->recent_rotated[1] /= 2;
1242 spin_unlock_irq(&zone->lru_lock);
1243 }
1244
1245 /*
1246 * With swappiness at 100, anonymous and file have the same priority.
1247 * This scanning priority is essentially the inverse of IO cost.
1248 */
1249 anon_prio = sc->swappiness;
1250 file_prio = 200 - sc->swappiness;
1251
1252 /*
1253 * anon recent_rotated[0]
1254 * %anon = 100 * ----------- / ----------------- * IO cost
1255 * anon + file rotate_sum
1256 */
1257 ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1);
1258 ap /= zone->recent_rotated[0] + 1;
1259
1260 fp = (file_prio + 1) * (zone->recent_scanned[1] + 1);
1261 fp /= zone->recent_rotated[1] + 1;
1262
1263 /* Normalize to percentages */
1264 percent[0] = 100 * ap / (ap + fp + 1);
1265 percent[1] = 100 - percent[0];
1259} 1266}
1260 1267
1268
1261/* 1269/*
1262 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. 1270 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
1263 */ 1271 */
@@ -1267,36 +1275,43 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
1267 unsigned long nr[NR_LRU_LISTS]; 1275 unsigned long nr[NR_LRU_LISTS];
1268 unsigned long nr_to_scan; 1276 unsigned long nr_to_scan;
1269 unsigned long nr_reclaimed = 0; 1277 unsigned long nr_reclaimed = 0;
1278 unsigned long percent[2]; /* anon @ 0; file @ 1 */
1270 enum lru_list l; 1279 enum lru_list l;
1271 1280
1272 if (scan_global_lru(sc)) { 1281 get_scan_ratio(zone, sc, percent);
1273 /* 1282
1274 * Add one to nr_to_scan just to make sure that the kernel 1283 for_each_lru(l) {
1275 * will slowly sift through the active list. 1284 if (scan_global_lru(sc)) {
1276 */ 1285 int file = is_file_lru(l);
1277 for_each_lru(l) { 1286 int scan;
1278 zone->lru[l].nr_scan += (zone_page_state(zone, 1287 /*
1279 NR_LRU_BASE + l) >> priority) + 1; 1288 * Add one to nr_to_scan just to make sure that the
1289 * kernel will slowly sift through each list.
1290 */
1291 scan = zone_page_state(zone, NR_LRU_BASE + l);
1292 if (priority) {
1293 scan >>= priority;
1294 scan = (scan * percent[file]) / 100;
1295 }
1296 zone->lru[l].nr_scan += scan + 1;
1280 nr[l] = zone->lru[l].nr_scan; 1297 nr[l] = zone->lru[l].nr_scan;
1281 if (nr[l] >= sc->swap_cluster_max) 1298 if (nr[l] >= sc->swap_cluster_max)
1282 zone->lru[l].nr_scan = 0; 1299 zone->lru[l].nr_scan = 0;
1283 else 1300 else
1284 nr[l] = 0; 1301 nr[l] = 0;
1302 } else {
1303 /*
1304 * This reclaim occurs not because zone memory shortage
1305 * but because memory controller hits its limit.
1306 * Don't modify zone reclaim related data.
1307 */
1308 nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone,
1309 priority, l);
1285 } 1310 }
1286 } else {
1287 /*
1288 * This reclaim occurs not because zone memory shortage but
1289 * because memory controller hits its limit.
1290 * Then, don't modify zone reclaim related data.
1291 */
1292 nr[LRU_ACTIVE] = mem_cgroup_calc_reclaim(sc->mem_cgroup,
1293 zone, priority, LRU_ACTIVE);
1294
1295 nr[LRU_INACTIVE] = mem_cgroup_calc_reclaim(sc->mem_cgroup,
1296 zone, priority, LRU_INACTIVE);
1297 } 1311 }
1298 1312
1299 while (nr[LRU_ACTIVE] || nr[LRU_INACTIVE]) { 1313 while (nr[LRU_ACTIVE_ANON] || nr[LRU_INACTIVE_ANON] ||
1314 nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) {
1300 for_each_lru(l) { 1315 for_each_lru(l) {
1301 if (nr[l]) { 1316 if (nr[l]) {
1302 nr_to_scan = min(nr[l], 1317 nr_to_scan = min(nr[l],
@@ -1369,7 +1384,7 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
1369 1384
1370 return nr_reclaimed; 1385 return nr_reclaimed;
1371} 1386}
1372 1387
1373/* 1388/*
1374 * This is the main entry point to direct page reclaim. 1389 * This is the main entry point to direct page reclaim.
1375 * 1390 *
@@ -1412,8 +1427,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1412 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1427 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1413 continue; 1428 continue;
1414 1429
1415 lru_pages += zone_page_state(zone, NR_ACTIVE) 1430 lru_pages += zone_lru_pages(zone);
1416 + zone_page_state(zone, NR_INACTIVE);
1417 } 1431 }
1418 } 1432 }
1419 1433
@@ -1615,8 +1629,7 @@ loop_again:
1615 for (i = 0; i <= end_zone; i++) { 1629 for (i = 0; i <= end_zone; i++) {
1616 struct zone *zone = pgdat->node_zones + i; 1630 struct zone *zone = pgdat->node_zones + i;
1617 1631
1618 lru_pages += zone_page_state(zone, NR_ACTIVE) 1632 lru_pages += zone_lru_pages(zone);
1619 + zone_page_state(zone, NR_INACTIVE);
1620 } 1633 }
1621 1634
1622 /* 1635 /*
@@ -1660,8 +1673,7 @@ loop_again:
1660 if (zone_is_all_unreclaimable(zone)) 1673 if (zone_is_all_unreclaimable(zone))
1661 continue; 1674 continue;
1662 if (nr_slab == 0 && zone->pages_scanned >= 1675 if (nr_slab == 0 && zone->pages_scanned >=
1663 (zone_page_state(zone, NR_ACTIVE) 1676 (zone_lru_pages(zone) * 6))
1664 + zone_page_state(zone, NR_INACTIVE)) * 6)
1665 zone_set_flag(zone, 1677 zone_set_flag(zone,
1666 ZONE_ALL_UNRECLAIMABLE); 1678 ZONE_ALL_UNRECLAIMABLE);
1667 /* 1679 /*
@@ -1715,7 +1727,7 @@ out:
1715 1727
1716/* 1728/*
1717 * The background pageout daemon, started as a kernel thread 1729 * The background pageout daemon, started as a kernel thread
1718 * from the init process. 1730 * from the init process.
1719 * 1731 *
1720 * This basically trickles out pages so that we have _some_ 1732 * This basically trickles out pages so that we have _some_
1721 * free memory available even if there is no other activity 1733 * free memory available even if there is no other activity
@@ -1809,6 +1821,14 @@ void wakeup_kswapd(struct zone *zone, int order)
1809 wake_up_interruptible(&pgdat->kswapd_wait); 1821 wake_up_interruptible(&pgdat->kswapd_wait);
1810} 1822}
1811 1823
1824unsigned long global_lru_pages(void)
1825{
1826 return global_page_state(NR_ACTIVE_ANON)
1827 + global_page_state(NR_ACTIVE_FILE)
1828 + global_page_state(NR_INACTIVE_ANON)
1829 + global_page_state(NR_INACTIVE_FILE);
1830}
1831
1812#ifdef CONFIG_PM 1832#ifdef CONFIG_PM
1813/* 1833/*
1814 * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages 1834 * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages
@@ -1834,7 +1854,8 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1834 1854
1835 for_each_lru(l) { 1855 for_each_lru(l) {
1836 /* For pass = 0 we don't shrink the active list */ 1856 /* For pass = 0 we don't shrink the active list */
1837 if (pass == 0 && l == LRU_ACTIVE) 1857 if (pass == 0 &&
1858 (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE))
1838 continue; 1859 continue;
1839 1860
1840 zone->lru[l].nr_scan += 1861 zone->lru[l].nr_scan +=
@@ -1856,11 +1877,6 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1856 return ret; 1877 return ret;
1857} 1878}
1858 1879
1859static unsigned long count_lru_pages(void)
1860{
1861 return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE);
1862}
1863
1864/* 1880/*
1865 * Try to free `nr_pages' of memory, system-wide, and return the number of 1881 * Try to free `nr_pages' of memory, system-wide, and return the number of
1866 * freed pages. 1882 * freed pages.
@@ -1886,7 +1902,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1886 1902
1887 current->reclaim_state = &reclaim_state; 1903 current->reclaim_state = &reclaim_state;
1888 1904
1889 lru_pages = count_lru_pages(); 1905 lru_pages = global_lru_pages();
1890 nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); 1906 nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
1891 /* If slab caches are huge, it's better to hit them first */ 1907 /* If slab caches are huge, it's better to hit them first */
1892 while (nr_slab >= lru_pages) { 1908 while (nr_slab >= lru_pages) {
@@ -1929,7 +1945,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1929 1945
1930 reclaim_state.reclaimed_slab = 0; 1946 reclaim_state.reclaimed_slab = 0;
1931 shrink_slab(sc.nr_scanned, sc.gfp_mask, 1947 shrink_slab(sc.nr_scanned, sc.gfp_mask,
1932 count_lru_pages()); 1948 global_lru_pages());
1933 ret += reclaim_state.reclaimed_slab; 1949 ret += reclaim_state.reclaimed_slab;
1934 if (ret >= nr_pages) 1950 if (ret >= nr_pages)
1935 goto out; 1951 goto out;
@@ -1946,7 +1962,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1946 if (!ret) { 1962 if (!ret) {
1947 do { 1963 do {
1948 reclaim_state.reclaimed_slab = 0; 1964 reclaim_state.reclaimed_slab = 0;
1949 shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages()); 1965 shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
1950 ret += reclaim_state.reclaimed_slab; 1966 ret += reclaim_state.reclaimed_slab;
1951 } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); 1967 } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
1952 } 1968 }
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 52c0335c1b71..27400b7da7c4 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -619,8 +619,10 @@ const struct seq_operations pagetypeinfo_op = {
619static const char * const vmstat_text[] = { 619static const char * const vmstat_text[] = {
620 /* Zoned VM counters */ 620 /* Zoned VM counters */
621 "nr_free_pages", 621 "nr_free_pages",
622 "nr_inactive", 622 "nr_inactive_anon",
623 "nr_active", 623 "nr_active_anon",
624 "nr_inactive_file",
625 "nr_active_file",
624 "nr_anon_pages", 626 "nr_anon_pages",
625 "nr_mapped", 627 "nr_mapped",
626 "nr_file_pages", 628 "nr_file_pages",
@@ -688,7 +690,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
688 "\n min %lu" 690 "\n min %lu"
689 "\n low %lu" 691 "\n low %lu"
690 "\n high %lu" 692 "\n high %lu"
691 "\n scanned %lu (a: %lu i: %lu)" 693 "\n scanned %lu (aa: %lu ia: %lu af: %lu if: %lu)"
692 "\n spanned %lu" 694 "\n spanned %lu"
693 "\n present %lu", 695 "\n present %lu",
694 zone_page_state(zone, NR_FREE_PAGES), 696 zone_page_state(zone, NR_FREE_PAGES),
@@ -696,8 +698,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
696 zone->pages_low, 698 zone->pages_low,
697 zone->pages_high, 699 zone->pages_high,
698 zone->pages_scanned, 700 zone->pages_scanned,
699 zone->lru[LRU_ACTIVE].nr_scan, 701 zone->lru[LRU_ACTIVE_ANON].nr_scan,
700 zone->lru[LRU_INACTIVE].nr_scan, 702 zone->lru[LRU_INACTIVE_ANON].nr_scan,
703 zone->lru[LRU_ACTIVE_FILE].nr_scan,
704 zone->lru[LRU_INACTIVE_FILE].nr_scan,
701 zone->spanned_pages, 705 zone->spanned_pages,
702 zone->present_pages); 706 zone->present_pages);
703 707