aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-26 13:50:56 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-26 13:50:56 -0400
commitf8d613e2a665bf1be9628a3c3f9bafe7599b32c0 (patch)
tree98d4da8d0e1a5fb1d9064626b4b96d95ccf26375 /mm
parent8a0599dd2471f2a2e409498c08a0ab339057ad06 (diff)
parent5bc20fc59706214d9591c11e1938a629d3538c12 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/djm/tmem
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/djm/tmem: xen: cleancache shim to Xen Transcendent Memory ocfs2: add cleancache support ext4: add cleancache support btrfs: add cleancache support ext3: add cleancache support mm/fs: add hooks to support cleancache mm: cleancache core ops functions and config fs: add field to superblock to support cleancache mm/fs: cleancache documentation Fix up trivial conflict in fs/btrfs/extent_io.c due to includes
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig23
-rw-r--r--mm/Makefile1
-rw-r--r--mm/cleancache.c244
-rw-r--r--mm/filemap.c11
-rw-r--r--mm/truncate.c6
5 files changed, 285 insertions, 0 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index e9c0c61f2ddd..8ca47a5ee9c8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -347,3 +347,26 @@ config NEED_PER_CPU_KM
347 depends on !SMP 347 depends on !SMP
348 bool 348 bool
349 default y 349 default y
350
351config CLEANCACHE
352 bool "Enable cleancache driver to cache clean pages if tmem is present"
353 default n
354 help
355 Cleancache can be thought of as a page-granularity victim cache
356 for clean pages that the kernel's pageframe replacement algorithm
357 (PFRA) would like to keep around, but can't since there isn't enough
358 memory. So when the PFRA "evicts" a page, it first attempts to use
359 cleancacne code to put the data contained in that page into
360 "transcendent memory", memory that is not directly accessible or
361 addressable by the kernel and is of unknown and possibly
362 time-varying size. And when a cleancache-enabled
363 filesystem wishes to access a page in a file on disk, it first
364 checks cleancache to see if it already contains it; if it does,
365 the page is copied into the kernel and a disk access is avoided.
366 When a transcendent memory driver is available (such as zcache or
367 Xen transcendent memory), a significant I/O reduction
368 may be achieved. When none is available, all cleancache calls
369 are reduced to a single pointer-compare-against-NULL resulting
370 in a negligible performance hit.
371
372 If unsure, say Y to enable cleancache
diff --git a/mm/Makefile b/mm/Makefile
index 42a8326c3e3d..836e4163c1bf 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -49,3 +49,4 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
49obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o 49obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
50obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o 50obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
51obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o 51obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
52obj-$(CONFIG_CLEANCACHE) += cleancache.o
diff --git a/mm/cleancache.c b/mm/cleancache.c
new file mode 100644
index 000000000000..bcaae4c2a770
--- /dev/null
+++ b/mm/cleancache.c
@@ -0,0 +1,244 @@
1/*
2 * Cleancache frontend
3 *
4 * This code provides the generic "frontend" layer to call a matching
5 * "backend" driver implementation of cleancache. See
6 * Documentation/vm/cleancache.txt for more information.
7 *
8 * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
9 * Author: Dan Magenheimer
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2.
12 */
13
14#include <linux/module.h>
15#include <linux/fs.h>
16#include <linux/exportfs.h>
17#include <linux/mm.h>
18#include <linux/cleancache.h>
19
20/*
21 * This global enablement flag may be read thousands of times per second
22 * by cleancache_get/put/flush even on systems where cleancache_ops
23 * is not claimed (e.g. cleancache is config'ed on but remains
24 * disabled), so is preferred to the slower alternative: a function
25 * call that checks a non-global.
26 */
27int cleancache_enabled;
28EXPORT_SYMBOL(cleancache_enabled);
29
30/*
31 * cleancache_ops is set by cleancache_ops_register to contain the pointers
32 * to the cleancache "backend" implementation functions.
33 */
34static struct cleancache_ops cleancache_ops;
35
36/* useful stats available in /sys/kernel/mm/cleancache */
37static unsigned long cleancache_succ_gets;
38static unsigned long cleancache_failed_gets;
39static unsigned long cleancache_puts;
40static unsigned long cleancache_flushes;
41
42/*
43 * register operations for cleancache, returning previous thus allowing
44 * detection of multiple backends and possible nesting
45 */
46struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops)
47{
48 struct cleancache_ops old = cleancache_ops;
49
50 cleancache_ops = *ops;
51 cleancache_enabled = 1;
52 return old;
53}
54EXPORT_SYMBOL(cleancache_register_ops);
55
56/* Called by a cleancache-enabled filesystem at time of mount */
57void __cleancache_init_fs(struct super_block *sb)
58{
59 sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE);
60}
61EXPORT_SYMBOL(__cleancache_init_fs);
62
63/* Called by a cleancache-enabled clustered filesystem at time of mount */
64void __cleancache_init_shared_fs(char *uuid, struct super_block *sb)
65{
66 sb->cleancache_poolid =
67 (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE);
68}
69EXPORT_SYMBOL(__cleancache_init_shared_fs);
70
71/*
72 * If the filesystem uses exportable filehandles, use the filehandle as
73 * the key, else use the inode number.
74 */
75static int cleancache_get_key(struct inode *inode,
76 struct cleancache_filekey *key)
77{
78 int (*fhfn)(struct dentry *, __u32 *fh, int *, int);
79 int len = 0, maxlen = CLEANCACHE_KEY_MAX;
80 struct super_block *sb = inode->i_sb;
81
82 key->u.ino = inode->i_ino;
83 if (sb->s_export_op != NULL) {
84 fhfn = sb->s_export_op->encode_fh;
85 if (fhfn) {
86 struct dentry d;
87 d.d_inode = inode;
88 len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0);
89 if (len <= 0 || len == 255)
90 return -1;
91 if (maxlen > CLEANCACHE_KEY_MAX)
92 return -1;
93 }
94 }
95 return 0;
96}
97
98/*
99 * "Get" data from cleancache associated with the poolid/inode/index
100 * that were specified when the data was put to cleanache and, if
101 * successful, use it to fill the specified page with data and return 0.
102 * The pageframe is unchanged and returns -1 if the get fails.
103 * Page must be locked by caller.
104 */
105int __cleancache_get_page(struct page *page)
106{
107 int ret = -1;
108 int pool_id;
109 struct cleancache_filekey key = { .u.key = { 0 } };
110
111 VM_BUG_ON(!PageLocked(page));
112 pool_id = page->mapping->host->i_sb->cleancache_poolid;
113 if (pool_id < 0)
114 goto out;
115
116 if (cleancache_get_key(page->mapping->host, &key) < 0)
117 goto out;
118
119 ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page);
120 if (ret == 0)
121 cleancache_succ_gets++;
122 else
123 cleancache_failed_gets++;
124out:
125 return ret;
126}
127EXPORT_SYMBOL(__cleancache_get_page);
128
129/*
130 * "Put" data from a page to cleancache and associate it with the
131 * (previously-obtained per-filesystem) poolid and the page's,
132 * inode and page index. Page must be locked. Note that a put_page
133 * always "succeeds", though a subsequent get_page may succeed or fail.
134 */
135void __cleancache_put_page(struct page *page)
136{
137 int pool_id;
138 struct cleancache_filekey key = { .u.key = { 0 } };
139
140 VM_BUG_ON(!PageLocked(page));
141 pool_id = page->mapping->host->i_sb->cleancache_poolid;
142 if (pool_id >= 0 &&
143 cleancache_get_key(page->mapping->host, &key) >= 0) {
144 (*cleancache_ops.put_page)(pool_id, key, page->index, page);
145 cleancache_puts++;
146 }
147}
148EXPORT_SYMBOL(__cleancache_put_page);
149
150/*
151 * Flush any data from cleancache associated with the poolid and the
152 * page's inode and page index so that a subsequent "get" will fail.
153 */
154void __cleancache_flush_page(struct address_space *mapping, struct page *page)
155{
156 /* careful... page->mapping is NULL sometimes when this is called */
157 int pool_id = mapping->host->i_sb->cleancache_poolid;
158 struct cleancache_filekey key = { .u.key = { 0 } };
159
160 if (pool_id >= 0) {
161 VM_BUG_ON(!PageLocked(page));
162 if (cleancache_get_key(mapping->host, &key) >= 0) {
163 (*cleancache_ops.flush_page)(pool_id, key, page->index);
164 cleancache_flushes++;
165 }
166 }
167}
168EXPORT_SYMBOL(__cleancache_flush_page);
169
170/*
171 * Flush all data from cleancache associated with the poolid and the
172 * mappings's inode so that all subsequent gets to this poolid/inode
173 * will fail.
174 */
175void __cleancache_flush_inode(struct address_space *mapping)
176{
177 int pool_id = mapping->host->i_sb->cleancache_poolid;
178 struct cleancache_filekey key = { .u.key = { 0 } };
179
180 if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
181 (*cleancache_ops.flush_inode)(pool_id, key);
182}
183EXPORT_SYMBOL(__cleancache_flush_inode);
184
185/*
186 * Called by any cleancache-enabled filesystem at time of unmount;
187 * note that pool_id is surrendered and may be reutrned by a subsequent
188 * cleancache_init_fs or cleancache_init_shared_fs
189 */
190void __cleancache_flush_fs(struct super_block *sb)
191{
192 if (sb->cleancache_poolid >= 0) {
193 int old_poolid = sb->cleancache_poolid;
194 sb->cleancache_poolid = -1;
195 (*cleancache_ops.flush_fs)(old_poolid);
196 }
197}
198EXPORT_SYMBOL(__cleancache_flush_fs);
199
200#ifdef CONFIG_SYSFS
201
202/* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */
203
204#define CLEANCACHE_SYSFS_RO(_name) \
205 static ssize_t cleancache_##_name##_show(struct kobject *kobj, \
206 struct kobj_attribute *attr, char *buf) \
207 { \
208 return sprintf(buf, "%lu\n", cleancache_##_name); \
209 } \
210 static struct kobj_attribute cleancache_##_name##_attr = { \
211 .attr = { .name = __stringify(_name), .mode = 0444 }, \
212 .show = cleancache_##_name##_show, \
213 }
214
215CLEANCACHE_SYSFS_RO(succ_gets);
216CLEANCACHE_SYSFS_RO(failed_gets);
217CLEANCACHE_SYSFS_RO(puts);
218CLEANCACHE_SYSFS_RO(flushes);
219
220static struct attribute *cleancache_attrs[] = {
221 &cleancache_succ_gets_attr.attr,
222 &cleancache_failed_gets_attr.attr,
223 &cleancache_puts_attr.attr,
224 &cleancache_flushes_attr.attr,
225 NULL,
226};
227
228static struct attribute_group cleancache_attr_group = {
229 .attrs = cleancache_attrs,
230 .name = "cleancache",
231};
232
233#endif /* CONFIG_SYSFS */
234
235static int __init init_cleancache(void)
236{
237#ifdef CONFIG_SYSFS
238 int err;
239
240 err = sysfs_create_group(mm_kobj, &cleancache_attr_group);
241#endif /* CONFIG_SYSFS */
242 return 0;
243}
244module_init(init_cleancache)
diff --git a/mm/filemap.c b/mm/filemap.c
index 68e782b3d3de..7455ccd8bda8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -34,6 +34,7 @@
34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
35#include <linux/memcontrol.h> 35#include <linux/memcontrol.h>
36#include <linux/mm_inline.h> /* for page_is_file_cache() */ 36#include <linux/mm_inline.h> /* for page_is_file_cache() */
37#include <linux/cleancache.h>
37#include "internal.h" 38#include "internal.h"
38 39
39/* 40/*
@@ -118,6 +119,16 @@ void __delete_from_page_cache(struct page *page)
118{ 119{
119 struct address_space *mapping = page->mapping; 120 struct address_space *mapping = page->mapping;
120 121
122 /*
123 * if we're uptodate, flush out into the cleancache, otherwise
124 * invalidate any existing cleancache entries. We can't leave
125 * stale data around in the cleancache once our page is gone
126 */
127 if (PageUptodate(page) && PageMappedToDisk(page))
128 cleancache_put_page(page);
129 else
130 cleancache_flush_page(mapping, page);
131
121 radix_tree_delete(&mapping->page_tree, page->index); 132 radix_tree_delete(&mapping->page_tree, page->index);
122 page->mapping = NULL; 133 page->mapping = NULL;
123 mapping->nrpages--; 134 mapping->nrpages--;
diff --git a/mm/truncate.c b/mm/truncate.c
index a95667529135..3a29a6180212 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -19,6 +19,7 @@
19#include <linux/task_io_accounting_ops.h> 19#include <linux/task_io_accounting_ops.h>
20#include <linux/buffer_head.h> /* grr. try_to_release_page, 20#include <linux/buffer_head.h> /* grr. try_to_release_page,
21 do_invalidatepage */ 21 do_invalidatepage */
22#include <linux/cleancache.h>
22#include "internal.h" 23#include "internal.h"
23 24
24 25
@@ -51,6 +52,7 @@ void do_invalidatepage(struct page *page, unsigned long offset)
51static inline void truncate_partial_page(struct page *page, unsigned partial) 52static inline void truncate_partial_page(struct page *page, unsigned partial)
52{ 53{
53 zero_user_segment(page, partial, PAGE_CACHE_SIZE); 54 zero_user_segment(page, partial, PAGE_CACHE_SIZE);
55 cleancache_flush_page(page->mapping, page);
54 if (page_has_private(page)) 56 if (page_has_private(page))
55 do_invalidatepage(page, partial); 57 do_invalidatepage(page, partial);
56} 58}
@@ -214,6 +216,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
214 pgoff_t next; 216 pgoff_t next;
215 int i; 217 int i;
216 218
219 cleancache_flush_inode(mapping);
217 if (mapping->nrpages == 0) 220 if (mapping->nrpages == 0)
218 return; 221 return;
219 222
@@ -291,6 +294,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
291 pagevec_release(&pvec); 294 pagevec_release(&pvec);
292 mem_cgroup_uncharge_end(); 295 mem_cgroup_uncharge_end();
293 } 296 }
297 cleancache_flush_inode(mapping);
294} 298}
295EXPORT_SYMBOL(truncate_inode_pages_range); 299EXPORT_SYMBOL(truncate_inode_pages_range);
296 300
@@ -440,6 +444,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
440 int did_range_unmap = 0; 444 int did_range_unmap = 0;
441 int wrapped = 0; 445 int wrapped = 0;
442 446
447 cleancache_flush_inode(mapping);
443 pagevec_init(&pvec, 0); 448 pagevec_init(&pvec, 0);
444 next = start; 449 next = start;
445 while (next <= end && !wrapped && 450 while (next <= end && !wrapped &&
@@ -498,6 +503,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
498 mem_cgroup_uncharge_end(); 503 mem_cgroup_uncharge_end();
499 cond_resched(); 504 cond_resched();
500 } 505 }
506 cleancache_flush_inode(mapping);
501 return ret; 507 return ret;
502} 508}
503EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); 509EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);