diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 13:50:56 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 13:50:56 -0400 |
commit | f8d613e2a665bf1be9628a3c3f9bafe7599b32c0 (patch) | |
tree | 98d4da8d0e1a5fb1d9064626b4b96d95ccf26375 /mm | |
parent | 8a0599dd2471f2a2e409498c08a0ab339057ad06 (diff) | |
parent | 5bc20fc59706214d9591c11e1938a629d3538c12 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/djm/tmem
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/djm/tmem:
xen: cleancache shim to Xen Transcendent Memory
ocfs2: add cleancache support
ext4: add cleancache support
btrfs: add cleancache support
ext3: add cleancache support
mm/fs: add hooks to support cleancache
mm: cleancache core ops functions and config
fs: add field to superblock to support cleancache
mm/fs: cleancache documentation
Fix up trivial conflict in fs/btrfs/extent_io.c due to includes
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 23 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/cleancache.c | 244 | ||||
-rw-r--r-- | mm/filemap.c | 11 | ||||
-rw-r--r-- | mm/truncate.c | 6 |
5 files changed, 285 insertions, 0 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index e9c0c61f2ddd..8ca47a5ee9c8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -347,3 +347,26 @@ config NEED_PER_CPU_KM | |||
347 | depends on !SMP | 347 | depends on !SMP |
348 | bool | 348 | bool |
349 | default y | 349 | default y |
350 | |||
351 | config CLEANCACHE | ||
352 | bool "Enable cleancache driver to cache clean pages if tmem is present" | ||
353 | default n | ||
354 | help | ||
355 | Cleancache can be thought of as a page-granularity victim cache | ||
356 | for clean pages that the kernel's pageframe replacement algorithm | ||
357 | (PFRA) would like to keep around, but can't since there isn't enough | ||
358 | memory. So when the PFRA "evicts" a page, it first attempts to use | ||
359 | cleancacne code to put the data contained in that page into | ||
360 | "transcendent memory", memory that is not directly accessible or | ||
361 | addressable by the kernel and is of unknown and possibly | ||
362 | time-varying size. And when a cleancache-enabled | ||
363 | filesystem wishes to access a page in a file on disk, it first | ||
364 | checks cleancache to see if it already contains it; if it does, | ||
365 | the page is copied into the kernel and a disk access is avoided. | ||
366 | When a transcendent memory driver is available (such as zcache or | ||
367 | Xen transcendent memory), a significant I/O reduction | ||
368 | may be achieved. When none is available, all cleancache calls | ||
369 | are reduced to a single pointer-compare-against-NULL resulting | ||
370 | in a negligible performance hit. | ||
371 | |||
372 | If unsure, say Y to enable cleancache | ||
diff --git a/mm/Makefile b/mm/Makefile index 42a8326c3e3d..836e4163c1bf 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -49,3 +49,4 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o | |||
49 | obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o | 49 | obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o |
50 | obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o | 50 | obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o |
51 | obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o | 51 | obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o |
52 | obj-$(CONFIG_CLEANCACHE) += cleancache.o | ||
diff --git a/mm/cleancache.c b/mm/cleancache.c new file mode 100644 index 000000000000..bcaae4c2a770 --- /dev/null +++ b/mm/cleancache.c | |||
@@ -0,0 +1,244 @@ | |||
1 | /* | ||
2 | * Cleancache frontend | ||
3 | * | ||
4 | * This code provides the generic "frontend" layer to call a matching | ||
5 | * "backend" driver implementation of cleancache. See | ||
6 | * Documentation/vm/cleancache.txt for more information. | ||
7 | * | ||
8 | * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. | ||
9 | * Author: Dan Magenheimer | ||
10 | * | ||
11 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
12 | */ | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/exportfs.h> | ||
17 | #include <linux/mm.h> | ||
18 | #include <linux/cleancache.h> | ||
19 | |||
20 | /* | ||
21 | * This global enablement flag may be read thousands of times per second | ||
22 | * by cleancache_get/put/flush even on systems where cleancache_ops | ||
23 | * is not claimed (e.g. cleancache is config'ed on but remains | ||
24 | * disabled), so is preferred to the slower alternative: a function | ||
25 | * call that checks a non-global. | ||
26 | */ | ||
27 | int cleancache_enabled; | ||
28 | EXPORT_SYMBOL(cleancache_enabled); | ||
29 | |||
30 | /* | ||
31 | * cleancache_ops is set by cleancache_ops_register to contain the pointers | ||
32 | * to the cleancache "backend" implementation functions. | ||
33 | */ | ||
34 | static struct cleancache_ops cleancache_ops; | ||
35 | |||
36 | /* useful stats available in /sys/kernel/mm/cleancache */ | ||
37 | static unsigned long cleancache_succ_gets; | ||
38 | static unsigned long cleancache_failed_gets; | ||
39 | static unsigned long cleancache_puts; | ||
40 | static unsigned long cleancache_flushes; | ||
41 | |||
42 | /* | ||
43 | * register operations for cleancache, returning previous thus allowing | ||
44 | * detection of multiple backends and possible nesting | ||
45 | */ | ||
46 | struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) | ||
47 | { | ||
48 | struct cleancache_ops old = cleancache_ops; | ||
49 | |||
50 | cleancache_ops = *ops; | ||
51 | cleancache_enabled = 1; | ||
52 | return old; | ||
53 | } | ||
54 | EXPORT_SYMBOL(cleancache_register_ops); | ||
55 | |||
56 | /* Called by a cleancache-enabled filesystem at time of mount */ | ||
57 | void __cleancache_init_fs(struct super_block *sb) | ||
58 | { | ||
59 | sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE); | ||
60 | } | ||
61 | EXPORT_SYMBOL(__cleancache_init_fs); | ||
62 | |||
63 | /* Called by a cleancache-enabled clustered filesystem at time of mount */ | ||
64 | void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) | ||
65 | { | ||
66 | sb->cleancache_poolid = | ||
67 | (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE); | ||
68 | } | ||
69 | EXPORT_SYMBOL(__cleancache_init_shared_fs); | ||
70 | |||
71 | /* | ||
72 | * If the filesystem uses exportable filehandles, use the filehandle as | ||
73 | * the key, else use the inode number. | ||
74 | */ | ||
75 | static int cleancache_get_key(struct inode *inode, | ||
76 | struct cleancache_filekey *key) | ||
77 | { | ||
78 | int (*fhfn)(struct dentry *, __u32 *fh, int *, int); | ||
79 | int len = 0, maxlen = CLEANCACHE_KEY_MAX; | ||
80 | struct super_block *sb = inode->i_sb; | ||
81 | |||
82 | key->u.ino = inode->i_ino; | ||
83 | if (sb->s_export_op != NULL) { | ||
84 | fhfn = sb->s_export_op->encode_fh; | ||
85 | if (fhfn) { | ||
86 | struct dentry d; | ||
87 | d.d_inode = inode; | ||
88 | len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0); | ||
89 | if (len <= 0 || len == 255) | ||
90 | return -1; | ||
91 | if (maxlen > CLEANCACHE_KEY_MAX) | ||
92 | return -1; | ||
93 | } | ||
94 | } | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * "Get" data from cleancache associated with the poolid/inode/index | ||
100 | * that were specified when the data was put to cleanache and, if | ||
101 | * successful, use it to fill the specified page with data and return 0. | ||
102 | * The pageframe is unchanged and returns -1 if the get fails. | ||
103 | * Page must be locked by caller. | ||
104 | */ | ||
105 | int __cleancache_get_page(struct page *page) | ||
106 | { | ||
107 | int ret = -1; | ||
108 | int pool_id; | ||
109 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
110 | |||
111 | VM_BUG_ON(!PageLocked(page)); | ||
112 | pool_id = page->mapping->host->i_sb->cleancache_poolid; | ||
113 | if (pool_id < 0) | ||
114 | goto out; | ||
115 | |||
116 | if (cleancache_get_key(page->mapping->host, &key) < 0) | ||
117 | goto out; | ||
118 | |||
119 | ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page); | ||
120 | if (ret == 0) | ||
121 | cleancache_succ_gets++; | ||
122 | else | ||
123 | cleancache_failed_gets++; | ||
124 | out: | ||
125 | return ret; | ||
126 | } | ||
127 | EXPORT_SYMBOL(__cleancache_get_page); | ||
128 | |||
129 | /* | ||
130 | * "Put" data from a page to cleancache and associate it with the | ||
131 | * (previously-obtained per-filesystem) poolid and the page's, | ||
132 | * inode and page index. Page must be locked. Note that a put_page | ||
133 | * always "succeeds", though a subsequent get_page may succeed or fail. | ||
134 | */ | ||
135 | void __cleancache_put_page(struct page *page) | ||
136 | { | ||
137 | int pool_id; | ||
138 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
139 | |||
140 | VM_BUG_ON(!PageLocked(page)); | ||
141 | pool_id = page->mapping->host->i_sb->cleancache_poolid; | ||
142 | if (pool_id >= 0 && | ||
143 | cleancache_get_key(page->mapping->host, &key) >= 0) { | ||
144 | (*cleancache_ops.put_page)(pool_id, key, page->index, page); | ||
145 | cleancache_puts++; | ||
146 | } | ||
147 | } | ||
148 | EXPORT_SYMBOL(__cleancache_put_page); | ||
149 | |||
150 | /* | ||
151 | * Flush any data from cleancache associated with the poolid and the | ||
152 | * page's inode and page index so that a subsequent "get" will fail. | ||
153 | */ | ||
154 | void __cleancache_flush_page(struct address_space *mapping, struct page *page) | ||
155 | { | ||
156 | /* careful... page->mapping is NULL sometimes when this is called */ | ||
157 | int pool_id = mapping->host->i_sb->cleancache_poolid; | ||
158 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
159 | |||
160 | if (pool_id >= 0) { | ||
161 | VM_BUG_ON(!PageLocked(page)); | ||
162 | if (cleancache_get_key(mapping->host, &key) >= 0) { | ||
163 | (*cleancache_ops.flush_page)(pool_id, key, page->index); | ||
164 | cleancache_flushes++; | ||
165 | } | ||
166 | } | ||
167 | } | ||
168 | EXPORT_SYMBOL(__cleancache_flush_page); | ||
169 | |||
170 | /* | ||
171 | * Flush all data from cleancache associated with the poolid and the | ||
172 | * mappings's inode so that all subsequent gets to this poolid/inode | ||
173 | * will fail. | ||
174 | */ | ||
175 | void __cleancache_flush_inode(struct address_space *mapping) | ||
176 | { | ||
177 | int pool_id = mapping->host->i_sb->cleancache_poolid; | ||
178 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
179 | |||
180 | if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) | ||
181 | (*cleancache_ops.flush_inode)(pool_id, key); | ||
182 | } | ||
183 | EXPORT_SYMBOL(__cleancache_flush_inode); | ||
184 | |||
185 | /* | ||
186 | * Called by any cleancache-enabled filesystem at time of unmount; | ||
187 | * note that pool_id is surrendered and may be reutrned by a subsequent | ||
188 | * cleancache_init_fs or cleancache_init_shared_fs | ||
189 | */ | ||
190 | void __cleancache_flush_fs(struct super_block *sb) | ||
191 | { | ||
192 | if (sb->cleancache_poolid >= 0) { | ||
193 | int old_poolid = sb->cleancache_poolid; | ||
194 | sb->cleancache_poolid = -1; | ||
195 | (*cleancache_ops.flush_fs)(old_poolid); | ||
196 | } | ||
197 | } | ||
198 | EXPORT_SYMBOL(__cleancache_flush_fs); | ||
199 | |||
200 | #ifdef CONFIG_SYSFS | ||
201 | |||
202 | /* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */ | ||
203 | |||
204 | #define CLEANCACHE_SYSFS_RO(_name) \ | ||
205 | static ssize_t cleancache_##_name##_show(struct kobject *kobj, \ | ||
206 | struct kobj_attribute *attr, char *buf) \ | ||
207 | { \ | ||
208 | return sprintf(buf, "%lu\n", cleancache_##_name); \ | ||
209 | } \ | ||
210 | static struct kobj_attribute cleancache_##_name##_attr = { \ | ||
211 | .attr = { .name = __stringify(_name), .mode = 0444 }, \ | ||
212 | .show = cleancache_##_name##_show, \ | ||
213 | } | ||
214 | |||
215 | CLEANCACHE_SYSFS_RO(succ_gets); | ||
216 | CLEANCACHE_SYSFS_RO(failed_gets); | ||
217 | CLEANCACHE_SYSFS_RO(puts); | ||
218 | CLEANCACHE_SYSFS_RO(flushes); | ||
219 | |||
220 | static struct attribute *cleancache_attrs[] = { | ||
221 | &cleancache_succ_gets_attr.attr, | ||
222 | &cleancache_failed_gets_attr.attr, | ||
223 | &cleancache_puts_attr.attr, | ||
224 | &cleancache_flushes_attr.attr, | ||
225 | NULL, | ||
226 | }; | ||
227 | |||
228 | static struct attribute_group cleancache_attr_group = { | ||
229 | .attrs = cleancache_attrs, | ||
230 | .name = "cleancache", | ||
231 | }; | ||
232 | |||
233 | #endif /* CONFIG_SYSFS */ | ||
234 | |||
235 | static int __init init_cleancache(void) | ||
236 | { | ||
237 | #ifdef CONFIG_SYSFS | ||
238 | int err; | ||
239 | |||
240 | err = sysfs_create_group(mm_kobj, &cleancache_attr_group); | ||
241 | #endif /* CONFIG_SYSFS */ | ||
242 | return 0; | ||
243 | } | ||
244 | module_init(init_cleancache) | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 68e782b3d3de..7455ccd8bda8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ | 34 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ |
35 | #include <linux/memcontrol.h> | 35 | #include <linux/memcontrol.h> |
36 | #include <linux/mm_inline.h> /* for page_is_file_cache() */ | 36 | #include <linux/mm_inline.h> /* for page_is_file_cache() */ |
37 | #include <linux/cleancache.h> | ||
37 | #include "internal.h" | 38 | #include "internal.h" |
38 | 39 | ||
39 | /* | 40 | /* |
@@ -118,6 +119,16 @@ void __delete_from_page_cache(struct page *page) | |||
118 | { | 119 | { |
119 | struct address_space *mapping = page->mapping; | 120 | struct address_space *mapping = page->mapping; |
120 | 121 | ||
122 | /* | ||
123 | * if we're uptodate, flush out into the cleancache, otherwise | ||
124 | * invalidate any existing cleancache entries. We can't leave | ||
125 | * stale data around in the cleancache once our page is gone | ||
126 | */ | ||
127 | if (PageUptodate(page) && PageMappedToDisk(page)) | ||
128 | cleancache_put_page(page); | ||
129 | else | ||
130 | cleancache_flush_page(mapping, page); | ||
131 | |||
121 | radix_tree_delete(&mapping->page_tree, page->index); | 132 | radix_tree_delete(&mapping->page_tree, page->index); |
122 | page->mapping = NULL; | 133 | page->mapping = NULL; |
123 | mapping->nrpages--; | 134 | mapping->nrpages--; |
diff --git a/mm/truncate.c b/mm/truncate.c index a95667529135..3a29a6180212 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/task_io_accounting_ops.h> | 19 | #include <linux/task_io_accounting_ops.h> |
20 | #include <linux/buffer_head.h> /* grr. try_to_release_page, | 20 | #include <linux/buffer_head.h> /* grr. try_to_release_page, |
21 | do_invalidatepage */ | 21 | do_invalidatepage */ |
22 | #include <linux/cleancache.h> | ||
22 | #include "internal.h" | 23 | #include "internal.h" |
23 | 24 | ||
24 | 25 | ||
@@ -51,6 +52,7 @@ void do_invalidatepage(struct page *page, unsigned long offset) | |||
51 | static inline void truncate_partial_page(struct page *page, unsigned partial) | 52 | static inline void truncate_partial_page(struct page *page, unsigned partial) |
52 | { | 53 | { |
53 | zero_user_segment(page, partial, PAGE_CACHE_SIZE); | 54 | zero_user_segment(page, partial, PAGE_CACHE_SIZE); |
55 | cleancache_flush_page(page->mapping, page); | ||
54 | if (page_has_private(page)) | 56 | if (page_has_private(page)) |
55 | do_invalidatepage(page, partial); | 57 | do_invalidatepage(page, partial); |
56 | } | 58 | } |
@@ -214,6 +216,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
214 | pgoff_t next; | 216 | pgoff_t next; |
215 | int i; | 217 | int i; |
216 | 218 | ||
219 | cleancache_flush_inode(mapping); | ||
217 | if (mapping->nrpages == 0) | 220 | if (mapping->nrpages == 0) |
218 | return; | 221 | return; |
219 | 222 | ||
@@ -291,6 +294,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
291 | pagevec_release(&pvec); | 294 | pagevec_release(&pvec); |
292 | mem_cgroup_uncharge_end(); | 295 | mem_cgroup_uncharge_end(); |
293 | } | 296 | } |
297 | cleancache_flush_inode(mapping); | ||
294 | } | 298 | } |
295 | EXPORT_SYMBOL(truncate_inode_pages_range); | 299 | EXPORT_SYMBOL(truncate_inode_pages_range); |
296 | 300 | ||
@@ -440,6 +444,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
440 | int did_range_unmap = 0; | 444 | int did_range_unmap = 0; |
441 | int wrapped = 0; | 445 | int wrapped = 0; |
442 | 446 | ||
447 | cleancache_flush_inode(mapping); | ||
443 | pagevec_init(&pvec, 0); | 448 | pagevec_init(&pvec, 0); |
444 | next = start; | 449 | next = start; |
445 | while (next <= end && !wrapped && | 450 | while (next <= end && !wrapped && |
@@ -498,6 +503,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
498 | mem_cgroup_uncharge_end(); | 503 | mem_cgroup_uncharge_end(); |
499 | cond_resched(); | 504 | cond_resched(); |
500 | } | 505 | } |
506 | cleancache_flush_inode(mapping); | ||
501 | return ret; | 507 | return ret; |
502 | } | 508 | } |
503 | EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); | 509 | EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); |