diff options
-rw-r--r-- | include/linux/cleancache.h | 122 | ||||
-rw-r--r-- | mm/Kconfig | 23 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/cleancache.c | 244 |
4 files changed, 390 insertions, 0 deletions
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h new file mode 100644 index 000000000000..04ffb2e6c9d0 --- /dev/null +++ b/include/linux/cleancache.h | |||
@@ -0,0 +1,122 @@ | |||
1 | #ifndef _LINUX_CLEANCACHE_H | ||
2 | #define _LINUX_CLEANCACHE_H | ||
3 | |||
4 | #include <linux/fs.h> | ||
5 | #include <linux/exportfs.h> | ||
6 | #include <linux/mm.h> | ||
7 | |||
8 | #define CLEANCACHE_KEY_MAX 6 | ||
9 | |||
10 | /* | ||
11 | * cleancache requires every file with a page in cleancache to have a | ||
12 | * unique key unless/until the file is removed/truncated. For some | ||
13 | * filesystems, the inode number is unique, but for "modern" filesystems | ||
14 | * an exportable filehandle is required (see exportfs.h) | ||
15 | */ | ||
16 | struct cleancache_filekey { | ||
17 | union { | ||
18 | ino_t ino; | ||
19 | __u32 fh[CLEANCACHE_KEY_MAX]; | ||
20 | u32 key[CLEANCACHE_KEY_MAX]; | ||
21 | } u; | ||
22 | }; | ||
23 | |||
24 | struct cleancache_ops { | ||
25 | int (*init_fs)(size_t); | ||
26 | int (*init_shared_fs)(char *uuid, size_t); | ||
27 | int (*get_page)(int, struct cleancache_filekey, | ||
28 | pgoff_t, struct page *); | ||
29 | void (*put_page)(int, struct cleancache_filekey, | ||
30 | pgoff_t, struct page *); | ||
31 | void (*flush_page)(int, struct cleancache_filekey, pgoff_t); | ||
32 | void (*flush_inode)(int, struct cleancache_filekey); | ||
33 | void (*flush_fs)(int); | ||
34 | }; | ||
35 | |||
36 | extern struct cleancache_ops | ||
37 | cleancache_register_ops(struct cleancache_ops *ops); | ||
38 | extern void __cleancache_init_fs(struct super_block *); | ||
39 | extern void __cleancache_init_shared_fs(char *, struct super_block *); | ||
40 | extern int __cleancache_get_page(struct page *); | ||
41 | extern void __cleancache_put_page(struct page *); | ||
42 | extern void __cleancache_flush_page(struct address_space *, struct page *); | ||
43 | extern void __cleancache_flush_inode(struct address_space *); | ||
44 | extern void __cleancache_flush_fs(struct super_block *); | ||
45 | extern int cleancache_enabled; | ||
46 | |||
47 | #ifdef CONFIG_CLEANCACHE | ||
48 | static inline bool cleancache_fs_enabled(struct page *page) | ||
49 | { | ||
50 | return page->mapping->host->i_sb->cleancache_poolid >= 0; | ||
51 | } | ||
52 | static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping) | ||
53 | { | ||
54 | return mapping->host->i_sb->cleancache_poolid >= 0; | ||
55 | } | ||
56 | #else | ||
57 | #define cleancache_enabled (0) | ||
58 | #define cleancache_fs_enabled(_page) (0) | ||
59 | #define cleancache_fs_enabled_mapping(_page) (0) | ||
60 | #endif | ||
61 | |||
62 | /* | ||
63 | * The shim layer provided by these inline functions allows the compiler | ||
64 | * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE | ||
65 | * is disabled, to a single global variable check if CONFIG_CLEANCACHE | ||
66 | * is enabled but no cleancache "backend" has dynamically enabled it, | ||
67 | * and, for the most frequent cleancache ops, to a single global variable | ||
68 | * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled | ||
69 | * and a cleancache backend has dynamically enabled cleancache, but the | ||
70 | * filesystem referenced by that cleancache op has not enabled cleancache. | ||
71 | * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially | ||
72 | * no measurable performance impact. | ||
73 | */ | ||
74 | |||
75 | static inline void cleancache_init_fs(struct super_block *sb) | ||
76 | { | ||
77 | if (cleancache_enabled) | ||
78 | __cleancache_init_fs(sb); | ||
79 | } | ||
80 | |||
81 | static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb) | ||
82 | { | ||
83 | if (cleancache_enabled) | ||
84 | __cleancache_init_shared_fs(uuid, sb); | ||
85 | } | ||
86 | |||
87 | static inline int cleancache_get_page(struct page *page) | ||
88 | { | ||
89 | int ret = -1; | ||
90 | |||
91 | if (cleancache_enabled && cleancache_fs_enabled(page)) | ||
92 | ret = __cleancache_get_page(page); | ||
93 | return ret; | ||
94 | } | ||
95 | |||
96 | static inline void cleancache_put_page(struct page *page) | ||
97 | { | ||
98 | if (cleancache_enabled && cleancache_fs_enabled(page)) | ||
99 | __cleancache_put_page(page); | ||
100 | } | ||
101 | |||
102 | static inline void cleancache_flush_page(struct address_space *mapping, | ||
103 | struct page *page) | ||
104 | { | ||
105 | /* careful... page->mapping is NULL sometimes when this is called */ | ||
106 | if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) | ||
107 | __cleancache_flush_page(mapping, page); | ||
108 | } | ||
109 | |||
110 | static inline void cleancache_flush_inode(struct address_space *mapping) | ||
111 | { | ||
112 | if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) | ||
113 | __cleancache_flush_inode(mapping); | ||
114 | } | ||
115 | |||
116 | static inline void cleancache_flush_fs(struct super_block *sb) | ||
117 | { | ||
118 | if (cleancache_enabled) | ||
119 | __cleancache_flush_fs(sb); | ||
120 | } | ||
121 | |||
122 | #endif /* _LINUX_CLEANCACHE_H */ | ||
diff --git a/mm/Kconfig b/mm/Kconfig index e9c0c61f2ddd..8ca47a5ee9c8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -347,3 +347,26 @@ config NEED_PER_CPU_KM | |||
347 | depends on !SMP | 347 | depends on !SMP |
348 | bool | 348 | bool |
349 | default y | 349 | default y |
350 | |||
351 | config CLEANCACHE | ||
352 | bool "Enable cleancache driver to cache clean pages if tmem is present" | ||
353 | default n | ||
354 | help | ||
355 | Cleancache can be thought of as a page-granularity victim cache | ||
356 | for clean pages that the kernel's pageframe replacement algorithm | ||
357 | (PFRA) would like to keep around, but can't since there isn't enough | ||
358 | memory. So when the PFRA "evicts" a page, it first attempts to use | ||
359 | cleancacne code to put the data contained in that page into | ||
360 | "transcendent memory", memory that is not directly accessible or | ||
361 | addressable by the kernel and is of unknown and possibly | ||
362 | time-varying size. And when a cleancache-enabled | ||
363 | filesystem wishes to access a page in a file on disk, it first | ||
364 | checks cleancache to see if it already contains it; if it does, | ||
365 | the page is copied into the kernel and a disk access is avoided. | ||
366 | When a transcendent memory driver is available (such as zcache or | ||
367 | Xen transcendent memory), a significant I/O reduction | ||
368 | may be achieved. When none is available, all cleancache calls | ||
369 | are reduced to a single pointer-compare-against-NULL resulting | ||
370 | in a negligible performance hit. | ||
371 | |||
372 | If unsure, say Y to enable cleancache | ||
diff --git a/mm/Makefile b/mm/Makefile index 42a8326c3e3d..836e4163c1bf 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -49,3 +49,4 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o | |||
49 | obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o | 49 | obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o |
50 | obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o | 50 | obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o |
51 | obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o | 51 | obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o |
52 | obj-$(CONFIG_CLEANCACHE) += cleancache.o | ||
diff --git a/mm/cleancache.c b/mm/cleancache.c new file mode 100644 index 000000000000..bcaae4c2a770 --- /dev/null +++ b/mm/cleancache.c | |||
@@ -0,0 +1,244 @@ | |||
1 | /* | ||
2 | * Cleancache frontend | ||
3 | * | ||
4 | * This code provides the generic "frontend" layer to call a matching | ||
5 | * "backend" driver implementation of cleancache. See | ||
6 | * Documentation/vm/cleancache.txt for more information. | ||
7 | * | ||
8 | * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. | ||
9 | * Author: Dan Magenheimer | ||
10 | * | ||
11 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
12 | */ | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/exportfs.h> | ||
17 | #include <linux/mm.h> | ||
18 | #include <linux/cleancache.h> | ||
19 | |||
20 | /* | ||
21 | * This global enablement flag may be read thousands of times per second | ||
22 | * by cleancache_get/put/flush even on systems where cleancache_ops | ||
23 | * is not claimed (e.g. cleancache is config'ed on but remains | ||
24 | * disabled), so is preferred to the slower alternative: a function | ||
25 | * call that checks a non-global. | ||
26 | */ | ||
27 | int cleancache_enabled; | ||
28 | EXPORT_SYMBOL(cleancache_enabled); | ||
29 | |||
30 | /* | ||
31 | * cleancache_ops is set by cleancache_ops_register to contain the pointers | ||
32 | * to the cleancache "backend" implementation functions. | ||
33 | */ | ||
34 | static struct cleancache_ops cleancache_ops; | ||
35 | |||
36 | /* useful stats available in /sys/kernel/mm/cleancache */ | ||
37 | static unsigned long cleancache_succ_gets; | ||
38 | static unsigned long cleancache_failed_gets; | ||
39 | static unsigned long cleancache_puts; | ||
40 | static unsigned long cleancache_flushes; | ||
41 | |||
42 | /* | ||
43 | * register operations for cleancache, returning previous thus allowing | ||
44 | * detection of multiple backends and possible nesting | ||
45 | */ | ||
46 | struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) | ||
47 | { | ||
48 | struct cleancache_ops old = cleancache_ops; | ||
49 | |||
50 | cleancache_ops = *ops; | ||
51 | cleancache_enabled = 1; | ||
52 | return old; | ||
53 | } | ||
54 | EXPORT_SYMBOL(cleancache_register_ops); | ||
55 | |||
56 | /* Called by a cleancache-enabled filesystem at time of mount */ | ||
57 | void __cleancache_init_fs(struct super_block *sb) | ||
58 | { | ||
59 | sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE); | ||
60 | } | ||
61 | EXPORT_SYMBOL(__cleancache_init_fs); | ||
62 | |||
63 | /* Called by a cleancache-enabled clustered filesystem at time of mount */ | ||
64 | void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) | ||
65 | { | ||
66 | sb->cleancache_poolid = | ||
67 | (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE); | ||
68 | } | ||
69 | EXPORT_SYMBOL(__cleancache_init_shared_fs); | ||
70 | |||
71 | /* | ||
72 | * If the filesystem uses exportable filehandles, use the filehandle as | ||
73 | * the key, else use the inode number. | ||
74 | */ | ||
75 | static int cleancache_get_key(struct inode *inode, | ||
76 | struct cleancache_filekey *key) | ||
77 | { | ||
78 | int (*fhfn)(struct dentry *, __u32 *fh, int *, int); | ||
79 | int len = 0, maxlen = CLEANCACHE_KEY_MAX; | ||
80 | struct super_block *sb = inode->i_sb; | ||
81 | |||
82 | key->u.ino = inode->i_ino; | ||
83 | if (sb->s_export_op != NULL) { | ||
84 | fhfn = sb->s_export_op->encode_fh; | ||
85 | if (fhfn) { | ||
86 | struct dentry d; | ||
87 | d.d_inode = inode; | ||
88 | len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0); | ||
89 | if (len <= 0 || len == 255) | ||
90 | return -1; | ||
91 | if (maxlen > CLEANCACHE_KEY_MAX) | ||
92 | return -1; | ||
93 | } | ||
94 | } | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * "Get" data from cleancache associated with the poolid/inode/index | ||
100 | * that were specified when the data was put to cleanache and, if | ||
101 | * successful, use it to fill the specified page with data and return 0. | ||
102 | * The pageframe is unchanged and returns -1 if the get fails. | ||
103 | * Page must be locked by caller. | ||
104 | */ | ||
105 | int __cleancache_get_page(struct page *page) | ||
106 | { | ||
107 | int ret = -1; | ||
108 | int pool_id; | ||
109 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
110 | |||
111 | VM_BUG_ON(!PageLocked(page)); | ||
112 | pool_id = page->mapping->host->i_sb->cleancache_poolid; | ||
113 | if (pool_id < 0) | ||
114 | goto out; | ||
115 | |||
116 | if (cleancache_get_key(page->mapping->host, &key) < 0) | ||
117 | goto out; | ||
118 | |||
119 | ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page); | ||
120 | if (ret == 0) | ||
121 | cleancache_succ_gets++; | ||
122 | else | ||
123 | cleancache_failed_gets++; | ||
124 | out: | ||
125 | return ret; | ||
126 | } | ||
127 | EXPORT_SYMBOL(__cleancache_get_page); | ||
128 | |||
129 | /* | ||
130 | * "Put" data from a page to cleancache and associate it with the | ||
131 | * (previously-obtained per-filesystem) poolid and the page's, | ||
132 | * inode and page index. Page must be locked. Note that a put_page | ||
133 | * always "succeeds", though a subsequent get_page may succeed or fail. | ||
134 | */ | ||
135 | void __cleancache_put_page(struct page *page) | ||
136 | { | ||
137 | int pool_id; | ||
138 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
139 | |||
140 | VM_BUG_ON(!PageLocked(page)); | ||
141 | pool_id = page->mapping->host->i_sb->cleancache_poolid; | ||
142 | if (pool_id >= 0 && | ||
143 | cleancache_get_key(page->mapping->host, &key) >= 0) { | ||
144 | (*cleancache_ops.put_page)(pool_id, key, page->index, page); | ||
145 | cleancache_puts++; | ||
146 | } | ||
147 | } | ||
148 | EXPORT_SYMBOL(__cleancache_put_page); | ||
149 | |||
150 | /* | ||
151 | * Flush any data from cleancache associated with the poolid and the | ||
152 | * page's inode and page index so that a subsequent "get" will fail. | ||
153 | */ | ||
154 | void __cleancache_flush_page(struct address_space *mapping, struct page *page) | ||
155 | { | ||
156 | /* careful... page->mapping is NULL sometimes when this is called */ | ||
157 | int pool_id = mapping->host->i_sb->cleancache_poolid; | ||
158 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
159 | |||
160 | if (pool_id >= 0) { | ||
161 | VM_BUG_ON(!PageLocked(page)); | ||
162 | if (cleancache_get_key(mapping->host, &key) >= 0) { | ||
163 | (*cleancache_ops.flush_page)(pool_id, key, page->index); | ||
164 | cleancache_flushes++; | ||
165 | } | ||
166 | } | ||
167 | } | ||
168 | EXPORT_SYMBOL(__cleancache_flush_page); | ||
169 | |||
170 | /* | ||
171 | * Flush all data from cleancache associated with the poolid and the | ||
172 | * mappings's inode so that all subsequent gets to this poolid/inode | ||
173 | * will fail. | ||
174 | */ | ||
175 | void __cleancache_flush_inode(struct address_space *mapping) | ||
176 | { | ||
177 | int pool_id = mapping->host->i_sb->cleancache_poolid; | ||
178 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
179 | |||
180 | if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) | ||
181 | (*cleancache_ops.flush_inode)(pool_id, key); | ||
182 | } | ||
183 | EXPORT_SYMBOL(__cleancache_flush_inode); | ||
184 | |||
185 | /* | ||
186 | * Called by any cleancache-enabled filesystem at time of unmount; | ||
187 | * note that pool_id is surrendered and may be reutrned by a subsequent | ||
188 | * cleancache_init_fs or cleancache_init_shared_fs | ||
189 | */ | ||
190 | void __cleancache_flush_fs(struct super_block *sb) | ||
191 | { | ||
192 | if (sb->cleancache_poolid >= 0) { | ||
193 | int old_poolid = sb->cleancache_poolid; | ||
194 | sb->cleancache_poolid = -1; | ||
195 | (*cleancache_ops.flush_fs)(old_poolid); | ||
196 | } | ||
197 | } | ||
198 | EXPORT_SYMBOL(__cleancache_flush_fs); | ||
199 | |||
200 | #ifdef CONFIG_SYSFS | ||
201 | |||
202 | /* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */ | ||
203 | |||
204 | #define CLEANCACHE_SYSFS_RO(_name) \ | ||
205 | static ssize_t cleancache_##_name##_show(struct kobject *kobj, \ | ||
206 | struct kobj_attribute *attr, char *buf) \ | ||
207 | { \ | ||
208 | return sprintf(buf, "%lu\n", cleancache_##_name); \ | ||
209 | } \ | ||
210 | static struct kobj_attribute cleancache_##_name##_attr = { \ | ||
211 | .attr = { .name = __stringify(_name), .mode = 0444 }, \ | ||
212 | .show = cleancache_##_name##_show, \ | ||
213 | } | ||
214 | |||
215 | CLEANCACHE_SYSFS_RO(succ_gets); | ||
216 | CLEANCACHE_SYSFS_RO(failed_gets); | ||
217 | CLEANCACHE_SYSFS_RO(puts); | ||
218 | CLEANCACHE_SYSFS_RO(flushes); | ||
219 | |||
220 | static struct attribute *cleancache_attrs[] = { | ||
221 | &cleancache_succ_gets_attr.attr, | ||
222 | &cleancache_failed_gets_attr.attr, | ||
223 | &cleancache_puts_attr.attr, | ||
224 | &cleancache_flushes_attr.attr, | ||
225 | NULL, | ||
226 | }; | ||
227 | |||
228 | static struct attribute_group cleancache_attr_group = { | ||
229 | .attrs = cleancache_attrs, | ||
230 | .name = "cleancache", | ||
231 | }; | ||
232 | |||
233 | #endif /* CONFIG_SYSFS */ | ||
234 | |||
235 | static int __init init_cleancache(void) | ||
236 | { | ||
237 | #ifdef CONFIG_SYSFS | ||
238 | int err; | ||
239 | |||
240 | err = sysfs_create_group(mm_kobj, &cleancache_attr_group); | ||
241 | #endif /* CONFIG_SYSFS */ | ||
242 | return 0; | ||
243 | } | ||
244 | module_init(init_cleancache) | ||