aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Magenheimer <dan.magenheimer@oracle.com>2011-05-26 12:01:36 -0400
committerDan Magenheimer <dan.magenheimer@oracle.com>2011-05-26 12:01:36 -0400
commit077b1f83a69d94f2918630a882d74939baca0bce (patch)
tree96c47594b48df7dd588e3d8827f0ac151c350bc0
parent9fdfdcf17151e8326c4d18cc133abc6e58f47568 (diff)
mm: cleancache core ops functions and config
This third patch of eight in this cleancache series provides the core code for cleancache that interfaces between the hooks in VFS and individual filesystems and a cleancache backend. It also includes build and config patches. Two new files are added: mm/cleancache.c and include/linux/cleancache.h. Note that CONFIG_CLEANCACHE can default to on; in systems that do not provide a cleancache backend, all hooks devolve to a simple check of a global enable flag, so performance impact should be negligible but can be reduced to zero impact if config'ed off. However for this first commit, it defaults to off. Details and a FAQ can be found in Documentation/vm/cleancache.txt Credits: Cleancache_ops design derived from Jeremy Fitzhardinge design for tmem [v8: dan.magenheimer@oracle.com: fix exportfs call affecting btrfs] [v8: akpm@linux-foundation.org: use static inline function, not macro] [v7: dan.magenheimer@oracle.com: cleanup sysfs and remove cleancache prefix] [v6: JBeulich@novell.com: robustly handle buggy fs encode_fh actor definition] [v5: jeremy@goop.org: clean up global usage and static var names] [v5: jeremy@goop.org: simplify init hook and any future fs init changes] [v5: hch@infradead.org: cleaner non-global interface for ops registration] [v4: adilger@sun.com: interface must support exportfs FS's] [v4: hch@infradead.org: interface must support 64-bit FS on 32-bit kernel] [v3: akpm@linux-foundation.org: use one ops struct to avoid pointer hops] [v3: akpm@linux-foundation.org: document and ensure PageLocked reqts are met] [v3: ngupta@vflare.org: fix success/fail codes, change funcs to void] [v2: viro@ZenIV.linux.org.uk: use sane types] Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com> Reviewed-by: Jeremy Fitzhardinge <jeremy@goop.org> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Acked-by: Al Viro <viro@ZenIV.linux.org.uk> Acked-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Nitin Gupta <ngupta@vflare.org> Acked-by: Minchan Kim <minchan.kim@gmail.com> Acked-by: Andreas Dilger <adilger@sun.com> Acked-by: Jan Beulich <JBeulich@novell.com> Cc: Matthew Wilcox <matthew@wil.cx> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Rik Van Riel <riel@redhat.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Ted Ts'o <tytso@mit.edu> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <joel.becker@oracle.com>
-rw-r--r--include/linux/cleancache.h122
-rw-r--r--mm/Kconfig23
-rw-r--r--mm/Makefile1
-rw-r--r--mm/cleancache.c244
4 files changed, 390 insertions, 0 deletions
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
new file mode 100644
index 000000000000..04ffb2e6c9d0
--- /dev/null
+++ b/include/linux/cleancache.h
@@ -0,0 +1,122 @@
1#ifndef _LINUX_CLEANCACHE_H
2#define _LINUX_CLEANCACHE_H
3
4#include <linux/fs.h>
5#include <linux/exportfs.h>
6#include <linux/mm.h>
7
8#define CLEANCACHE_KEY_MAX 6
9
10/*
11 * cleancache requires every file with a page in cleancache to have a
12 * unique key unless/until the file is removed/truncated. For some
13 * filesystems, the inode number is unique, but for "modern" filesystems
14 * an exportable filehandle is required (see exportfs.h)
15 */
16struct cleancache_filekey {
17 union {
18 ino_t ino;
19 __u32 fh[CLEANCACHE_KEY_MAX];
20 u32 key[CLEANCACHE_KEY_MAX];
21 } u;
22};
23
24struct cleancache_ops {
25 int (*init_fs)(size_t);
26 int (*init_shared_fs)(char *uuid, size_t);
27 int (*get_page)(int, struct cleancache_filekey,
28 pgoff_t, struct page *);
29 void (*put_page)(int, struct cleancache_filekey,
30 pgoff_t, struct page *);
31 void (*flush_page)(int, struct cleancache_filekey, pgoff_t);
32 void (*flush_inode)(int, struct cleancache_filekey);
33 void (*flush_fs)(int);
34};
35
36extern struct cleancache_ops
37 cleancache_register_ops(struct cleancache_ops *ops);
38extern void __cleancache_init_fs(struct super_block *);
39extern void __cleancache_init_shared_fs(char *, struct super_block *);
40extern int __cleancache_get_page(struct page *);
41extern void __cleancache_put_page(struct page *);
42extern void __cleancache_flush_page(struct address_space *, struct page *);
43extern void __cleancache_flush_inode(struct address_space *);
44extern void __cleancache_flush_fs(struct super_block *);
45extern int cleancache_enabled;
46
47#ifdef CONFIG_CLEANCACHE
48static inline bool cleancache_fs_enabled(struct page *page)
49{
50 return page->mapping->host->i_sb->cleancache_poolid >= 0;
51}
52static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
53{
54 return mapping->host->i_sb->cleancache_poolid >= 0;
55}
56#else
57#define cleancache_enabled (0)
58#define cleancache_fs_enabled(_page) (0)
59#define cleancache_fs_enabled_mapping(_page) (0)
60#endif
61
62/*
63 * The shim layer provided by these inline functions allows the compiler
64 * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE
65 * is disabled, to a single global variable check if CONFIG_CLEANCACHE
66 * is enabled but no cleancache "backend" has dynamically enabled it,
67 * and, for the most frequent cleancache ops, to a single global variable
68 * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled
69 * and a cleancache backend has dynamically enabled cleancache, but the
70 * filesystem referenced by that cleancache op has not enabled cleancache.
71 * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially
72 * no measurable performance impact.
73 */
74
75static inline void cleancache_init_fs(struct super_block *sb)
76{
77 if (cleancache_enabled)
78 __cleancache_init_fs(sb);
79}
80
81static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb)
82{
83 if (cleancache_enabled)
84 __cleancache_init_shared_fs(uuid, sb);
85}
86
87static inline int cleancache_get_page(struct page *page)
88{
89 int ret = -1;
90
91 if (cleancache_enabled && cleancache_fs_enabled(page))
92 ret = __cleancache_get_page(page);
93 return ret;
94}
95
96static inline void cleancache_put_page(struct page *page)
97{
98 if (cleancache_enabled && cleancache_fs_enabled(page))
99 __cleancache_put_page(page);
100}
101
102static inline void cleancache_flush_page(struct address_space *mapping,
103 struct page *page)
104{
105 /* careful... page->mapping is NULL sometimes when this is called */
106 if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
107 __cleancache_flush_page(mapping, page);
108}
109
110static inline void cleancache_flush_inode(struct address_space *mapping)
111{
112 if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
113 __cleancache_flush_inode(mapping);
114}
115
116static inline void cleancache_flush_fs(struct super_block *sb)
117{
118 if (cleancache_enabled)
119 __cleancache_flush_fs(sb);
120}
121
122#endif /* _LINUX_CLEANCACHE_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index e9c0c61f2ddd..8ca47a5ee9c8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -347,3 +347,26 @@ config NEED_PER_CPU_KM
347 depends on !SMP 347 depends on !SMP
348 bool 348 bool
349 default y 349 default y
350
351config CLEANCACHE
352 bool "Enable cleancache driver to cache clean pages if tmem is present"
353 default n
354 help
355 Cleancache can be thought of as a page-granularity victim cache
356 for clean pages that the kernel's pageframe replacement algorithm
357 (PFRA) would like to keep around, but can't since there isn't enough
358 memory. So when the PFRA "evicts" a page, it first attempts to use
359 cleancacne code to put the data contained in that page into
360 "transcendent memory", memory that is not directly accessible or
361 addressable by the kernel and is of unknown and possibly
362 time-varying size. And when a cleancache-enabled
363 filesystem wishes to access a page in a file on disk, it first
364 checks cleancache to see if it already contains it; if it does,
365 the page is copied into the kernel and a disk access is avoided.
366 When a transcendent memory driver is available (such as zcache or
367 Xen transcendent memory), a significant I/O reduction
368 may be achieved. When none is available, all cleancache calls
369 are reduced to a single pointer-compare-against-NULL resulting
370 in a negligible performance hit.
371
372 If unsure, say Y to enable cleancache
diff --git a/mm/Makefile b/mm/Makefile
index 42a8326c3e3d..836e4163c1bf 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -49,3 +49,4 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
49obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o 49obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
50obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o 50obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
51obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o 51obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
52obj-$(CONFIG_CLEANCACHE) += cleancache.o
diff --git a/mm/cleancache.c b/mm/cleancache.c
new file mode 100644
index 000000000000..bcaae4c2a770
--- /dev/null
+++ b/mm/cleancache.c
@@ -0,0 +1,244 @@
1/*
2 * Cleancache frontend
3 *
4 * This code provides the generic "frontend" layer to call a matching
5 * "backend" driver implementation of cleancache. See
6 * Documentation/vm/cleancache.txt for more information.
7 *
8 * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
9 * Author: Dan Magenheimer
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2.
12 */
13
14#include <linux/module.h>
15#include <linux/fs.h>
16#include <linux/exportfs.h>
17#include <linux/mm.h>
18#include <linux/cleancache.h>
19
20/*
21 * This global enablement flag may be read thousands of times per second
22 * by cleancache_get/put/flush even on systems where cleancache_ops
23 * is not claimed (e.g. cleancache is config'ed on but remains
24 * disabled), so is preferred to the slower alternative: a function
25 * call that checks a non-global.
26 */
27int cleancache_enabled;
28EXPORT_SYMBOL(cleancache_enabled);
29
30/*
31 * cleancache_ops is set by cleancache_ops_register to contain the pointers
32 * to the cleancache "backend" implementation functions.
33 */
34static struct cleancache_ops cleancache_ops;
35
36/* useful stats available in /sys/kernel/mm/cleancache */
37static unsigned long cleancache_succ_gets;
38static unsigned long cleancache_failed_gets;
39static unsigned long cleancache_puts;
40static unsigned long cleancache_flushes;
41
42/*
43 * register operations for cleancache, returning previous thus allowing
44 * detection of multiple backends and possible nesting
45 */
46struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops)
47{
48 struct cleancache_ops old = cleancache_ops;
49
50 cleancache_ops = *ops;
51 cleancache_enabled = 1;
52 return old;
53}
54EXPORT_SYMBOL(cleancache_register_ops);
55
56/* Called by a cleancache-enabled filesystem at time of mount */
57void __cleancache_init_fs(struct super_block *sb)
58{
59 sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE);
60}
61EXPORT_SYMBOL(__cleancache_init_fs);
62
63/* Called by a cleancache-enabled clustered filesystem at time of mount */
64void __cleancache_init_shared_fs(char *uuid, struct super_block *sb)
65{
66 sb->cleancache_poolid =
67 (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE);
68}
69EXPORT_SYMBOL(__cleancache_init_shared_fs);
70
71/*
72 * If the filesystem uses exportable filehandles, use the filehandle as
73 * the key, else use the inode number.
74 */
75static int cleancache_get_key(struct inode *inode,
76 struct cleancache_filekey *key)
77{
78 int (*fhfn)(struct dentry *, __u32 *fh, int *, int);
79 int len = 0, maxlen = CLEANCACHE_KEY_MAX;
80 struct super_block *sb = inode->i_sb;
81
82 key->u.ino = inode->i_ino;
83 if (sb->s_export_op != NULL) {
84 fhfn = sb->s_export_op->encode_fh;
85 if (fhfn) {
86 struct dentry d;
87 d.d_inode = inode;
88 len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0);
89 if (len <= 0 || len == 255)
90 return -1;
91 if (maxlen > CLEANCACHE_KEY_MAX)
92 return -1;
93 }
94 }
95 return 0;
96}
97
98/*
99 * "Get" data from cleancache associated with the poolid/inode/index
100 * that were specified when the data was put to cleanache and, if
101 * successful, use it to fill the specified page with data and return 0.
102 * The pageframe is unchanged and returns -1 if the get fails.
103 * Page must be locked by caller.
104 */
105int __cleancache_get_page(struct page *page)
106{
107 int ret = -1;
108 int pool_id;
109 struct cleancache_filekey key = { .u.key = { 0 } };
110
111 VM_BUG_ON(!PageLocked(page));
112 pool_id = page->mapping->host->i_sb->cleancache_poolid;
113 if (pool_id < 0)
114 goto out;
115
116 if (cleancache_get_key(page->mapping->host, &key) < 0)
117 goto out;
118
119 ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page);
120 if (ret == 0)
121 cleancache_succ_gets++;
122 else
123 cleancache_failed_gets++;
124out:
125 return ret;
126}
127EXPORT_SYMBOL(__cleancache_get_page);
128
129/*
130 * "Put" data from a page to cleancache and associate it with the
131 * (previously-obtained per-filesystem) poolid and the page's,
132 * inode and page index. Page must be locked. Note that a put_page
133 * always "succeeds", though a subsequent get_page may succeed or fail.
134 */
135void __cleancache_put_page(struct page *page)
136{
137 int pool_id;
138 struct cleancache_filekey key = { .u.key = { 0 } };
139
140 VM_BUG_ON(!PageLocked(page));
141 pool_id = page->mapping->host->i_sb->cleancache_poolid;
142 if (pool_id >= 0 &&
143 cleancache_get_key(page->mapping->host, &key) >= 0) {
144 (*cleancache_ops.put_page)(pool_id, key, page->index, page);
145 cleancache_puts++;
146 }
147}
148EXPORT_SYMBOL(__cleancache_put_page);
149
150/*
151 * Flush any data from cleancache associated with the poolid and the
152 * page's inode and page index so that a subsequent "get" will fail.
153 */
154void __cleancache_flush_page(struct address_space *mapping, struct page *page)
155{
156 /* careful... page->mapping is NULL sometimes when this is called */
157 int pool_id = mapping->host->i_sb->cleancache_poolid;
158 struct cleancache_filekey key = { .u.key = { 0 } };
159
160 if (pool_id >= 0) {
161 VM_BUG_ON(!PageLocked(page));
162 if (cleancache_get_key(mapping->host, &key) >= 0) {
163 (*cleancache_ops.flush_page)(pool_id, key, page->index);
164 cleancache_flushes++;
165 }
166 }
167}
168EXPORT_SYMBOL(__cleancache_flush_page);
169
170/*
171 * Flush all data from cleancache associated with the poolid and the
172 * mappings's inode so that all subsequent gets to this poolid/inode
173 * will fail.
174 */
175void __cleancache_flush_inode(struct address_space *mapping)
176{
177 int pool_id = mapping->host->i_sb->cleancache_poolid;
178 struct cleancache_filekey key = { .u.key = { 0 } };
179
180 if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
181 (*cleancache_ops.flush_inode)(pool_id, key);
182}
183EXPORT_SYMBOL(__cleancache_flush_inode);
184
185/*
186 * Called by any cleancache-enabled filesystem at time of unmount;
187 * note that pool_id is surrendered and may be reutrned by a subsequent
188 * cleancache_init_fs or cleancache_init_shared_fs
189 */
190void __cleancache_flush_fs(struct super_block *sb)
191{
192 if (sb->cleancache_poolid >= 0) {
193 int old_poolid = sb->cleancache_poolid;
194 sb->cleancache_poolid = -1;
195 (*cleancache_ops.flush_fs)(old_poolid);
196 }
197}
198EXPORT_SYMBOL(__cleancache_flush_fs);
199
200#ifdef CONFIG_SYSFS
201
202/* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */
203
204#define CLEANCACHE_SYSFS_RO(_name) \
205 static ssize_t cleancache_##_name##_show(struct kobject *kobj, \
206 struct kobj_attribute *attr, char *buf) \
207 { \
208 return sprintf(buf, "%lu\n", cleancache_##_name); \
209 } \
210 static struct kobj_attribute cleancache_##_name##_attr = { \
211 .attr = { .name = __stringify(_name), .mode = 0444 }, \
212 .show = cleancache_##_name##_show, \
213 }
214
215CLEANCACHE_SYSFS_RO(succ_gets);
216CLEANCACHE_SYSFS_RO(failed_gets);
217CLEANCACHE_SYSFS_RO(puts);
218CLEANCACHE_SYSFS_RO(flushes);
219
220static struct attribute *cleancache_attrs[] = {
221 &cleancache_succ_gets_attr.attr,
222 &cleancache_failed_gets_attr.attr,
223 &cleancache_puts_attr.attr,
224 &cleancache_flushes_attr.attr,
225 NULL,
226};
227
228static struct attribute_group cleancache_attr_group = {
229 .attrs = cleancache_attrs,
230 .name = "cleancache",
231};
232
233#endif /* CONFIG_SYSFS */
234
235static int __init init_cleancache(void)
236{
237#ifdef CONFIG_SYSFS
238 int err;
239
240 err = sysfs_create_group(mm_kobj, &cleancache_attr_group);
241#endif /* CONFIG_SYSFS */
242 return 0;
243}
244module_init(init_cleancache)