diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/Kconfig | 23 | ||||
| -rw-r--r-- | mm/Makefile | 1 | ||||
| -rw-r--r-- | mm/cleancache.c | 244 | ||||
| -rw-r--r-- | mm/filemap.c | 11 | ||||
| -rw-r--r-- | mm/truncate.c | 6 |
5 files changed, 285 insertions, 0 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index e9c0c61f2dd..8ca47a5ee9c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
| @@ -347,3 +347,26 @@ config NEED_PER_CPU_KM | |||
| 347 | depends on !SMP | 347 | depends on !SMP |
| 348 | bool | 348 | bool |
| 349 | default y | 349 | default y |
| 350 | |||
| 351 | config CLEANCACHE | ||
| 352 | bool "Enable cleancache driver to cache clean pages if tmem is present" | ||
| 353 | default n | ||
| 354 | help | ||
| 355 | Cleancache can be thought of as a page-granularity victim cache | ||
| 356 | for clean pages that the kernel's pageframe replacement algorithm | ||
| 357 | (PFRA) would like to keep around, but can't since there isn't enough | ||
| 358 | memory. So when the PFRA "evicts" a page, it first attempts to use | ||
| 359 | cleancacne code to put the data contained in that page into | ||
| 360 | "transcendent memory", memory that is not directly accessible or | ||
| 361 | addressable by the kernel and is of unknown and possibly | ||
| 362 | time-varying size. And when a cleancache-enabled | ||
| 363 | filesystem wishes to access a page in a file on disk, it first | ||
| 364 | checks cleancache to see if it already contains it; if it does, | ||
| 365 | the page is copied into the kernel and a disk access is avoided. | ||
| 366 | When a transcendent memory driver is available (such as zcache or | ||
| 367 | Xen transcendent memory), a significant I/O reduction | ||
| 368 | may be achieved. When none is available, all cleancache calls | ||
| 369 | are reduced to a single pointer-compare-against-NULL resulting | ||
| 370 | in a negligible performance hit. | ||
| 371 | |||
| 372 | If unsure, say Y to enable cleancache | ||
diff --git a/mm/Makefile b/mm/Makefile index 42a8326c3e3..836e4163c1b 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
| @@ -49,3 +49,4 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o | |||
| 49 | obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o | 49 | obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o |
| 50 | obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o | 50 | obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o |
| 51 | obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o | 51 | obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o |
| 52 | obj-$(CONFIG_CLEANCACHE) += cleancache.o | ||
diff --git a/mm/cleancache.c b/mm/cleancache.c new file mode 100644 index 00000000000..bcaae4c2a77 --- /dev/null +++ b/mm/cleancache.c | |||
| @@ -0,0 +1,244 @@ | |||
| 1 | /* | ||
| 2 | * Cleancache frontend | ||
| 3 | * | ||
| 4 | * This code provides the generic "frontend" layer to call a matching | ||
| 5 | * "backend" driver implementation of cleancache. See | ||
| 6 | * Documentation/vm/cleancache.txt for more information. | ||
| 7 | * | ||
| 8 | * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. | ||
| 9 | * Author: Dan Magenheimer | ||
| 10 | * | ||
| 11 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/module.h> | ||
| 15 | #include <linux/fs.h> | ||
| 16 | #include <linux/exportfs.h> | ||
| 17 | #include <linux/mm.h> | ||
| 18 | #include <linux/cleancache.h> | ||
| 19 | |||
| 20 | /* | ||
| 21 | * This global enablement flag may be read thousands of times per second | ||
| 22 | * by cleancache_get/put/flush even on systems where cleancache_ops | ||
| 23 | * is not claimed (e.g. cleancache is config'ed on but remains | ||
| 24 | * disabled), so is preferred to the slower alternative: a function | ||
| 25 | * call that checks a non-global. | ||
| 26 | */ | ||
| 27 | int cleancache_enabled; | ||
| 28 | EXPORT_SYMBOL(cleancache_enabled); | ||
| 29 | |||
| 30 | /* | ||
| 31 | * cleancache_ops is set by cleancache_ops_register to contain the pointers | ||
| 32 | * to the cleancache "backend" implementation functions. | ||
| 33 | */ | ||
| 34 | static struct cleancache_ops cleancache_ops; | ||
| 35 | |||
| 36 | /* useful stats available in /sys/kernel/mm/cleancache */ | ||
| 37 | static unsigned long cleancache_succ_gets; | ||
| 38 | static unsigned long cleancache_failed_gets; | ||
| 39 | static unsigned long cleancache_puts; | ||
| 40 | static unsigned long cleancache_flushes; | ||
| 41 | |||
| 42 | /* | ||
| 43 | * register operations for cleancache, returning previous thus allowing | ||
| 44 | * detection of multiple backends and possible nesting | ||
| 45 | */ | ||
| 46 | struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) | ||
| 47 | { | ||
| 48 | struct cleancache_ops old = cleancache_ops; | ||
| 49 | |||
| 50 | cleancache_ops = *ops; | ||
| 51 | cleancache_enabled = 1; | ||
| 52 | return old; | ||
| 53 | } | ||
| 54 | EXPORT_SYMBOL(cleancache_register_ops); | ||
| 55 | |||
| 56 | /* Called by a cleancache-enabled filesystem at time of mount */ | ||
| 57 | void __cleancache_init_fs(struct super_block *sb) | ||
| 58 | { | ||
| 59 | sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE); | ||
| 60 | } | ||
| 61 | EXPORT_SYMBOL(__cleancache_init_fs); | ||
| 62 | |||
| 63 | /* Called by a cleancache-enabled clustered filesystem at time of mount */ | ||
| 64 | void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) | ||
| 65 | { | ||
| 66 | sb->cleancache_poolid = | ||
| 67 | (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE); | ||
| 68 | } | ||
| 69 | EXPORT_SYMBOL(__cleancache_init_shared_fs); | ||
| 70 | |||
| 71 | /* | ||
| 72 | * If the filesystem uses exportable filehandles, use the filehandle as | ||
| 73 | * the key, else use the inode number. | ||
| 74 | */ | ||
| 75 | static int cleancache_get_key(struct inode *inode, | ||
| 76 | struct cleancache_filekey *key) | ||
| 77 | { | ||
| 78 | int (*fhfn)(struct dentry *, __u32 *fh, int *, int); | ||
| 79 | int len = 0, maxlen = CLEANCACHE_KEY_MAX; | ||
| 80 | struct super_block *sb = inode->i_sb; | ||
| 81 | |||
| 82 | key->u.ino = inode->i_ino; | ||
| 83 | if (sb->s_export_op != NULL) { | ||
| 84 | fhfn = sb->s_export_op->encode_fh; | ||
| 85 | if (fhfn) { | ||
| 86 | struct dentry d; | ||
| 87 | d.d_inode = inode; | ||
| 88 | len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0); | ||
| 89 | if (len <= 0 || len == 255) | ||
| 90 | return -1; | ||
| 91 | if (maxlen > CLEANCACHE_KEY_MAX) | ||
| 92 | return -1; | ||
| 93 | } | ||
| 94 | } | ||
| 95 | return 0; | ||
| 96 | } | ||
| 97 | |||
| 98 | /* | ||
| 99 | * "Get" data from cleancache associated with the poolid/inode/index | ||
| 100 | * that were specified when the data was put to cleanache and, if | ||
| 101 | * successful, use it to fill the specified page with data and return 0. | ||
| 102 | * The pageframe is unchanged and returns -1 if the get fails. | ||
| 103 | * Page must be locked by caller. | ||
| 104 | */ | ||
| 105 | int __cleancache_get_page(struct page *page) | ||
| 106 | { | ||
| 107 | int ret = -1; | ||
| 108 | int pool_id; | ||
| 109 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
| 110 | |||
| 111 | VM_BUG_ON(!PageLocked(page)); | ||
| 112 | pool_id = page->mapping->host->i_sb->cleancache_poolid; | ||
| 113 | if (pool_id < 0) | ||
| 114 | goto out; | ||
| 115 | |||
| 116 | if (cleancache_get_key(page->mapping->host, &key) < 0) | ||
| 117 | goto out; | ||
| 118 | |||
| 119 | ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page); | ||
| 120 | if (ret == 0) | ||
| 121 | cleancache_succ_gets++; | ||
| 122 | else | ||
| 123 | cleancache_failed_gets++; | ||
| 124 | out: | ||
| 125 | return ret; | ||
| 126 | } | ||
| 127 | EXPORT_SYMBOL(__cleancache_get_page); | ||
| 128 | |||
| 129 | /* | ||
| 130 | * "Put" data from a page to cleancache and associate it with the | ||
| 131 | * (previously-obtained per-filesystem) poolid and the page's, | ||
| 132 | * inode and page index. Page must be locked. Note that a put_page | ||
| 133 | * always "succeeds", though a subsequent get_page may succeed or fail. | ||
| 134 | */ | ||
| 135 | void __cleancache_put_page(struct page *page) | ||
| 136 | { | ||
| 137 | int pool_id; | ||
| 138 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
| 139 | |||
| 140 | VM_BUG_ON(!PageLocked(page)); | ||
| 141 | pool_id = page->mapping->host->i_sb->cleancache_poolid; | ||
| 142 | if (pool_id >= 0 && | ||
| 143 | cleancache_get_key(page->mapping->host, &key) >= 0) { | ||
| 144 | (*cleancache_ops.put_page)(pool_id, key, page->index, page); | ||
| 145 | cleancache_puts++; | ||
| 146 | } | ||
| 147 | } | ||
| 148 | EXPORT_SYMBOL(__cleancache_put_page); | ||
| 149 | |||
| 150 | /* | ||
| 151 | * Flush any data from cleancache associated with the poolid and the | ||
| 152 | * page's inode and page index so that a subsequent "get" will fail. | ||
| 153 | */ | ||
| 154 | void __cleancache_flush_page(struct address_space *mapping, struct page *page) | ||
| 155 | { | ||
| 156 | /* careful... page->mapping is NULL sometimes when this is called */ | ||
| 157 | int pool_id = mapping->host->i_sb->cleancache_poolid; | ||
| 158 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
| 159 | |||
| 160 | if (pool_id >= 0) { | ||
| 161 | VM_BUG_ON(!PageLocked(page)); | ||
| 162 | if (cleancache_get_key(mapping->host, &key) >= 0) { | ||
| 163 | (*cleancache_ops.flush_page)(pool_id, key, page->index); | ||
| 164 | cleancache_flushes++; | ||
| 165 | } | ||
| 166 | } | ||
| 167 | } | ||
| 168 | EXPORT_SYMBOL(__cleancache_flush_page); | ||
| 169 | |||
| 170 | /* | ||
| 171 | * Flush all data from cleancache associated with the poolid and the | ||
| 172 | * mappings's inode so that all subsequent gets to this poolid/inode | ||
| 173 | * will fail. | ||
| 174 | */ | ||
| 175 | void __cleancache_flush_inode(struct address_space *mapping) | ||
| 176 | { | ||
| 177 | int pool_id = mapping->host->i_sb->cleancache_poolid; | ||
| 178 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
| 179 | |||
| 180 | if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) | ||
| 181 | (*cleancache_ops.flush_inode)(pool_id, key); | ||
| 182 | } | ||
| 183 | EXPORT_SYMBOL(__cleancache_flush_inode); | ||
| 184 | |||
| 185 | /* | ||
| 186 | * Called by any cleancache-enabled filesystem at time of unmount; | ||
| 187 | * note that pool_id is surrendered and may be reutrned by a subsequent | ||
| 188 | * cleancache_init_fs or cleancache_init_shared_fs | ||
| 189 | */ | ||
| 190 | void __cleancache_flush_fs(struct super_block *sb) | ||
| 191 | { | ||
| 192 | if (sb->cleancache_poolid >= 0) { | ||
| 193 | int old_poolid = sb->cleancache_poolid; | ||
| 194 | sb->cleancache_poolid = -1; | ||
| 195 | (*cleancache_ops.flush_fs)(old_poolid); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | EXPORT_SYMBOL(__cleancache_flush_fs); | ||
| 199 | |||
| 200 | #ifdef CONFIG_SYSFS | ||
| 201 | |||
| 202 | /* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */ | ||
| 203 | |||
| 204 | #define CLEANCACHE_SYSFS_RO(_name) \ | ||
| 205 | static ssize_t cleancache_##_name##_show(struct kobject *kobj, \ | ||
| 206 | struct kobj_attribute *attr, char *buf) \ | ||
| 207 | { \ | ||
| 208 | return sprintf(buf, "%lu\n", cleancache_##_name); \ | ||
| 209 | } \ | ||
| 210 | static struct kobj_attribute cleancache_##_name##_attr = { \ | ||
| 211 | .attr = { .name = __stringify(_name), .mode = 0444 }, \ | ||
| 212 | .show = cleancache_##_name##_show, \ | ||
| 213 | } | ||
| 214 | |||
| 215 | CLEANCACHE_SYSFS_RO(succ_gets); | ||
| 216 | CLEANCACHE_SYSFS_RO(failed_gets); | ||
| 217 | CLEANCACHE_SYSFS_RO(puts); | ||
| 218 | CLEANCACHE_SYSFS_RO(flushes); | ||
| 219 | |||
| 220 | static struct attribute *cleancache_attrs[] = { | ||
| 221 | &cleancache_succ_gets_attr.attr, | ||
| 222 | &cleancache_failed_gets_attr.attr, | ||
| 223 | &cleancache_puts_attr.attr, | ||
| 224 | &cleancache_flushes_attr.attr, | ||
| 225 | NULL, | ||
| 226 | }; | ||
| 227 | |||
| 228 | static struct attribute_group cleancache_attr_group = { | ||
| 229 | .attrs = cleancache_attrs, | ||
| 230 | .name = "cleancache", | ||
| 231 | }; | ||
| 232 | |||
| 233 | #endif /* CONFIG_SYSFS */ | ||
| 234 | |||
| 235 | static int __init init_cleancache(void) | ||
| 236 | { | ||
| 237 | #ifdef CONFIG_SYSFS | ||
| 238 | int err; | ||
| 239 | |||
| 240 | err = sysfs_create_group(mm_kobj, &cleancache_attr_group); | ||
| 241 | #endif /* CONFIG_SYSFS */ | ||
| 242 | return 0; | ||
| 243 | } | ||
| 244 | module_init(init_cleancache) | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 68e782b3d3d..7455ccd8bda 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ | 34 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ |
| 35 | #include <linux/memcontrol.h> | 35 | #include <linux/memcontrol.h> |
| 36 | #include <linux/mm_inline.h> /* for page_is_file_cache() */ | 36 | #include <linux/mm_inline.h> /* for page_is_file_cache() */ |
| 37 | #include <linux/cleancache.h> | ||
| 37 | #include "internal.h" | 38 | #include "internal.h" |
| 38 | 39 | ||
| 39 | /* | 40 | /* |
| @@ -118,6 +119,16 @@ void __delete_from_page_cache(struct page *page) | |||
| 118 | { | 119 | { |
| 119 | struct address_space *mapping = page->mapping; | 120 | struct address_space *mapping = page->mapping; |
| 120 | 121 | ||
| 122 | /* | ||
| 123 | * if we're uptodate, flush out into the cleancache, otherwise | ||
| 124 | * invalidate any existing cleancache entries. We can't leave | ||
| 125 | * stale data around in the cleancache once our page is gone | ||
| 126 | */ | ||
| 127 | if (PageUptodate(page) && PageMappedToDisk(page)) | ||
| 128 | cleancache_put_page(page); | ||
| 129 | else | ||
| 130 | cleancache_flush_page(mapping, page); | ||
| 131 | |||
| 121 | radix_tree_delete(&mapping->page_tree, page->index); | 132 | radix_tree_delete(&mapping->page_tree, page->index); |
| 122 | page->mapping = NULL; | 133 | page->mapping = NULL; |
| 123 | mapping->nrpages--; | 134 | mapping->nrpages--; |
diff --git a/mm/truncate.c b/mm/truncate.c index a9566752913..3a29a618021 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <linux/task_io_accounting_ops.h> | 19 | #include <linux/task_io_accounting_ops.h> |
| 20 | #include <linux/buffer_head.h> /* grr. try_to_release_page, | 20 | #include <linux/buffer_head.h> /* grr. try_to_release_page, |
| 21 | do_invalidatepage */ | 21 | do_invalidatepage */ |
| 22 | #include <linux/cleancache.h> | ||
| 22 | #include "internal.h" | 23 | #include "internal.h" |
| 23 | 24 | ||
| 24 | 25 | ||
| @@ -51,6 +52,7 @@ void do_invalidatepage(struct page *page, unsigned long offset) | |||
| 51 | static inline void truncate_partial_page(struct page *page, unsigned partial) | 52 | static inline void truncate_partial_page(struct page *page, unsigned partial) |
| 52 | { | 53 | { |
| 53 | zero_user_segment(page, partial, PAGE_CACHE_SIZE); | 54 | zero_user_segment(page, partial, PAGE_CACHE_SIZE); |
| 55 | cleancache_flush_page(page->mapping, page); | ||
| 54 | if (page_has_private(page)) | 56 | if (page_has_private(page)) |
| 55 | do_invalidatepage(page, partial); | 57 | do_invalidatepage(page, partial); |
| 56 | } | 58 | } |
| @@ -214,6 +216,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
| 214 | pgoff_t next; | 216 | pgoff_t next; |
| 215 | int i; | 217 | int i; |
| 216 | 218 | ||
| 219 | cleancache_flush_inode(mapping); | ||
| 217 | if (mapping->nrpages == 0) | 220 | if (mapping->nrpages == 0) |
| 218 | return; | 221 | return; |
| 219 | 222 | ||
| @@ -291,6 +294,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
| 291 | pagevec_release(&pvec); | 294 | pagevec_release(&pvec); |
| 292 | mem_cgroup_uncharge_end(); | 295 | mem_cgroup_uncharge_end(); |
| 293 | } | 296 | } |
| 297 | cleancache_flush_inode(mapping); | ||
| 294 | } | 298 | } |
| 295 | EXPORT_SYMBOL(truncate_inode_pages_range); | 299 | EXPORT_SYMBOL(truncate_inode_pages_range); |
| 296 | 300 | ||
| @@ -440,6 +444,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
| 440 | int did_range_unmap = 0; | 444 | int did_range_unmap = 0; |
| 441 | int wrapped = 0; | 445 | int wrapped = 0; |
| 442 | 446 | ||
| 447 | cleancache_flush_inode(mapping); | ||
| 443 | pagevec_init(&pvec, 0); | 448 | pagevec_init(&pvec, 0); |
| 444 | next = start; | 449 | next = start; |
| 445 | while (next <= end && !wrapped && | 450 | while (next <= end && !wrapped && |
| @@ -498,6 +503,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
| 498 | mem_cgroup_uncharge_end(); | 503 | mem_cgroup_uncharge_end(); |
| 499 | cond_resched(); | 504 | cond_resched(); |
| 500 | } | 505 | } |
| 506 | cleancache_flush_inode(mapping); | ||
| 501 | return ret; | 507 | return ret; |
| 502 | } | 508 | } |
| 503 | EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); | 509 | EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); |
