diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 13:50:56 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 13:50:56 -0400 |
commit | f8d613e2a665bf1be9628a3c3f9bafe7599b32c0 (patch) | |
tree | 98d4da8d0e1a5fb1d9064626b4b96d95ccf26375 | |
parent | 8a0599dd2471f2a2e409498c08a0ab339057ad06 (diff) | |
parent | 5bc20fc59706214d9591c11e1938a629d3538c12 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/djm/tmem
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/djm/tmem:
xen: cleancache shim to Xen Transcendent Memory
ocfs2: add cleancache support
ext4: add cleancache support
btrfs: add cleancache support
ext3: add cleancache support
mm/fs: add hooks to support cleancache
mm: cleancache core ops functions and config
fs: add field to superblock to support cleancache
mm/fs: cleancache documentation
Fix up trivial conflict in fs/btrfs/extent_io.c due to includes
-rw-r--r-- | Documentation/ABI/testing/sysfs-kernel-mm-cleancache | 11 | ||||
-rw-r--r-- | Documentation/vm/cleancache.txt | 278 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/hypercall.h | 7 | ||||
-rw-r--r-- | drivers/xen/Makefile | 1 | ||||
-rw-r--r-- | drivers/xen/tmem.c | 264 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 9 | ||||
-rw-r--r-- | fs/btrfs/super.c | 2 | ||||
-rw-r--r-- | fs/buffer.c | 5 | ||||
-rw-r--r-- | fs/ext3/super.c | 2 | ||||
-rw-r--r-- | fs/ext4/super.c | 2 | ||||
-rw-r--r-- | fs/mpage.c | 7 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 2 | ||||
-rw-r--r-- | fs/super.c | 3 | ||||
-rw-r--r-- | include/linux/cleancache.h | 122 | ||||
-rw-r--r-- | include/linux/fs.h | 5 | ||||
-rw-r--r-- | include/xen/interface/xen.h | 22 | ||||
-rw-r--r-- | mm/Kconfig | 23 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/cleancache.c | 244 | ||||
-rw-r--r-- | mm/filemap.c | 11 | ||||
-rw-r--r-- | mm/truncate.c | 6 |
21 files changed, 1027 insertions, 0 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-cleancache b/Documentation/ABI/testing/sysfs-kernel-mm-cleancache new file mode 100644 index 000000000000..662ae646ea12 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-mm-cleancache | |||
@@ -0,0 +1,11 @@ | |||
1 | What: /sys/kernel/mm/cleancache/ | ||
2 | Date: April 2011 | ||
3 | Contact: Dan Magenheimer <dan.magenheimer@oracle.com> | ||
4 | Description: | ||
5 | /sys/kernel/mm/cleancache/ contains a number of files which | ||
6 | record a count of various cleancache operations | ||
7 | (sum across all filesystems): | ||
8 | succ_gets | ||
9 | failed_gets | ||
10 | puts | ||
11 | flushes | ||
diff --git a/Documentation/vm/cleancache.txt b/Documentation/vm/cleancache.txt new file mode 100644 index 000000000000..36c367c73084 --- /dev/null +++ b/Documentation/vm/cleancache.txt | |||
@@ -0,0 +1,278 @@ | |||
1 | MOTIVATION | ||
2 | |||
3 | Cleancache is a new optional feature provided by the VFS layer that | ||
4 | potentially dramatically increases page cache effectiveness for | ||
5 | many workloads in many environments at a negligible cost. | ||
6 | |||
7 | Cleancache can be thought of as a page-granularity victim cache for clean | ||
8 | pages that the kernel's pageframe replacement algorithm (PFRA) would like | ||
9 | to keep around, but can't since there isn't enough memory. So when the | ||
10 | PFRA "evicts" a page, it first attempts to use cleancache code to | ||
11 | put the data contained in that page into "transcendent memory", memory | ||
12 | that is not directly accessible or addressable by the kernel and is | ||
13 | of unknown and possibly time-varying size. | ||
14 | |||
15 | Later, when a cleancache-enabled filesystem wishes to access a page | ||
16 | in a file on disk, it first checks cleancache to see if it already | ||
17 | contains it; if it does, the page of data is copied into the kernel | ||
18 | and a disk access is avoided. | ||
19 | |||
20 | Transcendent memory "drivers" for cleancache are currently implemented | ||
21 | in Xen (using hypervisor memory) and zcache (using in-kernel compressed | ||
22 | memory) and other implementations are in development. | ||
23 | |||
24 | FAQs are included below. | ||
25 | |||
26 | IMPLEMENTATION OVERVIEW | ||
27 | |||
28 | A cleancache "backend" that provides transcendent memory registers itself | ||
29 | to the kernel's cleancache "frontend" by calling cleancache_register_ops, | ||
30 | passing a pointer to a cleancache_ops structure with funcs set appropriately. | ||
31 | Note that cleancache_register_ops returns the previous settings so that | ||
32 | chaining can be performed if desired. The functions provided must conform to | ||
33 | certain semantics as follows: | ||
34 | |||
35 | Most important, cleancache is "ephemeral". Pages which are copied into | ||
36 | cleancache have an indefinite lifetime which is completely unknowable | ||
37 | by the kernel and so may or may not still be in cleancache at any later time. | ||
38 | Thus, as its name implies, cleancache is not suitable for dirty pages. | ||
39 | Cleancache has complete discretion over what pages to preserve and what | ||
40 | pages to discard and when. | ||
41 | |||
42 | Mounting a cleancache-enabled filesystem should call "init_fs" to obtain a | ||
43 | pool id which, if positive, must be saved in the filesystem's superblock; | ||
44 | a negative return value indicates failure. A "put_page" will copy a | ||
45 | (presumably about-to-be-evicted) page into cleancache and associate it with | ||
46 | the pool id, a file key, and a page index into the file. (The combination | ||
47 | of a pool id, a file key, and an index is sometimes called a "handle".) | ||
48 | A "get_page" will copy the page, if found, from cleancache into kernel memory. | ||
49 | A "flush_page" will ensure the page no longer is present in cleancache; | ||
50 | a "flush_inode" will flush all pages associated with the specified file; | ||
51 | and, when a filesystem is unmounted, a "flush_fs" will flush all pages in | ||
52 | all files specified by the given pool id and also surrender the pool id. | ||
53 | |||
54 | An "init_shared_fs", like init_fs, obtains a pool id but tells cleancache | ||
55 | to treat the pool as shared using a 128-bit UUID as a key. On systems | ||
56 | that may run multiple kernels (such as hard partitioned or virtualized | ||
57 | systems) that may share a clustered filesystem, and where cleancache | ||
58 | may be shared among those kernels, calls to init_shared_fs that specify the | ||
59 | same UUID will receive the same pool id, thus allowing the pages to | ||
60 | be shared. Note that any security requirements must be imposed outside | ||
61 | of the kernel (e.g. by "tools" that control cleancache). Or a | ||
62 | cleancache implementation can simply disable shared_init by always | ||
63 | returning a negative value. | ||
64 | |||
65 | If a get_page is successful on a non-shared pool, the page is flushed (thus | ||
66 | making cleancache an "exclusive" cache). On a shared pool, the page | ||
67 | is NOT flushed on a successful get_page so that it remains accessible to | ||
68 | other sharers. The kernel is responsible for ensuring coherency between | ||
69 | cleancache (shared or not), the page cache, and the filesystem, using | ||
70 | cleancache flush operations as required. | ||
71 | |||
72 | Note that cleancache must enforce put-put-get coherency and get-get | ||
73 | coherency. For the former, if two puts are made to the same handle but | ||
74 | with different data, say AAA by the first put and BBB by the second, a | ||
75 | subsequent get can never return the stale data (AAA). For get-get coherency, | ||
76 | if a get for a given handle fails, subsequent gets for that handle will | ||
77 | never succeed unless preceded by a successful put with that handle. | ||
78 | |||
79 | Last, cleancache provides no SMP serialization guarantees; if two | ||
80 | different Linux threads are simultaneously putting and flushing a page | ||
81 | with the same handle, the results are indeterminate. Callers must | ||
82 | lock the page to ensure serial behavior. | ||
83 | |||
84 | CLEANCACHE PERFORMANCE METRICS | ||
85 | |||
86 | Cleancache monitoring is done by sysfs files in the | ||
87 | /sys/kernel/mm/cleancache directory. The effectiveness of cleancache | ||
88 | can be measured (across all filesystems) with: | ||
89 | |||
90 | succ_gets - number of gets that were successful | ||
91 | failed_gets - number of gets that failed | ||
92 | puts - number of puts attempted (all "succeed") | ||
93 | flushes - number of flushes attempted | ||
94 | |||
95 | A backend implementatation may provide additional metrics. | ||
96 | |||
97 | FAQ | ||
98 | |||
99 | 1) Where's the value? (Andrew Morton) | ||
100 | |||
101 | Cleancache provides a significant performance benefit to many workloads | ||
102 | in many environments with negligible overhead by improving the | ||
103 | effectiveness of the pagecache. Clean pagecache pages are | ||
104 | saved in transcendent memory (RAM that is otherwise not directly | ||
105 | addressable to the kernel); fetching those pages later avoids "refaults" | ||
106 | and thus disk reads. | ||
107 | |||
108 | Cleancache (and its sister code "frontswap") provide interfaces for | ||
109 | this transcendent memory (aka "tmem"), which conceptually lies between | ||
110 | fast kernel-directly-addressable RAM and slower DMA/asynchronous devices. | ||
111 | Disallowing direct kernel or userland reads/writes to tmem | ||
112 | is ideal when data is transformed to a different form and size (such | ||
113 | as with compression) or secretly moved (as might be useful for write- | ||
114 | balancing for some RAM-like devices). Evicted page-cache pages (and | ||
115 | swap pages) are a great use for this kind of slower-than-RAM-but-much- | ||
116 | faster-than-disk transcendent memory, and the cleancache (and frontswap) | ||
117 | "page-object-oriented" specification provides a nice way to read and | ||
118 | write -- and indirectly "name" -- the pages. | ||
119 | |||
120 | In the virtual case, the whole point of virtualization is to statistically | ||
121 | multiplex physical resources across the varying demands of multiple | ||
122 | virtual machines. This is really hard to do with RAM and efforts to | ||
123 | do it well with no kernel change have essentially failed (except in some | ||
124 | well-publicized special-case workloads). Cleancache -- and frontswap -- | ||
125 | with a fairly small impact on the kernel, provide a huge amount | ||
126 | of flexibility for more dynamic, flexible RAM multiplexing. | ||
127 | Specifically, the Xen Transcendent Memory backend allows otherwise | ||
128 | "fallow" hypervisor-owned RAM to not only be "time-shared" between multiple | ||
129 | virtual machines, but the pages can be compressed and deduplicated to | ||
130 | optimize RAM utilization. And when guest OS's are induced to surrender | ||
131 | underutilized RAM (e.g. with "self-ballooning"), page cache pages | ||
132 | are the first to go, and cleancache allows those pages to be | ||
133 | saved and reclaimed if overall host system memory conditions allow. | ||
134 | |||
135 | And the identical interface used for cleancache can be used in | ||
136 | physical systems as well. The zcache driver acts as a memory-hungry | ||
137 | device that stores pages of data in a compressed state. And | ||
138 | the proposed "RAMster" driver shares RAM across multiple physical | ||
139 | systems. | ||
140 | |||
141 | 2) Why does cleancache have its sticky fingers so deep inside the | ||
142 | filesystems and VFS? (Andrew Morton and Christoph Hellwig) | ||
143 | |||
144 | The core hooks for cleancache in VFS are in most cases a single line | ||
145 | and the minimum set are placed precisely where needed to maintain | ||
146 | coherency (via cleancache_flush operations) between cleancache, | ||
147 | the page cache, and disk. All hooks compile into nothingness if | ||
148 | cleancache is config'ed off and turn into a function-pointer- | ||
149 | compare-to-NULL if config'ed on but no backend claims the ops | ||
150 | functions, or to a compare-struct-element-to-negative if a | ||
151 | backend claims the ops functions but a filesystem doesn't enable | ||
152 | cleancache. | ||
153 | |||
154 | Some filesystems are built entirely on top of VFS and the hooks | ||
155 | in VFS are sufficient, so don't require an "init_fs" hook; the | ||
156 | initial implementation of cleancache didn't provide this hook. | ||
157 | But for some filesystems (such as btrfs), the VFS hooks are | ||
158 | incomplete and one or more hooks in fs-specific code are required. | ||
159 | And for some other filesystems, such as tmpfs, cleancache may | ||
160 | be counterproductive. So it seemed prudent to require a filesystem | ||
161 | to "opt in" to use cleancache, which requires adding a hook in | ||
162 | each filesystem. Not all filesystems are supported by cleancache | ||
163 | only because they haven't been tested. The existing set should | ||
164 | be sufficient to validate the concept, the opt-in approach means | ||
165 | that untested filesystems are not affected, and the hooks in the | ||
166 | existing filesystems should make it very easy to add more | ||
167 | filesystems in the future. | ||
168 | |||
169 | The total impact of the hooks to existing fs and mm files is only | ||
170 | about 40 lines added (not counting comments and blank lines). | ||
171 | |||
172 | 3) Why not make cleancache asynchronous and batched so it can | ||
173 | more easily interface with real devices with DMA instead | ||
174 | of copying each individual page? (Minchan Kim) | ||
175 | |||
176 | The one-page-at-a-time copy semantics simplifies the implementation | ||
177 | on both the frontend and backend and also allows the backend to | ||
178 | do fancy things on-the-fly like page compression and | ||
179 | page deduplication. And since the data is "gone" (copied into/out | ||
180 | of the pageframe) before the cleancache get/put call returns, | ||
181 | a great deal of race conditions and potential coherency issues | ||
182 | are avoided. While the interface seems odd for a "real device" | ||
183 | or for real kernel-addressable RAM, it makes perfect sense for | ||
184 | transcendent memory. | ||
185 | |||
186 | 4) Why is non-shared cleancache "exclusive"? And where is the | ||
187 | page "flushed" after a "get"? (Minchan Kim) | ||
188 | |||
189 | The main reason is to free up space in transcendent memory and | ||
190 | to avoid unnecessary cleancache_flush calls. If you want inclusive, | ||
191 | the page can be "put" immediately following the "get". If | ||
192 | put-after-get for inclusive becomes common, the interface could | ||
193 | be easily extended to add a "get_no_flush" call. | ||
194 | |||
195 | The flush is done by the cleancache backend implementation. | ||
196 | |||
197 | 5) What's the performance impact? | ||
198 | |||
199 | Performance analysis has been presented at OLS'09 and LCA'10. | ||
200 | Briefly, performance gains can be significant on most workloads, | ||
201 | especially when memory pressure is high (e.g. when RAM is | ||
202 | overcommitted in a virtual workload); and because the hooks are | ||
203 | invoked primarily in place of or in addition to a disk read/write, | ||
204 | overhead is negligible even in worst case workloads. Basically | ||
205 | cleancache replaces I/O with memory-copy-CPU-overhead; on older | ||
206 | single-core systems with slow memory-copy speeds, cleancache | ||
207 | has little value, but in newer multicore machines, especially | ||
208 | consolidated/virtualized machines, it has great value. | ||
209 | |||
210 | 6) How do I add cleancache support for filesystem X? (Boaz Harrash) | ||
211 | |||
212 | Filesystems that are well-behaved and conform to certain | ||
213 | restrictions can utilize cleancache simply by making a call to | ||
214 | cleancache_init_fs at mount time. Unusual, misbehaving, or | ||
215 | poorly layered filesystems must either add additional hooks | ||
216 | and/or undergo extensive additional testing... or should just | ||
217 | not enable the optional cleancache. | ||
218 | |||
219 | Some points for a filesystem to consider: | ||
220 | |||
221 | - The FS should be block-device-based (e.g. a ram-based FS such | ||
222 | as tmpfs should not enable cleancache) | ||
223 | - To ensure coherency/correctness, the FS must ensure that all | ||
224 | file removal or truncation operations either go through VFS or | ||
225 | add hooks to do the equivalent cleancache "flush" operations | ||
226 | - To ensure coherency/correctness, either inode numbers must | ||
227 | be unique across the lifetime of the on-disk file OR the | ||
228 | FS must provide an "encode_fh" function. | ||
229 | - The FS must call the VFS superblock alloc and deactivate routines | ||
230 | or add hooks to do the equivalent cleancache calls done there. | ||
231 | - To maximize performance, all pages fetched from the FS should | ||
232 | go through the do_mpag_readpage routine or the FS should add | ||
233 | hooks to do the equivalent (cf. btrfs) | ||
234 | - Currently, the FS blocksize must be the same as PAGESIZE. This | ||
235 | is not an architectural restriction, but no backends currently | ||
236 | support anything different. | ||
237 | - A clustered FS should invoke the "shared_init_fs" cleancache | ||
238 | hook to get best performance for some backends. | ||
239 | |||
240 | 7) Why not use the KVA of the inode as the key? (Christoph Hellwig) | ||
241 | |||
242 | If cleancache would use the inode virtual address instead of | ||
243 | inode/filehandle, the pool id could be eliminated. But, this | ||
244 | won't work because cleancache retains pagecache data pages | ||
245 | persistently even when the inode has been pruned from the | ||
246 | inode unused list, and only flushes the data page if the file | ||
247 | gets removed/truncated. So if cleancache used the inode kva, | ||
248 | there would be potential coherency issues if/when the inode | ||
249 | kva is reused for a different file. Alternately, if cleancache | ||
250 | flushed the pages when the inode kva was freed, much of the value | ||
251 | of cleancache would be lost because the cache of pages in cleanache | ||
252 | is potentially much larger than the kernel pagecache and is most | ||
253 | useful if the pages survive inode cache removal. | ||
254 | |||
255 | 8) Why is a global variable required? | ||
256 | |||
257 | The cleancache_enabled flag is checked in all of the frequently-used | ||
258 | cleancache hooks. The alternative is a function call to check a static | ||
259 | variable. Since cleancache is enabled dynamically at runtime, systems | ||
260 | that don't enable cleancache would suffer thousands (possibly | ||
261 | tens-of-thousands) of unnecessary function calls per second. So the | ||
262 | global variable allows cleancache to be enabled by default at compile | ||
263 | time, but have insignificant performance impact when cleancache remains | ||
264 | disabled at runtime. | ||
265 | |||
266 | 9) Does cleanache work with KVM? | ||
267 | |||
268 | The memory model of KVM is sufficiently different that a cleancache | ||
269 | backend may have less value for KVM. This remains to be tested, | ||
270 | especially in an overcommitted system. | ||
271 | |||
272 | 10) Does cleancache work in userspace? It sounds useful for | ||
273 | memory hungry caches like web browsers. (Jamie Lokier) | ||
274 | |||
275 | No plans yet, though we agree it sounds useful, at least for | ||
276 | apps that bypass the page cache (e.g. O_DIRECT). | ||
277 | |||
278 | Last updated: Dan Magenheimer, April 13 2011 | ||
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 8508bfe52296..d240ea950519 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -447,6 +447,13 @@ HYPERVISOR_hvm_op(int op, void *arg) | |||
447 | return _hypercall2(unsigned long, hvm_op, op, arg); | 447 | return _hypercall2(unsigned long, hvm_op, op, arg); |
448 | } | 448 | } |
449 | 449 | ||
450 | static inline int | ||
451 | HYPERVISOR_tmem_op( | ||
452 | struct tmem_op *op) | ||
453 | { | ||
454 | return _hypercall1(int, tmem_op, op); | ||
455 | } | ||
456 | |||
450 | static inline void | 457 | static inline void |
451 | MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) | 458 | MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) |
452 | { | 459 | { |
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 4781f806701d..bbc18258ecc5 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -1,5 +1,6 @@ | |||
1 | obj-y += grant-table.o features.o events.o manage.o balloon.o | 1 | obj-y += grant-table.o features.o events.o manage.o balloon.o |
2 | obj-y += xenbus/ | 2 | obj-y += xenbus/ |
3 | obj-y += tmem.o | ||
3 | 4 | ||
4 | nostackp := $(call cc-option, -fno-stack-protector) | 5 | nostackp := $(call cc-option, -fno-stack-protector) |
5 | CFLAGS_features.o := $(nostackp) | 6 | CFLAGS_features.o := $(nostackp) |
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c new file mode 100644 index 000000000000..816a44959ef0 --- /dev/null +++ b/drivers/xen/tmem.c | |||
@@ -0,0 +1,264 @@ | |||
1 | /* | ||
2 | * Xen implementation for transcendent memory (tmem) | ||
3 | * | ||
4 | * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. | ||
5 | * Author: Dan Magenheimer | ||
6 | */ | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/types.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/pagemap.h> | ||
12 | #include <linux/cleancache.h> | ||
13 | |||
14 | #include <xen/xen.h> | ||
15 | #include <xen/interface/xen.h> | ||
16 | #include <asm/xen/hypercall.h> | ||
17 | #include <asm/xen/page.h> | ||
18 | #include <asm/xen/hypervisor.h> | ||
19 | |||
20 | #define TMEM_CONTROL 0 | ||
21 | #define TMEM_NEW_POOL 1 | ||
22 | #define TMEM_DESTROY_POOL 2 | ||
23 | #define TMEM_NEW_PAGE 3 | ||
24 | #define TMEM_PUT_PAGE 4 | ||
25 | #define TMEM_GET_PAGE 5 | ||
26 | #define TMEM_FLUSH_PAGE 6 | ||
27 | #define TMEM_FLUSH_OBJECT 7 | ||
28 | #define TMEM_READ 8 | ||
29 | #define TMEM_WRITE 9 | ||
30 | #define TMEM_XCHG 10 | ||
31 | |||
32 | /* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ | ||
33 | #define TMEM_POOL_PERSIST 1 | ||
34 | #define TMEM_POOL_SHARED 2 | ||
35 | #define TMEM_POOL_PAGESIZE_SHIFT 4 | ||
36 | #define TMEM_VERSION_SHIFT 24 | ||
37 | |||
38 | |||
39 | struct tmem_pool_uuid { | ||
40 | u64 uuid_lo; | ||
41 | u64 uuid_hi; | ||
42 | }; | ||
43 | |||
44 | struct tmem_oid { | ||
45 | u64 oid[3]; | ||
46 | }; | ||
47 | |||
48 | #define TMEM_POOL_PRIVATE_UUID { 0, 0 } | ||
49 | |||
50 | /* flags for tmem_ops.new_pool */ | ||
51 | #define TMEM_POOL_PERSIST 1 | ||
52 | #define TMEM_POOL_SHARED 2 | ||
53 | |||
54 | /* xen tmem foundation ops/hypercalls */ | ||
55 | |||
56 | static inline int xen_tmem_op(u32 tmem_cmd, u32 tmem_pool, struct tmem_oid oid, | ||
57 | u32 index, unsigned long gmfn, u32 tmem_offset, u32 pfn_offset, u32 len) | ||
58 | { | ||
59 | struct tmem_op op; | ||
60 | int rc = 0; | ||
61 | |||
62 | op.cmd = tmem_cmd; | ||
63 | op.pool_id = tmem_pool; | ||
64 | op.u.gen.oid[0] = oid.oid[0]; | ||
65 | op.u.gen.oid[1] = oid.oid[1]; | ||
66 | op.u.gen.oid[2] = oid.oid[2]; | ||
67 | op.u.gen.index = index; | ||
68 | op.u.gen.tmem_offset = tmem_offset; | ||
69 | op.u.gen.pfn_offset = pfn_offset; | ||
70 | op.u.gen.len = len; | ||
71 | set_xen_guest_handle(op.u.gen.gmfn, (void *)gmfn); | ||
72 | rc = HYPERVISOR_tmem_op(&op); | ||
73 | return rc; | ||
74 | } | ||
75 | |||
76 | static int xen_tmem_new_pool(struct tmem_pool_uuid uuid, | ||
77 | u32 flags, unsigned long pagesize) | ||
78 | { | ||
79 | struct tmem_op op; | ||
80 | int rc = 0, pageshift; | ||
81 | |||
82 | for (pageshift = 0; pagesize != 1; pageshift++) | ||
83 | pagesize >>= 1; | ||
84 | flags |= (pageshift - 12) << TMEM_POOL_PAGESIZE_SHIFT; | ||
85 | flags |= TMEM_SPEC_VERSION << TMEM_VERSION_SHIFT; | ||
86 | op.cmd = TMEM_NEW_POOL; | ||
87 | op.u.new.uuid[0] = uuid.uuid_lo; | ||
88 | op.u.new.uuid[1] = uuid.uuid_hi; | ||
89 | op.u.new.flags = flags; | ||
90 | rc = HYPERVISOR_tmem_op(&op); | ||
91 | return rc; | ||
92 | } | ||
93 | |||
94 | /* xen generic tmem ops */ | ||
95 | |||
96 | static int xen_tmem_put_page(u32 pool_id, struct tmem_oid oid, | ||
97 | u32 index, unsigned long pfn) | ||
98 | { | ||
99 | unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; | ||
100 | |||
101 | return xen_tmem_op(TMEM_PUT_PAGE, pool_id, oid, index, | ||
102 | gmfn, 0, 0, 0); | ||
103 | } | ||
104 | |||
105 | static int xen_tmem_get_page(u32 pool_id, struct tmem_oid oid, | ||
106 | u32 index, unsigned long pfn) | ||
107 | { | ||
108 | unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; | ||
109 | |||
110 | return xen_tmem_op(TMEM_GET_PAGE, pool_id, oid, index, | ||
111 | gmfn, 0, 0, 0); | ||
112 | } | ||
113 | |||
114 | static int xen_tmem_flush_page(u32 pool_id, struct tmem_oid oid, u32 index) | ||
115 | { | ||
116 | return xen_tmem_op(TMEM_FLUSH_PAGE, pool_id, oid, index, | ||
117 | 0, 0, 0, 0); | ||
118 | } | ||
119 | |||
120 | static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) | ||
121 | { | ||
122 | return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); | ||
123 | } | ||
124 | |||
125 | static int xen_tmem_destroy_pool(u32 pool_id) | ||
126 | { | ||
127 | struct tmem_oid oid = { { 0 } }; | ||
128 | |||
129 | return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); | ||
130 | } | ||
131 | |||
132 | int tmem_enabled; | ||
133 | |||
134 | static int __init enable_tmem(char *s) | ||
135 | { | ||
136 | tmem_enabled = 1; | ||
137 | return 1; | ||
138 | } | ||
139 | |||
140 | __setup("tmem", enable_tmem); | ||
141 | |||
142 | /* cleancache ops */ | ||
143 | |||
144 | static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, | ||
145 | pgoff_t index, struct page *page) | ||
146 | { | ||
147 | u32 ind = (u32) index; | ||
148 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
149 | unsigned long pfn = page_to_pfn(page); | ||
150 | |||
151 | if (pool < 0) | ||
152 | return; | ||
153 | if (ind != index) | ||
154 | return; | ||
155 | mb(); /* ensure page is quiescent; tmem may address it with an alias */ | ||
156 | (void)xen_tmem_put_page((u32)pool, oid, ind, pfn); | ||
157 | } | ||
158 | |||
159 | static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key, | ||
160 | pgoff_t index, struct page *page) | ||
161 | { | ||
162 | u32 ind = (u32) index; | ||
163 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
164 | unsigned long pfn = page_to_pfn(page); | ||
165 | int ret; | ||
166 | |||
167 | /* translate return values to linux semantics */ | ||
168 | if (pool < 0) | ||
169 | return -1; | ||
170 | if (ind != index) | ||
171 | return -1; | ||
172 | ret = xen_tmem_get_page((u32)pool, oid, ind, pfn); | ||
173 | if (ret == 1) | ||
174 | return 0; | ||
175 | else | ||
176 | return -1; | ||
177 | } | ||
178 | |||
179 | static void tmem_cleancache_flush_page(int pool, struct cleancache_filekey key, | ||
180 | pgoff_t index) | ||
181 | { | ||
182 | u32 ind = (u32) index; | ||
183 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
184 | |||
185 | if (pool < 0) | ||
186 | return; | ||
187 | if (ind != index) | ||
188 | return; | ||
189 | (void)xen_tmem_flush_page((u32)pool, oid, ind); | ||
190 | } | ||
191 | |||
192 | static void tmem_cleancache_flush_inode(int pool, struct cleancache_filekey key) | ||
193 | { | ||
194 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
195 | |||
196 | if (pool < 0) | ||
197 | return; | ||
198 | (void)xen_tmem_flush_object((u32)pool, oid); | ||
199 | } | ||
200 | |||
201 | static void tmem_cleancache_flush_fs(int pool) | ||
202 | { | ||
203 | if (pool < 0) | ||
204 | return; | ||
205 | (void)xen_tmem_destroy_pool((u32)pool); | ||
206 | } | ||
207 | |||
208 | static int tmem_cleancache_init_fs(size_t pagesize) | ||
209 | { | ||
210 | struct tmem_pool_uuid uuid_private = TMEM_POOL_PRIVATE_UUID; | ||
211 | |||
212 | return xen_tmem_new_pool(uuid_private, 0, pagesize); | ||
213 | } | ||
214 | |||
215 | static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize) | ||
216 | { | ||
217 | struct tmem_pool_uuid shared_uuid; | ||
218 | |||
219 | shared_uuid.uuid_lo = *(u64 *)uuid; | ||
220 | shared_uuid.uuid_hi = *(u64 *)(&uuid[8]); | ||
221 | return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize); | ||
222 | } | ||
223 | |||
224 | static int use_cleancache = 1; | ||
225 | |||
226 | static int __init no_cleancache(char *s) | ||
227 | { | ||
228 | use_cleancache = 0; | ||
229 | return 1; | ||
230 | } | ||
231 | |||
232 | __setup("nocleancache", no_cleancache); | ||
233 | |||
234 | static struct cleancache_ops tmem_cleancache_ops = { | ||
235 | .put_page = tmem_cleancache_put_page, | ||
236 | .get_page = tmem_cleancache_get_page, | ||
237 | .flush_page = tmem_cleancache_flush_page, | ||
238 | .flush_inode = tmem_cleancache_flush_inode, | ||
239 | .flush_fs = tmem_cleancache_flush_fs, | ||
240 | .init_shared_fs = tmem_cleancache_init_shared_fs, | ||
241 | .init_fs = tmem_cleancache_init_fs | ||
242 | }; | ||
243 | |||
244 | static int __init xen_tmem_init(void) | ||
245 | { | ||
246 | struct cleancache_ops old_ops; | ||
247 | |||
248 | if (!xen_domain()) | ||
249 | return 0; | ||
250 | #ifdef CONFIG_CLEANCACHE | ||
251 | BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); | ||
252 | if (tmem_enabled && use_cleancache) { | ||
253 | char *s = ""; | ||
254 | old_ops = cleancache_register_ops(&tmem_cleancache_ops); | ||
255 | if (old_ops.init_fs != NULL) | ||
256 | s = " (WARNING: cleancache_ops overridden)"; | ||
257 | printk(KERN_INFO "cleancache enabled, RAM provided by " | ||
258 | "Xen Transcendent Memory%s\n", s); | ||
259 | } | ||
260 | #endif | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | module_init(xen_tmem_init) | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 96fcfa522dab..4f9893243dae 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/writeback.h> | 11 | #include <linux/writeback.h> |
12 | #include <linux/pagevec.h> | 12 | #include <linux/pagevec.h> |
13 | #include <linux/prefetch.h> | 13 | #include <linux/prefetch.h> |
14 | #include <linux/cleancache.h> | ||
14 | #include "extent_io.h" | 15 | #include "extent_io.h" |
15 | #include "extent_map.h" | 16 | #include "extent_map.h" |
16 | #include "compat.h" | 17 | #include "compat.h" |
@@ -2016,6 +2017,13 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2016 | 2017 | ||
2017 | set_page_extent_mapped(page); | 2018 | set_page_extent_mapped(page); |
2018 | 2019 | ||
2020 | if (!PageUptodate(page)) { | ||
2021 | if (cleancache_get_page(page) == 0) { | ||
2022 | BUG_ON(blocksize != PAGE_SIZE); | ||
2023 | goto out; | ||
2024 | } | ||
2025 | } | ||
2026 | |||
2019 | end = page_end; | 2027 | end = page_end; |
2020 | while (1) { | 2028 | while (1) { |
2021 | lock_extent(tree, start, end, GFP_NOFS); | 2029 | lock_extent(tree, start, end, GFP_NOFS); |
@@ -2149,6 +2157,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2149 | cur = cur + iosize; | 2157 | cur = cur + iosize; |
2150 | page_offset += iosize; | 2158 | page_offset += iosize; |
2151 | } | 2159 | } |
2160 | out: | ||
2152 | if (!nr) { | 2161 | if (!nr) { |
2153 | if (!PageError(page)) | 2162 | if (!PageError(page)) |
2154 | SetPageUptodate(page); | 2163 | SetPageUptodate(page); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0ac712efcdf2..be4ffa12f3ef 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/miscdevice.h> | 39 | #include <linux/miscdevice.h> |
40 | #include <linux/magic.h> | 40 | #include <linux/magic.h> |
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/cleancache.h> | ||
42 | #include "compat.h" | 43 | #include "compat.h" |
43 | #include "ctree.h" | 44 | #include "ctree.h" |
44 | #include "disk-io.h" | 45 | #include "disk-io.h" |
@@ -624,6 +625,7 @@ static int btrfs_fill_super(struct super_block *sb, | |||
624 | sb->s_root = root_dentry; | 625 | sb->s_root = root_dentry; |
625 | 626 | ||
626 | save_mount_options(sb, data); | 627 | save_mount_options(sb, data); |
628 | cleancache_init_fs(sb); | ||
627 | return 0; | 629 | return 0; |
628 | 630 | ||
629 | fail_close: | 631 | fail_close: |
diff --git a/fs/buffer.c b/fs/buffer.c index b0675bfe8207..698c6b2cc462 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/bitops.h> | 41 | #include <linux/bitops.h> |
42 | #include <linux/mpage.h> | 42 | #include <linux/mpage.h> |
43 | #include <linux/bit_spinlock.h> | 43 | #include <linux/bit_spinlock.h> |
44 | #include <linux/cleancache.h> | ||
44 | 45 | ||
45 | static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); | 46 | static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); |
46 | 47 | ||
@@ -269,6 +270,10 @@ void invalidate_bdev(struct block_device *bdev) | |||
269 | invalidate_bh_lrus(); | 270 | invalidate_bh_lrus(); |
270 | lru_add_drain_all(); /* make sure all lru add caches are flushed */ | 271 | lru_add_drain_all(); /* make sure all lru add caches are flushed */ |
271 | invalidate_mapping_pages(mapping, 0, -1); | 272 | invalidate_mapping_pages(mapping, 0, -1); |
273 | /* 99% of the time, we don't need to flush the cleancache on the bdev. | ||
274 | * But, for the strange corners, lets be cautious | ||
275 | */ | ||
276 | cleancache_flush_inode(mapping); | ||
272 | } | 277 | } |
273 | EXPORT_SYMBOL(invalidate_bdev); | 278 | EXPORT_SYMBOL(invalidate_bdev); |
274 | 279 | ||
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3c6a9e0eadc1..aad153ef6b78 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/quotaops.h> | 36 | #include <linux/quotaops.h> |
37 | #include <linux/seq_file.h> | 37 | #include <linux/seq_file.h> |
38 | #include <linux/log2.h> | 38 | #include <linux/log2.h> |
39 | #include <linux/cleancache.h> | ||
39 | 40 | ||
40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
41 | 42 | ||
@@ -1367,6 +1368,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, | |||
1367 | } else { | 1368 | } else { |
1368 | ext3_msg(sb, KERN_INFO, "using internal journal"); | 1369 | ext3_msg(sb, KERN_INFO, "using internal journal"); |
1369 | } | 1370 | } |
1371 | cleancache_init_fs(sb); | ||
1370 | return res; | 1372 | return res; |
1371 | } | 1373 | } |
1372 | 1374 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d9937df7f5cf..cc5c157aa11d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/ctype.h> | 38 | #include <linux/ctype.h> |
39 | #include <linux/log2.h> | 39 | #include <linux/log2.h> |
40 | #include <linux/crc16.h> | 40 | #include <linux/crc16.h> |
41 | #include <linux/cleancache.h> | ||
41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
42 | 43 | ||
43 | #include <linux/kthread.h> | 44 | #include <linux/kthread.h> |
@@ -1948,6 +1949,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1948 | EXT4_INODES_PER_GROUP(sb), | 1949 | EXT4_INODES_PER_GROUP(sb), |
1949 | sbi->s_mount_opt, sbi->s_mount_opt2); | 1950 | sbi->s_mount_opt, sbi->s_mount_opt2); |
1950 | 1951 | ||
1952 | cleancache_init_fs(sb); | ||
1951 | return res; | 1953 | return res; |
1952 | } | 1954 | } |
1953 | 1955 | ||
diff --git a/fs/mpage.c b/fs/mpage.c index 0afc809e46e0..fdfae9fa98cd 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/backing-dev.h> | 28 | #include <linux/backing-dev.h> |
29 | #include <linux/pagevec.h> | 29 | #include <linux/pagevec.h> |
30 | #include <linux/cleancache.h> | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * I/O completion handler for multipage BIOs. | 33 | * I/O completion handler for multipage BIOs. |
@@ -271,6 +272,12 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, | |||
271 | SetPageMappedToDisk(page); | 272 | SetPageMappedToDisk(page); |
272 | } | 273 | } |
273 | 274 | ||
275 | if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) && | ||
276 | cleancache_get_page(page) == 0) { | ||
277 | SetPageUptodate(page); | ||
278 | goto confused; | ||
279 | } | ||
280 | |||
274 | /* | 281 | /* |
275 | * This page will go to BIO. Do we need to send this BIO off first? | 282 | * This page will go to BIO. Do we need to send this BIO off first? |
276 | */ | 283 | */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 5a521c748859..4129fb671d71 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/mount.h> | 41 | #include <linux/mount.h> |
42 | #include <linux/seq_file.h> | 42 | #include <linux/seq_file.h> |
43 | #include <linux/quotaops.h> | 43 | #include <linux/quotaops.h> |
44 | #include <linux/cleancache.h> | ||
44 | 45 | ||
45 | #define CREATE_TRACE_POINTS | 46 | #define CREATE_TRACE_POINTS |
46 | #include "ocfs2_trace.h" | 47 | #include "ocfs2_trace.h" |
@@ -2352,6 +2353,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2352 | mlog_errno(status); | 2353 | mlog_errno(status); |
2353 | goto bail; | 2354 | goto bail; |
2354 | } | 2355 | } |
2356 | cleancache_init_shared_fs((char *)&uuid_net_key, sb); | ||
2355 | 2357 | ||
2356 | bail: | 2358 | bail: |
2357 | return status; | 2359 | return status; |
diff --git a/fs/super.c b/fs/super.c index c04f7e0b7ed2..c75593953c52 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/mutex.h> | 31 | #include <linux/mutex.h> |
32 | #include <linux/backing-dev.h> | 32 | #include <linux/backing-dev.h> |
33 | #include <linux/rculist_bl.h> | 33 | #include <linux/rculist_bl.h> |
34 | #include <linux/cleancache.h> | ||
34 | #include "internal.h" | 35 | #include "internal.h" |
35 | 36 | ||
36 | 37 | ||
@@ -112,6 +113,7 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
112 | s->s_maxbytes = MAX_NON_LFS; | 113 | s->s_maxbytes = MAX_NON_LFS; |
113 | s->s_op = &default_op; | 114 | s->s_op = &default_op; |
114 | s->s_time_gran = 1000000000; | 115 | s->s_time_gran = 1000000000; |
116 | s->cleancache_poolid = -1; | ||
115 | } | 117 | } |
116 | out: | 118 | out: |
117 | return s; | 119 | return s; |
@@ -177,6 +179,7 @@ void deactivate_locked_super(struct super_block *s) | |||
177 | { | 179 | { |
178 | struct file_system_type *fs = s->s_type; | 180 | struct file_system_type *fs = s->s_type; |
179 | if (atomic_dec_and_test(&s->s_active)) { | 181 | if (atomic_dec_and_test(&s->s_active)) { |
182 | cleancache_flush_fs(s); | ||
180 | fs->kill_sb(s); | 183 | fs->kill_sb(s); |
181 | /* | 184 | /* |
182 | * We need to call rcu_barrier so all the delayed rcu free | 185 | * We need to call rcu_barrier so all the delayed rcu free |
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h new file mode 100644 index 000000000000..04ffb2e6c9d0 --- /dev/null +++ b/include/linux/cleancache.h | |||
@@ -0,0 +1,122 @@ | |||
1 | #ifndef _LINUX_CLEANCACHE_H | ||
2 | #define _LINUX_CLEANCACHE_H | ||
3 | |||
4 | #include <linux/fs.h> | ||
5 | #include <linux/exportfs.h> | ||
6 | #include <linux/mm.h> | ||
7 | |||
8 | #define CLEANCACHE_KEY_MAX 6 | ||
9 | |||
10 | /* | ||
11 | * cleancache requires every file with a page in cleancache to have a | ||
12 | * unique key unless/until the file is removed/truncated. For some | ||
13 | * filesystems, the inode number is unique, but for "modern" filesystems | ||
14 | * an exportable filehandle is required (see exportfs.h) | ||
15 | */ | ||
16 | struct cleancache_filekey { | ||
17 | union { | ||
18 | ino_t ino; | ||
19 | __u32 fh[CLEANCACHE_KEY_MAX]; | ||
20 | u32 key[CLEANCACHE_KEY_MAX]; | ||
21 | } u; | ||
22 | }; | ||
23 | |||
24 | struct cleancache_ops { | ||
25 | int (*init_fs)(size_t); | ||
26 | int (*init_shared_fs)(char *uuid, size_t); | ||
27 | int (*get_page)(int, struct cleancache_filekey, | ||
28 | pgoff_t, struct page *); | ||
29 | void (*put_page)(int, struct cleancache_filekey, | ||
30 | pgoff_t, struct page *); | ||
31 | void (*flush_page)(int, struct cleancache_filekey, pgoff_t); | ||
32 | void (*flush_inode)(int, struct cleancache_filekey); | ||
33 | void (*flush_fs)(int); | ||
34 | }; | ||
35 | |||
36 | extern struct cleancache_ops | ||
37 | cleancache_register_ops(struct cleancache_ops *ops); | ||
38 | extern void __cleancache_init_fs(struct super_block *); | ||
39 | extern void __cleancache_init_shared_fs(char *, struct super_block *); | ||
40 | extern int __cleancache_get_page(struct page *); | ||
41 | extern void __cleancache_put_page(struct page *); | ||
42 | extern void __cleancache_flush_page(struct address_space *, struct page *); | ||
43 | extern void __cleancache_flush_inode(struct address_space *); | ||
44 | extern void __cleancache_flush_fs(struct super_block *); | ||
45 | extern int cleancache_enabled; | ||
46 | |||
47 | #ifdef CONFIG_CLEANCACHE | ||
48 | static inline bool cleancache_fs_enabled(struct page *page) | ||
49 | { | ||
50 | return page->mapping->host->i_sb->cleancache_poolid >= 0; | ||
51 | } | ||
52 | static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping) | ||
53 | { | ||
54 | return mapping->host->i_sb->cleancache_poolid >= 0; | ||
55 | } | ||
56 | #else | ||
57 | #define cleancache_enabled (0) | ||
58 | #define cleancache_fs_enabled(_page) (0) | ||
59 | #define cleancache_fs_enabled_mapping(_page) (0) | ||
60 | #endif | ||
61 | |||
62 | /* | ||
63 | * The shim layer provided by these inline functions allows the compiler | ||
64 | * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE | ||
65 | * is disabled, to a single global variable check if CONFIG_CLEANCACHE | ||
66 | * is enabled but no cleancache "backend" has dynamically enabled it, | ||
67 | * and, for the most frequent cleancache ops, to a single global variable | ||
68 | * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled | ||
69 | * and a cleancache backend has dynamically enabled cleancache, but the | ||
70 | * filesystem referenced by that cleancache op has not enabled cleancache. | ||
71 | * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially | ||
72 | * no measurable performance impact. | ||
73 | */ | ||
74 | |||
75 | static inline void cleancache_init_fs(struct super_block *sb) | ||
76 | { | ||
77 | if (cleancache_enabled) | ||
78 | __cleancache_init_fs(sb); | ||
79 | } | ||
80 | |||
81 | static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb) | ||
82 | { | ||
83 | if (cleancache_enabled) | ||
84 | __cleancache_init_shared_fs(uuid, sb); | ||
85 | } | ||
86 | |||
87 | static inline int cleancache_get_page(struct page *page) | ||
88 | { | ||
89 | int ret = -1; | ||
90 | |||
91 | if (cleancache_enabled && cleancache_fs_enabled(page)) | ||
92 | ret = __cleancache_get_page(page); | ||
93 | return ret; | ||
94 | } | ||
95 | |||
96 | static inline void cleancache_put_page(struct page *page) | ||
97 | { | ||
98 | if (cleancache_enabled && cleancache_fs_enabled(page)) | ||
99 | __cleancache_put_page(page); | ||
100 | } | ||
101 | |||
102 | static inline void cleancache_flush_page(struct address_space *mapping, | ||
103 | struct page *page) | ||
104 | { | ||
105 | /* careful... page->mapping is NULL sometimes when this is called */ | ||
106 | if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) | ||
107 | __cleancache_flush_page(mapping, page); | ||
108 | } | ||
109 | |||
110 | static inline void cleancache_flush_inode(struct address_space *mapping) | ||
111 | { | ||
112 | if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) | ||
113 | __cleancache_flush_inode(mapping); | ||
114 | } | ||
115 | |||
116 | static inline void cleancache_flush_fs(struct super_block *sb) | ||
117 | { | ||
118 | if (cleancache_enabled) | ||
119 | __cleancache_flush_fs(sb); | ||
120 | } | ||
121 | |||
122 | #endif /* _LINUX_CLEANCACHE_H */ | ||
diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f9d3251790d..241609346dfb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1428,6 +1428,11 @@ struct super_block { | |||
1428 | */ | 1428 | */ |
1429 | char __rcu *s_options; | 1429 | char __rcu *s_options; |
1430 | const struct dentry_operations *s_d_op; /* default d_op for dentries */ | 1430 | const struct dentry_operations *s_d_op; /* default d_op for dentries */ |
1431 | |||
1432 | /* | ||
1433 | * Saved pool identifier for cleancache (-1 means none) | ||
1434 | */ | ||
1435 | int cleancache_poolid; | ||
1431 | }; | 1436 | }; |
1432 | 1437 | ||
1433 | extern struct timespec current_fs_time(struct super_block *sb); | 1438 | extern struct timespec current_fs_time(struct super_block *sb); |
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index b33257bc7e83..70213b4515eb 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h | |||
@@ -58,6 +58,7 @@ | |||
58 | #define __HYPERVISOR_event_channel_op 32 | 58 | #define __HYPERVISOR_event_channel_op 32 |
59 | #define __HYPERVISOR_physdev_op 33 | 59 | #define __HYPERVISOR_physdev_op 33 |
60 | #define __HYPERVISOR_hvm_op 34 | 60 | #define __HYPERVISOR_hvm_op 34 |
61 | #define __HYPERVISOR_tmem_op 38 | ||
61 | 62 | ||
62 | /* Architecture-specific hypercall definitions. */ | 63 | /* Architecture-specific hypercall definitions. */ |
63 | #define __HYPERVISOR_arch_0 48 | 64 | #define __HYPERVISOR_arch_0 48 |
@@ -461,6 +462,27 @@ typedef uint8_t xen_domain_handle_t[16]; | |||
461 | #define __mk_unsigned_long(x) x ## UL | 462 | #define __mk_unsigned_long(x) x ## UL |
462 | #define mk_unsigned_long(x) __mk_unsigned_long(x) | 463 | #define mk_unsigned_long(x) __mk_unsigned_long(x) |
463 | 464 | ||
465 | #define TMEM_SPEC_VERSION 1 | ||
466 | |||
467 | struct tmem_op { | ||
468 | uint32_t cmd; | ||
469 | int32_t pool_id; | ||
470 | union { | ||
471 | struct { /* for cmd == TMEM_NEW_POOL */ | ||
472 | uint64_t uuid[2]; | ||
473 | uint32_t flags; | ||
474 | } new; | ||
475 | struct { | ||
476 | uint64_t oid[3]; | ||
477 | uint32_t index; | ||
478 | uint32_t tmem_offset; | ||
479 | uint32_t pfn_offset; | ||
480 | uint32_t len; | ||
481 | GUEST_HANDLE(void) gmfn; /* guest machine page frame */ | ||
482 | } gen; | ||
483 | } u; | ||
484 | }; | ||
485 | |||
464 | #else /* __ASSEMBLY__ */ | 486 | #else /* __ASSEMBLY__ */ |
465 | 487 | ||
466 | /* In assembly code we cannot use C numeric constant suffixes. */ | 488 | /* In assembly code we cannot use C numeric constant suffixes. */ |
diff --git a/mm/Kconfig b/mm/Kconfig index e9c0c61f2ddd..8ca47a5ee9c8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -347,3 +347,26 @@ config NEED_PER_CPU_KM | |||
347 | depends on !SMP | 347 | depends on !SMP |
348 | bool | 348 | bool |
349 | default y | 349 | default y |
350 | |||
351 | config CLEANCACHE | ||
352 | bool "Enable cleancache driver to cache clean pages if tmem is present" | ||
353 | default n | ||
354 | help | ||
355 | Cleancache can be thought of as a page-granularity victim cache | ||
356 | for clean pages that the kernel's pageframe replacement algorithm | ||
357 | (PFRA) would like to keep around, but can't since there isn't enough | ||
358 | memory. So when the PFRA "evicts" a page, it first attempts to use | ||
359 | cleancacne code to put the data contained in that page into | ||
360 | "transcendent memory", memory that is not directly accessible or | ||
361 | addressable by the kernel and is of unknown and possibly | ||
362 | time-varying size. And when a cleancache-enabled | ||
363 | filesystem wishes to access a page in a file on disk, it first | ||
364 | checks cleancache to see if it already contains it; if it does, | ||
365 | the page is copied into the kernel and a disk access is avoided. | ||
366 | When a transcendent memory driver is available (such as zcache or | ||
367 | Xen transcendent memory), a significant I/O reduction | ||
368 | may be achieved. When none is available, all cleancache calls | ||
369 | are reduced to a single pointer-compare-against-NULL resulting | ||
370 | in a negligible performance hit. | ||
371 | |||
372 | If unsure, say Y to enable cleancache | ||
diff --git a/mm/Makefile b/mm/Makefile index 42a8326c3e3d..836e4163c1bf 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -49,3 +49,4 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o | |||
49 | obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o | 49 | obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o |
50 | obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o | 50 | obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o |
51 | obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o | 51 | obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o |
52 | obj-$(CONFIG_CLEANCACHE) += cleancache.o | ||
diff --git a/mm/cleancache.c b/mm/cleancache.c new file mode 100644 index 000000000000..bcaae4c2a770 --- /dev/null +++ b/mm/cleancache.c | |||
@@ -0,0 +1,244 @@ | |||
1 | /* | ||
2 | * Cleancache frontend | ||
3 | * | ||
4 | * This code provides the generic "frontend" layer to call a matching | ||
5 | * "backend" driver implementation of cleancache. See | ||
6 | * Documentation/vm/cleancache.txt for more information. | ||
7 | * | ||
8 | * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. | ||
9 | * Author: Dan Magenheimer | ||
10 | * | ||
11 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
12 | */ | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/exportfs.h> | ||
17 | #include <linux/mm.h> | ||
18 | #include <linux/cleancache.h> | ||
19 | |||
20 | /* | ||
21 | * This global enablement flag may be read thousands of times per second | ||
22 | * by cleancache_get/put/flush even on systems where cleancache_ops | ||
23 | * is not claimed (e.g. cleancache is config'ed on but remains | ||
24 | * disabled), so is preferred to the slower alternative: a function | ||
25 | * call that checks a non-global. | ||
26 | */ | ||
27 | int cleancache_enabled; | ||
28 | EXPORT_SYMBOL(cleancache_enabled); | ||
29 | |||
30 | /* | ||
31 | * cleancache_ops is set by cleancache_ops_register to contain the pointers | ||
32 | * to the cleancache "backend" implementation functions. | ||
33 | */ | ||
34 | static struct cleancache_ops cleancache_ops; | ||
35 | |||
36 | /* useful stats available in /sys/kernel/mm/cleancache */ | ||
37 | static unsigned long cleancache_succ_gets; | ||
38 | static unsigned long cleancache_failed_gets; | ||
39 | static unsigned long cleancache_puts; | ||
40 | static unsigned long cleancache_flushes; | ||
41 | |||
42 | /* | ||
43 | * register operations for cleancache, returning previous thus allowing | ||
44 | * detection of multiple backends and possible nesting | ||
45 | */ | ||
46 | struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) | ||
47 | { | ||
48 | struct cleancache_ops old = cleancache_ops; | ||
49 | |||
50 | cleancache_ops = *ops; | ||
51 | cleancache_enabled = 1; | ||
52 | return old; | ||
53 | } | ||
54 | EXPORT_SYMBOL(cleancache_register_ops); | ||
55 | |||
56 | /* Called by a cleancache-enabled filesystem at time of mount */ | ||
57 | void __cleancache_init_fs(struct super_block *sb) | ||
58 | { | ||
59 | sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE); | ||
60 | } | ||
61 | EXPORT_SYMBOL(__cleancache_init_fs); | ||
62 | |||
63 | /* Called by a cleancache-enabled clustered filesystem at time of mount */ | ||
64 | void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) | ||
65 | { | ||
66 | sb->cleancache_poolid = | ||
67 | (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE); | ||
68 | } | ||
69 | EXPORT_SYMBOL(__cleancache_init_shared_fs); | ||
70 | |||
71 | /* | ||
72 | * If the filesystem uses exportable filehandles, use the filehandle as | ||
73 | * the key, else use the inode number. | ||
74 | */ | ||
75 | static int cleancache_get_key(struct inode *inode, | ||
76 | struct cleancache_filekey *key) | ||
77 | { | ||
78 | int (*fhfn)(struct dentry *, __u32 *fh, int *, int); | ||
79 | int len = 0, maxlen = CLEANCACHE_KEY_MAX; | ||
80 | struct super_block *sb = inode->i_sb; | ||
81 | |||
82 | key->u.ino = inode->i_ino; | ||
83 | if (sb->s_export_op != NULL) { | ||
84 | fhfn = sb->s_export_op->encode_fh; | ||
85 | if (fhfn) { | ||
86 | struct dentry d; | ||
87 | d.d_inode = inode; | ||
88 | len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0); | ||
89 | if (len <= 0 || len == 255) | ||
90 | return -1; | ||
91 | if (maxlen > CLEANCACHE_KEY_MAX) | ||
92 | return -1; | ||
93 | } | ||
94 | } | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * "Get" data from cleancache associated with the poolid/inode/index | ||
100 | * that were specified when the data was put to cleanache and, if | ||
101 | * successful, use it to fill the specified page with data and return 0. | ||
102 | * The pageframe is unchanged and returns -1 if the get fails. | ||
103 | * Page must be locked by caller. | ||
104 | */ | ||
105 | int __cleancache_get_page(struct page *page) | ||
106 | { | ||
107 | int ret = -1; | ||
108 | int pool_id; | ||
109 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
110 | |||
111 | VM_BUG_ON(!PageLocked(page)); | ||
112 | pool_id = page->mapping->host->i_sb->cleancache_poolid; | ||
113 | if (pool_id < 0) | ||
114 | goto out; | ||
115 | |||
116 | if (cleancache_get_key(page->mapping->host, &key) < 0) | ||
117 | goto out; | ||
118 | |||
119 | ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page); | ||
120 | if (ret == 0) | ||
121 | cleancache_succ_gets++; | ||
122 | else | ||
123 | cleancache_failed_gets++; | ||
124 | out: | ||
125 | return ret; | ||
126 | } | ||
127 | EXPORT_SYMBOL(__cleancache_get_page); | ||
128 | |||
129 | /* | ||
130 | * "Put" data from a page to cleancache and associate it with the | ||
131 | * (previously-obtained per-filesystem) poolid and the page's, | ||
132 | * inode and page index. Page must be locked. Note that a put_page | ||
133 | * always "succeeds", though a subsequent get_page may succeed or fail. | ||
134 | */ | ||
135 | void __cleancache_put_page(struct page *page) | ||
136 | { | ||
137 | int pool_id; | ||
138 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
139 | |||
140 | VM_BUG_ON(!PageLocked(page)); | ||
141 | pool_id = page->mapping->host->i_sb->cleancache_poolid; | ||
142 | if (pool_id >= 0 && | ||
143 | cleancache_get_key(page->mapping->host, &key) >= 0) { | ||
144 | (*cleancache_ops.put_page)(pool_id, key, page->index, page); | ||
145 | cleancache_puts++; | ||
146 | } | ||
147 | } | ||
148 | EXPORT_SYMBOL(__cleancache_put_page); | ||
149 | |||
150 | /* | ||
151 | * Flush any data from cleancache associated with the poolid and the | ||
152 | * page's inode and page index so that a subsequent "get" will fail. | ||
153 | */ | ||
154 | void __cleancache_flush_page(struct address_space *mapping, struct page *page) | ||
155 | { | ||
156 | /* careful... page->mapping is NULL sometimes when this is called */ | ||
157 | int pool_id = mapping->host->i_sb->cleancache_poolid; | ||
158 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
159 | |||
160 | if (pool_id >= 0) { | ||
161 | VM_BUG_ON(!PageLocked(page)); | ||
162 | if (cleancache_get_key(mapping->host, &key) >= 0) { | ||
163 | (*cleancache_ops.flush_page)(pool_id, key, page->index); | ||
164 | cleancache_flushes++; | ||
165 | } | ||
166 | } | ||
167 | } | ||
168 | EXPORT_SYMBOL(__cleancache_flush_page); | ||
169 | |||
170 | /* | ||
171 | * Flush all data from cleancache associated with the poolid and the | ||
172 | * mappings's inode so that all subsequent gets to this poolid/inode | ||
173 | * will fail. | ||
174 | */ | ||
175 | void __cleancache_flush_inode(struct address_space *mapping) | ||
176 | { | ||
177 | int pool_id = mapping->host->i_sb->cleancache_poolid; | ||
178 | struct cleancache_filekey key = { .u.key = { 0 } }; | ||
179 | |||
180 | if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) | ||
181 | (*cleancache_ops.flush_inode)(pool_id, key); | ||
182 | } | ||
183 | EXPORT_SYMBOL(__cleancache_flush_inode); | ||
184 | |||
185 | /* | ||
186 | * Called by any cleancache-enabled filesystem at time of unmount; | ||
187 | * note that pool_id is surrendered and may be reutrned by a subsequent | ||
188 | * cleancache_init_fs or cleancache_init_shared_fs | ||
189 | */ | ||
190 | void __cleancache_flush_fs(struct super_block *sb) | ||
191 | { | ||
192 | if (sb->cleancache_poolid >= 0) { | ||
193 | int old_poolid = sb->cleancache_poolid; | ||
194 | sb->cleancache_poolid = -1; | ||
195 | (*cleancache_ops.flush_fs)(old_poolid); | ||
196 | } | ||
197 | } | ||
198 | EXPORT_SYMBOL(__cleancache_flush_fs); | ||
199 | |||
200 | #ifdef CONFIG_SYSFS | ||
201 | |||
202 | /* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */ | ||
203 | |||
204 | #define CLEANCACHE_SYSFS_RO(_name) \ | ||
205 | static ssize_t cleancache_##_name##_show(struct kobject *kobj, \ | ||
206 | struct kobj_attribute *attr, char *buf) \ | ||
207 | { \ | ||
208 | return sprintf(buf, "%lu\n", cleancache_##_name); \ | ||
209 | } \ | ||
210 | static struct kobj_attribute cleancache_##_name##_attr = { \ | ||
211 | .attr = { .name = __stringify(_name), .mode = 0444 }, \ | ||
212 | .show = cleancache_##_name##_show, \ | ||
213 | } | ||
214 | |||
215 | CLEANCACHE_SYSFS_RO(succ_gets); | ||
216 | CLEANCACHE_SYSFS_RO(failed_gets); | ||
217 | CLEANCACHE_SYSFS_RO(puts); | ||
218 | CLEANCACHE_SYSFS_RO(flushes); | ||
219 | |||
220 | static struct attribute *cleancache_attrs[] = { | ||
221 | &cleancache_succ_gets_attr.attr, | ||
222 | &cleancache_failed_gets_attr.attr, | ||
223 | &cleancache_puts_attr.attr, | ||
224 | &cleancache_flushes_attr.attr, | ||
225 | NULL, | ||
226 | }; | ||
227 | |||
228 | static struct attribute_group cleancache_attr_group = { | ||
229 | .attrs = cleancache_attrs, | ||
230 | .name = "cleancache", | ||
231 | }; | ||
232 | |||
233 | #endif /* CONFIG_SYSFS */ | ||
234 | |||
235 | static int __init init_cleancache(void) | ||
236 | { | ||
237 | #ifdef CONFIG_SYSFS | ||
238 | int err; | ||
239 | |||
240 | err = sysfs_create_group(mm_kobj, &cleancache_attr_group); | ||
241 | #endif /* CONFIG_SYSFS */ | ||
242 | return 0; | ||
243 | } | ||
244 | module_init(init_cleancache) | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 68e782b3d3de..7455ccd8bda8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ | 34 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ |
35 | #include <linux/memcontrol.h> | 35 | #include <linux/memcontrol.h> |
36 | #include <linux/mm_inline.h> /* for page_is_file_cache() */ | 36 | #include <linux/mm_inline.h> /* for page_is_file_cache() */ |
37 | #include <linux/cleancache.h> | ||
37 | #include "internal.h" | 38 | #include "internal.h" |
38 | 39 | ||
39 | /* | 40 | /* |
@@ -118,6 +119,16 @@ void __delete_from_page_cache(struct page *page) | |||
118 | { | 119 | { |
119 | struct address_space *mapping = page->mapping; | 120 | struct address_space *mapping = page->mapping; |
120 | 121 | ||
122 | /* | ||
123 | * if we're uptodate, flush out into the cleancache, otherwise | ||
124 | * invalidate any existing cleancache entries. We can't leave | ||
125 | * stale data around in the cleancache once our page is gone | ||
126 | */ | ||
127 | if (PageUptodate(page) && PageMappedToDisk(page)) | ||
128 | cleancache_put_page(page); | ||
129 | else | ||
130 | cleancache_flush_page(mapping, page); | ||
131 | |||
121 | radix_tree_delete(&mapping->page_tree, page->index); | 132 | radix_tree_delete(&mapping->page_tree, page->index); |
122 | page->mapping = NULL; | 133 | page->mapping = NULL; |
123 | mapping->nrpages--; | 134 | mapping->nrpages--; |
diff --git a/mm/truncate.c b/mm/truncate.c index a95667529135..3a29a6180212 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/task_io_accounting_ops.h> | 19 | #include <linux/task_io_accounting_ops.h> |
20 | #include <linux/buffer_head.h> /* grr. try_to_release_page, | 20 | #include <linux/buffer_head.h> /* grr. try_to_release_page, |
21 | do_invalidatepage */ | 21 | do_invalidatepage */ |
22 | #include <linux/cleancache.h> | ||
22 | #include "internal.h" | 23 | #include "internal.h" |
23 | 24 | ||
24 | 25 | ||
@@ -51,6 +52,7 @@ void do_invalidatepage(struct page *page, unsigned long offset) | |||
51 | static inline void truncate_partial_page(struct page *page, unsigned partial) | 52 | static inline void truncate_partial_page(struct page *page, unsigned partial) |
52 | { | 53 | { |
53 | zero_user_segment(page, partial, PAGE_CACHE_SIZE); | 54 | zero_user_segment(page, partial, PAGE_CACHE_SIZE); |
55 | cleancache_flush_page(page->mapping, page); | ||
54 | if (page_has_private(page)) | 56 | if (page_has_private(page)) |
55 | do_invalidatepage(page, partial); | 57 | do_invalidatepage(page, partial); |
56 | } | 58 | } |
@@ -214,6 +216,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
214 | pgoff_t next; | 216 | pgoff_t next; |
215 | int i; | 217 | int i; |
216 | 218 | ||
219 | cleancache_flush_inode(mapping); | ||
217 | if (mapping->nrpages == 0) | 220 | if (mapping->nrpages == 0) |
218 | return; | 221 | return; |
219 | 222 | ||
@@ -291,6 +294,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
291 | pagevec_release(&pvec); | 294 | pagevec_release(&pvec); |
292 | mem_cgroup_uncharge_end(); | 295 | mem_cgroup_uncharge_end(); |
293 | } | 296 | } |
297 | cleancache_flush_inode(mapping); | ||
294 | } | 298 | } |
295 | EXPORT_SYMBOL(truncate_inode_pages_range); | 299 | EXPORT_SYMBOL(truncate_inode_pages_range); |
296 | 300 | ||
@@ -440,6 +444,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
440 | int did_range_unmap = 0; | 444 | int did_range_unmap = 0; |
441 | int wrapped = 0; | 445 | int wrapped = 0; |
442 | 446 | ||
447 | cleancache_flush_inode(mapping); | ||
443 | pagevec_init(&pvec, 0); | 448 | pagevec_init(&pvec, 0); |
444 | next = start; | 449 | next = start; |
445 | while (next <= end && !wrapped && | 450 | while (next <= end && !wrapped && |
@@ -498,6 +503,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
498 | mem_cgroup_uncharge_end(); | 503 | mem_cgroup_uncharge_end(); |
499 | cond_resched(); | 504 | cond_resched(); |
500 | } | 505 | } |
506 | cleancache_flush_inode(mapping); | ||
501 | return ret; | 507 | return ret; |
502 | } | 508 | } |
503 | EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); | 509 | EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); |