aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorMilosz Tanski <milosz@adfin.com>2013-08-21 17:29:54 -0400
committerMilosz Tanski <milosz@adfin.com>2013-09-06 12:50:11 -0400
commit99ccbd229cf7453206bc858e795ec1f0345ff258 (patch)
tree343ab082d154d7c970d90bf44c9887fdb0251cdb /fs/ceph
parentcd0a2df681ec2af45f50c555c2a39dc92a4dff71 (diff)
ceph: use fscache as a local presisent cache
Adding support for fscache to the Ceph filesystem. This would bring it to on par with some of the other network filesystems in Linux (like NFS, AFS, etc...) In order to mount the filesystem with fscache the 'fsc' mount option must be passed. Signed-off-by: Milosz Tanski <milosz@adfin.com> Signed-off-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/Kconfig9
-rw-r--r--fs/ceph/Makefile1
-rw-r--r--fs/ceph/addr.c37
-rw-r--r--fs/ceph/cache.c393
-rw-r--r--fs/ceph/cache.h138
-rw-r--r--fs/ceph/caps.c19
-rw-r--r--fs/ceph/file.c17
-rw-r--r--fs/ceph/inode.c14
-rw-r--r--fs/ceph/super.c35
-rw-r--r--fs/ceph/super.h16
10 files changed, 666 insertions, 13 deletions
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 49bc78243db9..ac9a2ef5bb9b 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -16,3 +16,12 @@ config CEPH_FS
16 16
17 If unsure, say N. 17 If unsure, say N.
18 18
19if CEPH_FS
20config CEPH_FSCACHE
21 bool "Enable Ceph client caching support"
22 depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
23 help
24 Choose Y here to enable persistent, read-only local
25 caching support for Ceph clients using FS-Cache
26
27endif
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index bd352125e829..32e30106a2f0 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -9,3 +9,4 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
9 mds_client.o mdsmap.o strings.o ceph_frag.o \ 9 mds_client.o mdsmap.o strings.o ceph_frag.o \
10 debugfs.o 10 debugfs.o
11 11
12ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 3bed7da38326..3a21a7cbc21c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -11,6 +11,7 @@
11 11
12#include "super.h" 12#include "super.h"
13#include "mds_client.h" 13#include "mds_client.h"
14#include "cache.h"
14#include <linux/ceph/osd_client.h> 15#include <linux/ceph/osd_client.h>
15 16
16/* 17/*
@@ -144,6 +145,11 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
144 return; 145 return;
145 } 146 }
146 147
148 ceph_invalidate_fscache_page(inode, page);
149
150 if (!PagePrivate(page))
151 return;
152
147 /* 153 /*
148 * We can get non-dirty pages here due to races between 154 * We can get non-dirty pages here due to races between
149 * set_page_dirty and truncate_complete_page; just spit out a 155 * set_page_dirty and truncate_complete_page; just spit out a
@@ -163,14 +169,17 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
163 ClearPagePrivate(page); 169 ClearPagePrivate(page);
164} 170}
165 171
166/* just a sanity check */
167static int ceph_releasepage(struct page *page, gfp_t g) 172static int ceph_releasepage(struct page *page, gfp_t g)
168{ 173{
169 struct inode *inode = page->mapping ? page->mapping->host : NULL; 174 struct inode *inode = page->mapping ? page->mapping->host : NULL;
170 dout("%p releasepage %p idx %lu\n", inode, page, page->index); 175 dout("%p releasepage %p idx %lu\n", inode, page, page->index);
171 WARN_ON(PageDirty(page)); 176 WARN_ON(PageDirty(page));
172 WARN_ON(PagePrivate(page)); 177
173 return 0; 178 /* Can we release the page from the cache? */
179 if (!ceph_release_fscache_page(page, g))
180 return 0;
181
182 return !PagePrivate(page);
174} 183}
175 184
176/* 185/*
@@ -180,11 +189,16 @@ static int readpage_nounlock(struct file *filp, struct page *page)
180{ 189{
181 struct inode *inode = file_inode(filp); 190 struct inode *inode = file_inode(filp);
182 struct ceph_inode_info *ci = ceph_inode(inode); 191 struct ceph_inode_info *ci = ceph_inode(inode);
183 struct ceph_osd_client *osdc = 192 struct ceph_osd_client *osdc =
184 &ceph_inode_to_client(inode)->client->osdc; 193 &ceph_inode_to_client(inode)->client->osdc;
185 int err = 0; 194 int err = 0;
186 u64 len = PAGE_CACHE_SIZE; 195 u64 len = PAGE_CACHE_SIZE;
187 196
197 err = ceph_readpage_from_fscache(inode, page);
198
199 if (err == 0)
200 goto out;
201
188 dout("readpage inode %p file %p page %p index %lu\n", 202 dout("readpage inode %p file %p page %p index %lu\n",
189 inode, filp, page, page->index); 203 inode, filp, page, page->index);
190 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, 204 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
@@ -202,6 +216,9 @@ static int readpage_nounlock(struct file *filp, struct page *page)
202 } 216 }
203 SetPageUptodate(page); 217 SetPageUptodate(page);
204 218
219 if (err == 0)
220 ceph_readpage_to_fscache(inode, page);
221
205out: 222out:
206 return err < 0 ? err : 0; 223 return err < 0 ? err : 0;
207} 224}
@@ -244,6 +261,7 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
244 page->index); 261 page->index);
245 flush_dcache_page(page); 262 flush_dcache_page(page);
246 SetPageUptodate(page); 263 SetPageUptodate(page);
264 ceph_readpage_to_fscache(inode, page);
247 unlock_page(page); 265 unlock_page(page);
248 page_cache_release(page); 266 page_cache_release(page);
249 bytes -= PAGE_CACHE_SIZE; 267 bytes -= PAGE_CACHE_SIZE;
@@ -313,7 +331,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
313 page = list_entry(page_list->prev, struct page, lru); 331 page = list_entry(page_list->prev, struct page, lru);
314 BUG_ON(PageLocked(page)); 332 BUG_ON(PageLocked(page));
315 list_del(&page->lru); 333 list_del(&page->lru);
316 334
317 dout("start_read %p adding %p idx %lu\n", inode, page, 335 dout("start_read %p adding %p idx %lu\n", inode, page,
318 page->index); 336 page->index);
319 if (add_to_page_cache_lru(page, &inode->i_data, page->index, 337 if (add_to_page_cache_lru(page, &inode->i_data, page->index,
@@ -360,6 +378,12 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
360 int rc = 0; 378 int rc = 0;
361 int max = 0; 379 int max = 0;
362 380
381 rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
382 &nr_pages);
383
384 if (rc == 0)
385 goto out;
386
363 if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE) 387 if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
364 max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1) 388 max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
365 >> PAGE_SHIFT; 389 >> PAGE_SHIFT;
@@ -479,6 +503,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
479 CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) 503 CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
480 set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); 504 set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
481 505
506 ceph_readpage_to_fscache(inode, page);
507
482 set_page_writeback(page); 508 set_page_writeback(page);
483 err = ceph_osdc_writepages(osdc, ceph_vino(inode), 509 err = ceph_osdc_writepages(osdc, ceph_vino(inode),
484 &ci->i_layout, snapc, 510 &ci->i_layout, snapc,
@@ -534,7 +560,6 @@ static void ceph_release_pages(struct page **pages, int num)
534 pagevec_release(&pvec); 560 pagevec_release(&pvec);
535} 561}
536 562
537
538/* 563/*
539 * async writeback completion handler. 564 * async writeback completion handler.
540 * 565 *
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
new file mode 100644
index 000000000000..5c413ecf1f15
--- /dev/null
+++ b/fs/ceph/cache.c
@@ -0,0 +1,393 @@
1/*
2 * Ceph cache definitions.
3 *
4 * Copyright (C) 2013 by Adfin Solutions, Inc. All Rights Reserved.
5 * Written by Milosz Tanski (milosz@adfin.com)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2
9 * as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to:
18 * Free Software Foundation
19 * 51 Franklin Street, Fifth Floor
20 * Boston, MA 02111-1301 USA
21 *
22 */
23
24#include <linux/fscache.h>
25
26#include "super.h"
27#include "cache.h"
28
29struct ceph_aux_inode {
30 struct timespec mtime;
31 loff_t size;
32};
33
34struct fscache_netfs ceph_cache_netfs = {
35 .name = "ceph",
36 .version = 0,
37};
38
39static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
40 void *buffer, uint16_t maxbuf)
41{
42 const struct ceph_fs_client* fsc = cookie_netfs_data;
43 uint16_t klen;
44
45 klen = sizeof(fsc->client->fsid);
46 if (klen > maxbuf)
47 return 0;
48
49 memcpy(buffer, &fsc->client->fsid, klen);
50 return klen;
51}
52
53static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
54 .name = "CEPH.fsid",
55 .type = FSCACHE_COOKIE_TYPE_INDEX,
56 .get_key = ceph_fscache_session_get_key,
57};
58
59int ceph_fscache_register()
60{
61 return fscache_register_netfs(&ceph_cache_netfs);
62}
63
64void ceph_fscache_unregister()
65{
66 fscache_unregister_netfs(&ceph_cache_netfs);
67}
68
69int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
70{
71 fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
72 &ceph_fscache_fsid_object_def,
73 fsc);
74
75 if (fsc->fscache == NULL) {
76 pr_err("Unable to resgister fsid: %p fscache cookie", fsc);
77 return 0;
78 }
79
80 fsc->revalidate_wq = alloc_workqueue("ceph-revalidate", 0, 1);
81 if (fsc->revalidate_wq == NULL)
82 return -ENOMEM;
83
84 return 0;
85}
86
87static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
88 void *buffer, uint16_t maxbuf)
89{
90 const struct ceph_inode_info* ci = cookie_netfs_data;
91 uint16_t klen;
92
93 /* use ceph virtual inode (id + snaphot) */
94 klen = sizeof(ci->i_vino);
95 if (klen > maxbuf)
96 return 0;
97
98 memcpy(buffer, &ci->i_vino, klen);
99 return klen;
100}
101
102static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
103 void *buffer, uint16_t bufmax)
104{
105 struct ceph_aux_inode aux;
106 const struct ceph_inode_info* ci = cookie_netfs_data;
107 const struct inode* inode = &ci->vfs_inode;
108
109 memset(&aux, 0, sizeof(aux));
110 aux.mtime = inode->i_mtime;
111 aux.size = inode->i_size;
112
113 memcpy(buffer, &aux, sizeof(aux));
114
115 return sizeof(aux);
116}
117
118static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
119 uint64_t *size)
120{
121 const struct ceph_inode_info* ci = cookie_netfs_data;
122 const struct inode* inode = &ci->vfs_inode;
123
124 *size = inode->i_size;
125}
126
127static enum fscache_checkaux ceph_fscache_inode_check_aux(
128 void *cookie_netfs_data, const void *data, uint16_t dlen)
129{
130 struct ceph_aux_inode aux;
131 struct ceph_inode_info* ci = cookie_netfs_data;
132 struct inode* inode = &ci->vfs_inode;
133
134 if (dlen != sizeof(aux))
135 return FSCACHE_CHECKAUX_OBSOLETE;
136
137 memset(&aux, 0, sizeof(aux));
138 aux.mtime = inode->i_mtime;
139 aux.size = inode->i_size;
140
141 if (memcmp(data, &aux, sizeof(aux)) != 0)
142 return FSCACHE_CHECKAUX_OBSOLETE;
143
144 dout("ceph inode 0x%p cached okay", ci);
145 return FSCACHE_CHECKAUX_OKAY;
146}
147
148static void ceph_fscache_inode_now_uncached(void* cookie_netfs_data)
149{
150 struct ceph_inode_info* ci = cookie_netfs_data;
151 struct pagevec pvec;
152 pgoff_t first;
153 int loop, nr_pages;
154
155 pagevec_init(&pvec, 0);
156 first = 0;
157
158 dout("ceph inode 0x%p now uncached", ci);
159
160 while (1) {
161 nr_pages = pagevec_lookup(&pvec, ci->vfs_inode.i_mapping, first,
162 PAGEVEC_SIZE - pagevec_count(&pvec));
163
164 if (!nr_pages)
165 break;
166
167 for (loop = 0; loop < nr_pages; loop++)
168 ClearPageFsCache(pvec.pages[loop]);
169
170 first = pvec.pages[nr_pages - 1]->index + 1;
171
172 pvec.nr = nr_pages;
173 pagevec_release(&pvec);
174 cond_resched();
175 }
176}
177
178static const struct fscache_cookie_def ceph_fscache_inode_object_def = {
179 .name = "CEPH.inode",
180 .type = FSCACHE_COOKIE_TYPE_DATAFILE,
181 .get_key = ceph_fscache_inode_get_key,
182 .get_attr = ceph_fscache_inode_get_attr,
183 .get_aux = ceph_fscache_inode_get_aux,
184 .check_aux = ceph_fscache_inode_check_aux,
185 .now_uncached = ceph_fscache_inode_now_uncached,
186};
187
188void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
189 struct ceph_inode_info* ci)
190{
191 struct inode* inode = &ci->vfs_inode;
192
193 /* No caching for filesystem */
194 if (fsc->fscache == NULL)
195 return;
196
197 /* Only cache for regular files that are read only */
198 if ((ci->vfs_inode.i_mode & S_IFREG) == 0)
199 return;
200
201 /* Avoid multiple racing open requests */
202 mutex_lock(&inode->i_mutex);
203
204 if (ci->fscache)
205 goto done;
206
207 ci->fscache = fscache_acquire_cookie(fsc->fscache,
208 &ceph_fscache_inode_object_def,
209 ci);
210done:
211 mutex_unlock(&inode->i_mutex);
212
213}
214
215void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
216{
217 struct fscache_cookie* cookie;
218
219 if ((cookie = ci->fscache) == NULL)
220 return;
221
222 ci->fscache = NULL;
223
224 fscache_uncache_all_inode_pages(cookie, &ci->vfs_inode);
225 fscache_relinquish_cookie(cookie, 0);
226}
227
228static void ceph_vfs_readpage_complete(struct page *page, void *data, int error)
229{
230 if (!error)
231 SetPageUptodate(page);
232}
233
234static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int error)
235{
236 if (!error)
237 SetPageUptodate(page);
238
239 unlock_page(page);
240}
241
242static inline int cache_valid(struct ceph_inode_info *ci)
243{
244 return ((ceph_caps_issued(ci) & CEPH_CAP_FILE_CACHE) &&
245 (ci->i_fscache_gen == ci->i_rdcache_gen));
246}
247
248
249/* Atempt to read from the fscache,
250 *
251 * This function is called from the readpage_nounlock context. DO NOT attempt to
252 * unlock the page here (or in the callback).
253 */
254int ceph_readpage_from_fscache(struct inode *inode, struct page *page)
255{
256 struct ceph_inode_info *ci = ceph_inode(inode);
257 int ret;
258
259 if (!cache_valid(ci))
260 return -ENOBUFS;
261
262 ret = fscache_read_or_alloc_page(ci->fscache, page,
263 ceph_vfs_readpage_complete, NULL,
264 GFP_KERNEL);
265
266 switch (ret) {
267 case 0: /* Page found */
268 dout("page read submitted\n");
269 return 0;
270 case -ENOBUFS: /* Pages were not found, and can't be */
271 case -ENODATA: /* Pages were not found */
272 dout("page/inode not in cache\n");
273 return ret;
274 default:
275 dout("%s: unknown error ret = %i\n", __func__, ret);
276 return ret;
277 }
278}
279
280int ceph_readpages_from_fscache(struct inode *inode,
281 struct address_space *mapping,
282 struct list_head *pages,
283 unsigned *nr_pages)
284{
285 struct ceph_inode_info *ci = ceph_inode(inode);
286 int ret;
287
288 if (!cache_valid(ci))
289 return -ENOBUFS;
290
291 ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages,
292 ceph_vfs_readpage_complete_unlock,
293 NULL, mapping_gfp_mask(mapping));
294
295 switch (ret) {
296 case 0: /* All pages found */
297 dout("all-page read submitted\n");
298 return 0;
299 case -ENOBUFS: /* Some pages were not found, and can't be */
300 case -ENODATA: /* some pages were not found */
301 dout("page/inode not in cache\n");
302 return ret;
303 default:
304 dout("%s: unknown error ret = %i\n", __func__, ret);
305 return ret;
306 }
307}
308
309void ceph_readpage_to_fscache(struct inode *inode, struct page *page)
310{
311 struct ceph_inode_info *ci = ceph_inode(inode);
312 int ret;
313
314 if (!cache_valid(ci))
315 return;
316
317 ret = fscache_write_page(ci->fscache, page, GFP_KERNEL);
318 if (ret)
319 fscache_uncache_page(ci->fscache, page);
320}
321
322void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
323{
324 struct ceph_inode_info *ci = ceph_inode(inode);
325
326 fscache_wait_on_page_write(ci->fscache, page);
327 fscache_uncache_page(ci->fscache, page);
328}
329
330void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
331{
332 if (fsc->revalidate_wq)
333 destroy_workqueue(fsc->revalidate_wq);
334
335 fscache_relinquish_cookie(fsc->fscache, 0);
336 fsc->fscache = NULL;
337}
338
339static void ceph_revalidate_work(struct work_struct *work)
340{
341 int issued;
342 u32 orig_gen;
343 struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
344 i_revalidate_work);
345 struct inode *inode = &ci->vfs_inode;
346
347 spin_lock(&ci->i_ceph_lock);
348 issued = __ceph_caps_issued(ci, NULL);
349 orig_gen = ci->i_rdcache_gen;
350 spin_unlock(&ci->i_ceph_lock);
351
352 if (!(issued & CEPH_CAP_FILE_CACHE)) {
353 dout("revalidate_work lost cache before validation %p\n",
354 inode);
355 goto out;
356 }
357
358 if (!fscache_check_consistency(ci->fscache))
359 fscache_invalidate(ci->fscache);
360
361 spin_lock(&ci->i_ceph_lock);
362 /* Update the new valid generation (backwards sanity check too) */
363 if (orig_gen > ci->i_fscache_gen) {
364 ci->i_fscache_gen = orig_gen;
365 }
366 spin_unlock(&ci->i_ceph_lock);
367
368out:
369 iput(&ci->vfs_inode);
370}
371
372void ceph_queue_revalidate(struct inode *inode)
373{
374 struct ceph_inode_info *ci = ceph_inode(inode);
375
376 ihold(inode);
377
378 if (queue_work(ceph_sb_to_client(inode->i_sb)->revalidate_wq,
379 &ci->i_revalidate_work)) {
380 dout("ceph_queue_revalidate %p\n", inode);
381 } else {
382 dout("ceph_queue_revalidate %p failed\n)", inode);
383 iput(inode);
384 }
385}
386
387void ceph_fscache_inode_init(struct ceph_inode_info *ci)
388{
389 ci->fscache = NULL;
390 /* The first load is verifed cookie open time */
391 ci->i_fscache_gen = 1;
392 INIT_WORK(&ci->i_revalidate_work, ceph_revalidate_work);
393}
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
new file mode 100644
index 000000000000..0ea95cb7f389
--- /dev/null
+++ b/fs/ceph/cache.h
@@ -0,0 +1,138 @@
1/*
2 * Ceph cache definitions.
3 *
4 * Copyright (C) 2013 by Adfin Solutions, Inc. All Rights Reserved.
5 * Written by Milosz Tanski (milosz@adfin.com)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2
9 * as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to:
18 * Free Software Foundation
19 * 51 Franklin Street, Fifth Floor
20 * Boston, MA 02111-1301 USA
21 *
22 */
23
24#ifndef _CEPH_CACHE_H
25#define _CEPH_CACHE_H
26
27#ifdef CONFIG_CEPH_FSCACHE
28
29int ceph_fscache_register(void);
30void ceph_fscache_unregister(void);
31
32int ceph_fscache_register_fs(struct ceph_fs_client* fsc);
33void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc);
34
35void ceph_fscache_inode_init(struct ceph_inode_info *ci);
36void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
37 struct ceph_inode_info* ci);
38void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci);
39
40int ceph_readpage_from_fscache(struct inode *inode, struct page *page);
41int ceph_readpages_from_fscache(struct inode *inode,
42 struct address_space *mapping,
43 struct list_head *pages,
44 unsigned *nr_pages);
45void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
46void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
47void ceph_queue_revalidate(struct inode *inode);
48
49static inline void ceph_fscache_invalidate(struct inode *inode)
50{
51 fscache_invalidate(ceph_inode(inode)->fscache);
52}
53
54static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
55{
56 struct inode* inode = page->mapping->host;
57 struct ceph_inode_info *ci = ceph_inode(inode);
58 return fscache_maybe_release_page(ci->fscache, page, gfp);
59}
60
61#else
62
63static inline int ceph_fscache_register(void)
64{
65 return 0;
66}
67
68static inline void ceph_fscache_unregister(void)
69{
70}
71
72static inline int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
73{
74 return 0;
75}
76
77static inline void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
78{
79}
80
81static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
82{
83}
84
85static inline void ceph_fscache_register_inode_cookie(struct ceph_fs_client* parent_fsc,
86 struct ceph_inode_info* ci)
87{
88}
89
90static inline void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
91{
92}
93
94static inline int ceph_readpage_from_fscache(struct inode* inode,
95 struct page *page)
96{
97 return -ENOBUFS;
98}
99
100static inline int ceph_readpages_from_fscache(struct inode *inode,
101 struct address_space *mapping,
102 struct list_head *pages,
103 unsigned *nr_pages)
104{
105 return -ENOBUFS;
106}
107
108static inline void ceph_readpage_to_fscache(struct inode *inode,
109 struct page *page)
110{
111}
112
113static inline void ceph_fscache_invalidate(struct inode *inode)
114{
115}
116
117static inline void ceph_invalidate_fscache_page(struct inode *inode,
118 struct page *page)
119{
120}
121
122static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
123{
124 return 1;
125}
126
127static inline void ceph_fscache_readpages_cancel(struct inode *inode,
128 struct list_head *pages)
129{
130}
131
132static inline void ceph_queue_revalidate(struct inode *inode)
133{
134}
135
136#endif
137
138#endif
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 5a26bc1dd799..7b451eb7d123 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -10,6 +10,7 @@
10 10
11#include "super.h" 11#include "super.h"
12#include "mds_client.h" 12#include "mds_client.h"
13#include "cache.h"
13#include <linux/ceph/decode.h> 14#include <linux/ceph/decode.h>
14#include <linux/ceph/messenger.h> 15#include <linux/ceph/messenger.h>
15 16
@@ -479,8 +480,9 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
479 * i_rdcache_gen. 480 * i_rdcache_gen.
480 */ 481 */
481 if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && 482 if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
482 (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) 483 (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
483 ci->i_rdcache_gen++; 484 ci->i_rdcache_gen++;
485 }
484 486
485 /* 487 /*
486 * if we are newly issued FILE_SHARED, mark dir not complete; we 488 * if we are newly issued FILE_SHARED, mark dir not complete; we
@@ -2395,6 +2397,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2395 int writeback = 0; 2397 int writeback = 0;
2396 int queue_invalidate = 0; 2398 int queue_invalidate = 0;
2397 int deleted_inode = 0; 2399 int deleted_inode = 0;
2400 int queue_revalidate = 0;
2398 2401
2399 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", 2402 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
2400 inode, cap, mds, seq, ceph_cap_string(newcaps)); 2403 inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2417,6 +2420,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2417 ci->i_rdcache_revoking = ci->i_rdcache_gen; 2420 ci->i_rdcache_revoking = ci->i_rdcache_gen;
2418 } 2421 }
2419 } 2422 }
2423
2424 ceph_fscache_invalidate(inode);
2420 } 2425 }
2421 2426
2422 /* side effects now are allowed */ 2427 /* side effects now are allowed */
@@ -2458,6 +2463,11 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2458 } 2463 }
2459 } 2464 }
2460 2465
2466 /* Do we need to revalidate our fscache cookie. Don't bother on the
2467 * first cache cap as we already validate at cookie creation time. */
2468 if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
2469 queue_revalidate = 1;
2470
2461 /* size/ctime/mtime/atime? */ 2471 /* size/ctime/mtime/atime? */
2462 ceph_fill_file_size(inode, issued, 2472 ceph_fill_file_size(inode, issued,
2463 le32_to_cpu(grant->truncate_seq), 2473 le32_to_cpu(grant->truncate_seq),
@@ -2542,6 +2552,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2542 BUG_ON(cap->issued & ~cap->implemented); 2552 BUG_ON(cap->issued & ~cap->implemented);
2543 2553
2544 spin_unlock(&ci->i_ceph_lock); 2554 spin_unlock(&ci->i_ceph_lock);
2555
2545 if (writeback) 2556 if (writeback)
2546 /* 2557 /*
2547 * queue inode for writeback: we can't actually call 2558 * queue inode for writeback: we can't actually call
@@ -2553,6 +2564,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2553 ceph_queue_invalidate(inode); 2564 ceph_queue_invalidate(inode);
2554 if (deleted_inode) 2565 if (deleted_inode)
2555 invalidate_aliases(inode); 2566 invalidate_aliases(inode);
2567 if (queue_revalidate)
2568 ceph_queue_revalidate(inode);
2556 if (wake) 2569 if (wake)
2557 wake_up_all(&ci->i_cap_wq); 2570 wake_up_all(&ci->i_cap_wq);
2558 2571
@@ -2709,8 +2722,10 @@ static void handle_cap_trunc(struct inode *inode,
2709 truncate_seq, truncate_size, size); 2722 truncate_seq, truncate_size, size);
2710 spin_unlock(&ci->i_ceph_lock); 2723 spin_unlock(&ci->i_ceph_lock);
2711 2724
2712 if (queue_trunc) 2725 if (queue_trunc) {
2713 ceph_queue_vmtruncate(inode); 2726 ceph_queue_vmtruncate(inode);
2727 ceph_fscache_invalidate(inode);
2728 }
2714} 2729}
2715 2730
2716/* 2731/*
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 20d0222c2e76..3de89829e2a1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -12,6 +12,7 @@
12 12
13#include "super.h" 13#include "super.h"
14#include "mds_client.h" 14#include "mds_client.h"
15#include "cache.h"
15 16
16/* 17/*
17 * Ceph file operations 18 * Ceph file operations
@@ -69,9 +70,23 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
69{ 70{
70 struct ceph_file_info *cf; 71 struct ceph_file_info *cf;
71 int ret = 0; 72 int ret = 0;
73 struct ceph_inode_info *ci = ceph_inode(inode);
74 struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
75 struct ceph_mds_client *mdsc = fsc->mdsc;
72 76
73 switch (inode->i_mode & S_IFMT) { 77 switch (inode->i_mode & S_IFMT) {
74 case S_IFREG: 78 case S_IFREG:
79 /* First file open request creates the cookie, we want to keep
80 * this cookie around for the filetime of the inode as not to
81 * have to worry about fscache register / revoke / operation
82 * races.
83 *
84 * Also, if we know the operation is going to invalidate data
85 * (non readonly) just nuke the cache right away.
86 */
87 ceph_fscache_register_inode_cookie(mdsc->fsc, ci);
88 if ((fmode & CEPH_FILE_MODE_WR))
89 ceph_fscache_invalidate(inode);
75 case S_IFDIR: 90 case S_IFDIR:
76 dout("init_file %p %p 0%o (regular)\n", inode, file, 91 dout("init_file %p %p 0%o (regular)\n", inode, file,
77 inode->i_mode); 92 inode->i_mode);
@@ -182,6 +197,7 @@ int ceph_open(struct inode *inode, struct file *file)
182 spin_unlock(&ci->i_ceph_lock); 197 spin_unlock(&ci->i_ceph_lock);
183 return ceph_init_file(inode, file, fmode); 198 return ceph_init_file(inode, file, fmode);
184 } 199 }
200
185 spin_unlock(&ci->i_ceph_lock); 201 spin_unlock(&ci->i_ceph_lock);
186 202
187 dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted)); 203 dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
@@ -192,6 +208,7 @@ int ceph_open(struct inode *inode, struct file *file)
192 } 208 }
193 req->r_inode = inode; 209 req->r_inode = inode;
194 ihold(inode); 210 ihold(inode);
211
195 req->r_num_caps = 1; 212 req->r_num_caps = 1;
196 if (flags & (O_CREAT|O_TRUNC)) 213 if (flags & (O_CREAT|O_TRUNC))
197 parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); 214 parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 602ccd8e06b7..eae41cd73276 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -12,6 +12,7 @@
12 12
13#include "super.h" 13#include "super.h"
14#include "mds_client.h" 14#include "mds_client.h"
15#include "cache.h"
15#include <linux/ceph/decode.h> 16#include <linux/ceph/decode.h>
16 17
17/* 18/*
@@ -386,6 +387,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
386 387
387 INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work); 388 INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
388 389
390 ceph_fscache_inode_init(ci);
391
389 return &ci->vfs_inode; 392 return &ci->vfs_inode;
390} 393}
391 394
@@ -405,6 +408,8 @@ void ceph_destroy_inode(struct inode *inode)
405 408
406 dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode)); 409 dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
407 410
411 ceph_fscache_unregister_inode_cookie(ci);
412
408 ceph_queue_caps_release(inode); 413 ceph_queue_caps_release(inode);
409 414
410 /* 415 /*
@@ -439,7 +444,6 @@ void ceph_destroy_inode(struct inode *inode)
439 call_rcu(&inode->i_rcu, ceph_i_callback); 444 call_rcu(&inode->i_rcu, ceph_i_callback);
440} 445}
441 446
442
443/* 447/*
444 * Helpers to fill in size, ctime, mtime, and atime. We have to be 448 * Helpers to fill in size, ctime, mtime, and atime. We have to be
445 * careful because either the client or MDS may have more up to date 449 * careful because either the client or MDS may have more up to date
@@ -491,6 +495,10 @@ int ceph_fill_file_size(struct inode *inode, int issued,
491 truncate_size); 495 truncate_size);
492 ci->i_truncate_size = truncate_size; 496 ci->i_truncate_size = truncate_size;
493 } 497 }
498
499 if (queue_trunc)
500 ceph_fscache_invalidate(inode);
501
494 return queue_trunc; 502 return queue_trunc;
495} 503}
496 504
@@ -1079,7 +1087,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1079 * complete. 1087 * complete.
1080 */ 1088 */
1081 ceph_set_dentry_offset(req->r_old_dentry); 1089 ceph_set_dentry_offset(req->r_old_dentry);
1082 dout("dn %p gets new offset %lld\n", req->r_old_dentry, 1090 dout("dn %p gets new offset %lld\n", req->r_old_dentry,
1083 ceph_dentry(req->r_old_dentry)->offset); 1091 ceph_dentry(req->r_old_dentry)->offset);
1084 1092
1085 dn = req->r_old_dentry; /* use old_dentry */ 1093 dn = req->r_old_dentry; /* use old_dentry */
@@ -1494,6 +1502,7 @@ void ceph_queue_vmtruncate(struct inode *inode)
1494 struct ceph_inode_info *ci = ceph_inode(inode); 1502 struct ceph_inode_info *ci = ceph_inode(inode);
1495 1503
1496 ihold(inode); 1504 ihold(inode);
1505
1497 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, 1506 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
1498 &ci->i_vmtruncate_work)) { 1507 &ci->i_vmtruncate_work)) {
1499 dout("ceph_queue_vmtruncate %p\n", inode); 1508 dout("ceph_queue_vmtruncate %p\n", inode);
@@ -1565,7 +1574,6 @@ retry:
1565 wake_up_all(&ci->i_cap_wq); 1574 wake_up_all(&ci->i_cap_wq);
1566} 1575}
1567 1576
1568
1569/* 1577/*
1570 * symlinks 1578 * symlinks
1571 */ 1579 */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 6627b26a800c..6a0951e43044 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -17,6 +17,7 @@
17 17
18#include "super.h" 18#include "super.h"
19#include "mds_client.h" 19#include "mds_client.h"
20#include "cache.h"
20 21
21#include <linux/ceph/ceph_features.h> 22#include <linux/ceph/ceph_features.h>
22#include <linux/ceph/decode.h> 23#include <linux/ceph/decode.h>
@@ -142,6 +143,8 @@ enum {
142 Opt_nodcache, 143 Opt_nodcache,
143 Opt_ino32, 144 Opt_ino32,
144 Opt_noino32, 145 Opt_noino32,
146 Opt_fscache,
147 Opt_nofscache
145}; 148};
146 149
147static match_table_t fsopt_tokens = { 150static match_table_t fsopt_tokens = {
@@ -167,6 +170,8 @@ static match_table_t fsopt_tokens = {
167 {Opt_nodcache, "nodcache"}, 170 {Opt_nodcache, "nodcache"},
168 {Opt_ino32, "ino32"}, 171 {Opt_ino32, "ino32"},
169 {Opt_noino32, "noino32"}, 172 {Opt_noino32, "noino32"},
173 {Opt_fscache, "fsc"},
174 {Opt_nofscache, "nofsc"},
170 {-1, NULL} 175 {-1, NULL}
171}; 176};
172 177
@@ -260,6 +265,12 @@ static int parse_fsopt_token(char *c, void *private)
260 case Opt_noino32: 265 case Opt_noino32:
261 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 266 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
262 break; 267 break;
268 case Opt_fscache:
269 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
270 break;
271 case Opt_nofscache:
272 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
273 break;
263 default: 274 default:
264 BUG_ON(token); 275 BUG_ON(token);
265 } 276 }
@@ -422,6 +433,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
422 seq_puts(m, ",dcache"); 433 seq_puts(m, ",dcache");
423 else 434 else
424 seq_puts(m, ",nodcache"); 435 seq_puts(m, ",nodcache");
436 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
437 seq_puts(m, ",fsc");
438 else
439 seq_puts(m, ",nofsc");
425 440
426 if (fsopt->wsize) 441 if (fsopt->wsize)
427 seq_printf(m, ",wsize=%d", fsopt->wsize); 442 seq_printf(m, ",wsize=%d", fsopt->wsize);
@@ -530,11 +545,18 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
530 if (!fsc->wb_pagevec_pool) 545 if (!fsc->wb_pagevec_pool)
531 goto fail_trunc_wq; 546 goto fail_trunc_wq;
532 547
548 /* setup fscache */
549 if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
550 (ceph_fscache_register_fs(fsc) != 0))
551 goto fail_fscache;
552
533 /* caps */ 553 /* caps */
534 fsc->min_caps = fsopt->max_readdir; 554 fsc->min_caps = fsopt->max_readdir;
535 555
536 return fsc; 556 return fsc;
537 557
558fail_fscache:
559 ceph_fscache_unregister_fs(fsc);
538fail_trunc_wq: 560fail_trunc_wq:
539 destroy_workqueue(fsc->trunc_wq); 561 destroy_workqueue(fsc->trunc_wq);
540fail_pg_inv_wq: 562fail_pg_inv_wq:
@@ -554,6 +576,8 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
554{ 576{
555 dout("destroy_fs_client %p\n", fsc); 577 dout("destroy_fs_client %p\n", fsc);
556 578
579 ceph_fscache_unregister_fs(fsc);
580
557 destroy_workqueue(fsc->wb_wq); 581 destroy_workqueue(fsc->wb_wq);
558 destroy_workqueue(fsc->pg_inv_wq); 582 destroy_workqueue(fsc->pg_inv_wq);
559 destroy_workqueue(fsc->trunc_wq); 583 destroy_workqueue(fsc->trunc_wq);
@@ -588,6 +612,8 @@ static void ceph_inode_init_once(void *foo)
588 612
589static int __init init_caches(void) 613static int __init init_caches(void)
590{ 614{
615 int error = -ENOMEM;
616
591 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 617 ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
592 sizeof(struct ceph_inode_info), 618 sizeof(struct ceph_inode_info),
593 __alignof__(struct ceph_inode_info), 619 __alignof__(struct ceph_inode_info),
@@ -611,15 +637,17 @@ static int __init init_caches(void)
611 if (ceph_file_cachep == NULL) 637 if (ceph_file_cachep == NULL)
612 goto bad_file; 638 goto bad_file;
613 639
614 return 0; 640 if ((error = ceph_fscache_register()))
641 goto bad_file;
615 642
643 return 0;
616bad_file: 644bad_file:
617 kmem_cache_destroy(ceph_dentry_cachep); 645 kmem_cache_destroy(ceph_dentry_cachep);
618bad_dentry: 646bad_dentry:
619 kmem_cache_destroy(ceph_cap_cachep); 647 kmem_cache_destroy(ceph_cap_cachep);
620bad_cap: 648bad_cap:
621 kmem_cache_destroy(ceph_inode_cachep); 649 kmem_cache_destroy(ceph_inode_cachep);
622 return -ENOMEM; 650 return error;
623} 651}
624 652
625static void destroy_caches(void) 653static void destroy_caches(void)
@@ -629,10 +657,13 @@ static void destroy_caches(void)
629 * destroy cache. 657 * destroy cache.
630 */ 658 */
631 rcu_barrier(); 659 rcu_barrier();
660
632 kmem_cache_destroy(ceph_inode_cachep); 661 kmem_cache_destroy(ceph_inode_cachep);
633 kmem_cache_destroy(ceph_cap_cachep); 662 kmem_cache_destroy(ceph_cap_cachep);
634 kmem_cache_destroy(ceph_dentry_cachep); 663 kmem_cache_destroy(ceph_dentry_cachep);
635 kmem_cache_destroy(ceph_file_cachep); 664 kmem_cache_destroy(ceph_file_cachep);
665
666 ceph_fscache_unregister();
636} 667}
637 668
638 669
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index f1e4e4766ea2..bb23ef636177 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -16,6 +16,10 @@
16 16
17#include <linux/ceph/libceph.h> 17#include <linux/ceph/libceph.h>
18 18
19#ifdef CONFIG_CEPH_FSCACHE
20#include <linux/fscache.h>
21#endif
22
19/* f_type in struct statfs */ 23/* f_type in struct statfs */
20#define CEPH_SUPER_MAGIC 0x00c36400 24#define CEPH_SUPER_MAGIC 0x00c36400
21 25
@@ -29,6 +33,7 @@
29#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ 33#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */
30#define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */ 34#define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */
31#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ 35#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
36#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
32 37
33#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) 38#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES)
34 39
@@ -90,6 +95,11 @@ struct ceph_fs_client {
90 struct dentry *debugfs_bdi; 95 struct dentry *debugfs_bdi;
91 struct dentry *debugfs_mdsc, *debugfs_mdsmap; 96 struct dentry *debugfs_mdsc, *debugfs_mdsmap;
92#endif 97#endif
98
99#ifdef CONFIG_CEPH_FSCACHE
100 struct fscache_cookie *fscache;
101 struct workqueue_struct *revalidate_wq;
102#endif
93}; 103};
94 104
95 105
@@ -320,6 +330,12 @@ struct ceph_inode_info {
320 330
321 struct work_struct i_vmtruncate_work; 331 struct work_struct i_vmtruncate_work;
322 332
333#ifdef CONFIG_CEPH_FSCACHE
334 struct fscache_cookie *fscache;
335 u32 i_fscache_gen; /* sequence, for delayed fscache validate */
336 struct work_struct i_revalidate_work;
337#endif
338
323 struct inode vfs_inode; /* at end */ 339 struct inode vfs_inode; /* at end */
324}; 340};
325 341