aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2016-10-24 22:51:55 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-05-08 01:47:55 -0400
commite99b0ea39354cf308423588df5daf69c42dbd735 (patch)
tree49bfc2c494d1e74f864dd58831e28891a81475b8 /fs
parent1bf9bc48133851121d92f0cc908bb5a1446aeba5 (diff)
ceph: try getting buffer capability for readahead/fadvise
commit 2b1ac852eb67a6e95595e576371d23519105559f upstream. For readahead/fadvise cases, caller of ceph_readpages does not hold buffer capability. Pages can be added to page cache while there is no buffer capability. This can cause data integrity issue. Signed-off-by: Yan, Zheng <zyan@redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/addr.c58
-rw-r--r--fs/ceph/caps.c21
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/super.h2
4 files changed, 73 insertions, 11 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1e643c718917..18dc18f8af2c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -315,7 +315,32 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
315 struct page **pages; 315 struct page **pages;
316 pgoff_t next_index; 316 pgoff_t next_index;
317 int nr_pages = 0; 317 int nr_pages = 0;
318 int ret; 318 int got = 0;
319 int ret = 0;
320
321 if (!current->journal_info) {
322 /* caller of readpages does not hold buffer and read caps
323 * (fadvise, madvise and readahead cases) */
324 int want = CEPH_CAP_FILE_CACHE;
325 ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, &got);
326 if (ret < 0) {
327 dout("start_read %p, error getting cap\n", inode);
328 } else if (!(got & want)) {
329 dout("start_read %p, no cache cap\n", inode);
330 ret = 0;
331 }
332 if (ret <= 0) {
333 if (got)
334 ceph_put_cap_refs(ci, got);
335 while (!list_empty(page_list)) {
336 page = list_entry(page_list->prev,
337 struct page, lru);
338 list_del(&page->lru);
339 put_page(page);
340 }
341 return ret;
342 }
343 }
319 344
320 off = (u64) page_offset(page); 345 off = (u64) page_offset(page);
321 346
@@ -338,15 +363,18 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
338 CEPH_OSD_FLAG_READ, NULL, 363 CEPH_OSD_FLAG_READ, NULL,
339 ci->i_truncate_seq, ci->i_truncate_size, 364 ci->i_truncate_seq, ci->i_truncate_size,
340 false); 365 false);
341 if (IS_ERR(req)) 366 if (IS_ERR(req)) {
342 return PTR_ERR(req); 367 ret = PTR_ERR(req);
368 goto out;
369 }
343 370
344 /* build page vector */ 371 /* build page vector */
345 nr_pages = calc_pages_for(0, len); 372 nr_pages = calc_pages_for(0, len);
346 pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL); 373 pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL);
347 ret = -ENOMEM; 374 if (!pages) {
348 if (!pages) 375 ret = -ENOMEM;
349 goto out; 376 goto out_put;
377 }
350 for (i = 0; i < nr_pages; ++i) { 378 for (i = 0; i < nr_pages; ++i) {
351 page = list_entry(page_list->prev, struct page, lru); 379 page = list_entry(page_list->prev, struct page, lru);
352 BUG_ON(PageLocked(page)); 380 BUG_ON(PageLocked(page));
@@ -379,6 +407,12 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
379 if (ret < 0) 407 if (ret < 0)
380 goto out_pages; 408 goto out_pages;
381 ceph_osdc_put_request(req); 409 ceph_osdc_put_request(req);
410
411 /* After adding locked pages to page cache, the inode holds cache cap.
412 * So we can drop our cap refs. */
413 if (got)
414 ceph_put_cap_refs(ci, got);
415
382 return nr_pages; 416 return nr_pages;
383 417
384out_pages: 418out_pages:
@@ -387,8 +421,11 @@ out_pages:
387 unlock_page(pages[i]); 421 unlock_page(pages[i]);
388 } 422 }
389 ceph_put_page_vector(pages, nr_pages, false); 423 ceph_put_page_vector(pages, nr_pages, false);
390out: 424out_put:
391 ceph_osdc_put_request(req); 425 ceph_osdc_put_request(req);
426out:
427 if (got)
428 ceph_put_cap_refs(ci, got);
392 return ret; 429 return ret;
393} 430}
394 431
@@ -425,7 +462,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
425 rc = start_read(inode, page_list, max); 462 rc = start_read(inode, page_list, max);
426 if (rc < 0) 463 if (rc < 0)
427 goto out; 464 goto out;
428 BUG_ON(rc == 0);
429 } 465 }
430out: 466out:
431 ceph_fscache_readpages_cancel(inode, page_list); 467 ceph_fscache_readpages_cancel(inode, page_list);
@@ -1372,9 +1408,11 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1372 inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got)); 1408 inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got));
1373 1409
1374 if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || 1410 if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
1375 ci->i_inline_version == CEPH_INLINE_NONE) 1411 ci->i_inline_version == CEPH_INLINE_NONE) {
1412 current->journal_info = vma->vm_file;
1376 ret = filemap_fault(vma, vmf); 1413 ret = filemap_fault(vma, vmf);
1377 else 1414 current->journal_info = NULL;
1415 } else
1378 ret = -EAGAIN; 1416 ret = -EAGAIN;
1379 1417
1380 dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", 1418 dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f3f21105b860..03951f90ecf7 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2479,6 +2479,27 @@ static void check_max_size(struct inode *inode, loff_t endoff)
2479 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2479 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
2480} 2480}
2481 2481
2482int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, int *got)
2483{
2484 int ret, err = 0;
2485
2486 BUG_ON(need & ~CEPH_CAP_FILE_RD);
2487 BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO));
2488 ret = ceph_pool_perm_check(ci, need);
2489 if (ret < 0)
2490 return ret;
2491
2492 ret = try_get_cap_refs(ci, need, want, 0, true, got, &err);
2493 if (ret) {
2494 if (err == -EAGAIN) {
2495 ret = 0;
2496 } else if (err < 0) {
2497 ret = err;
2498 }
2499 }
2500 return ret;
2501}
2502
2482/* 2503/*
2483 * Wait for caps, and take cap references. If we can't get a WR cap 2504 * Wait for caps, and take cap references. If we can't get a WR cap
2484 * due to a small max_size, make sure we check_max_size (and possibly 2505 * due to a small max_size, make sure we check_max_size (and possibly
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index f995e3528a33..ca3f630db90f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1249,8 +1249,9 @@ again:
1249 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", 1249 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
1250 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, 1250 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
1251 ceph_cap_string(got)); 1251 ceph_cap_string(got));
1252 1252 current->journal_info = filp;
1253 ret = generic_file_read_iter(iocb, to); 1253 ret = generic_file_read_iter(iocb, to);
1254 current->journal_info = NULL;
1254 } 1255 }
1255 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", 1256 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
1256 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); 1257 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3e3fa9163059..622d5dd9f616 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -905,6 +905,8 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
905 905
906extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, 906extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
907 loff_t endoff, int *got, struct page **pinned_page); 907 loff_t endoff, int *got, struct page **pinned_page);
908extern int ceph_try_get_caps(struct ceph_inode_info *ci,
909 int need, int want, int *got);
908 910
909/* for counting open files by mode */ 911/* for counting open files by mode */
910extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode); 912extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode);