diff options
author | Yan, Zheng <zyan@redhat.com> | 2016-10-24 22:51:55 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-05-08 01:47:55 -0400 |
commit | e99b0ea39354cf308423588df5daf69c42dbd735 (patch) | |
tree | 49bfc2c494d1e74f864dd58831e28891a81475b8 /fs | |
parent | 1bf9bc48133851121d92f0cc908bb5a1446aeba5 (diff) |
ceph: try getting buffer capability for readahead/fadvise
commit 2b1ac852eb67a6e95595e576371d23519105559f upstream.
For readahead/fadvise cases, caller of ceph_readpages does not
hold buffer capability. Pages can be added to page cache while
there is no buffer capability. This can cause data integrity
issue.
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/addr.c | 58 | ||||
-rw-r--r-- | fs/ceph/caps.c | 21 | ||||
-rw-r--r-- | fs/ceph/file.c | 3 | ||||
-rw-r--r-- | fs/ceph/super.h | 2 |
4 files changed, 73 insertions, 11 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 1e643c718917..18dc18f8af2c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -315,7 +315,32 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) | |||
315 | struct page **pages; | 315 | struct page **pages; |
316 | pgoff_t next_index; | 316 | pgoff_t next_index; |
317 | int nr_pages = 0; | 317 | int nr_pages = 0; |
318 | int ret; | 318 | int got = 0; |
319 | int ret = 0; | ||
320 | |||
321 | if (!current->journal_info) { | ||
322 | /* caller of readpages does not hold buffer and read caps | ||
323 | * (fadvise, madvise and readahead cases) */ | ||
324 | int want = CEPH_CAP_FILE_CACHE; | ||
325 | ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, &got); | ||
326 | if (ret < 0) { | ||
327 | dout("start_read %p, error getting cap\n", inode); | ||
328 | } else if (!(got & want)) { | ||
329 | dout("start_read %p, no cache cap\n", inode); | ||
330 | ret = 0; | ||
331 | } | ||
332 | if (ret <= 0) { | ||
333 | if (got) | ||
334 | ceph_put_cap_refs(ci, got); | ||
335 | while (!list_empty(page_list)) { | ||
336 | page = list_entry(page_list->prev, | ||
337 | struct page, lru); | ||
338 | list_del(&page->lru); | ||
339 | put_page(page); | ||
340 | } | ||
341 | return ret; | ||
342 | } | ||
343 | } | ||
319 | 344 | ||
320 | off = (u64) page_offset(page); | 345 | off = (u64) page_offset(page); |
321 | 346 | ||
@@ -338,15 +363,18 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) | |||
338 | CEPH_OSD_FLAG_READ, NULL, | 363 | CEPH_OSD_FLAG_READ, NULL, |
339 | ci->i_truncate_seq, ci->i_truncate_size, | 364 | ci->i_truncate_seq, ci->i_truncate_size, |
340 | false); | 365 | false); |
341 | if (IS_ERR(req)) | 366 | if (IS_ERR(req)) { |
342 | return PTR_ERR(req); | 367 | ret = PTR_ERR(req); |
368 | goto out; | ||
369 | } | ||
343 | 370 | ||
344 | /* build page vector */ | 371 | /* build page vector */ |
345 | nr_pages = calc_pages_for(0, len); | 372 | nr_pages = calc_pages_for(0, len); |
346 | pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL); | 373 | pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL); |
347 | ret = -ENOMEM; | 374 | if (!pages) { |
348 | if (!pages) | 375 | ret = -ENOMEM; |
349 | goto out; | 376 | goto out_put; |
377 | } | ||
350 | for (i = 0; i < nr_pages; ++i) { | 378 | for (i = 0; i < nr_pages; ++i) { |
351 | page = list_entry(page_list->prev, struct page, lru); | 379 | page = list_entry(page_list->prev, struct page, lru); |
352 | BUG_ON(PageLocked(page)); | 380 | BUG_ON(PageLocked(page)); |
@@ -379,6 +407,12 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) | |||
379 | if (ret < 0) | 407 | if (ret < 0) |
380 | goto out_pages; | 408 | goto out_pages; |
381 | ceph_osdc_put_request(req); | 409 | ceph_osdc_put_request(req); |
410 | |||
411 | /* After adding locked pages to page cache, the inode holds cache cap. | ||
412 | * So we can drop our cap refs. */ | ||
413 | if (got) | ||
414 | ceph_put_cap_refs(ci, got); | ||
415 | |||
382 | return nr_pages; | 416 | return nr_pages; |
383 | 417 | ||
384 | out_pages: | 418 | out_pages: |
@@ -387,8 +421,11 @@ out_pages: | |||
387 | unlock_page(pages[i]); | 421 | unlock_page(pages[i]); |
388 | } | 422 | } |
389 | ceph_put_page_vector(pages, nr_pages, false); | 423 | ceph_put_page_vector(pages, nr_pages, false); |
390 | out: | 424 | out_put: |
391 | ceph_osdc_put_request(req); | 425 | ceph_osdc_put_request(req); |
426 | out: | ||
427 | if (got) | ||
428 | ceph_put_cap_refs(ci, got); | ||
392 | return ret; | 429 | return ret; |
393 | } | 430 | } |
394 | 431 | ||
@@ -425,7 +462,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
425 | rc = start_read(inode, page_list, max); | 462 | rc = start_read(inode, page_list, max); |
426 | if (rc < 0) | 463 | if (rc < 0) |
427 | goto out; | 464 | goto out; |
428 | BUG_ON(rc == 0); | ||
429 | } | 465 | } |
430 | out: | 466 | out: |
431 | ceph_fscache_readpages_cancel(inode, page_list); | 467 | ceph_fscache_readpages_cancel(inode, page_list); |
@@ -1372,9 +1408,11 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1372 | inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got)); | 1408 | inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got)); |
1373 | 1409 | ||
1374 | if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || | 1410 | if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || |
1375 | ci->i_inline_version == CEPH_INLINE_NONE) | 1411 | ci->i_inline_version == CEPH_INLINE_NONE) { |
1412 | current->journal_info = vma->vm_file; | ||
1376 | ret = filemap_fault(vma, vmf); | 1413 | ret = filemap_fault(vma, vmf); |
1377 | else | 1414 | current->journal_info = NULL; |
1415 | } else | ||
1378 | ret = -EAGAIN; | 1416 | ret = -EAGAIN; |
1379 | 1417 | ||
1380 | dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", | 1418 | dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f3f21105b860..03951f90ecf7 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -2479,6 +2479,27 @@ static void check_max_size(struct inode *inode, loff_t endoff) | |||
2479 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); | 2479 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); |
2480 | } | 2480 | } |
2481 | 2481 | ||
2482 | int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, int *got) | ||
2483 | { | ||
2484 | int ret, err = 0; | ||
2485 | |||
2486 | BUG_ON(need & ~CEPH_CAP_FILE_RD); | ||
2487 | BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)); | ||
2488 | ret = ceph_pool_perm_check(ci, need); | ||
2489 | if (ret < 0) | ||
2490 | return ret; | ||
2491 | |||
2492 | ret = try_get_cap_refs(ci, need, want, 0, true, got, &err); | ||
2493 | if (ret) { | ||
2494 | if (err == -EAGAIN) { | ||
2495 | ret = 0; | ||
2496 | } else if (err < 0) { | ||
2497 | ret = err; | ||
2498 | } | ||
2499 | } | ||
2500 | return ret; | ||
2501 | } | ||
2502 | |||
2482 | /* | 2503 | /* |
2483 | * Wait for caps, and take cap references. If we can't get a WR cap | 2504 | * Wait for caps, and take cap references. If we can't get a WR cap |
2484 | * due to a small max_size, make sure we check_max_size (and possibly | 2505 | * due to a small max_size, make sure we check_max_size (and possibly |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index f995e3528a33..ca3f630db90f 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -1249,8 +1249,9 @@ again: | |||
1249 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", | 1249 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", |
1250 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, | 1250 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, |
1251 | ceph_cap_string(got)); | 1251 | ceph_cap_string(got)); |
1252 | 1252 | current->journal_info = filp; | |
1253 | ret = generic_file_read_iter(iocb, to); | 1253 | ret = generic_file_read_iter(iocb, to); |
1254 | current->journal_info = NULL; | ||
1254 | } | 1255 | } |
1255 | dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", | 1256 | dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", |
1256 | inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); | 1257 | inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3e3fa9163059..622d5dd9f616 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -905,6 +905,8 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn, | |||
905 | 905 | ||
906 | extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | 906 | extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, |
907 | loff_t endoff, int *got, struct page **pinned_page); | 907 | loff_t endoff, int *got, struct page **pinned_page); |
908 | extern int ceph_try_get_caps(struct ceph_inode_info *ci, | ||
909 | int need, int want, int *got); | ||
908 | 910 | ||
909 | /* for counting open files by mode */ | 911 | /* for counting open files by mode */ |
910 | extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode); | 912 | extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode); |