diff options
| author | Yan, Zheng <zyan@redhat.com> | 2016-10-24 22:51:55 -0400 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-05-08 01:47:55 -0400 |
| commit | e99b0ea39354cf308423588df5daf69c42dbd735 (patch) | |
| tree | 49bfc2c494d1e74f864dd58831e28891a81475b8 /fs | |
| parent | 1bf9bc48133851121d92f0cc908bb5a1446aeba5 (diff) | |
ceph: try getting buffer capability for readahead/fadvise
commit 2b1ac852eb67a6e95595e576371d23519105559f upstream.
For readahead/fadvise cases, caller of ceph_readpages does not
hold buffer capability. Pages can be added to page cache while
there is no buffer capability. This can cause data integrity
issue.
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/ceph/addr.c | 58 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 21 | ||||
| -rw-r--r-- | fs/ceph/file.c | 3 | ||||
| -rw-r--r-- | fs/ceph/super.h | 2 |
4 files changed, 73 insertions, 11 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 1e643c718917..18dc18f8af2c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -315,7 +315,32 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) | |||
| 315 | struct page **pages; | 315 | struct page **pages; |
| 316 | pgoff_t next_index; | 316 | pgoff_t next_index; |
| 317 | int nr_pages = 0; | 317 | int nr_pages = 0; |
| 318 | int ret; | 318 | int got = 0; |
| 319 | int ret = 0; | ||
| 320 | |||
| 321 | if (!current->journal_info) { | ||
| 322 | /* caller of readpages does not hold buffer and read caps | ||
| 323 | * (fadvise, madvise and readahead cases) */ | ||
| 324 | int want = CEPH_CAP_FILE_CACHE; | ||
| 325 | ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, &got); | ||
| 326 | if (ret < 0) { | ||
| 327 | dout("start_read %p, error getting cap\n", inode); | ||
| 328 | } else if (!(got & want)) { | ||
| 329 | dout("start_read %p, no cache cap\n", inode); | ||
| 330 | ret = 0; | ||
| 331 | } | ||
| 332 | if (ret <= 0) { | ||
| 333 | if (got) | ||
| 334 | ceph_put_cap_refs(ci, got); | ||
| 335 | while (!list_empty(page_list)) { | ||
| 336 | page = list_entry(page_list->prev, | ||
| 337 | struct page, lru); | ||
| 338 | list_del(&page->lru); | ||
| 339 | put_page(page); | ||
| 340 | } | ||
| 341 | return ret; | ||
| 342 | } | ||
| 343 | } | ||
| 319 | 344 | ||
| 320 | off = (u64) page_offset(page); | 345 | off = (u64) page_offset(page); |
| 321 | 346 | ||
| @@ -338,15 +363,18 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) | |||
| 338 | CEPH_OSD_FLAG_READ, NULL, | 363 | CEPH_OSD_FLAG_READ, NULL, |
| 339 | ci->i_truncate_seq, ci->i_truncate_size, | 364 | ci->i_truncate_seq, ci->i_truncate_size, |
| 340 | false); | 365 | false); |
| 341 | if (IS_ERR(req)) | 366 | if (IS_ERR(req)) { |
| 342 | return PTR_ERR(req); | 367 | ret = PTR_ERR(req); |
| 368 | goto out; | ||
| 369 | } | ||
| 343 | 370 | ||
| 344 | /* build page vector */ | 371 | /* build page vector */ |
| 345 | nr_pages = calc_pages_for(0, len); | 372 | nr_pages = calc_pages_for(0, len); |
| 346 | pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL); | 373 | pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL); |
| 347 | ret = -ENOMEM; | 374 | if (!pages) { |
| 348 | if (!pages) | 375 | ret = -ENOMEM; |
| 349 | goto out; | 376 | goto out_put; |
| 377 | } | ||
| 350 | for (i = 0; i < nr_pages; ++i) { | 378 | for (i = 0; i < nr_pages; ++i) { |
| 351 | page = list_entry(page_list->prev, struct page, lru); | 379 | page = list_entry(page_list->prev, struct page, lru); |
| 352 | BUG_ON(PageLocked(page)); | 380 | BUG_ON(PageLocked(page)); |
| @@ -379,6 +407,12 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) | |||
| 379 | if (ret < 0) | 407 | if (ret < 0) |
| 380 | goto out_pages; | 408 | goto out_pages; |
| 381 | ceph_osdc_put_request(req); | 409 | ceph_osdc_put_request(req); |
| 410 | |||
| 411 | /* After adding locked pages to page cache, the inode holds cache cap. | ||
| 412 | * So we can drop our cap refs. */ | ||
| 413 | if (got) | ||
| 414 | ceph_put_cap_refs(ci, got); | ||
| 415 | |||
| 382 | return nr_pages; | 416 | return nr_pages; |
| 383 | 417 | ||
| 384 | out_pages: | 418 | out_pages: |
| @@ -387,8 +421,11 @@ out_pages: | |||
| 387 | unlock_page(pages[i]); | 421 | unlock_page(pages[i]); |
| 388 | } | 422 | } |
| 389 | ceph_put_page_vector(pages, nr_pages, false); | 423 | ceph_put_page_vector(pages, nr_pages, false); |
| 390 | out: | 424 | out_put: |
| 391 | ceph_osdc_put_request(req); | 425 | ceph_osdc_put_request(req); |
| 426 | out: | ||
| 427 | if (got) | ||
| 428 | ceph_put_cap_refs(ci, got); | ||
| 392 | return ret; | 429 | return ret; |
| 393 | } | 430 | } |
| 394 | 431 | ||
| @@ -425,7 +462,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
| 425 | rc = start_read(inode, page_list, max); | 462 | rc = start_read(inode, page_list, max); |
| 426 | if (rc < 0) | 463 | if (rc < 0) |
| 427 | goto out; | 464 | goto out; |
| 428 | BUG_ON(rc == 0); | ||
| 429 | } | 465 | } |
| 430 | out: | 466 | out: |
| 431 | ceph_fscache_readpages_cancel(inode, page_list); | 467 | ceph_fscache_readpages_cancel(inode, page_list); |
| @@ -1372,9 +1408,11 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 1372 | inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got)); | 1408 | inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got)); |
| 1373 | 1409 | ||
| 1374 | if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || | 1410 | if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || |
| 1375 | ci->i_inline_version == CEPH_INLINE_NONE) | 1411 | ci->i_inline_version == CEPH_INLINE_NONE) { |
| 1412 | current->journal_info = vma->vm_file; | ||
| 1376 | ret = filemap_fault(vma, vmf); | 1413 | ret = filemap_fault(vma, vmf); |
| 1377 | else | 1414 | current->journal_info = NULL; |
| 1415 | } else | ||
| 1378 | ret = -EAGAIN; | 1416 | ret = -EAGAIN; |
| 1379 | 1417 | ||
| 1380 | dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", | 1418 | dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f3f21105b860..03951f90ecf7 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -2479,6 +2479,27 @@ static void check_max_size(struct inode *inode, loff_t endoff) | |||
| 2479 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); | 2479 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); |
| 2480 | } | 2480 | } |
| 2481 | 2481 | ||
| 2482 | int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, int *got) | ||
| 2483 | { | ||
| 2484 | int ret, err = 0; | ||
| 2485 | |||
| 2486 | BUG_ON(need & ~CEPH_CAP_FILE_RD); | ||
| 2487 | BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)); | ||
| 2488 | ret = ceph_pool_perm_check(ci, need); | ||
| 2489 | if (ret < 0) | ||
| 2490 | return ret; | ||
| 2491 | |||
| 2492 | ret = try_get_cap_refs(ci, need, want, 0, true, got, &err); | ||
| 2493 | if (ret) { | ||
| 2494 | if (err == -EAGAIN) { | ||
| 2495 | ret = 0; | ||
| 2496 | } else if (err < 0) { | ||
| 2497 | ret = err; | ||
| 2498 | } | ||
| 2499 | } | ||
| 2500 | return ret; | ||
| 2501 | } | ||
| 2502 | |||
| 2482 | /* | 2503 | /* |
| 2483 | * Wait for caps, and take cap references. If we can't get a WR cap | 2504 | * Wait for caps, and take cap references. If we can't get a WR cap |
| 2484 | * due to a small max_size, make sure we check_max_size (and possibly | 2505 | * due to a small max_size, make sure we check_max_size (and possibly |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index f995e3528a33..ca3f630db90f 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -1249,8 +1249,9 @@ again: | |||
| 1249 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", | 1249 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", |
| 1250 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, | 1250 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, |
| 1251 | ceph_cap_string(got)); | 1251 | ceph_cap_string(got)); |
| 1252 | 1252 | current->journal_info = filp; | |
| 1253 | ret = generic_file_read_iter(iocb, to); | 1253 | ret = generic_file_read_iter(iocb, to); |
| 1254 | current->journal_info = NULL; | ||
| 1254 | } | 1255 | } |
| 1255 | dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", | 1256 | dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", |
| 1256 | inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); | 1257 | inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3e3fa9163059..622d5dd9f616 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -905,6 +905,8 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn, | |||
| 905 | 905 | ||
| 906 | extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | 906 | extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, |
| 907 | loff_t endoff, int *got, struct page **pinned_page); | 907 | loff_t endoff, int *got, struct page **pinned_page); |
| 908 | extern int ceph_try_get_caps(struct ceph_inode_info *ci, | ||
| 909 | int need, int want, int *got); | ||
| 908 | 910 | ||
| 909 | /* for counting open files by mode */ | 911 | /* for counting open files by mode */ |
| 910 | extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode); | 912 | extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode); |
