diff options
Diffstat (limited to 'fs/ceph/file.c')
| -rw-r--r-- | fs/ceph/file.c | 52 |
1 files changed, 34 insertions, 18 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e77c28cf3690..8d79b8912e31 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file) | |||
| 154 | } | 154 | } |
| 155 | 155 | ||
| 156 | /* | 156 | /* |
| 157 | * No need to block if we have any caps. Update wanted set | 157 | * No need to block if we have caps on the auth MDS (for |
| 158 | * write) or any MDS (for read). Update wanted set | ||
| 158 | * asynchronously. | 159 | * asynchronously. |
| 159 | */ | 160 | */ |
| 160 | spin_lock(&inode->i_lock); | 161 | spin_lock(&inode->i_lock); |
| 161 | if (__ceph_is_any_real_caps(ci)) { | 162 | if (__ceph_is_any_real_caps(ci) && |
| 163 | (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { | ||
| 162 | int mds_wanted = __ceph_caps_mds_wanted(ci); | 164 | int mds_wanted = __ceph_caps_mds_wanted(ci); |
| 163 | int issued = __ceph_caps_issued(ci, NULL); | 165 | int issued = __ceph_caps_issued(ci, NULL); |
| 164 | 166 | ||
| @@ -280,11 +282,12 @@ int ceph_release(struct inode *inode, struct file *file) | |||
| 280 | static int striped_read(struct inode *inode, | 282 | static int striped_read(struct inode *inode, |
| 281 | u64 off, u64 len, | 283 | u64 off, u64 len, |
| 282 | struct page **pages, int num_pages, | 284 | struct page **pages, int num_pages, |
| 283 | int *checkeof) | 285 | int *checkeof, bool align_to_pages) |
| 284 | { | 286 | { |
| 285 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 287 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 286 | struct ceph_inode_info *ci = ceph_inode(inode); | 288 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 287 | u64 pos, this_len; | 289 | u64 pos, this_len; |
| 290 | int io_align, page_align; | ||
| 288 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 291 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ |
| 289 | int left, pages_left; | 292 | int left, pages_left; |
| 290 | int read; | 293 | int read; |
| @@ -300,14 +303,19 @@ static int striped_read(struct inode *inode, | |||
| 300 | page_pos = pages; | 303 | page_pos = pages; |
| 301 | pages_left = num_pages; | 304 | pages_left = num_pages; |
| 302 | read = 0; | 305 | read = 0; |
| 306 | io_align = off & ~PAGE_MASK; | ||
| 303 | 307 | ||
| 304 | more: | 308 | more: |
| 309 | if (align_to_pages) | ||
| 310 | page_align = (pos - io_align) & ~PAGE_MASK; | ||
| 311 | else | ||
| 312 | page_align = pos & ~PAGE_MASK; | ||
| 305 | this_len = left; | 313 | this_len = left; |
| 306 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), | 314 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
| 307 | &ci->i_layout, pos, &this_len, | 315 | &ci->i_layout, pos, &this_len, |
| 308 | ci->i_truncate_seq, | 316 | ci->i_truncate_seq, |
| 309 | ci->i_truncate_size, | 317 | ci->i_truncate_size, |
| 310 | page_pos, pages_left); | 318 | page_pos, pages_left, page_align); |
| 311 | hit_stripe = this_len < left; | 319 | hit_stripe = this_len < left; |
| 312 | was_short = ret >= 0 && ret < this_len; | 320 | was_short = ret >= 0 && ret < this_len; |
| 313 | if (ret == -ENOENT) | 321 | if (ret == -ENOENT) |
| @@ -374,26 +382,25 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
| 374 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, | 382 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, |
| 375 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 383 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
| 376 | 384 | ||
| 377 | if (file->f_flags & O_DIRECT) { | 385 | if (file->f_flags & O_DIRECT) |
| 378 | pages = ceph_get_direct_page_vector(data, num_pages, off, len); | 386 | pages = ceph_get_direct_page_vector(data, num_pages); |
| 379 | 387 | else | |
| 380 | /* | ||
| 381 | * flush any page cache pages in this range. this | ||
| 382 | * will make concurrent normal and O_DIRECT io slow, | ||
| 383 | * but it will at least behave sensibly when they are | ||
| 384 | * in sequence. | ||
| 385 | */ | ||
| 386 | } else { | ||
| 387 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); | 388 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
| 388 | } | ||
| 389 | if (IS_ERR(pages)) | 389 | if (IS_ERR(pages)) |
| 390 | return PTR_ERR(pages); | 390 | return PTR_ERR(pages); |
| 391 | 391 | ||
| 392 | /* | ||
| 393 | * flush any page cache pages in this range. this | ||
| 394 | * will make concurrent normal and sync io slow, | ||
| 395 | * but it will at least behave sensibly when they are | ||
| 396 | * in sequence. | ||
| 397 | */ | ||
| 392 | ret = filemap_write_and_wait(inode->i_mapping); | 398 | ret = filemap_write_and_wait(inode->i_mapping); |
| 393 | if (ret < 0) | 399 | if (ret < 0) |
| 394 | goto done; | 400 | goto done; |
| 395 | 401 | ||
| 396 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 402 | ret = striped_read(inode, off, len, pages, num_pages, checkeof, |
| 403 | file->f_flags & O_DIRECT); | ||
| 397 | 404 | ||
| 398 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 405 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) |
| 399 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); | 406 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); |
| @@ -448,6 +455,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
| 448 | int flags; | 455 | int flags; |
| 449 | int do_sync = 0; | 456 | int do_sync = 0; |
| 450 | int check_caps = 0; | 457 | int check_caps = 0; |
| 458 | int page_align, io_align; | ||
| 451 | int ret; | 459 | int ret; |
| 452 | struct timespec mtime = CURRENT_TIME; | 460 | struct timespec mtime = CURRENT_TIME; |
| 453 | 461 | ||
| @@ -462,6 +470,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
| 462 | else | 470 | else |
| 463 | pos = *offset; | 471 | pos = *offset; |
| 464 | 472 | ||
| 473 | io_align = pos & ~PAGE_MASK; | ||
| 474 | |||
| 465 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); | 475 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); |
| 466 | if (ret < 0) | 476 | if (ret < 0) |
| 467 | return ret; | 477 | return ret; |
| @@ -486,20 +496,26 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
| 486 | */ | 496 | */ |
| 487 | more: | 497 | more: |
| 488 | len = left; | 498 | len = left; |
| 499 | if (file->f_flags & O_DIRECT) | ||
| 500 | /* write from beginning of first page, regardless of | ||
| 501 | io alignment */ | ||
| 502 | page_align = (pos - io_align) & ~PAGE_MASK; | ||
| 503 | else | ||
| 504 | page_align = pos & ~PAGE_MASK; | ||
| 489 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | 505 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
| 490 | ceph_vino(inode), pos, &len, | 506 | ceph_vino(inode), pos, &len, |
| 491 | CEPH_OSD_OP_WRITE, flags, | 507 | CEPH_OSD_OP_WRITE, flags, |
| 492 | ci->i_snap_realm->cached_context, | 508 | ci->i_snap_realm->cached_context, |
| 493 | do_sync, | 509 | do_sync, |
| 494 | ci->i_truncate_seq, ci->i_truncate_size, | 510 | ci->i_truncate_seq, ci->i_truncate_size, |
| 495 | &mtime, false, 2); | 511 | &mtime, false, 2, page_align); |
| 496 | if (!req) | 512 | if (!req) |
| 497 | return -ENOMEM; | 513 | return -ENOMEM; |
| 498 | 514 | ||
| 499 | num_pages = calc_pages_for(pos, len); | 515 | num_pages = calc_pages_for(pos, len); |
| 500 | 516 | ||
| 501 | if (file->f_flags & O_DIRECT) { | 517 | if (file->f_flags & O_DIRECT) { |
| 502 | pages = ceph_get_direct_page_vector(data, num_pages, pos, len); | 518 | pages = ceph_get_direct_page_vector(data, num_pages); |
| 503 | if (IS_ERR(pages)) { | 519 | if (IS_ERR(pages)) { |
| 504 | ret = PTR_ERR(pages); | 520 | ret = PTR_ERR(pages); |
| 505 | goto out; | 521 | goto out; |
