diff options
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r-- | fs/ceph/file.c | 65 |
1 files changed, 44 insertions, 21 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e77c28cf3690..7d0e4a82d898 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file) | |||
154 | } | 154 | } |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * No need to block if we have any caps. Update wanted set | 157 | * No need to block if we have caps on the auth MDS (for |
158 | * write) or any MDS (for read). Update wanted set | ||
158 | * asynchronously. | 159 | * asynchronously. |
159 | */ | 160 | */ |
160 | spin_lock(&inode->i_lock); | 161 | spin_lock(&inode->i_lock); |
161 | if (__ceph_is_any_real_caps(ci)) { | 162 | if (__ceph_is_any_real_caps(ci) && |
163 | (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { | ||
162 | int mds_wanted = __ceph_caps_mds_wanted(ci); | 164 | int mds_wanted = __ceph_caps_mds_wanted(ci); |
163 | int issued = __ceph_caps_issued(ci, NULL); | 165 | int issued = __ceph_caps_issued(ci, NULL); |
164 | 166 | ||
@@ -280,11 +282,13 @@ int ceph_release(struct inode *inode, struct file *file) | |||
280 | static int striped_read(struct inode *inode, | 282 | static int striped_read(struct inode *inode, |
281 | u64 off, u64 len, | 283 | u64 off, u64 len, |
282 | struct page **pages, int num_pages, | 284 | struct page **pages, int num_pages, |
283 | int *checkeof) | 285 | int *checkeof, bool align_to_pages, |
286 | unsigned long buf_align) | ||
284 | { | 287 | { |
285 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 288 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
286 | struct ceph_inode_info *ci = ceph_inode(inode); | 289 | struct ceph_inode_info *ci = ceph_inode(inode); |
287 | u64 pos, this_len; | 290 | u64 pos, this_len; |
291 | int io_align, page_align; | ||
288 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 292 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ |
289 | int left, pages_left; | 293 | int left, pages_left; |
290 | int read; | 294 | int read; |
@@ -300,14 +304,19 @@ static int striped_read(struct inode *inode, | |||
300 | page_pos = pages; | 304 | page_pos = pages; |
301 | pages_left = num_pages; | 305 | pages_left = num_pages; |
302 | read = 0; | 306 | read = 0; |
307 | io_align = off & ~PAGE_MASK; | ||
303 | 308 | ||
304 | more: | 309 | more: |
310 | if (align_to_pages) | ||
311 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; | ||
312 | else | ||
313 | page_align = pos & ~PAGE_MASK; | ||
305 | this_len = left; | 314 | this_len = left; |
306 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), | 315 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
307 | &ci->i_layout, pos, &this_len, | 316 | &ci->i_layout, pos, &this_len, |
308 | ci->i_truncate_seq, | 317 | ci->i_truncate_seq, |
309 | ci->i_truncate_size, | 318 | ci->i_truncate_size, |
310 | page_pos, pages_left); | 319 | page_pos, pages_left, page_align); |
311 | hit_stripe = this_len < left; | 320 | hit_stripe = this_len < left; |
312 | was_short = ret >= 0 && ret < this_len; | 321 | was_short = ret >= 0 && ret < this_len; |
313 | if (ret == -ENOENT) | 322 | if (ret == -ENOENT) |
@@ -368,32 +377,34 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
368 | struct inode *inode = file->f_dentry->d_inode; | 377 | struct inode *inode = file->f_dentry->d_inode; |
369 | struct page **pages; | 378 | struct page **pages; |
370 | u64 off = *poff; | 379 | u64 off = *poff; |
371 | int num_pages = calc_pages_for(off, len); | 380 | int num_pages, ret; |
372 | int ret; | ||
373 | 381 | ||
374 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, | 382 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, |
375 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 383 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
376 | 384 | ||
377 | if (file->f_flags & O_DIRECT) { | 385 | if (file->f_flags & O_DIRECT) { |
378 | pages = ceph_get_direct_page_vector(data, num_pages, off, len); | 386 | num_pages = calc_pages_for((unsigned long)data, len); |
379 | 387 | pages = ceph_get_direct_page_vector(data, num_pages, true); | |
380 | /* | ||
381 | * flush any page cache pages in this range. this | ||
382 | * will make concurrent normal and O_DIRECT io slow, | ||
383 | * but it will at least behave sensibly when they are | ||
384 | * in sequence. | ||
385 | */ | ||
386 | } else { | 388 | } else { |
389 | num_pages = calc_pages_for(off, len); | ||
387 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); | 390 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
388 | } | 391 | } |
389 | if (IS_ERR(pages)) | 392 | if (IS_ERR(pages)) |
390 | return PTR_ERR(pages); | 393 | return PTR_ERR(pages); |
391 | 394 | ||
395 | /* | ||
396 | * flush any page cache pages in this range. this | ||
397 | * will make concurrent normal and sync io slow, | ||
398 | * but it will at least behave sensibly when they are | ||
399 | * in sequence. | ||
400 | */ | ||
392 | ret = filemap_write_and_wait(inode->i_mapping); | 401 | ret = filemap_write_and_wait(inode->i_mapping); |
393 | if (ret < 0) | 402 | if (ret < 0) |
394 | goto done; | 403 | goto done; |
395 | 404 | ||
396 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 405 | ret = striped_read(inode, off, len, pages, num_pages, checkeof, |
406 | file->f_flags & O_DIRECT, | ||
407 | (unsigned long)data & ~PAGE_MASK); | ||
397 | 408 | ||
398 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 409 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) |
399 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); | 410 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); |
@@ -402,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
402 | 413 | ||
403 | done: | 414 | done: |
404 | if (file->f_flags & O_DIRECT) | 415 | if (file->f_flags & O_DIRECT) |
405 | ceph_put_page_vector(pages, num_pages); | 416 | ceph_put_page_vector(pages, num_pages, true); |
406 | else | 417 | else |
407 | ceph_release_page_vector(pages, num_pages); | 418 | ceph_release_page_vector(pages, num_pages); |
408 | dout("sync_read result %d\n", ret); | 419 | dout("sync_read result %d\n", ret); |
@@ -448,6 +459,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
448 | int flags; | 459 | int flags; |
449 | int do_sync = 0; | 460 | int do_sync = 0; |
450 | int check_caps = 0; | 461 | int check_caps = 0; |
462 | int page_align, io_align; | ||
463 | unsigned long buf_align; | ||
451 | int ret; | 464 | int ret; |
452 | struct timespec mtime = CURRENT_TIME; | 465 | struct timespec mtime = CURRENT_TIME; |
453 | 466 | ||
@@ -462,6 +475,9 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
462 | else | 475 | else |
463 | pos = *offset; | 476 | pos = *offset; |
464 | 477 | ||
478 | io_align = pos & ~PAGE_MASK; | ||
479 | buf_align = (unsigned long)data & ~PAGE_MASK; | ||
480 | |||
465 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); | 481 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); |
466 | if (ret < 0) | 482 | if (ret < 0) |
467 | return ret; | 483 | return ret; |
@@ -486,20 +502,27 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
486 | */ | 502 | */ |
487 | more: | 503 | more: |
488 | len = left; | 504 | len = left; |
505 | if (file->f_flags & O_DIRECT) { | ||
506 | /* write from beginning of first page, regardless of | ||
507 | io alignment */ | ||
508 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; | ||
509 | num_pages = calc_pages_for((unsigned long)data, len); | ||
510 | } else { | ||
511 | page_align = pos & ~PAGE_MASK; | ||
512 | num_pages = calc_pages_for(pos, len); | ||
513 | } | ||
489 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | 514 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
490 | ceph_vino(inode), pos, &len, | 515 | ceph_vino(inode), pos, &len, |
491 | CEPH_OSD_OP_WRITE, flags, | 516 | CEPH_OSD_OP_WRITE, flags, |
492 | ci->i_snap_realm->cached_context, | 517 | ci->i_snap_realm->cached_context, |
493 | do_sync, | 518 | do_sync, |
494 | ci->i_truncate_seq, ci->i_truncate_size, | 519 | ci->i_truncate_seq, ci->i_truncate_size, |
495 | &mtime, false, 2); | 520 | &mtime, false, 2, page_align); |
496 | if (!req) | 521 | if (!req) |
497 | return -ENOMEM; | 522 | return -ENOMEM; |
498 | 523 | ||
499 | num_pages = calc_pages_for(pos, len); | ||
500 | |||
501 | if (file->f_flags & O_DIRECT) { | 524 | if (file->f_flags & O_DIRECT) { |
502 | pages = ceph_get_direct_page_vector(data, num_pages, pos, len); | 525 | pages = ceph_get_direct_page_vector(data, num_pages, false); |
503 | if (IS_ERR(pages)) { | 526 | if (IS_ERR(pages)) { |
504 | ret = PTR_ERR(pages); | 527 | ret = PTR_ERR(pages); |
505 | goto out; | 528 | goto out; |
@@ -549,7 +572,7 @@ more: | |||
549 | } | 572 | } |
550 | 573 | ||
551 | if (file->f_flags & O_DIRECT) | 574 | if (file->f_flags & O_DIRECT) |
552 | ceph_put_page_vector(pages, num_pages); | 575 | ceph_put_page_vector(pages, num_pages, false); |
553 | else if (file->f_flags & O_SYNC) | 576 | else if (file->f_flags & O_SYNC) |
554 | ceph_release_page_vector(pages, num_pages); | 577 | ceph_release_page_vector(pages, num_pages); |
555 | 578 | ||