diff options
author | Yan, Zheng <zheng.z.yan@intel.com> | 2013-04-12 04:11:13 -0400 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-05-02 00:18:54 -0400 |
commit | 03d254edebe51949a569c38df6b4b05b7f3c50f9 (patch) | |
tree | 61ea077054bff82f2493ddfbf8d3751da1652f2d /fs/ceph | |
parent | 37505d5768b9389a5ef7bc1efa465d4484a5462e (diff) |
ceph: apply write checks in ceph_aio_write
copy write checks in __generic_file_aio_write to ceph_aio_write.
To make these checks cover sync write path.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Alex Elder <elder@inktank.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/file.c | 94 |
1 files changed, 59 insertions, 35 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index dd44f3593992..c639d9279fdd 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -486,7 +486,7 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) | |||
486 | * objects, rollback on failure, etc.) | 486 | * objects, rollback on failure, etc.) |
487 | */ | 487 | */ |
488 | static ssize_t ceph_sync_write(struct file *file, const char __user *data, | 488 | static ssize_t ceph_sync_write(struct file *file, const char __user *data, |
489 | size_t left, loff_t *offset) | 489 | size_t left, loff_t pos, loff_t *ppos) |
490 | { | 490 | { |
491 | struct inode *inode = file_inode(file); | 491 | struct inode *inode = file_inode(file); |
492 | struct ceph_inode_info *ci = ceph_inode(inode); | 492 | struct ceph_inode_info *ci = ceph_inode(inode); |
@@ -497,7 +497,6 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
497 | int num_ops = 1; | 497 | int num_ops = 1; |
498 | struct page **pages; | 498 | struct page **pages; |
499 | int num_pages; | 499 | int num_pages; |
500 | long long unsigned pos; | ||
501 | u64 len; | 500 | u64 len; |
502 | int written = 0; | 501 | int written = 0; |
503 | int flags; | 502 | int flags; |
@@ -511,14 +510,9 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
511 | if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) | 510 | if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) |
512 | return -EROFS; | 511 | return -EROFS; |
513 | 512 | ||
514 | dout("sync_write on file %p %lld~%u %s\n", file, *offset, | 513 | dout("sync_write on file %p %lld~%u %s\n", file, pos, |
515 | (unsigned)left, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 514 | (unsigned)left, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
516 | 515 | ||
517 | if (file->f_flags & O_APPEND) | ||
518 | pos = i_size_read(inode); | ||
519 | else | ||
520 | pos = *offset; | ||
521 | |||
522 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); | 516 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); |
523 | if (ret < 0) | 517 | if (ret < 0) |
524 | return ret; | 518 | return ret; |
@@ -617,7 +611,7 @@ out: | |||
617 | goto more; | 611 | goto more; |
618 | 612 | ||
619 | ret = written; | 613 | ret = written; |
620 | *offset = pos; | 614 | *ppos = pos; |
621 | if (pos > i_size_read(inode)) | 615 | if (pos > i_size_read(inode)) |
622 | check_caps = ceph_inode_set_size(inode, pos); | 616 | check_caps = ceph_inode_set_size(inode, pos); |
623 | if (check_caps) | 617 | if (check_caps) |
@@ -714,51 +708,75 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
714 | struct ceph_inode_info *ci = ceph_inode(inode); | 708 | struct ceph_inode_info *ci = ceph_inode(inode); |
715 | struct ceph_osd_client *osdc = | 709 | struct ceph_osd_client *osdc = |
716 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | 710 | &ceph_sb_to_client(inode->i_sb)->client->osdc; |
717 | loff_t endoff = pos + iov->iov_len; | 711 | ssize_t count, written = 0; |
718 | int want, got = 0; | 712 | int err, want, got; |
719 | int ret, err; | 713 | bool hold_mutex; |
720 | 714 | ||
721 | if (ceph_snap(inode) != CEPH_NOSNAP) | 715 | if (ceph_snap(inode) != CEPH_NOSNAP) |
722 | return -EROFS; | 716 | return -EROFS; |
723 | 717 | ||
724 | sb_start_write(inode->i_sb); | 718 | sb_start_write(inode->i_sb); |
719 | mutex_lock(&inode->i_mutex); | ||
720 | hold_mutex = true; | ||
721 | |||
722 | err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); | ||
723 | if (err) | ||
724 | goto out; | ||
725 | |||
726 | /* We can write back this queue in page reclaim */ | ||
727 | current->backing_dev_info = file->f_mapping->backing_dev_info; | ||
728 | |||
729 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | ||
730 | if (err) | ||
731 | goto out; | ||
732 | |||
733 | if (count == 0) | ||
734 | goto out; | ||
735 | |||
736 | err = file_remove_suid(file); | ||
737 | if (err) | ||
738 | goto out; | ||
739 | |||
740 | err = file_update_time(file); | ||
741 | if (err) | ||
742 | goto out; | ||
743 | |||
725 | retry_snap: | 744 | retry_snap: |
726 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { | 745 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { |
727 | ret = -ENOSPC; | 746 | err = -ENOSPC; |
728 | goto out; | 747 | goto out; |
729 | } | 748 | } |
730 | mutex_lock(&inode->i_mutex); | 749 | |
731 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", | 750 | dout("aio_write %p %llx.%llx %llu~%ld getting caps. i_size %llu\n", |
732 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 751 | inode, ceph_vinop(inode), pos, count, inode->i_size); |
733 | inode->i_size); | ||
734 | if (fi->fmode & CEPH_FILE_MODE_LAZY) | 752 | if (fi->fmode & CEPH_FILE_MODE_LAZY) |
735 | want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; | 753 | want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; |
736 | else | 754 | else |
737 | want = CEPH_CAP_FILE_BUFFER; | 755 | want = CEPH_CAP_FILE_BUFFER; |
738 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); | 756 | got = 0; |
739 | if (ret < 0) { | 757 | err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count); |
740 | mutex_unlock(&inode->i_mutex); | 758 | if (err < 0) |
741 | goto out; | 759 | goto out; |
742 | } | ||
743 | 760 | ||
744 | dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", | 761 | dout("aio_write %p %llx.%llx %llu~%ld got cap refs on %s\n", |
745 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 762 | inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); |
746 | ceph_cap_string(got)); | ||
747 | 763 | ||
748 | if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || | 764 | if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || |
749 | (iocb->ki_filp->f_flags & O_DIRECT) || | 765 | (iocb->ki_filp->f_flags & O_DIRECT) || |
750 | (inode->i_sb->s_flags & MS_SYNCHRONOUS) || | 766 | (inode->i_sb->s_flags & MS_SYNCHRONOUS) || |
751 | (fi->flags & CEPH_F_SYNC)) { | 767 | (fi->flags & CEPH_F_SYNC)) { |
752 | mutex_unlock(&inode->i_mutex); | 768 | mutex_unlock(&inode->i_mutex); |
753 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, | 769 | written = ceph_sync_write(file, iov->iov_base, count, |
754 | &iocb->ki_pos); | 770 | pos, &iocb->ki_pos); |
755 | } else { | 771 | } else { |
756 | ret = __generic_file_aio_write(iocb, iov, nr_segs, | 772 | written = generic_file_buffered_write(iocb, iov, nr_segs, |
757 | &iocb->ki_pos); | 773 | pos, &iocb->ki_pos, |
774 | count, 0); | ||
758 | mutex_unlock(&inode->i_mutex); | 775 | mutex_unlock(&inode->i_mutex); |
759 | } | 776 | } |
777 | hold_mutex = false; | ||
760 | 778 | ||
761 | if (ret >= 0) { | 779 | if (written >= 0) { |
762 | int dirty; | 780 | int dirty; |
763 | spin_lock(&ci->i_ceph_lock); | 781 | spin_lock(&ci->i_ceph_lock); |
764 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | 782 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); |
@@ -772,22 +790,28 @@ retry_snap: | |||
772 | ceph_cap_string(got)); | 790 | ceph_cap_string(got)); |
773 | ceph_put_cap_refs(ci, got); | 791 | ceph_put_cap_refs(ci, got); |
774 | 792 | ||
775 | if (ret >= 0 && | 793 | if (written >= 0 && |
776 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) || | 794 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) || |
777 | ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { | 795 | ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { |
778 | err = vfs_fsync_range(file, pos, pos + ret - 1, 1); | 796 | err = vfs_fsync_range(file, pos, pos + written - 1, 1); |
779 | if (err < 0) | 797 | if (err < 0) |
780 | ret = err; | 798 | written = err; |
781 | } | 799 | } |
782 | out: | 800 | |
783 | if (ret == -EOLDSNAPC) { | 801 | if (written == -EOLDSNAPC) { |
784 | dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", | 802 | dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", |
785 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); | 803 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); |
804 | mutex_lock(&inode->i_mutex); | ||
805 | hold_mutex = true; | ||
786 | goto retry_snap; | 806 | goto retry_snap; |
787 | } | 807 | } |
808 | out: | ||
809 | if (hold_mutex) | ||
810 | mutex_unlock(&inode->i_mutex); | ||
788 | sb_end_write(inode->i_sb); | 811 | sb_end_write(inode->i_sb); |
812 | current->backing_dev_info = NULL; | ||
789 | 813 | ||
790 | return ret; | 814 | return written ? written : err; |
791 | } | 815 | } |
792 | 816 | ||
793 | /* | 817 | /* |