diff options
author | Sage Weil <sage@newdream.net> | 2011-07-26 14:27:34 -0400 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2011-07-26 14:27:34 -0400 |
commit | d8de9ab63a57326d21154c13c365f949f53ce8e1 (patch) | |
tree | 283aac6bf2cc91a772d9cc04c97f46f60267e4c5 /fs/ceph/file.c | |
parent | 4cf9d544631c92809cb94ea680c71df56e9437aa (diff) |
ceph: avoid carrying Fw cap during write into page cache
The generic_file_aio_write call may block on balance_dirty_pages while we
flush data to the OSDs. If we hold a reference to the FILE_WR cap during
that interval revocation by the MDS (e.g., to do a stat(2)) may be very
slow.
Reviewed-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r-- | fs/ceph/file.c | 22 |
1 files changed, 19 insertions, 3 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 44e4fe9fba02..6c90cf090601 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -713,7 +713,7 @@ retry_snap: | |||
713 | want = CEPH_CAP_FILE_BUFFER; | 713 | want = CEPH_CAP_FILE_BUFFER; |
714 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); | 714 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); |
715 | if (ret < 0) | 715 | if (ret < 0) |
716 | goto out; | 716 | goto out_put; |
717 | 717 | ||
718 | dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", | 718 | dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", |
719 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 719 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
@@ -726,8 +726,18 @@ retry_snap: | |||
726 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, | 726 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, |
727 | &iocb->ki_pos); | 727 | &iocb->ki_pos); |
728 | } else { | 728 | } else { |
729 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | 729 | /* |
730 | * buffered write; drop Fw early to avoid slow | ||
731 | * revocation if we get stuck on balance_dirty_pages | ||
732 | */ | ||
733 | int dirty; | ||
734 | |||
735 | spin_lock(&inode->i_lock); | ||
736 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | ||
737 | spin_unlock(&inode->i_lock); | ||
738 | ceph_put_cap_refs(ci, got); | ||
730 | 739 | ||
740 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
731 | if ((ret >= 0 || ret == -EIOCBQUEUED) && | 741 | if ((ret >= 0 || ret == -EIOCBQUEUED) && |
732 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) | 742 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) |
733 | || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { | 743 | || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { |
@@ -735,7 +745,12 @@ retry_snap: | |||
735 | if (err < 0) | 745 | if (err < 0) |
736 | ret = err; | 746 | ret = err; |
737 | } | 747 | } |
748 | |||
749 | if (dirty) | ||
750 | __mark_inode_dirty(inode, dirty); | ||
751 | goto out; | ||
738 | } | 752 | } |
753 | |||
739 | if (ret >= 0) { | 754 | if (ret >= 0) { |
740 | int dirty; | 755 | int dirty; |
741 | spin_lock(&inode->i_lock); | 756 | spin_lock(&inode->i_lock); |
@@ -745,12 +760,13 @@ retry_snap: | |||
745 | __mark_inode_dirty(inode, dirty); | 760 | __mark_inode_dirty(inode, dirty); |
746 | } | 761 | } |
747 | 762 | ||
748 | out: | 763 | out_put: |
749 | dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", | 764 | dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", |
750 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 765 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
751 | ceph_cap_string(got)); | 766 | ceph_cap_string(got)); |
752 | ceph_put_cap_refs(ci, got); | 767 | ceph_put_cap_refs(ci, got); |
753 | 768 | ||
769 | out: | ||
754 | if (ret == -EOLDSNAPC) { | 770 | if (ret == -EOLDSNAPC) { |
755 | dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", | 771 | dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", |
756 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); | 772 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); |