aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-05-02 00:15:58 -0400
committerSage Weil <sage@inktank.com>2013-05-02 00:15:58 -0400
commit7971bd92baf729fcebe04d7330ac22dc668d0261 (patch)
tree78a8ea3b4e072e52840dac968dfacfff737765bd /fs/ceph
parenta8673d61ad77ddf2118599507bd40cc345e95368 (diff)
ceph: revert commit 22cddde104
commit 22cddde104 breaks the atomicity of write operation, it also introduces a deadlock between write and truncate. Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Reviewed-by: Greg Farnum <greg@inktank.com> Conflicts: fs/ceph/addr.c
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c51
-rw-r--r--fs/ceph/file.c73
-rw-r--r--fs/ceph/mds_client.c1
3 files changed, 48 insertions, 77 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index a60ea977af6f..2a571fb4803b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1067,51 +1067,23 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
1067 struct page **pagep, void **fsdata) 1067 struct page **pagep, void **fsdata)
1068{ 1068{
1069 struct inode *inode = file_inode(file); 1069 struct inode *inode = file_inode(file);
1070 struct ceph_inode_info *ci = ceph_inode(inode);
1071 struct ceph_file_info *fi = file->private_data;
1072 struct page *page; 1070 struct page *page;
1073 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 1071 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1074 int r, want, got = 0; 1072 int r;
1075
1076 if (fi->fmode & CEPH_FILE_MODE_LAZY)
1077 want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
1078 else
1079 want = CEPH_CAP_FILE_BUFFER;
1080
1081 dout("write_begin %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
1082 inode, ceph_vinop(inode), pos, len, inode->i_size);
1083 r = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos+len);
1084 if (r < 0)
1085 return r;
1086 dout("write_begin %p %llx.%llx %llu~%u got cap refs on %s\n",
1087 inode, ceph_vinop(inode), pos, len, ceph_cap_string(got));
1088 if (!(got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO))) {
1089 ceph_put_cap_refs(ci, got);
1090 return -EAGAIN;
1091 }
1092 1073
1093 do { 1074 do {
1094 /* get a page */ 1075 /* get a page */
1095 page = grab_cache_page_write_begin(mapping, index, 0); 1076 page = grab_cache_page_write_begin(mapping, index, 0);
1096 if (!page) { 1077 if (!page)
1097 r = -ENOMEM; 1078 return -ENOMEM;
1098 break; 1079 *pagep = page;
1099 }
1100 1080
1101 dout("write_begin file %p inode %p page %p %d~%d\n", file, 1081 dout("write_begin file %p inode %p page %p %d~%d\n", file,
1102 inode, page, (int)pos, (int)len); 1082 inode, page, (int)pos, (int)len);
1103 1083
1104 r = ceph_update_writeable_page(file, pos, len, page); 1084 r = ceph_update_writeable_page(file, pos, len, page);
1105 if (r)
1106 page_cache_release(page);
1107 } while (r == -EAGAIN); 1085 } while (r == -EAGAIN);
1108 1086
1109 if (r) {
1110 ceph_put_cap_refs(ci, got);
1111 } else {
1112 *pagep = page;
1113 *(int *)fsdata = got;
1114 }
1115 return r; 1087 return r;
1116} 1088}
1117 1089
@@ -1125,12 +1097,10 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
1125 struct page *page, void *fsdata) 1097 struct page *page, void *fsdata)
1126{ 1098{
1127 struct inode *inode = file_inode(file); 1099 struct inode *inode = file_inode(file);
1128 struct ceph_inode_info *ci = ceph_inode(inode);
1129 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 1100 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
1130 struct ceph_mds_client *mdsc = fsc->mdsc; 1101 struct ceph_mds_client *mdsc = fsc->mdsc;
1131 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 1102 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1132 int check_cap = 0; 1103 int check_cap = 0;
1133 int got = (unsigned long)fsdata;
1134 1104
1135 dout("write_end file %p inode %p page %p %d~%d (%d)\n", file, 1105 dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
1136 inode, page, (int)pos, (int)copied, (int)len); 1106 inode, page, (int)pos, (int)copied, (int)len);
@@ -1153,19 +1123,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
1153 up_read(&mdsc->snap_rwsem); 1123 up_read(&mdsc->snap_rwsem);
1154 page_cache_release(page); 1124 page_cache_release(page);
1155 1125
1156 if (copied > 0) {
1157 int dirty;
1158 spin_lock(&ci->i_ceph_lock);
1159 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
1160 spin_unlock(&ci->i_ceph_lock);
1161 if (dirty)
1162 __mark_inode_dirty(inode, dirty);
1163 }
1164
1165 dout("write_end %p %llx.%llx %llu~%u dropping cap refs on %s\n",
1166 inode, ceph_vinop(inode), pos, len, ceph_cap_string(got));
1167 ceph_put_cap_refs(ci, got);
1168
1169 if (check_cap) 1126 if (check_cap)
1170 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); 1127 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
1171 1128
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index bf338d9b67e3..b86d2a0eb145 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -718,53 +718,63 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
718 struct ceph_osd_client *osdc = 718 struct ceph_osd_client *osdc =
719 &ceph_sb_to_client(inode->i_sb)->client->osdc; 719 &ceph_sb_to_client(inode->i_sb)->client->osdc;
720 loff_t endoff = pos + iov->iov_len; 720 loff_t endoff = pos + iov->iov_len;
721 int got = 0; 721 int want, got = 0;
722 int ret, err, written; 722 int ret, err;
723 723
724 if (ceph_snap(inode) != CEPH_NOSNAP) 724 if (ceph_snap(inode) != CEPH_NOSNAP)
725 return -EROFS; 725 return -EROFS;
726 726
727retry_snap: 727retry_snap:
728 written = 0;
729 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) 728 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
730 return -ENOSPC; 729 return -ENOSPC;
731 __ceph_do_pending_vmtruncate(inode); 730 __ceph_do_pending_vmtruncate(inode);
731 dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
732 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
733 inode->i_size);
734 if (fi->fmode & CEPH_FILE_MODE_LAZY)
735 want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
736 else
737 want = CEPH_CAP_FILE_BUFFER;
738 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
739 if (ret < 0)
740 goto out_put;
732 741
733 /* 742 dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
734 * try to do a buffered write. if we don't have sufficient 743 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
735 * caps, we'll get -EAGAIN from generic_file_aio_write, or a 744 ceph_cap_string(got));
736 * short write if we only get caps for some pages. 745
737 */ 746 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
738 if (!(iocb->ki_filp->f_flags & O_DIRECT) && 747 (iocb->ki_filp->f_flags & O_DIRECT) ||
739 !(inode->i_sb->s_flags & MS_SYNCHRONOUS) && 748 (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
740 !(fi->flags & CEPH_F_SYNC)) { 749 (fi->flags & CEPH_F_SYNC)) {
741 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 750 ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
742 if (ret >= 0) 751 &iocb->ki_pos);
743 written = ret; 752 } else {
753 /*
754 * buffered write; drop Fw early to avoid slow
755 * revocation if we get stuck on balance_dirty_pages
756 */
757 int dirty;
744 758
759 spin_lock(&ci->i_ceph_lock);
760 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
761 spin_unlock(&ci->i_ceph_lock);
762 ceph_put_cap_refs(ci, got);
763
764 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
745 if ((ret >= 0 || ret == -EIOCBQUEUED) && 765 if ((ret >= 0 || ret == -EIOCBQUEUED) &&
746 ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) 766 ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
747 || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { 767 || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
748 err = vfs_fsync_range(file, pos, pos + written - 1, 1); 768 err = vfs_fsync_range(file, pos, pos + ret - 1, 1);
749 if (err < 0) 769 if (err < 0)
750 ret = err; 770 ret = err;
751 } 771 }
752 if ((ret < 0 && ret != -EAGAIN) || pos + written >= endoff)
753 goto out;
754 }
755 772
756 dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", 773 if (dirty)
757 inode, ceph_vinop(inode), pos + written, 774 __mark_inode_dirty(inode, dirty);
758 (unsigned)iov->iov_len - written, inode->i_size);
759 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, 0, &got, endoff);
760 if (ret < 0)
761 goto out; 775 goto out;
776 }
762 777
763 dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
764 inode, ceph_vinop(inode), pos + written,
765 (unsigned)iov->iov_len - written, ceph_cap_string(got));
766 ret = ceph_sync_write(file, iov->iov_base + written,
767 iov->iov_len - written, &iocb->ki_pos);
768 if (ret >= 0) { 778 if (ret >= 0) {
769 int dirty; 779 int dirty;
770 spin_lock(&ci->i_ceph_lock); 780 spin_lock(&ci->i_ceph_lock);
@@ -773,10 +783,13 @@ retry_snap:
773 if (dirty) 783 if (dirty)
774 __mark_inode_dirty(inode, dirty); 784 __mark_inode_dirty(inode, dirty);
775 } 785 }
786
787out_put:
776 dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", 788 dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
777 inode, ceph_vinop(inode), pos + written, 789 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
778 (unsigned)iov->iov_len - written, ceph_cap_string(got)); 790 ceph_cap_string(got));
779 ceph_put_cap_refs(ci, got); 791 ceph_put_cap_refs(ci, got);
792
780out: 793out:
781 if (ret == -EOLDSNAPC) { 794 if (ret == -EOLDSNAPC) {
782 dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", 795 dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 56da380878c5..9811caae7be4 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1916,6 +1916,7 @@ static void __wake_requests(struct ceph_mds_client *mdsc,
1916 req = list_entry(tmp_list.next, 1916 req = list_entry(tmp_list.next,
1917 struct ceph_mds_request, r_wait); 1917 struct ceph_mds_request, r_wait);
1918 list_del_init(&req->r_wait); 1918 list_del_init(&req->r_wait);
1919 dout(" wake request %p tid %llu\n", req, req->r_tid);
1919 __do_request(mdsc, req); 1920 __do_request(mdsc, req);
1920 } 1921 }
1921} 1922}