diff options
author | Sage Weil <sage@inktank.com> | 2012-11-05 14:07:23 -0500 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2012-11-05 14:07:23 -0500 |
commit | 22cddde104d715600a4c218bf9224923208afe90 (patch) | |
tree | 82fc93d9c89ef41145e52ace63484047e600f866 /fs/ceph/addr.c | |
parent | 4d1d0534f53863108fdea496288cb3310f88118d (diff) |
ceph: Fix i_size update race
ceph_aio_write() has an optimization that marks cap EPH_CAP_FILE_WR
dirty before data is copied to page cache and inode size is updated.
If ceph_check_caps() flushes the dirty cap before the inode size is
updated, MDS can miss the new inode size. The fix is move
ceph_{get,put}_cap_refs() into ceph_write_{begin,end}() and call
__ceph_mark_dirty_caps() after inode size is updated.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r-- | fs/ceph/addr.c | 51 |
1 files changed, 47 insertions, 4 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 22b6e4583faa..21a07187df05 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -1078,23 +1078,51 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, | |||
1078 | struct page **pagep, void **fsdata) | 1078 | struct page **pagep, void **fsdata) |
1079 | { | 1079 | { |
1080 | struct inode *inode = file->f_dentry->d_inode; | 1080 | struct inode *inode = file->f_dentry->d_inode; |
1081 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
1082 | struct ceph_file_info *fi = file->private_data; | ||
1081 | struct page *page; | 1083 | struct page *page; |
1082 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 1084 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
1083 | int r; | 1085 | int r, want, got = 0; |
1086 | |||
1087 | if (fi->fmode & CEPH_FILE_MODE_LAZY) | ||
1088 | want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; | ||
1089 | else | ||
1090 | want = CEPH_CAP_FILE_BUFFER; | ||
1091 | |||
1092 | dout("write_begin %p %llx.%llx %llu~%u getting caps. i_size %llu\n", | ||
1093 | inode, ceph_vinop(inode), pos, len, inode->i_size); | ||
1094 | r = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos+len); | ||
1095 | if (r < 0) | ||
1096 | return r; | ||
1097 | dout("write_begin %p %llx.%llx %llu~%u got cap refs on %s\n", | ||
1098 | inode, ceph_vinop(inode), pos, len, ceph_cap_string(got)); | ||
1099 | if (!(got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO))) { | ||
1100 | ceph_put_cap_refs(ci, got); | ||
1101 | return -EAGAIN; | ||
1102 | } | ||
1084 | 1103 | ||
1085 | do { | 1104 | do { |
1086 | /* get a page */ | 1105 | /* get a page */ |
1087 | page = grab_cache_page_write_begin(mapping, index, 0); | 1106 | page = grab_cache_page_write_begin(mapping, index, 0); |
1088 | if (!page) | 1107 | if (!page) { |
1089 | return -ENOMEM; | 1108 | r = -ENOMEM; |
1090 | *pagep = page; | 1109 | break; |
1110 | } | ||
1091 | 1111 | ||
1092 | dout("write_begin file %p inode %p page %p %d~%d\n", file, | 1112 | dout("write_begin file %p inode %p page %p %d~%d\n", file, |
1093 | inode, page, (int)pos, (int)len); | 1113 | inode, page, (int)pos, (int)len); |
1094 | 1114 | ||
1095 | r = ceph_update_writeable_page(file, pos, len, page); | 1115 | r = ceph_update_writeable_page(file, pos, len, page); |
1116 | if (r) | ||
1117 | page_cache_release(page); | ||
1096 | } while (r == -EAGAIN); | 1118 | } while (r == -EAGAIN); |
1097 | 1119 | ||
1120 | if (r) { | ||
1121 | ceph_put_cap_refs(ci, got); | ||
1122 | } else { | ||
1123 | *pagep = page; | ||
1124 | *(int *)fsdata = got; | ||
1125 | } | ||
1098 | return r; | 1126 | return r; |
1099 | } | 1127 | } |
1100 | 1128 | ||
@@ -1108,10 +1136,12 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, | |||
1108 | struct page *page, void *fsdata) | 1136 | struct page *page, void *fsdata) |
1109 | { | 1137 | { |
1110 | struct inode *inode = file->f_dentry->d_inode; | 1138 | struct inode *inode = file->f_dentry->d_inode; |
1139 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
1111 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 1140 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
1112 | struct ceph_mds_client *mdsc = fsc->mdsc; | 1141 | struct ceph_mds_client *mdsc = fsc->mdsc; |
1113 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | 1142 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
1114 | int check_cap = 0; | 1143 | int check_cap = 0; |
1144 | int got = (unsigned long)fsdata; | ||
1115 | 1145 | ||
1116 | dout("write_end file %p inode %p page %p %d~%d (%d)\n", file, | 1146 | dout("write_end file %p inode %p page %p %d~%d (%d)\n", file, |
1117 | inode, page, (int)pos, (int)copied, (int)len); | 1147 | inode, page, (int)pos, (int)copied, (int)len); |
@@ -1134,6 +1164,19 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, | |||
1134 | up_read(&mdsc->snap_rwsem); | 1164 | up_read(&mdsc->snap_rwsem); |
1135 | page_cache_release(page); | 1165 | page_cache_release(page); |
1136 | 1166 | ||
1167 | if (copied > 0) { | ||
1168 | int dirty; | ||
1169 | spin_lock(&ci->i_ceph_lock); | ||
1170 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | ||
1171 | spin_unlock(&ci->i_ceph_lock); | ||
1172 | if (dirty) | ||
1173 | __mark_inode_dirty(inode, dirty); | ||
1174 | } | ||
1175 | |||
1176 | dout("write_end %p %llx.%llx %llu~%u dropping cap refs on %s\n", | ||
1177 | inode, ceph_vinop(inode), pos, len, ceph_cap_string(got)); | ||
1178 | ceph_put_cap_refs(ci, got); | ||
1179 | |||
1137 | if (check_cap) | 1180 | if (check_cap) |
1138 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); | 1181 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); |
1139 | 1182 | ||