aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/addr.c
diff options
context:
space:
mode:
authorYan, Zheng <zheng.z.yan@intel.com>2013-11-28 01:28:14 -0500
committerIlya Dryomov <ilya.dryomov@inktank.com>2013-12-31 13:32:00 -0500
commit61f68816211ee4b884dc0dda8dd4d977548f4865 (patch)
tree8363f3938546bf044dac413bff490dd2e9e12310 /fs/ceph/addr.c
parent9b60e70b3b6a8e4bc2d1b6d9f858a30e1cec496b (diff)
ceph: check caps in filemap_fault and page_mkwrite
Adds cap check to the page fault handler. The check prevents page fault handler from adding new page to the page cache while Fcb caps are being revoked. This solves Fc revoking hang in multiple clients mmap IO workload. Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Reviewed-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r--fs/ceph/addr.c89
1 files changed, 77 insertions, 12 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c346b8479f99..ebda329611be 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1210,6 +1210,41 @@ const struct address_space_operations ceph_aops = {
1210/* 1210/*
1211 * vm ops 1211 * vm ops
1212 */ 1212 */
1213static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1214{
1215 struct inode *inode = file_inode(vma->vm_file);
1216 struct ceph_inode_info *ci = ceph_inode(inode);
1217 struct ceph_file_info *fi = vma->vm_file->private_data;
1218 loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
1219 int want, got, ret;
1220
1221 dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
1222 inode, ceph_vinop(inode), off, PAGE_CACHE_SIZE);
1223 if (fi->fmode & CEPH_FILE_MODE_LAZY)
1224 want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
1225 else
1226 want = CEPH_CAP_FILE_CACHE;
1227 while (1) {
1228 got = 0;
1229 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
1230 if (ret == 0)
1231 break;
1232 if (ret != -ERESTARTSYS) {
1233 WARN_ON(1);
1234 return VM_FAULT_SIGBUS;
1235 }
1236 }
1237 dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
1238 inode, off, PAGE_CACHE_SIZE, ceph_cap_string(got));
1239
1240 ret = filemap_fault(vma, vmf);
1241
1242 dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
1243 inode, off, PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
1244 ceph_put_cap_refs(ci, got);
1245
1246 return ret;
1247}
1213 1248
1214/* 1249/*
1215 * Reuse write_begin here for simplicity. 1250 * Reuse write_begin here for simplicity.
@@ -1217,23 +1252,41 @@ const struct address_space_operations ceph_aops = {
1217static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 1252static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1218{ 1253{
1219 struct inode *inode = file_inode(vma->vm_file); 1254 struct inode *inode = file_inode(vma->vm_file);
1220 struct page *page = vmf->page; 1255 struct ceph_inode_info *ci = ceph_inode(inode);
1256 struct ceph_file_info *fi = vma->vm_file->private_data;
1221 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 1257 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
1258 struct page *page = vmf->page;
1222 loff_t off = page_offset(page); 1259 loff_t off = page_offset(page);
1223 loff_t size, len; 1260 loff_t size = i_size_read(inode);
1224 int ret; 1261 size_t len;
1225 1262 int want, got, ret;
1226 /* Update time before taking page lock */
1227 file_update_time(vma->vm_file);
1228 1263
1229 size = i_size_read(inode);
1230 if (off + PAGE_CACHE_SIZE <= size) 1264 if (off + PAGE_CACHE_SIZE <= size)
1231 len = PAGE_CACHE_SIZE; 1265 len = PAGE_CACHE_SIZE;
1232 else 1266 else
1233 len = size & ~PAGE_CACHE_MASK; 1267 len = size & ~PAGE_CACHE_MASK;
1234 1268
1235 dout("page_mkwrite %p %llu~%llu page %p idx %lu\n", inode, 1269 dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
1236 off, len, page, page->index); 1270 inode, ceph_vinop(inode), off, len, size);
1271 if (fi->fmode & CEPH_FILE_MODE_LAZY)
1272 want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
1273 else
1274 want = CEPH_CAP_FILE_BUFFER;
1275 while (1) {
1276 got = 0;
1277 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len);
1278 if (ret == 0)
1279 break;
1280 if (ret != -ERESTARTSYS) {
1281 WARN_ON(1);
1282 return VM_FAULT_SIGBUS;
1283 }
1284 }
1285 dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
1286 inode, off, len, ceph_cap_string(got));
1287
1288 /* Update time before taking page lock */
1289 file_update_time(vma->vm_file);
1237 1290
1238 lock_page(page); 1291 lock_page(page);
1239 1292
@@ -1255,14 +1308,26 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1255 ret = VM_FAULT_SIGBUS; 1308 ret = VM_FAULT_SIGBUS;
1256 } 1309 }
1257out: 1310out:
1258 dout("page_mkwrite %p %llu~%llu = %d\n", inode, off, len, ret); 1311 if (ret != VM_FAULT_LOCKED) {
1259 if (ret != VM_FAULT_LOCKED)
1260 unlock_page(page); 1312 unlock_page(page);
1313 } else {
1314 int dirty;
1315 spin_lock(&ci->i_ceph_lock);
1316 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
1317 spin_unlock(&ci->i_ceph_lock);
1318 if (dirty)
1319 __mark_inode_dirty(inode, dirty);
1320 }
1321
1322 dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
1323 inode, off, len, ceph_cap_string(got), ret);
1324 ceph_put_cap_refs(ci, got);
1325
1261 return ret; 1326 return ret;
1262} 1327}
1263 1328
1264static struct vm_operations_struct ceph_vmops = { 1329static struct vm_operations_struct ceph_vmops = {
1265 .fault = filemap_fault, 1330 .fault = ceph_filemap_fault,
1266 .page_mkwrite = ceph_page_mkwrite, 1331 .page_mkwrite = ceph_page_mkwrite,
1267 .remap_pages = generic_file_remap_pages, 1332 .remap_pages = generic_file_remap_pages,
1268}; 1333};