aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2015-06-10 05:26:13 -0400
committerIlya Dryomov <idryomov@gmail.com>2015-06-25 04:49:31 -0400
commitf66fd9f0952187d274c13c136b74548f792c1925 (patch)
treec021f04f69b116f2673fdfb2354a99871f8f03a4 /fs/ceph
parente548e9b93d3e565e42b938a99804114565be1f81 (diff)
ceph: pre-allocate data structure that tracks caps flushing
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c19
-rw-r--r--fs/ceph/caps.c26
-rw-r--r--fs/ceph/file.c18
-rw-r--r--fs/ceph/inode.c15
-rw-r--r--fs/ceph/mds_client.c6
-rw-r--r--fs/ceph/super.c8
-rw-r--r--fs/ceph/super.h6
-rw-r--r--fs/ceph/xattr.c20
8 files changed, 102 insertions, 16 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5f53ac0d9d7c..7edf3c49e661 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1308,12 +1308,17 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1308 struct inode *inode = file_inode(vma->vm_file); 1308 struct inode *inode = file_inode(vma->vm_file);
1309 struct ceph_inode_info *ci = ceph_inode(inode); 1309 struct ceph_inode_info *ci = ceph_inode(inode);
1310 struct ceph_file_info *fi = vma->vm_file->private_data; 1310 struct ceph_file_info *fi = vma->vm_file->private_data;
1311 struct ceph_cap_flush *prealloc_cf;
1311 struct page *page = vmf->page; 1312 struct page *page = vmf->page;
1312 loff_t off = page_offset(page); 1313 loff_t off = page_offset(page);
1313 loff_t size = i_size_read(inode); 1314 loff_t size = i_size_read(inode);
1314 size_t len; 1315 size_t len;
1315 int want, got, ret; 1316 int want, got, ret;
1316 1317
1318 prealloc_cf = ceph_alloc_cap_flush();
1319 if (!prealloc_cf)
1320 return VM_FAULT_SIGBUS;
1321
1317 if (ci->i_inline_version != CEPH_INLINE_NONE) { 1322 if (ci->i_inline_version != CEPH_INLINE_NONE) {
1318 struct page *locked_page = NULL; 1323 struct page *locked_page = NULL;
1319 if (off == 0) { 1324 if (off == 0) {
@@ -1323,8 +1328,10 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1323 ret = ceph_uninline_data(vma->vm_file, locked_page); 1328 ret = ceph_uninline_data(vma->vm_file, locked_page);
1324 if (locked_page) 1329 if (locked_page)
1325 unlock_page(locked_page); 1330 unlock_page(locked_page);
1326 if (ret < 0) 1331 if (ret < 0) {
1327 return VM_FAULT_SIGBUS; 1332 ret = VM_FAULT_SIGBUS;
1333 goto out_free;
1334 }
1328 } 1335 }
1329 1336
1330 if (off + PAGE_CACHE_SIZE <= size) 1337 if (off + PAGE_CACHE_SIZE <= size)
@@ -1346,7 +1353,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1346 break; 1353 break;
1347 if (ret != -ERESTARTSYS) { 1354 if (ret != -ERESTARTSYS) {
1348 WARN_ON(1); 1355 WARN_ON(1);
1349 return VM_FAULT_SIGBUS; 1356 ret = VM_FAULT_SIGBUS;
1357 goto out_free;
1350 } 1358 }
1351 } 1359 }
1352 dout("page_mkwrite %p %llu~%zd got cap refs on %s\n", 1360 dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
@@ -1381,7 +1389,8 @@ out:
1381 int dirty; 1389 int dirty;
1382 spin_lock(&ci->i_ceph_lock); 1390 spin_lock(&ci->i_ceph_lock);
1383 ci->i_inline_version = CEPH_INLINE_NONE; 1391 ci->i_inline_version = CEPH_INLINE_NONE;
1384 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 1392 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
1393 &prealloc_cf);
1385 spin_unlock(&ci->i_ceph_lock); 1394 spin_unlock(&ci->i_ceph_lock);
1386 if (dirty) 1395 if (dirty)
1387 __mark_inode_dirty(inode, dirty); 1396 __mark_inode_dirty(inode, dirty);
@@ -1390,6 +1399,8 @@ out:
1390 dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n", 1399 dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
1391 inode, off, len, ceph_cap_string(got), ret); 1400 inode, off, len, ceph_cap_string(got), ret);
1392 ceph_put_cap_refs(ci, got); 1401 ceph_put_cap_refs(ci, got);
1402out_free:
1403 ceph_free_cap_flush(prealloc_cf);
1393 1404
1394 return ret; 1405 return ret;
1395} 1406}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 69a16044ec41..dd7b20adf1d4 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1356,7 +1356,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
1356 * Caller is then responsible for calling __mark_inode_dirty with the 1356 * Caller is then responsible for calling __mark_inode_dirty with the
1357 * returned flags value. 1357 * returned flags value.
1358 */ 1358 */
1359int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) 1359int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
1360 struct ceph_cap_flush **pcf)
1360{ 1361{
1361 struct ceph_mds_client *mdsc = 1362 struct ceph_mds_client *mdsc =
1362 ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; 1363 ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
@@ -1376,6 +1377,9 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1376 ceph_cap_string(was | mask)); 1377 ceph_cap_string(was | mask));
1377 ci->i_dirty_caps |= mask; 1378 ci->i_dirty_caps |= mask;
1378 if (was == 0) { 1379 if (was == 0) {
1380 WARN_ON_ONCE(ci->i_prealloc_cap_flush);
1381 swap(ci->i_prealloc_cap_flush, *pcf);
1382
1379 if (!ci->i_head_snapc) { 1383 if (!ci->i_head_snapc) {
1380 WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem)); 1384 WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem));
1381 ci->i_head_snapc = ceph_get_snap_context( 1385 ci->i_head_snapc = ceph_get_snap_context(
@@ -1391,6 +1395,8 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1391 ihold(inode); 1395 ihold(inode);
1392 dirty |= I_DIRTY_SYNC; 1396 dirty |= I_DIRTY_SYNC;
1393 } 1397 }
1398 } else {
1399 WARN_ON_ONCE(!ci->i_prealloc_cap_flush);
1394 } 1400 }
1395 BUG_ON(list_empty(&ci->i_dirty_item)); 1401 BUG_ON(list_empty(&ci->i_dirty_item));
1396 if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) && 1402 if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
@@ -1446,6 +1452,17 @@ static void __add_cap_flushing_to_mdsc(struct ceph_mds_client *mdsc,
1446 rb_insert_color(&cf->g_node, &mdsc->cap_flush_tree); 1452 rb_insert_color(&cf->g_node, &mdsc->cap_flush_tree);
1447} 1453}
1448 1454
1455struct ceph_cap_flush *ceph_alloc_cap_flush(void)
1456{
1457 return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
1458}
1459
1460void ceph_free_cap_flush(struct ceph_cap_flush *cf)
1461{
1462 if (cf)
1463 kmem_cache_free(ceph_cap_flush_cachep, cf);
1464}
1465
1449static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc) 1466static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc)
1450{ 1467{
1451 struct rb_node *n = rb_first(&mdsc->cap_flush_tree); 1468 struct rb_node *n = rb_first(&mdsc->cap_flush_tree);
@@ -1469,11 +1486,12 @@ static int __mark_caps_flushing(struct inode *inode,
1469{ 1486{
1470 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 1487 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
1471 struct ceph_inode_info *ci = ceph_inode(inode); 1488 struct ceph_inode_info *ci = ceph_inode(inode);
1472 struct ceph_cap_flush *cf; 1489 struct ceph_cap_flush *cf = NULL;
1473 int flushing; 1490 int flushing;
1474 1491
1475 BUG_ON(ci->i_dirty_caps == 0); 1492 BUG_ON(ci->i_dirty_caps == 0);
1476 BUG_ON(list_empty(&ci->i_dirty_item)); 1493 BUG_ON(list_empty(&ci->i_dirty_item));
1494 BUG_ON(!ci->i_prealloc_cap_flush);
1477 1495
1478 flushing = ci->i_dirty_caps; 1496 flushing = ci->i_dirty_caps;
1479 dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n", 1497 dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n",
@@ -1484,7 +1502,7 @@ static int __mark_caps_flushing(struct inode *inode,
1484 ci->i_dirty_caps = 0; 1502 ci->i_dirty_caps = 0;
1485 dout(" inode %p now !dirty\n", inode); 1503 dout(" inode %p now !dirty\n", inode);
1486 1504
1487 cf = kmalloc(sizeof(*cf), GFP_ATOMIC); 1505 swap(cf, ci->i_prealloc_cap_flush);
1488 cf->caps = flushing; 1506 cf->caps = flushing;
1489 cf->kick = false; 1507 cf->kick = false;
1490 1508
@@ -3075,7 +3093,7 @@ out:
3075 cf = list_first_entry(&to_remove, 3093 cf = list_first_entry(&to_remove,
3076 struct ceph_cap_flush, list); 3094 struct ceph_cap_flush, list);
3077 list_del(&cf->list); 3095 list_del(&cf->list);
3078 kfree(cf); 3096 ceph_free_cap_flush(cf);
3079 } 3097 }
3080 if (drop) 3098 if (drop)
3081 iput(inode); 3099 iput(inode);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 0a76a370d798..8a4eb4d21d3c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -939,6 +939,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
939 struct ceph_inode_info *ci = ceph_inode(inode); 939 struct ceph_inode_info *ci = ceph_inode(inode);
940 struct ceph_osd_client *osdc = 940 struct ceph_osd_client *osdc =
941 &ceph_sb_to_client(inode->i_sb)->client->osdc; 941 &ceph_sb_to_client(inode->i_sb)->client->osdc;
942 struct ceph_cap_flush *prealloc_cf;
942 ssize_t count, written = 0; 943 ssize_t count, written = 0;
943 int err, want, got; 944 int err, want, got;
944 loff_t pos; 945 loff_t pos;
@@ -946,6 +947,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
946 if (ceph_snap(inode) != CEPH_NOSNAP) 947 if (ceph_snap(inode) != CEPH_NOSNAP)
947 return -EROFS; 948 return -EROFS;
948 949
950 prealloc_cf = ceph_alloc_cap_flush();
951 if (!prealloc_cf)
952 return -ENOMEM;
953
949 mutex_lock(&inode->i_mutex); 954 mutex_lock(&inode->i_mutex);
950 955
951 /* We can write back this queue in page reclaim */ 956 /* We can write back this queue in page reclaim */
@@ -1050,7 +1055,8 @@ retry_snap:
1050 int dirty; 1055 int dirty;
1051 spin_lock(&ci->i_ceph_lock); 1056 spin_lock(&ci->i_ceph_lock);
1052 ci->i_inline_version = CEPH_INLINE_NONE; 1057 ci->i_inline_version = CEPH_INLINE_NONE;
1053 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 1058 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
1059 &prealloc_cf);
1054 spin_unlock(&ci->i_ceph_lock); 1060 spin_unlock(&ci->i_ceph_lock);
1055 if (dirty) 1061 if (dirty)
1056 __mark_inode_dirty(inode, dirty); 1062 __mark_inode_dirty(inode, dirty);
@@ -1074,6 +1080,7 @@ retry_snap:
1074out: 1080out:
1075 mutex_unlock(&inode->i_mutex); 1081 mutex_unlock(&inode->i_mutex);
1076out_unlocked: 1082out_unlocked:
1083 ceph_free_cap_flush(prealloc_cf);
1077 current->backing_dev_info = NULL; 1084 current->backing_dev_info = NULL;
1078 return written ? written : err; 1085 return written ? written : err;
1079} 1086}
@@ -1270,6 +1277,7 @@ static long ceph_fallocate(struct file *file, int mode,
1270 struct ceph_inode_info *ci = ceph_inode(inode); 1277 struct ceph_inode_info *ci = ceph_inode(inode);
1271 struct ceph_osd_client *osdc = 1278 struct ceph_osd_client *osdc =
1272 &ceph_inode_to_client(inode)->client->osdc; 1279 &ceph_inode_to_client(inode)->client->osdc;
1280 struct ceph_cap_flush *prealloc_cf;
1273 int want, got = 0; 1281 int want, got = 0;
1274 int dirty; 1282 int dirty;
1275 int ret = 0; 1283 int ret = 0;
@@ -1282,6 +1290,10 @@ static long ceph_fallocate(struct file *file, int mode,
1282 if (!S_ISREG(inode->i_mode)) 1290 if (!S_ISREG(inode->i_mode))
1283 return -EOPNOTSUPP; 1291 return -EOPNOTSUPP;
1284 1292
1293 prealloc_cf = ceph_alloc_cap_flush();
1294 if (!prealloc_cf)
1295 return -ENOMEM;
1296
1285 mutex_lock(&inode->i_mutex); 1297 mutex_lock(&inode->i_mutex);
1286 1298
1287 if (ceph_snap(inode) != CEPH_NOSNAP) { 1299 if (ceph_snap(inode) != CEPH_NOSNAP) {
@@ -1328,7 +1340,8 @@ static long ceph_fallocate(struct file *file, int mode,
1328 if (!ret) { 1340 if (!ret) {
1329 spin_lock(&ci->i_ceph_lock); 1341 spin_lock(&ci->i_ceph_lock);
1330 ci->i_inline_version = CEPH_INLINE_NONE; 1342 ci->i_inline_version = CEPH_INLINE_NONE;
1331 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 1343 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
1344 &prealloc_cf);
1332 spin_unlock(&ci->i_ceph_lock); 1345 spin_unlock(&ci->i_ceph_lock);
1333 if (dirty) 1346 if (dirty)
1334 __mark_inode_dirty(inode, dirty); 1347 __mark_inode_dirty(inode, dirty);
@@ -1337,6 +1350,7 @@ static long ceph_fallocate(struct file *file, int mode,
1337 ceph_put_cap_refs(ci, got); 1350 ceph_put_cap_refs(ci, got);
1338unlock: 1351unlock:
1339 mutex_unlock(&inode->i_mutex); 1352 mutex_unlock(&inode->i_mutex);
1353 ceph_free_cap_flush(prealloc_cf);
1340 return ret; 1354 return ret;
1341} 1355}
1342 1356
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 3326302f5884..e86d1a4efc46 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -416,6 +416,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
416 ci->i_flushing_caps = 0; 416 ci->i_flushing_caps = 0;
417 INIT_LIST_HEAD(&ci->i_dirty_item); 417 INIT_LIST_HEAD(&ci->i_dirty_item);
418 INIT_LIST_HEAD(&ci->i_flushing_item); 418 INIT_LIST_HEAD(&ci->i_flushing_item);
419 ci->i_prealloc_cap_flush = NULL;
419 ci->i_cap_flush_tree = RB_ROOT; 420 ci->i_cap_flush_tree = RB_ROOT;
420 init_waitqueue_head(&ci->i_cap_wq); 421 init_waitqueue_head(&ci->i_cap_wq);
421 ci->i_hold_caps_min = 0; 422 ci->i_hold_caps_min = 0;
@@ -1720,6 +1721,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1720 const unsigned int ia_valid = attr->ia_valid; 1721 const unsigned int ia_valid = attr->ia_valid;
1721 struct ceph_mds_request *req; 1722 struct ceph_mds_request *req;
1722 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; 1723 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
1724 struct ceph_cap_flush *prealloc_cf;
1723 int issued; 1725 int issued;
1724 int release = 0, dirtied = 0; 1726 int release = 0, dirtied = 0;
1725 int mask = 0; 1727 int mask = 0;
@@ -1734,10 +1736,16 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1734 if (err != 0) 1736 if (err != 0)
1735 return err; 1737 return err;
1736 1738
1739 prealloc_cf = ceph_alloc_cap_flush();
1740 if (!prealloc_cf)
1741 return -ENOMEM;
1742
1737 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR, 1743 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR,
1738 USE_AUTH_MDS); 1744 USE_AUTH_MDS);
1739 if (IS_ERR(req)) 1745 if (IS_ERR(req)) {
1746 ceph_free_cap_flush(prealloc_cf);
1740 return PTR_ERR(req); 1747 return PTR_ERR(req);
1748 }
1741 1749
1742 spin_lock(&ci->i_ceph_lock); 1750 spin_lock(&ci->i_ceph_lock);
1743 issued = __ceph_caps_issued(ci, NULL); 1751 issued = __ceph_caps_issued(ci, NULL);
@@ -1895,7 +1903,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1895 dout("setattr %p ATTR_FILE ... hrm!\n", inode); 1903 dout("setattr %p ATTR_FILE ... hrm!\n", inode);
1896 1904
1897 if (dirtied) { 1905 if (dirtied) {
1898 inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied); 1906 inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
1907 &prealloc_cf);
1899 inode->i_ctime = CURRENT_TIME; 1908 inode->i_ctime = CURRENT_TIME;
1900 } 1909 }
1901 1910
@@ -1927,9 +1936,11 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1927 ceph_mdsc_put_request(req); 1936 ceph_mdsc_put_request(req);
1928 if (mask & CEPH_SETATTR_SIZE) 1937 if (mask & CEPH_SETATTR_SIZE)
1929 __ceph_do_pending_vmtruncate(inode); 1938 __ceph_do_pending_vmtruncate(inode);
1939 ceph_free_cap_flush(prealloc_cf);
1930 return err; 1940 return err;
1931out_put: 1941out_put:
1932 ceph_mdsc_put_request(req); 1942 ceph_mdsc_put_request(req);
1943 ceph_free_cap_flush(prealloc_cf);
1933 return err; 1944 return err;
1934} 1945}
1935 1946
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 89e4305a94d4..8d73fe9d488b 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1189,6 +1189,10 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
1189 } 1189 }
1190 spin_unlock(&mdsc->cap_dirty_lock); 1190 spin_unlock(&mdsc->cap_dirty_lock);
1191 1191
1192 if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
1193 list_add(&ci->i_prealloc_cap_flush->list, &to_remove);
1194 ci->i_prealloc_cap_flush = NULL;
1195 }
1192 } 1196 }
1193 spin_unlock(&ci->i_ceph_lock); 1197 spin_unlock(&ci->i_ceph_lock);
1194 while (!list_empty(&to_remove)) { 1198 while (!list_empty(&to_remove)) {
@@ -1196,7 +1200,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
1196 cf = list_first_entry(&to_remove, 1200 cf = list_first_entry(&to_remove,
1197 struct ceph_cap_flush, list); 1201 struct ceph_cap_flush, list);
1198 list_del(&cf->list); 1202 list_del(&cf->list);
1199 kfree(cf); 1203 ceph_free_cap_flush(cf);
1200 } 1204 }
1201 while (drop--) 1205 while (drop--)
1202 iput(inode); 1206 iput(inode);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index edeb83c43112..d1c833c321b9 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -622,6 +622,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
622 */ 622 */
623struct kmem_cache *ceph_inode_cachep; 623struct kmem_cache *ceph_inode_cachep;
624struct kmem_cache *ceph_cap_cachep; 624struct kmem_cache *ceph_cap_cachep;
625struct kmem_cache *ceph_cap_flush_cachep;
625struct kmem_cache *ceph_dentry_cachep; 626struct kmem_cache *ceph_dentry_cachep;
626struct kmem_cache *ceph_file_cachep; 627struct kmem_cache *ceph_file_cachep;
627 628
@@ -647,6 +648,10 @@ static int __init init_caches(void)
647 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 648 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
648 if (ceph_cap_cachep == NULL) 649 if (ceph_cap_cachep == NULL)
649 goto bad_cap; 650 goto bad_cap;
651 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
652 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
653 if (ceph_cap_flush_cachep == NULL)
654 goto bad_cap_flush;
650 655
651 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 656 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
652 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 657 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
@@ -665,6 +670,8 @@ static int __init init_caches(void)
665bad_file: 670bad_file:
666 kmem_cache_destroy(ceph_dentry_cachep); 671 kmem_cache_destroy(ceph_dentry_cachep);
667bad_dentry: 672bad_dentry:
673 kmem_cache_destroy(ceph_cap_flush_cachep);
674bad_cap_flush:
668 kmem_cache_destroy(ceph_cap_cachep); 675 kmem_cache_destroy(ceph_cap_cachep);
669bad_cap: 676bad_cap:
670 kmem_cache_destroy(ceph_inode_cachep); 677 kmem_cache_destroy(ceph_inode_cachep);
@@ -681,6 +688,7 @@ static void destroy_caches(void)
681 688
682 kmem_cache_destroy(ceph_inode_cachep); 689 kmem_cache_destroy(ceph_inode_cachep);
683 kmem_cache_destroy(ceph_cap_cachep); 690 kmem_cache_destroy(ceph_cap_cachep);
691 kmem_cache_destroy(ceph_cap_flush_cachep);
684 kmem_cache_destroy(ceph_dentry_cachep); 692 kmem_cache_destroy(ceph_dentry_cachep);
685 kmem_cache_destroy(ceph_file_cachep); 693 kmem_cache_destroy(ceph_file_cachep);
686 694
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index e7f13f742357..4415e977d72b 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -309,6 +309,7 @@ struct ceph_inode_info {
309 /* we need to track cap writeback on a per-cap-bit basis, to allow 309 /* we need to track cap writeback on a per-cap-bit basis, to allow
310 * overlapping, pipelined cap flushes to the mds. we can probably 310 * overlapping, pipelined cap flushes to the mds. we can probably
311 * reduce the tid to 8 bits if we're concerned about inode size. */ 311 * reduce the tid to 8 bits if we're concerned about inode size. */
312 struct ceph_cap_flush *i_prealloc_cap_flush;
312 struct rb_root i_cap_flush_tree; 313 struct rb_root i_cap_flush_tree;
313 wait_queue_head_t i_cap_wq; /* threads waiting on a capability */ 314 wait_queue_head_t i_cap_wq; /* threads waiting on a capability */
314 unsigned long i_hold_caps_min; /* jiffies */ 315 unsigned long i_hold_caps_min; /* jiffies */
@@ -578,7 +579,10 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
578{ 579{
579 return ci->i_dirty_caps | ci->i_flushing_caps; 580 return ci->i_dirty_caps | ci->i_flushing_caps;
580} 581}
581extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); 582extern struct ceph_cap_flush *ceph_alloc_cap_flush(void);
583extern void ceph_free_cap_flush(struct ceph_cap_flush *cf);
584extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
585 struct ceph_cap_flush **pcf);
582 586
583extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci, 587extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
584 struct ceph_cap *ocap, int mask); 588 struct ceph_cap *ocap, int mask);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index c6f7d9b82085..819163d8313b 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -912,6 +912,7 @@ int __ceph_setxattr(struct dentry *dentry, const char *name,
912 struct ceph_vxattr *vxattr; 912 struct ceph_vxattr *vxattr;
913 struct ceph_inode_info *ci = ceph_inode(inode); 913 struct ceph_inode_info *ci = ceph_inode(inode);
914 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; 914 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
915 struct ceph_cap_flush *prealloc_cf = NULL;
915 int issued; 916 int issued;
916 int err; 917 int err;
917 int dirty = 0; 918 int dirty = 0;
@@ -950,6 +951,10 @@ int __ceph_setxattr(struct dentry *dentry, const char *name,
950 if (!xattr) 951 if (!xattr)
951 goto out; 952 goto out;
952 953
954 prealloc_cf = ceph_alloc_cap_flush();
955 if (!prealloc_cf)
956 goto out;
957
953 spin_lock(&ci->i_ceph_lock); 958 spin_lock(&ci->i_ceph_lock);
954retry: 959retry:
955 issued = __ceph_caps_issued(ci, NULL); 960 issued = __ceph_caps_issued(ci, NULL);
@@ -991,7 +996,8 @@ retry:
991 flags, value ? 1 : -1, &xattr); 996 flags, value ? 1 : -1, &xattr);
992 997
993 if (!err) { 998 if (!err) {
994 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 999 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
1000 &prealloc_cf);
995 ci->i_xattrs.dirty = true; 1001 ci->i_xattrs.dirty = true;
996 inode->i_ctime = CURRENT_TIME; 1002 inode->i_ctime = CURRENT_TIME;
997 } 1003 }
@@ -1001,6 +1007,7 @@ retry:
1001 up_read(&mdsc->snap_rwsem); 1007 up_read(&mdsc->snap_rwsem);
1002 if (dirty) 1008 if (dirty)
1003 __mark_inode_dirty(inode, dirty); 1009 __mark_inode_dirty(inode, dirty);
1010 ceph_free_cap_flush(prealloc_cf);
1004 return err; 1011 return err;
1005 1012
1006do_sync: 1013do_sync:
@@ -1010,6 +1017,7 @@ do_sync_unlocked:
1010 up_read(&mdsc->snap_rwsem); 1017 up_read(&mdsc->snap_rwsem);
1011 err = ceph_sync_setxattr(dentry, name, value, size, flags); 1018 err = ceph_sync_setxattr(dentry, name, value, size, flags);
1012out: 1019out:
1020 ceph_free_cap_flush(prealloc_cf);
1013 kfree(newname); 1021 kfree(newname);
1014 kfree(newval); 1022 kfree(newval);
1015 kfree(xattr); 1023 kfree(xattr);
@@ -1062,6 +1070,7 @@ int __ceph_removexattr(struct dentry *dentry, const char *name)
1062 struct ceph_vxattr *vxattr; 1070 struct ceph_vxattr *vxattr;
1063 struct ceph_inode_info *ci = ceph_inode(inode); 1071 struct ceph_inode_info *ci = ceph_inode(inode);
1064 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; 1072 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
1073 struct ceph_cap_flush *prealloc_cf = NULL;
1065 int issued; 1074 int issued;
1066 int err; 1075 int err;
1067 int required_blob_size; 1076 int required_blob_size;
@@ -1079,6 +1088,10 @@ int __ceph_removexattr(struct dentry *dentry, const char *name)
1079 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) 1088 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
1080 goto do_sync_unlocked; 1089 goto do_sync_unlocked;
1081 1090
1091 prealloc_cf = ceph_alloc_cap_flush();
1092 if (!prealloc_cf)
1093 return -ENOMEM;
1094
1082 err = -ENOMEM; 1095 err = -ENOMEM;
1083 spin_lock(&ci->i_ceph_lock); 1096 spin_lock(&ci->i_ceph_lock);
1084retry: 1097retry:
@@ -1120,7 +1133,8 @@ retry:
1120 1133
1121 err = __remove_xattr_by_name(ceph_inode(inode), name); 1134 err = __remove_xattr_by_name(ceph_inode(inode), name);
1122 1135
1123 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 1136 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
1137 &prealloc_cf);
1124 ci->i_xattrs.dirty = true; 1138 ci->i_xattrs.dirty = true;
1125 inode->i_ctime = CURRENT_TIME; 1139 inode->i_ctime = CURRENT_TIME;
1126 spin_unlock(&ci->i_ceph_lock); 1140 spin_unlock(&ci->i_ceph_lock);
@@ -1128,12 +1142,14 @@ retry:
1128 up_read(&mdsc->snap_rwsem); 1142 up_read(&mdsc->snap_rwsem);
1129 if (dirty) 1143 if (dirty)
1130 __mark_inode_dirty(inode, dirty); 1144 __mark_inode_dirty(inode, dirty);
1145 ceph_free_cap_flush(prealloc_cf);
1131 return err; 1146 return err;
1132do_sync: 1147do_sync:
1133 spin_unlock(&ci->i_ceph_lock); 1148 spin_unlock(&ci->i_ceph_lock);
1134do_sync_unlocked: 1149do_sync_unlocked:
1135 if (lock_snap_rwsem) 1150 if (lock_snap_rwsem)
1136 up_read(&mdsc->snap_rwsem); 1151 up_read(&mdsc->snap_rwsem);
1152 ceph_free_cap_flush(prealloc_cf);
1137 err = ceph_send_removexattr(dentry, name); 1153 err = ceph_send_removexattr(dentry, name);
1138 return err; 1154 return err;
1139} 1155}