diff options
author | Yan, Zheng <zyan@redhat.com> | 2015-06-10 05:26:13 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2015-06-25 04:49:31 -0400 |
commit | f66fd9f0952187d274c13c136b74548f792c1925 (patch) | |
tree | c021f04f69b116f2673fdfb2354a99871f8f03a4 /fs/ceph | |
parent | e548e9b93d3e565e42b938a99804114565be1f81 (diff) |
ceph: pre-allocate data structure that tracks caps flushing
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/addr.c | 19 | ||||
-rw-r--r-- | fs/ceph/caps.c | 26 | ||||
-rw-r--r-- | fs/ceph/file.c | 18 | ||||
-rw-r--r-- | fs/ceph/inode.c | 15 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 6 | ||||
-rw-r--r-- | fs/ceph/super.c | 8 | ||||
-rw-r--r-- | fs/ceph/super.h | 6 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 20 |
8 files changed, 102 insertions, 16 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 5f53ac0d9d7c..7edf3c49e661 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -1308,12 +1308,17 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1308 | struct inode *inode = file_inode(vma->vm_file); | 1308 | struct inode *inode = file_inode(vma->vm_file); |
1309 | struct ceph_inode_info *ci = ceph_inode(inode); | 1309 | struct ceph_inode_info *ci = ceph_inode(inode); |
1310 | struct ceph_file_info *fi = vma->vm_file->private_data; | 1310 | struct ceph_file_info *fi = vma->vm_file->private_data; |
1311 | struct ceph_cap_flush *prealloc_cf; | ||
1311 | struct page *page = vmf->page; | 1312 | struct page *page = vmf->page; |
1312 | loff_t off = page_offset(page); | 1313 | loff_t off = page_offset(page); |
1313 | loff_t size = i_size_read(inode); | 1314 | loff_t size = i_size_read(inode); |
1314 | size_t len; | 1315 | size_t len; |
1315 | int want, got, ret; | 1316 | int want, got, ret; |
1316 | 1317 | ||
1318 | prealloc_cf = ceph_alloc_cap_flush(); | ||
1319 | if (!prealloc_cf) | ||
1320 | return VM_FAULT_SIGBUS; | ||
1321 | |||
1317 | if (ci->i_inline_version != CEPH_INLINE_NONE) { | 1322 | if (ci->i_inline_version != CEPH_INLINE_NONE) { |
1318 | struct page *locked_page = NULL; | 1323 | struct page *locked_page = NULL; |
1319 | if (off == 0) { | 1324 | if (off == 0) { |
@@ -1323,8 +1328,10 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1323 | ret = ceph_uninline_data(vma->vm_file, locked_page); | 1328 | ret = ceph_uninline_data(vma->vm_file, locked_page); |
1324 | if (locked_page) | 1329 | if (locked_page) |
1325 | unlock_page(locked_page); | 1330 | unlock_page(locked_page); |
1326 | if (ret < 0) | 1331 | if (ret < 0) { |
1327 | return VM_FAULT_SIGBUS; | 1332 | ret = VM_FAULT_SIGBUS; |
1333 | goto out_free; | ||
1334 | } | ||
1328 | } | 1335 | } |
1329 | 1336 | ||
1330 | if (off + PAGE_CACHE_SIZE <= size) | 1337 | if (off + PAGE_CACHE_SIZE <= size) |
@@ -1346,7 +1353,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1346 | break; | 1353 | break; |
1347 | if (ret != -ERESTARTSYS) { | 1354 | if (ret != -ERESTARTSYS) { |
1348 | WARN_ON(1); | 1355 | WARN_ON(1); |
1349 | return VM_FAULT_SIGBUS; | 1356 | ret = VM_FAULT_SIGBUS; |
1357 | goto out_free; | ||
1350 | } | 1358 | } |
1351 | } | 1359 | } |
1352 | dout("page_mkwrite %p %llu~%zd got cap refs on %s\n", | 1360 | dout("page_mkwrite %p %llu~%zd got cap refs on %s\n", |
@@ -1381,7 +1389,8 @@ out: | |||
1381 | int dirty; | 1389 | int dirty; |
1382 | spin_lock(&ci->i_ceph_lock); | 1390 | spin_lock(&ci->i_ceph_lock); |
1383 | ci->i_inline_version = CEPH_INLINE_NONE; | 1391 | ci->i_inline_version = CEPH_INLINE_NONE; |
1384 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | 1392 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, |
1393 | &prealloc_cf); | ||
1385 | spin_unlock(&ci->i_ceph_lock); | 1394 | spin_unlock(&ci->i_ceph_lock); |
1386 | if (dirty) | 1395 | if (dirty) |
1387 | __mark_inode_dirty(inode, dirty); | 1396 | __mark_inode_dirty(inode, dirty); |
@@ -1390,6 +1399,8 @@ out: | |||
1390 | dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n", | 1399 | dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n", |
1391 | inode, off, len, ceph_cap_string(got), ret); | 1400 | inode, off, len, ceph_cap_string(got), ret); |
1392 | ceph_put_cap_refs(ci, got); | 1401 | ceph_put_cap_refs(ci, got); |
1402 | out_free: | ||
1403 | ceph_free_cap_flush(prealloc_cf); | ||
1393 | 1404 | ||
1394 | return ret; | 1405 | return ret; |
1395 | } | 1406 | } |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 69a16044ec41..dd7b20adf1d4 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1356,7 +1356,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
1356 | * Caller is then responsible for calling __mark_inode_dirty with the | 1356 | * Caller is then responsible for calling __mark_inode_dirty with the |
1357 | * returned flags value. | 1357 | * returned flags value. |
1358 | */ | 1358 | */ |
1359 | int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | 1359 | int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, |
1360 | struct ceph_cap_flush **pcf) | ||
1360 | { | 1361 | { |
1361 | struct ceph_mds_client *mdsc = | 1362 | struct ceph_mds_client *mdsc = |
1362 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 1363 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
@@ -1376,6 +1377,9 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1376 | ceph_cap_string(was | mask)); | 1377 | ceph_cap_string(was | mask)); |
1377 | ci->i_dirty_caps |= mask; | 1378 | ci->i_dirty_caps |= mask; |
1378 | if (was == 0) { | 1379 | if (was == 0) { |
1380 | WARN_ON_ONCE(ci->i_prealloc_cap_flush); | ||
1381 | swap(ci->i_prealloc_cap_flush, *pcf); | ||
1382 | |||
1379 | if (!ci->i_head_snapc) { | 1383 | if (!ci->i_head_snapc) { |
1380 | WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem)); | 1384 | WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem)); |
1381 | ci->i_head_snapc = ceph_get_snap_context( | 1385 | ci->i_head_snapc = ceph_get_snap_context( |
@@ -1391,6 +1395,8 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1391 | ihold(inode); | 1395 | ihold(inode); |
1392 | dirty |= I_DIRTY_SYNC; | 1396 | dirty |= I_DIRTY_SYNC; |
1393 | } | 1397 | } |
1398 | } else { | ||
1399 | WARN_ON_ONCE(!ci->i_prealloc_cap_flush); | ||
1394 | } | 1400 | } |
1395 | BUG_ON(list_empty(&ci->i_dirty_item)); | 1401 | BUG_ON(list_empty(&ci->i_dirty_item)); |
1396 | if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) && | 1402 | if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) && |
@@ -1446,6 +1452,17 @@ static void __add_cap_flushing_to_mdsc(struct ceph_mds_client *mdsc, | |||
1446 | rb_insert_color(&cf->g_node, &mdsc->cap_flush_tree); | 1452 | rb_insert_color(&cf->g_node, &mdsc->cap_flush_tree); |
1447 | } | 1453 | } |
1448 | 1454 | ||
1455 | struct ceph_cap_flush *ceph_alloc_cap_flush(void) | ||
1456 | { | ||
1457 | return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL); | ||
1458 | } | ||
1459 | |||
1460 | void ceph_free_cap_flush(struct ceph_cap_flush *cf) | ||
1461 | { | ||
1462 | if (cf) | ||
1463 | kmem_cache_free(ceph_cap_flush_cachep, cf); | ||
1464 | } | ||
1465 | |||
1449 | static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc) | 1466 | static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc) |
1450 | { | 1467 | { |
1451 | struct rb_node *n = rb_first(&mdsc->cap_flush_tree); | 1468 | struct rb_node *n = rb_first(&mdsc->cap_flush_tree); |
@@ -1469,11 +1486,12 @@ static int __mark_caps_flushing(struct inode *inode, | |||
1469 | { | 1486 | { |
1470 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | 1487 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
1471 | struct ceph_inode_info *ci = ceph_inode(inode); | 1488 | struct ceph_inode_info *ci = ceph_inode(inode); |
1472 | struct ceph_cap_flush *cf; | 1489 | struct ceph_cap_flush *cf = NULL; |
1473 | int flushing; | 1490 | int flushing; |
1474 | 1491 | ||
1475 | BUG_ON(ci->i_dirty_caps == 0); | 1492 | BUG_ON(ci->i_dirty_caps == 0); |
1476 | BUG_ON(list_empty(&ci->i_dirty_item)); | 1493 | BUG_ON(list_empty(&ci->i_dirty_item)); |
1494 | BUG_ON(!ci->i_prealloc_cap_flush); | ||
1477 | 1495 | ||
1478 | flushing = ci->i_dirty_caps; | 1496 | flushing = ci->i_dirty_caps; |
1479 | dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n", | 1497 | dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n", |
@@ -1484,7 +1502,7 @@ static int __mark_caps_flushing(struct inode *inode, | |||
1484 | ci->i_dirty_caps = 0; | 1502 | ci->i_dirty_caps = 0; |
1485 | dout(" inode %p now !dirty\n", inode); | 1503 | dout(" inode %p now !dirty\n", inode); |
1486 | 1504 | ||
1487 | cf = kmalloc(sizeof(*cf), GFP_ATOMIC); | 1505 | swap(cf, ci->i_prealloc_cap_flush); |
1488 | cf->caps = flushing; | 1506 | cf->caps = flushing; |
1489 | cf->kick = false; | 1507 | cf->kick = false; |
1490 | 1508 | ||
@@ -3075,7 +3093,7 @@ out: | |||
3075 | cf = list_first_entry(&to_remove, | 3093 | cf = list_first_entry(&to_remove, |
3076 | struct ceph_cap_flush, list); | 3094 | struct ceph_cap_flush, list); |
3077 | list_del(&cf->list); | 3095 | list_del(&cf->list); |
3078 | kfree(cf); | 3096 | ceph_free_cap_flush(cf); |
3079 | } | 3097 | } |
3080 | if (drop) | 3098 | if (drop) |
3081 | iput(inode); | 3099 | iput(inode); |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0a76a370d798..8a4eb4d21d3c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -939,6 +939,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
939 | struct ceph_inode_info *ci = ceph_inode(inode); | 939 | struct ceph_inode_info *ci = ceph_inode(inode); |
940 | struct ceph_osd_client *osdc = | 940 | struct ceph_osd_client *osdc = |
941 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | 941 | &ceph_sb_to_client(inode->i_sb)->client->osdc; |
942 | struct ceph_cap_flush *prealloc_cf; | ||
942 | ssize_t count, written = 0; | 943 | ssize_t count, written = 0; |
943 | int err, want, got; | 944 | int err, want, got; |
944 | loff_t pos; | 945 | loff_t pos; |
@@ -946,6 +947,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
946 | if (ceph_snap(inode) != CEPH_NOSNAP) | 947 | if (ceph_snap(inode) != CEPH_NOSNAP) |
947 | return -EROFS; | 948 | return -EROFS; |
948 | 949 | ||
950 | prealloc_cf = ceph_alloc_cap_flush(); | ||
951 | if (!prealloc_cf) | ||
952 | return -ENOMEM; | ||
953 | |||
949 | mutex_lock(&inode->i_mutex); | 954 | mutex_lock(&inode->i_mutex); |
950 | 955 | ||
951 | /* We can write back this queue in page reclaim */ | 956 | /* We can write back this queue in page reclaim */ |
@@ -1050,7 +1055,8 @@ retry_snap: | |||
1050 | int dirty; | 1055 | int dirty; |
1051 | spin_lock(&ci->i_ceph_lock); | 1056 | spin_lock(&ci->i_ceph_lock); |
1052 | ci->i_inline_version = CEPH_INLINE_NONE; | 1057 | ci->i_inline_version = CEPH_INLINE_NONE; |
1053 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | 1058 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, |
1059 | &prealloc_cf); | ||
1054 | spin_unlock(&ci->i_ceph_lock); | 1060 | spin_unlock(&ci->i_ceph_lock); |
1055 | if (dirty) | 1061 | if (dirty) |
1056 | __mark_inode_dirty(inode, dirty); | 1062 | __mark_inode_dirty(inode, dirty); |
@@ -1074,6 +1080,7 @@ retry_snap: | |||
1074 | out: | 1080 | out: |
1075 | mutex_unlock(&inode->i_mutex); | 1081 | mutex_unlock(&inode->i_mutex); |
1076 | out_unlocked: | 1082 | out_unlocked: |
1083 | ceph_free_cap_flush(prealloc_cf); | ||
1077 | current->backing_dev_info = NULL; | 1084 | current->backing_dev_info = NULL; |
1078 | return written ? written : err; | 1085 | return written ? written : err; |
1079 | } | 1086 | } |
@@ -1270,6 +1277,7 @@ static long ceph_fallocate(struct file *file, int mode, | |||
1270 | struct ceph_inode_info *ci = ceph_inode(inode); | 1277 | struct ceph_inode_info *ci = ceph_inode(inode); |
1271 | struct ceph_osd_client *osdc = | 1278 | struct ceph_osd_client *osdc = |
1272 | &ceph_inode_to_client(inode)->client->osdc; | 1279 | &ceph_inode_to_client(inode)->client->osdc; |
1280 | struct ceph_cap_flush *prealloc_cf; | ||
1273 | int want, got = 0; | 1281 | int want, got = 0; |
1274 | int dirty; | 1282 | int dirty; |
1275 | int ret = 0; | 1283 | int ret = 0; |
@@ -1282,6 +1290,10 @@ static long ceph_fallocate(struct file *file, int mode, | |||
1282 | if (!S_ISREG(inode->i_mode)) | 1290 | if (!S_ISREG(inode->i_mode)) |
1283 | return -EOPNOTSUPP; | 1291 | return -EOPNOTSUPP; |
1284 | 1292 | ||
1293 | prealloc_cf = ceph_alloc_cap_flush(); | ||
1294 | if (!prealloc_cf) | ||
1295 | return -ENOMEM; | ||
1296 | |||
1285 | mutex_lock(&inode->i_mutex); | 1297 | mutex_lock(&inode->i_mutex); |
1286 | 1298 | ||
1287 | if (ceph_snap(inode) != CEPH_NOSNAP) { | 1299 | if (ceph_snap(inode) != CEPH_NOSNAP) { |
@@ -1328,7 +1340,8 @@ static long ceph_fallocate(struct file *file, int mode, | |||
1328 | if (!ret) { | 1340 | if (!ret) { |
1329 | spin_lock(&ci->i_ceph_lock); | 1341 | spin_lock(&ci->i_ceph_lock); |
1330 | ci->i_inline_version = CEPH_INLINE_NONE; | 1342 | ci->i_inline_version = CEPH_INLINE_NONE; |
1331 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | 1343 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, |
1344 | &prealloc_cf); | ||
1332 | spin_unlock(&ci->i_ceph_lock); | 1345 | spin_unlock(&ci->i_ceph_lock); |
1333 | if (dirty) | 1346 | if (dirty) |
1334 | __mark_inode_dirty(inode, dirty); | 1347 | __mark_inode_dirty(inode, dirty); |
@@ -1337,6 +1350,7 @@ static long ceph_fallocate(struct file *file, int mode, | |||
1337 | ceph_put_cap_refs(ci, got); | 1350 | ceph_put_cap_refs(ci, got); |
1338 | unlock: | 1351 | unlock: |
1339 | mutex_unlock(&inode->i_mutex); | 1352 | mutex_unlock(&inode->i_mutex); |
1353 | ceph_free_cap_flush(prealloc_cf); | ||
1340 | return ret; | 1354 | return ret; |
1341 | } | 1355 | } |
1342 | 1356 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 3326302f5884..e86d1a4efc46 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -416,6 +416,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
416 | ci->i_flushing_caps = 0; | 416 | ci->i_flushing_caps = 0; |
417 | INIT_LIST_HEAD(&ci->i_dirty_item); | 417 | INIT_LIST_HEAD(&ci->i_dirty_item); |
418 | INIT_LIST_HEAD(&ci->i_flushing_item); | 418 | INIT_LIST_HEAD(&ci->i_flushing_item); |
419 | ci->i_prealloc_cap_flush = NULL; | ||
419 | ci->i_cap_flush_tree = RB_ROOT; | 420 | ci->i_cap_flush_tree = RB_ROOT; |
420 | init_waitqueue_head(&ci->i_cap_wq); | 421 | init_waitqueue_head(&ci->i_cap_wq); |
421 | ci->i_hold_caps_min = 0; | 422 | ci->i_hold_caps_min = 0; |
@@ -1720,6 +1721,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1720 | const unsigned int ia_valid = attr->ia_valid; | 1721 | const unsigned int ia_valid = attr->ia_valid; |
1721 | struct ceph_mds_request *req; | 1722 | struct ceph_mds_request *req; |
1722 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; | 1723 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; |
1724 | struct ceph_cap_flush *prealloc_cf; | ||
1723 | int issued; | 1725 | int issued; |
1724 | int release = 0, dirtied = 0; | 1726 | int release = 0, dirtied = 0; |
1725 | int mask = 0; | 1727 | int mask = 0; |
@@ -1734,10 +1736,16 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1734 | if (err != 0) | 1736 | if (err != 0) |
1735 | return err; | 1737 | return err; |
1736 | 1738 | ||
1739 | prealloc_cf = ceph_alloc_cap_flush(); | ||
1740 | if (!prealloc_cf) | ||
1741 | return -ENOMEM; | ||
1742 | |||
1737 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR, | 1743 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR, |
1738 | USE_AUTH_MDS); | 1744 | USE_AUTH_MDS); |
1739 | if (IS_ERR(req)) | 1745 | if (IS_ERR(req)) { |
1746 | ceph_free_cap_flush(prealloc_cf); | ||
1740 | return PTR_ERR(req); | 1747 | return PTR_ERR(req); |
1748 | } | ||
1741 | 1749 | ||
1742 | spin_lock(&ci->i_ceph_lock); | 1750 | spin_lock(&ci->i_ceph_lock); |
1743 | issued = __ceph_caps_issued(ci, NULL); | 1751 | issued = __ceph_caps_issued(ci, NULL); |
@@ -1895,7 +1903,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1895 | dout("setattr %p ATTR_FILE ... hrm!\n", inode); | 1903 | dout("setattr %p ATTR_FILE ... hrm!\n", inode); |
1896 | 1904 | ||
1897 | if (dirtied) { | 1905 | if (dirtied) { |
1898 | inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied); | 1906 | inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied, |
1907 | &prealloc_cf); | ||
1899 | inode->i_ctime = CURRENT_TIME; | 1908 | inode->i_ctime = CURRENT_TIME; |
1900 | } | 1909 | } |
1901 | 1910 | ||
@@ -1927,9 +1936,11 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1927 | ceph_mdsc_put_request(req); | 1936 | ceph_mdsc_put_request(req); |
1928 | if (mask & CEPH_SETATTR_SIZE) | 1937 | if (mask & CEPH_SETATTR_SIZE) |
1929 | __ceph_do_pending_vmtruncate(inode); | 1938 | __ceph_do_pending_vmtruncate(inode); |
1939 | ceph_free_cap_flush(prealloc_cf); | ||
1930 | return err; | 1940 | return err; |
1931 | out_put: | 1941 | out_put: |
1932 | ceph_mdsc_put_request(req); | 1942 | ceph_mdsc_put_request(req); |
1943 | ceph_free_cap_flush(prealloc_cf); | ||
1933 | return err; | 1944 | return err; |
1934 | } | 1945 | } |
1935 | 1946 | ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 89e4305a94d4..8d73fe9d488b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -1189,6 +1189,10 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
1189 | } | 1189 | } |
1190 | spin_unlock(&mdsc->cap_dirty_lock); | 1190 | spin_unlock(&mdsc->cap_dirty_lock); |
1191 | 1191 | ||
1192 | if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { | ||
1193 | list_add(&ci->i_prealloc_cap_flush->list, &to_remove); | ||
1194 | ci->i_prealloc_cap_flush = NULL; | ||
1195 | } | ||
1192 | } | 1196 | } |
1193 | spin_unlock(&ci->i_ceph_lock); | 1197 | spin_unlock(&ci->i_ceph_lock); |
1194 | while (!list_empty(&to_remove)) { | 1198 | while (!list_empty(&to_remove)) { |
@@ -1196,7 +1200,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
1196 | cf = list_first_entry(&to_remove, | 1200 | cf = list_first_entry(&to_remove, |
1197 | struct ceph_cap_flush, list); | 1201 | struct ceph_cap_flush, list); |
1198 | list_del(&cf->list); | 1202 | list_del(&cf->list); |
1199 | kfree(cf); | 1203 | ceph_free_cap_flush(cf); |
1200 | } | 1204 | } |
1201 | while (drop--) | 1205 | while (drop--) |
1202 | iput(inode); | 1206 | iput(inode); |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index edeb83c43112..d1c833c321b9 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -622,6 +622,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) | |||
622 | */ | 622 | */ |
623 | struct kmem_cache *ceph_inode_cachep; | 623 | struct kmem_cache *ceph_inode_cachep; |
624 | struct kmem_cache *ceph_cap_cachep; | 624 | struct kmem_cache *ceph_cap_cachep; |
625 | struct kmem_cache *ceph_cap_flush_cachep; | ||
625 | struct kmem_cache *ceph_dentry_cachep; | 626 | struct kmem_cache *ceph_dentry_cachep; |
626 | struct kmem_cache *ceph_file_cachep; | 627 | struct kmem_cache *ceph_file_cachep; |
627 | 628 | ||
@@ -647,6 +648,10 @@ static int __init init_caches(void) | |||
647 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | 648 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); |
648 | if (ceph_cap_cachep == NULL) | 649 | if (ceph_cap_cachep == NULL) |
649 | goto bad_cap; | 650 | goto bad_cap; |
651 | ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, | ||
652 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
653 | if (ceph_cap_flush_cachep == NULL) | ||
654 | goto bad_cap_flush; | ||
650 | 655 | ||
651 | ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, | 656 | ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, |
652 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | 657 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); |
@@ -665,6 +670,8 @@ static int __init init_caches(void) | |||
665 | bad_file: | 670 | bad_file: |
666 | kmem_cache_destroy(ceph_dentry_cachep); | 671 | kmem_cache_destroy(ceph_dentry_cachep); |
667 | bad_dentry: | 672 | bad_dentry: |
673 | kmem_cache_destroy(ceph_cap_flush_cachep); | ||
674 | bad_cap_flush: | ||
668 | kmem_cache_destroy(ceph_cap_cachep); | 675 | kmem_cache_destroy(ceph_cap_cachep); |
669 | bad_cap: | 676 | bad_cap: |
670 | kmem_cache_destroy(ceph_inode_cachep); | 677 | kmem_cache_destroy(ceph_inode_cachep); |
@@ -681,6 +688,7 @@ static void destroy_caches(void) | |||
681 | 688 | ||
682 | kmem_cache_destroy(ceph_inode_cachep); | 689 | kmem_cache_destroy(ceph_inode_cachep); |
683 | kmem_cache_destroy(ceph_cap_cachep); | 690 | kmem_cache_destroy(ceph_cap_cachep); |
691 | kmem_cache_destroy(ceph_cap_flush_cachep); | ||
684 | kmem_cache_destroy(ceph_dentry_cachep); | 692 | kmem_cache_destroy(ceph_dentry_cachep); |
685 | kmem_cache_destroy(ceph_file_cachep); | 693 | kmem_cache_destroy(ceph_file_cachep); |
686 | 694 | ||
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index e7f13f742357..4415e977d72b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -309,6 +309,7 @@ struct ceph_inode_info { | |||
309 | /* we need to track cap writeback on a per-cap-bit basis, to allow | 309 | /* we need to track cap writeback on a per-cap-bit basis, to allow |
310 | * overlapping, pipelined cap flushes to the mds. we can probably | 310 | * overlapping, pipelined cap flushes to the mds. we can probably |
311 | * reduce the tid to 8 bits if we're concerned about inode size. */ | 311 | * reduce the tid to 8 bits if we're concerned about inode size. */ |
312 | struct ceph_cap_flush *i_prealloc_cap_flush; | ||
312 | struct rb_root i_cap_flush_tree; | 313 | struct rb_root i_cap_flush_tree; |
313 | wait_queue_head_t i_cap_wq; /* threads waiting on a capability */ | 314 | wait_queue_head_t i_cap_wq; /* threads waiting on a capability */ |
314 | unsigned long i_hold_caps_min; /* jiffies */ | 315 | unsigned long i_hold_caps_min; /* jiffies */ |
@@ -578,7 +579,10 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci) | |||
578 | { | 579 | { |
579 | return ci->i_dirty_caps | ci->i_flushing_caps; | 580 | return ci->i_dirty_caps | ci->i_flushing_caps; |
580 | } | 581 | } |
581 | extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); | 582 | extern struct ceph_cap_flush *ceph_alloc_cap_flush(void); |
583 | extern void ceph_free_cap_flush(struct ceph_cap_flush *cf); | ||
584 | extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, | ||
585 | struct ceph_cap_flush **pcf); | ||
582 | 586 | ||
583 | extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci, | 587 | extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci, |
584 | struct ceph_cap *ocap, int mask); | 588 | struct ceph_cap *ocap, int mask); |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index c6f7d9b82085..819163d8313b 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -912,6 +912,7 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, | |||
912 | struct ceph_vxattr *vxattr; | 912 | struct ceph_vxattr *vxattr; |
913 | struct ceph_inode_info *ci = ceph_inode(inode); | 913 | struct ceph_inode_info *ci = ceph_inode(inode); |
914 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; | 914 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; |
915 | struct ceph_cap_flush *prealloc_cf = NULL; | ||
915 | int issued; | 916 | int issued; |
916 | int err; | 917 | int err; |
917 | int dirty = 0; | 918 | int dirty = 0; |
@@ -950,6 +951,10 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, | |||
950 | if (!xattr) | 951 | if (!xattr) |
951 | goto out; | 952 | goto out; |
952 | 953 | ||
954 | prealloc_cf = ceph_alloc_cap_flush(); | ||
955 | if (!prealloc_cf) | ||
956 | goto out; | ||
957 | |||
953 | spin_lock(&ci->i_ceph_lock); | 958 | spin_lock(&ci->i_ceph_lock); |
954 | retry: | 959 | retry: |
955 | issued = __ceph_caps_issued(ci, NULL); | 960 | issued = __ceph_caps_issued(ci, NULL); |
@@ -991,7 +996,8 @@ retry: | |||
991 | flags, value ? 1 : -1, &xattr); | 996 | flags, value ? 1 : -1, &xattr); |
992 | 997 | ||
993 | if (!err) { | 998 | if (!err) { |
994 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); | 999 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, |
1000 | &prealloc_cf); | ||
995 | ci->i_xattrs.dirty = true; | 1001 | ci->i_xattrs.dirty = true; |
996 | inode->i_ctime = CURRENT_TIME; | 1002 | inode->i_ctime = CURRENT_TIME; |
997 | } | 1003 | } |
@@ -1001,6 +1007,7 @@ retry: | |||
1001 | up_read(&mdsc->snap_rwsem); | 1007 | up_read(&mdsc->snap_rwsem); |
1002 | if (dirty) | 1008 | if (dirty) |
1003 | __mark_inode_dirty(inode, dirty); | 1009 | __mark_inode_dirty(inode, dirty); |
1010 | ceph_free_cap_flush(prealloc_cf); | ||
1004 | return err; | 1011 | return err; |
1005 | 1012 | ||
1006 | do_sync: | 1013 | do_sync: |
@@ -1010,6 +1017,7 @@ do_sync_unlocked: | |||
1010 | up_read(&mdsc->snap_rwsem); | 1017 | up_read(&mdsc->snap_rwsem); |
1011 | err = ceph_sync_setxattr(dentry, name, value, size, flags); | 1018 | err = ceph_sync_setxattr(dentry, name, value, size, flags); |
1012 | out: | 1019 | out: |
1020 | ceph_free_cap_flush(prealloc_cf); | ||
1013 | kfree(newname); | 1021 | kfree(newname); |
1014 | kfree(newval); | 1022 | kfree(newval); |
1015 | kfree(xattr); | 1023 | kfree(xattr); |
@@ -1062,6 +1070,7 @@ int __ceph_removexattr(struct dentry *dentry, const char *name) | |||
1062 | struct ceph_vxattr *vxattr; | 1070 | struct ceph_vxattr *vxattr; |
1063 | struct ceph_inode_info *ci = ceph_inode(inode); | 1071 | struct ceph_inode_info *ci = ceph_inode(inode); |
1064 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; | 1072 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; |
1073 | struct ceph_cap_flush *prealloc_cf = NULL; | ||
1065 | int issued; | 1074 | int issued; |
1066 | int err; | 1075 | int err; |
1067 | int required_blob_size; | 1076 | int required_blob_size; |
@@ -1079,6 +1088,10 @@ int __ceph_removexattr(struct dentry *dentry, const char *name) | |||
1079 | if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) | 1088 | if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) |
1080 | goto do_sync_unlocked; | 1089 | goto do_sync_unlocked; |
1081 | 1090 | ||
1091 | prealloc_cf = ceph_alloc_cap_flush(); | ||
1092 | if (!prealloc_cf) | ||
1093 | return -ENOMEM; | ||
1094 | |||
1082 | err = -ENOMEM; | 1095 | err = -ENOMEM; |
1083 | spin_lock(&ci->i_ceph_lock); | 1096 | spin_lock(&ci->i_ceph_lock); |
1084 | retry: | 1097 | retry: |
@@ -1120,7 +1133,8 @@ retry: | |||
1120 | 1133 | ||
1121 | err = __remove_xattr_by_name(ceph_inode(inode), name); | 1134 | err = __remove_xattr_by_name(ceph_inode(inode), name); |
1122 | 1135 | ||
1123 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); | 1136 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, |
1137 | &prealloc_cf); | ||
1124 | ci->i_xattrs.dirty = true; | 1138 | ci->i_xattrs.dirty = true; |
1125 | inode->i_ctime = CURRENT_TIME; | 1139 | inode->i_ctime = CURRENT_TIME; |
1126 | spin_unlock(&ci->i_ceph_lock); | 1140 | spin_unlock(&ci->i_ceph_lock); |
@@ -1128,12 +1142,14 @@ retry: | |||
1128 | up_read(&mdsc->snap_rwsem); | 1142 | up_read(&mdsc->snap_rwsem); |
1129 | if (dirty) | 1143 | if (dirty) |
1130 | __mark_inode_dirty(inode, dirty); | 1144 | __mark_inode_dirty(inode, dirty); |
1145 | ceph_free_cap_flush(prealloc_cf); | ||
1131 | return err; | 1146 | return err; |
1132 | do_sync: | 1147 | do_sync: |
1133 | spin_unlock(&ci->i_ceph_lock); | 1148 | spin_unlock(&ci->i_ceph_lock); |
1134 | do_sync_unlocked: | 1149 | do_sync_unlocked: |
1135 | if (lock_snap_rwsem) | 1150 | if (lock_snap_rwsem) |
1136 | up_read(&mdsc->snap_rwsem); | 1151 | up_read(&mdsc->snap_rwsem); |
1152 | ceph_free_cap_flush(prealloc_cf); | ||
1137 | err = ceph_send_removexattr(dentry, name); | 1153 | err = ceph_send_removexattr(dentry, name); |
1138 | return err; | 1154 | return err; |
1139 | } | 1155 | } |