aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-22 14:30:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-22 14:30:10 -0400
commit1204c464458e9837320a326a9fce550e3c5ef5de (patch)
treef0e19354d7d0d5553a0e0e6f7fd7c7e2f8465696 /fs/ceph
parent4f2112351b4ac964b0249bdd883f7b79601f39d8 (diff)
parentf77303bddabf73ebccb60f613b77da391f933cf6 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil: "This time around we have a collection of CephFS fixes from Zheng around MDS failure handling and snapshots, support for a new CRUSH straw2 algorithm (to sync up with userspace) and several RBD cleanups and fixes from Ilya, an error path leak fix from Taesoo, and then an assorted collection of cleanups from others" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (28 commits) rbd: rbd_wq comment is obsolete libceph: announce support for straw2 buckets crush: straw2 bucket type with an efficient 64-bit crush_ln() crush: ensuring at most num-rep osds are selected crush: drop unnecessary include from mapper.c ceph: fix uninline data function ceph: rename snapshot support ceph: fix null pointer dereference in send_mds_reconnect() ceph: hold on to exclusive caps on complete directories libceph: simplify our debugfs attr macro ceph: show non-default options only libceph: expose client options through debugfs libceph, ceph: split ceph_show_options() rbd: mark block queue as non-rotational libceph: don't overwrite specific con error msgs ceph: cleanup unsafe requests when reconnecting is denied ceph: don't zero i_wrbuffer_ref when reconnecting is denied ceph: don't mark dirty caps when there is no auth cap ceph: keep i_snap_realm while there are writers libceph: osdmap.h: Add missing format newlines ...
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c38
-rw-r--r--fs/ceph/caps.c51
-rw-r--r--fs/ceph/dir.c48
-rw-r--r--fs/ceph/mds_client.c61
-rw-r--r--fs/ceph/strings.c1
-rw-r--r--fs/ceph/super.c56
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c23
8 files changed, 190 insertions, 92 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 155ab9c0246b..e162bcd105ee 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1146,6 +1146,10 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
1146 inode, page, (int)pos, (int)len); 1146 inode, page, (int)pos, (int)len);
1147 1147
1148 r = ceph_update_writeable_page(file, pos, len, page); 1148 r = ceph_update_writeable_page(file, pos, len, page);
1149 if (r < 0)
1150 page_cache_release(page);
1151 else
1152 *pagep = page;
1149 } while (r == -EAGAIN); 1153 } while (r == -EAGAIN);
1150 1154
1151 return r; 1155 return r;
@@ -1534,19 +1538,27 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
1534 1538
1535 osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); 1539 osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
1536 1540
1537 err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, 1541 {
1538 "inline_version", &inline_version, 1542 __le64 xattr_buf = cpu_to_le64(inline_version);
1539 sizeof(inline_version), 1543 err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
1540 CEPH_OSD_CMPXATTR_OP_GT, 1544 "inline_version", &xattr_buf,
1541 CEPH_OSD_CMPXATTR_MODE_U64); 1545 sizeof(xattr_buf),
1542 if (err) 1546 CEPH_OSD_CMPXATTR_OP_GT,
1543 goto out_put; 1547 CEPH_OSD_CMPXATTR_MODE_U64);
1544 1548 if (err)
1545 err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, 1549 goto out_put;
1546 "inline_version", &inline_version, 1550 }
1547 sizeof(inline_version), 0, 0); 1551
1548 if (err) 1552 {
1549 goto out_put; 1553 char xattr_buf[32];
1554 int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf),
1555 "%llu", inline_version);
1556 err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
1557 "inline_version",
1558 xattr_buf, xattr_len, 0, 0);
1559 if (err)
1560 goto out_put;
1561 }
1550 1562
1551 ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); 1563 ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
1552 err = ceph_osdc_start_request(&fsc->client->osdc, req, false); 1564 err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 8172775428a0..11631c4c7d14 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -896,6 +896,18 @@ int ceph_is_any_caps(struct inode *inode)
896 return ret; 896 return ret;
897} 897}
898 898
899static void drop_inode_snap_realm(struct ceph_inode_info *ci)
900{
901 struct ceph_snap_realm *realm = ci->i_snap_realm;
902 spin_lock(&realm->inodes_with_caps_lock);
903 list_del_init(&ci->i_snap_realm_item);
904 ci->i_snap_realm_counter++;
905 ci->i_snap_realm = NULL;
906 spin_unlock(&realm->inodes_with_caps_lock);
907 ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
908 realm);
909}
910
899/* 911/*
900 * Remove a cap. Take steps to deal with a racing iterate_session_caps. 912 * Remove a cap. Take steps to deal with a racing iterate_session_caps.
901 * 913 *
@@ -946,15 +958,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
946 if (removed) 958 if (removed)
947 ceph_put_cap(mdsc, cap); 959 ceph_put_cap(mdsc, cap);
948 960
949 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { 961 /* when reconnect denied, we remove session caps forcibly,
950 struct ceph_snap_realm *realm = ci->i_snap_realm; 962 * i_wr_ref can be non-zero. If there are ongoing write,
951 spin_lock(&realm->inodes_with_caps_lock); 963 * keep i_snap_realm.
952 list_del_init(&ci->i_snap_realm_item); 964 */
953 ci->i_snap_realm_counter++; 965 if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
954 ci->i_snap_realm = NULL; 966 drop_inode_snap_realm(ci);
955 spin_unlock(&realm->inodes_with_caps_lock); 967
956 ceph_put_snap_realm(mdsc, realm);
957 }
958 if (!__ceph_is_any_real_caps(ci)) 968 if (!__ceph_is_any_real_caps(ci))
959 __cap_delay_cancel(mdsc, ci); 969 __cap_delay_cancel(mdsc, ci);
960} 970}
@@ -1394,6 +1404,13 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1394 int was = ci->i_dirty_caps; 1404 int was = ci->i_dirty_caps;
1395 int dirty = 0; 1405 int dirty = 0;
1396 1406
1407 if (!ci->i_auth_cap) {
1408 pr_warn("__mark_dirty_caps %p %llx mask %s, "
1409 "but no auth cap (session was closed?)\n",
1410 inode, ceph_ino(inode), ceph_cap_string(mask));
1411 return 0;
1412 }
1413
1397 dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode, 1414 dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
1398 ceph_cap_string(mask), ceph_cap_string(was), 1415 ceph_cap_string(mask), ceph_cap_string(was),
1399 ceph_cap_string(was | mask)); 1416 ceph_cap_string(was | mask));
@@ -1404,7 +1421,6 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1404 ci->i_snap_realm->cached_context); 1421 ci->i_snap_realm->cached_context);
1405 dout(" inode %p now dirty snapc %p auth cap %p\n", 1422 dout(" inode %p now dirty snapc %p auth cap %p\n",
1406 &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); 1423 &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
1407 WARN_ON(!ci->i_auth_cap);
1408 BUG_ON(!list_empty(&ci->i_dirty_item)); 1424 BUG_ON(!list_empty(&ci->i_dirty_item));
1409 spin_lock(&mdsc->cap_dirty_lock); 1425 spin_lock(&mdsc->cap_dirty_lock);
1410 list_add(&ci->i_dirty_item, &mdsc->cap_dirty); 1426 list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -1545,7 +1561,19 @@ retry_locked:
1545 if (!mdsc->stopping && inode->i_nlink > 0) { 1561 if (!mdsc->stopping && inode->i_nlink > 0) {
1546 if (want) { 1562 if (want) {
1547 retain |= CEPH_CAP_ANY; /* be greedy */ 1563 retain |= CEPH_CAP_ANY; /* be greedy */
1564 } else if (S_ISDIR(inode->i_mode) &&
1565 (issued & CEPH_CAP_FILE_SHARED) &&
1566 __ceph_dir_is_complete(ci)) {
1567 /*
1568 * If a directory is complete, we want to keep
1569 * the exclusive cap. So that MDS does not end up
1570 * revoking the shared cap on every create/unlink
1571 * operation.
1572 */
1573 want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
1574 retain |= want;
1548 } else { 1575 } else {
1576
1549 retain |= CEPH_CAP_ANY_SHARED; 1577 retain |= CEPH_CAP_ANY_SHARED;
1550 /* 1578 /*
1551 * keep RD only if we didn't have the file open RW, 1579 * keep RD only if we didn't have the file open RW,
@@ -2309,6 +2337,9 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2309 wake = 1; 2337 wake = 1;
2310 } 2338 }
2311 } 2339 }
2340 /* see comment in __ceph_remove_cap() */
2341 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
2342 drop_inode_snap_realm(ci);
2312 } 2343 }
2313 spin_unlock(&ci->i_ceph_lock); 2344 spin_unlock(&ci->i_ceph_lock);
2314 2345
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 83e9976f7189..e729b79812b4 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -281,6 +281,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
281 /* can we use the dcache? */ 281 /* can we use the dcache? */
282 spin_lock(&ci->i_ceph_lock); 282 spin_lock(&ci->i_ceph_lock);
283 if ((ctx->pos == 2 || fi->dentry) && 283 if ((ctx->pos == 2 || fi->dentry) &&
284 ceph_test_mount_opt(fsc, DCACHE) &&
284 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 285 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
285 ceph_snap(inode) != CEPH_SNAPDIR && 286 ceph_snap(inode) != CEPH_SNAPDIR &&
286 __ceph_dir_is_complete_ordered(ci) && 287 __ceph_dir_is_complete_ordered(ci) &&
@@ -336,16 +337,23 @@ more:
336 ceph_mdsc_put_request(req); 337 ceph_mdsc_put_request(req);
337 return err; 338 return err;
338 } 339 }
339 req->r_inode = inode;
340 ihold(inode);
341 req->r_dentry = dget(file->f_path.dentry);
342 /* hints to request -> mds selection code */ 340 /* hints to request -> mds selection code */
343 req->r_direct_mode = USE_AUTH_MDS; 341 req->r_direct_mode = USE_AUTH_MDS;
344 req->r_direct_hash = ceph_frag_value(frag); 342 req->r_direct_hash = ceph_frag_value(frag);
345 req->r_direct_is_hash = true; 343 req->r_direct_is_hash = true;
346 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); 344 if (fi->last_name) {
345 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
346 if (!req->r_path2) {
347 ceph_mdsc_put_request(req);
348 return -ENOMEM;
349 }
350 }
347 req->r_readdir_offset = fi->next_offset; 351 req->r_readdir_offset = fi->next_offset;
348 req->r_args.readdir.frag = cpu_to_le32(frag); 352 req->r_args.readdir.frag = cpu_to_le32(frag);
353
354 req->r_inode = inode;
355 ihold(inode);
356 req->r_dentry = dget(file->f_path.dentry);
349 err = ceph_mdsc_do_request(mdsc, NULL, req); 357 err = ceph_mdsc_do_request(mdsc, NULL, req);
350 if (err < 0) { 358 if (err < 0) {
351 ceph_mdsc_put_request(req); 359 ceph_mdsc_put_request(req);
@@ -629,6 +637,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
629 fsc->mount_options->snapdir_name, 637 fsc->mount_options->snapdir_name,
630 dentry->d_name.len) && 638 dentry->d_name.len) &&
631 !is_root_ceph_dentry(dir, dentry) && 639 !is_root_ceph_dentry(dir, dentry) &&
640 ceph_test_mount_opt(fsc, DCACHE) &&
632 __ceph_dir_is_complete(ci) && 641 __ceph_dir_is_complete(ci) &&
633 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 642 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
634 spin_unlock(&ci->i_ceph_lock); 643 spin_unlock(&ci->i_ceph_lock);
@@ -755,10 +764,15 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
755 err = PTR_ERR(req); 764 err = PTR_ERR(req);
756 goto out; 765 goto out;
757 } 766 }
758 req->r_dentry = dget(dentry);
759 req->r_num_caps = 2;
760 req->r_path2 = kstrdup(dest, GFP_NOFS); 767 req->r_path2 = kstrdup(dest, GFP_NOFS);
768 if (!req->r_path2) {
769 err = -ENOMEM;
770 ceph_mdsc_put_request(req);
771 goto out;
772 }
761 req->r_locked_dir = dir; 773 req->r_locked_dir = dir;
774 req->r_dentry = dget(dentry);
775 req->r_num_caps = 2;
762 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 776 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
763 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 777 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
764 err = ceph_mdsc_do_request(mdsc, dir, req); 778 err = ceph_mdsc_do_request(mdsc, dir, req);
@@ -933,16 +947,20 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
933 struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); 947 struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
934 struct ceph_mds_client *mdsc = fsc->mdsc; 948 struct ceph_mds_client *mdsc = fsc->mdsc;
935 struct ceph_mds_request *req; 949 struct ceph_mds_request *req;
950 int op = CEPH_MDS_OP_RENAME;
936 int err; 951 int err;
937 952
938 if (ceph_snap(old_dir) != ceph_snap(new_dir)) 953 if (ceph_snap(old_dir) != ceph_snap(new_dir))
939 return -EXDEV; 954 return -EXDEV;
940 if (ceph_snap(old_dir) != CEPH_NOSNAP || 955 if (ceph_snap(old_dir) != CEPH_NOSNAP) {
941 ceph_snap(new_dir) != CEPH_NOSNAP) 956 if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
942 return -EROFS; 957 op = CEPH_MDS_OP_RENAMESNAP;
958 else
959 return -EROFS;
960 }
943 dout("rename dir %p dentry %p to dir %p dentry %p\n", 961 dout("rename dir %p dentry %p to dir %p dentry %p\n",
944 old_dir, old_dentry, new_dir, new_dentry); 962 old_dir, old_dentry, new_dir, new_dentry);
945 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); 963 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
946 if (IS_ERR(req)) 964 if (IS_ERR(req))
947 return PTR_ERR(req); 965 return PTR_ERR(req);
948 ihold(old_dir); 966 ihold(old_dir);
@@ -1240,11 +1258,12 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
1240 dout("dir_fsync %p wait on tid %llu (until %llu)\n", 1258 dout("dir_fsync %p wait on tid %llu (until %llu)\n",
1241 inode, req->r_tid, last_tid); 1259 inode, req->r_tid, last_tid);
1242 if (req->r_timeout) { 1260 if (req->r_timeout) {
1243 ret = wait_for_completion_timeout( 1261 unsigned long time_left = wait_for_completion_timeout(
1244 &req->r_safe_completion, req->r_timeout); 1262 &req->r_safe_completion,
1245 if (ret > 0) 1263 req->r_timeout);
1264 if (time_left > 0)
1246 ret = 0; 1265 ret = 0;
1247 else if (ret == 0) 1266 else
1248 ret = -EIO; /* timed out */ 1267 ret = -EIO; /* timed out */
1249 } else { 1268 } else {
1250 wait_for_completion(&req->r_safe_completion); 1269 wait_for_completion(&req->r_safe_completion);
@@ -1372,6 +1391,7 @@ const struct inode_operations ceph_snapdir_iops = {
1372 .getattr = ceph_getattr, 1391 .getattr = ceph_getattr,
1373 .mkdir = ceph_mkdir, 1392 .mkdir = ceph_mkdir,
1374 .rmdir = ceph_unlink, 1393 .rmdir = ceph_unlink,
1394 .rename = ceph_rename,
1375}; 1395};
1376 1396
1377const struct dentry_operations ceph_dentry_ops = { 1397const struct dentry_operations ceph_dentry_ops = {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 71c073f38e54..0a2eb32ffe43 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1021,6 +1021,33 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
1021 spin_unlock(&session->s_cap_lock); 1021 spin_unlock(&session->s_cap_lock);
1022} 1022}
1023 1023
1024static void cleanup_session_requests(struct ceph_mds_client *mdsc,
1025 struct ceph_mds_session *session)
1026{
1027 struct ceph_mds_request *req;
1028 struct rb_node *p;
1029
1030 dout("cleanup_session_requests mds%d\n", session->s_mds);
1031 mutex_lock(&mdsc->mutex);
1032 while (!list_empty(&session->s_unsafe)) {
1033 req = list_first_entry(&session->s_unsafe,
1034 struct ceph_mds_request, r_unsafe_item);
1035 list_del_init(&req->r_unsafe_item);
1036 pr_info(" dropping unsafe request %llu\n", req->r_tid);
1037 __unregister_request(mdsc, req);
1038 }
1039 /* zero r_attempts, so kick_requests() will re-send requests */
1040 p = rb_first(&mdsc->request_tree);
1041 while (p) {
1042 req = rb_entry(p, struct ceph_mds_request, r_node);
1043 p = rb_next(p);
1044 if (req->r_session &&
1045 req->r_session->s_mds == session->s_mds)
1046 req->r_attempts = 0;
1047 }
1048 mutex_unlock(&mdsc->mutex);
1049}
1050
1024/* 1051/*
1025 * Helper to safely iterate over all caps associated with a session, with 1052 * Helper to safely iterate over all caps associated with a session, with
1026 * special care taken to handle a racing __ceph_remove_cap(). 1053 * special care taken to handle a racing __ceph_remove_cap().
@@ -1098,7 +1125,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
1098 cap, ci, &ci->vfs_inode); 1125 cap, ci, &ci->vfs_inode);
1099 spin_lock(&ci->i_ceph_lock); 1126 spin_lock(&ci->i_ceph_lock);
1100 __ceph_remove_cap(cap, false); 1127 __ceph_remove_cap(cap, false);
1101 if (!__ceph_is_any_real_caps(ci)) { 1128 if (!ci->i_auth_cap) {
1102 struct ceph_mds_client *mdsc = 1129 struct ceph_mds_client *mdsc =
1103 ceph_sb_to_client(inode->i_sb)->mdsc; 1130 ceph_sb_to_client(inode->i_sb)->mdsc;
1104 1131
@@ -1120,13 +1147,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
1120 mdsc->num_cap_flushing--; 1147 mdsc->num_cap_flushing--;
1121 drop = 1; 1148 drop = 1;
1122 } 1149 }
1123 if (drop && ci->i_wrbuffer_ref) {
1124 pr_info(" dropping dirty data for %p %lld\n",
1125 inode, ceph_ino(inode));
1126 ci->i_wrbuffer_ref = 0;
1127 ci->i_wrbuffer_ref_head = 0;
1128 drop++;
1129 }
1130 spin_unlock(&mdsc->cap_dirty_lock); 1150 spin_unlock(&mdsc->cap_dirty_lock);
1131 } 1151 }
1132 spin_unlock(&ci->i_ceph_lock); 1152 spin_unlock(&ci->i_ceph_lock);
@@ -1853,7 +1873,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
1853 */ 1873 */
1854static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, 1874static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1855 struct ceph_mds_request *req, 1875 struct ceph_mds_request *req,
1856 int mds) 1876 int mds, bool drop_cap_releases)
1857{ 1877{
1858 struct ceph_msg *msg; 1878 struct ceph_msg *msg;
1859 struct ceph_mds_request_head *head; 1879 struct ceph_mds_request_head *head;
@@ -1937,6 +1957,12 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1937 releases += ceph_encode_inode_release(&p, 1957 releases += ceph_encode_inode_release(&p,
1938 req->r_old_dentry->d_inode, 1958 req->r_old_dentry->d_inode,
1939 mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); 1959 mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
1960
1961 if (drop_cap_releases) {
1962 releases = 0;
1963 p = msg->front.iov_base + req->r_request_release_offset;
1964 }
1965
1940 head->num_releases = cpu_to_le16(releases); 1966 head->num_releases = cpu_to_le16(releases);
1941 1967
1942 /* time stamp */ 1968 /* time stamp */
@@ -1989,7 +2015,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
1989 */ 2015 */
1990static int __prepare_send_request(struct ceph_mds_client *mdsc, 2016static int __prepare_send_request(struct ceph_mds_client *mdsc,
1991 struct ceph_mds_request *req, 2017 struct ceph_mds_request *req,
1992 int mds) 2018 int mds, bool drop_cap_releases)
1993{ 2019{
1994 struct ceph_mds_request_head *rhead; 2020 struct ceph_mds_request_head *rhead;
1995 struct ceph_msg *msg; 2021 struct ceph_msg *msg;
@@ -2048,7 +2074,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
2048 ceph_msg_put(req->r_request); 2074 ceph_msg_put(req->r_request);
2049 req->r_request = NULL; 2075 req->r_request = NULL;
2050 } 2076 }
2051 msg = create_request_message(mdsc, req, mds); 2077 msg = create_request_message(mdsc, req, mds, drop_cap_releases);
2052 if (IS_ERR(msg)) { 2078 if (IS_ERR(msg)) {
2053 req->r_err = PTR_ERR(msg); 2079 req->r_err = PTR_ERR(msg);
2054 complete_request(mdsc, req); 2080 complete_request(mdsc, req);
@@ -2132,7 +2158,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
2132 if (req->r_request_started == 0) /* note request start time */ 2158 if (req->r_request_started == 0) /* note request start time */
2133 req->r_request_started = jiffies; 2159 req->r_request_started = jiffies;
2134 2160
2135 err = __prepare_send_request(mdsc, req, mds); 2161 err = __prepare_send_request(mdsc, req, mds, false);
2136 if (!err) { 2162 if (!err) {
2137 ceph_msg_get(req->r_request); 2163 ceph_msg_get(req->r_request);
2138 ceph_con_send(&session->s_con, req->r_request); 2164 ceph_con_send(&session->s_con, req->r_request);
@@ -2590,6 +2616,7 @@ static void handle_session(struct ceph_mds_session *session,
2590 case CEPH_SESSION_CLOSE: 2616 case CEPH_SESSION_CLOSE:
2591 if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) 2617 if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
2592 pr_info("mds%d reconnect denied\n", session->s_mds); 2618 pr_info("mds%d reconnect denied\n", session->s_mds);
2619 cleanup_session_requests(mdsc, session);
2593 remove_session_caps(session); 2620 remove_session_caps(session);
2594 wake = 2; /* for good measure */ 2621 wake = 2; /* for good measure */
2595 wake_up_all(&mdsc->session_close_wq); 2622 wake_up_all(&mdsc->session_close_wq);
@@ -2658,7 +2685,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
2658 2685
2659 mutex_lock(&mdsc->mutex); 2686 mutex_lock(&mdsc->mutex);
2660 list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) { 2687 list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
2661 err = __prepare_send_request(mdsc, req, session->s_mds); 2688 err = __prepare_send_request(mdsc, req, session->s_mds, true);
2662 if (!err) { 2689 if (!err) {
2663 ceph_msg_get(req->r_request); 2690 ceph_msg_get(req->r_request);
2664 ceph_con_send(&session->s_con, req->r_request); 2691 ceph_con_send(&session->s_con, req->r_request);
@@ -2679,7 +2706,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
2679 continue; /* only old requests */ 2706 continue; /* only old requests */
2680 if (req->r_session && 2707 if (req->r_session &&
2681 req->r_session->s_mds == session->s_mds) { 2708 req->r_session->s_mds == session->s_mds) {
2682 err = __prepare_send_request(mdsc, req, session->s_mds); 2709 err = __prepare_send_request(mdsc, req,
2710 session->s_mds, true);
2683 if (!err) { 2711 if (!err) {
2684 ceph_msg_get(req->r_request); 2712 ceph_msg_get(req->r_request);
2685 ceph_con_send(&session->s_con, req->r_request); 2713 ceph_con_send(&session->s_con, req->r_request);
@@ -2864,7 +2892,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2864 spin_unlock(&session->s_cap_lock); 2892 spin_unlock(&session->s_cap_lock);
2865 2893
2866 /* trim unused caps to reduce MDS's cache rejoin time */ 2894 /* trim unused caps to reduce MDS's cache rejoin time */
2867 shrink_dcache_parent(mdsc->fsc->sb->s_root); 2895 if (mdsc->fsc->sb->s_root)
2896 shrink_dcache_parent(mdsc->fsc->sb->s_root);
2868 2897
2869 ceph_con_close(&session->s_con); 2898 ceph_con_close(&session->s_con);
2870 ceph_con_open(&session->s_con, 2899 ceph_con_open(&session->s_con,
@@ -3133,7 +3162,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
3133 di->lease_renew_from && 3162 di->lease_renew_from &&
3134 di->lease_renew_after == 0) { 3163 di->lease_renew_after == 0) {
3135 unsigned long duration = 3164 unsigned long duration =
3136 le32_to_cpu(h->duration_ms) * HZ / 1000; 3165 msecs_to_jiffies(le32_to_cpu(h->duration_ms));
3137 3166
3138 di->lease_seq = seq; 3167 di->lease_seq = seq;
3139 dentry->d_time = di->lease_renew_from + duration; 3168 dentry->d_time = di->lease_renew_from + duration;
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 51cc23e48111..89e6bc321df3 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -75,6 +75,7 @@ const char *ceph_mds_op_name(int op)
75 case CEPH_MDS_OP_LSSNAP: return "lssnap"; 75 case CEPH_MDS_OP_LSSNAP: return "lssnap";
76 case CEPH_MDS_OP_MKSNAP: return "mksnap"; 76 case CEPH_MDS_OP_MKSNAP: return "mksnap";
77 case CEPH_MDS_OP_RMSNAP: return "rmsnap"; 77 case CEPH_MDS_OP_RMSNAP: return "rmsnap";
78 case CEPH_MDS_OP_RENAMESNAP: return "renamesnap";
78 case CEPH_MDS_OP_SETFILELOCK: return "setfilelock"; 79 case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
79 case CEPH_MDS_OP_GETFILELOCK: return "getfilelock"; 80 case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
80 } 81 }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a63997b8bcff..e463ebd69a9c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -345,6 +345,11 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
345 fsopt->rsize = CEPH_RSIZE_DEFAULT; 345 fsopt->rsize = CEPH_RSIZE_DEFAULT;
346 fsopt->rasize = CEPH_RASIZE_DEFAULT; 346 fsopt->rasize = CEPH_RASIZE_DEFAULT;
347 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 347 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
348 if (!fsopt->snapdir_name) {
349 err = -ENOMEM;
350 goto out;
351 }
352
348 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 353 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
349 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 354 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
350 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; 355 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
@@ -406,31 +411,20 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
406{ 411{
407 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 412 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
408 struct ceph_mount_options *fsopt = fsc->mount_options; 413 struct ceph_mount_options *fsopt = fsc->mount_options;
409 struct ceph_options *opt = fsc->client->options; 414 size_t pos;
410 415 int ret;
411 if (opt->flags & CEPH_OPT_FSID) 416
412 seq_printf(m, ",fsid=%pU", &opt->fsid); 417 /* a comma between MNT/MS and client options */
413 if (opt->flags & CEPH_OPT_NOSHARE) 418 seq_putc(m, ',');
414 seq_puts(m, ",noshare"); 419 pos = m->count;
415 if (opt->flags & CEPH_OPT_NOCRC) 420
416 seq_puts(m, ",nocrc"); 421 ret = ceph_print_client_options(m, fsc->client);
417 if (opt->flags & CEPH_OPT_NOMSGAUTH) 422 if (ret)
418 seq_puts(m, ",nocephx_require_signatures"); 423 return ret;
419 if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) 424
420 seq_puts(m, ",notcp_nodelay"); 425 /* retract our comma if no client options */
421 426 if (m->count == pos)
422 if (opt->name) 427 m->count--;
423 seq_printf(m, ",name=%s", opt->name);
424 if (opt->key)
425 seq_puts(m, ",secret=<hidden>");
426
427 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
428 seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
429 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
430 seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
431 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
432 seq_printf(m, ",osdkeepalivetimeout=%d",
433 opt->osd_keepalive_timeout);
434 428
435 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 429 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
436 seq_puts(m, ",dirstat"); 430 seq_puts(m, ",dirstat");
@@ -438,14 +432,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
438 seq_puts(m, ",norbytes"); 432 seq_puts(m, ",norbytes");
439 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 433 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
440 seq_puts(m, ",noasyncreaddir"); 434 seq_puts(m, ",noasyncreaddir");
441 if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE) 435 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
442 seq_puts(m, ",dcache");
443 else
444 seq_puts(m, ",nodcache"); 436 seq_puts(m, ",nodcache");
445 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) 437 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
446 seq_puts(m, ",fsc"); 438 seq_puts(m, ",fsc");
447 else
448 seq_puts(m, ",nofsc");
449 439
450#ifdef CONFIG_CEPH_FS_POSIX_ACL 440#ifdef CONFIG_CEPH_FS_POSIX_ACL
451 if (fsopt->sb_flags & MS_POSIXACL) 441 if (fsopt->sb_flags & MS_POSIXACL)
@@ -477,6 +467,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
477 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 467 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
478 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 468 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
479 seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); 469 seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
470
480 return 0; 471 return 0;
481} 472}
482 473
@@ -730,6 +721,11 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
730 if (IS_ERR(req)) 721 if (IS_ERR(req))
731 return ERR_CAST(req); 722 return ERR_CAST(req);
732 req->r_path1 = kstrdup(path, GFP_NOFS); 723 req->r_path1 = kstrdup(path, GFP_NOFS);
724 if (!req->r_path1) {
725 root = ERR_PTR(-ENOMEM);
726 goto out;
727 }
728
733 req->r_ino1.ino = CEPH_INO_ROOT; 729 req->r_ino1.ino = CEPH_INO_ROOT;
734 req->r_ino1.snap = CEPH_NOSNAP; 730 req->r_ino1.snap = CEPH_NOSNAP;
735 req->r_started = started; 731 req->r_started = started;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 04c8124ed30e..fa20e1318939 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -36,7 +36,8 @@
36#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ 36#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
37#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ 37#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
38 38
39#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) 39#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES | \
40 CEPH_MOUNT_OPT_DCACHE)
40 41
41#define ceph_set_mount_opt(fsc, opt) \ 42#define ceph_set_mount_opt(fsc, opt) \
42 (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; 43 (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
@@ -881,7 +882,6 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
881 882
882/* file.c */ 883/* file.c */
883extern const struct file_operations ceph_file_fops; 884extern const struct file_operations ceph_file_fops;
884extern const struct address_space_operations ceph_aops;
885 885
886extern int ceph_open(struct inode *inode, struct file *file); 886extern int ceph_open(struct inode *inode, struct file *file);
887extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, 887extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 5a492caf34cb..5c4c9c256931 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -877,16 +877,23 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
877 err = PTR_ERR(req); 877 err = PTR_ERR(req);
878 goto out; 878 goto out;
879 } 879 }
880 req->r_inode = inode; 880
881 ihold(inode);
882 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
883 req->r_num_caps = 1;
884 req->r_args.setxattr.flags = cpu_to_le32(flags); 881 req->r_args.setxattr.flags = cpu_to_le32(flags);
885 req->r_path2 = kstrdup(name, GFP_NOFS); 882 req->r_path2 = kstrdup(name, GFP_NOFS);
883 if (!req->r_path2) {
884 ceph_mdsc_put_request(req);
885 err = -ENOMEM;
886 goto out;
887 }
886 888
887 req->r_pagelist = pagelist; 889 req->r_pagelist = pagelist;
888 pagelist = NULL; 890 pagelist = NULL;
889 891
892 req->r_inode = inode;
893 ihold(inode);
894 req->r_num_caps = 1;
895 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
896
890 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); 897 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
891 err = ceph_mdsc_do_request(mdsc, NULL, req); 898 err = ceph_mdsc_do_request(mdsc, NULL, req);
892 ceph_mdsc_put_request(req); 899 ceph_mdsc_put_request(req);
@@ -1019,12 +1026,14 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
1019 USE_AUTH_MDS); 1026 USE_AUTH_MDS);
1020 if (IS_ERR(req)) 1027 if (IS_ERR(req))
1021 return PTR_ERR(req); 1028 return PTR_ERR(req);
1029 req->r_path2 = kstrdup(name, GFP_NOFS);
1030 if (!req->r_path2)
1031 return -ENOMEM;
1032
1022 req->r_inode = inode; 1033 req->r_inode = inode;
1023 ihold(inode); 1034 ihold(inode);
1024 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1025 req->r_num_caps = 1; 1035 req->r_num_caps = 1;
1026 req->r_path2 = kstrdup(name, GFP_NOFS); 1036 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1027
1028 err = ceph_mdsc_do_request(mdsc, NULL, req); 1037 err = ceph_mdsc_do_request(mdsc, NULL, req);
1029 ceph_mdsc_put_request(req); 1038 ceph_mdsc_put_request(req);
1030 return err; 1039 return err;