aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/block/rbd.c26
-rw-r--r--fs/ceph/addr.c38
-rw-r--r--fs/ceph/caps.c51
-rw-r--r--fs/ceph/dir.c48
-rw-r--r--fs/ceph/mds_client.c61
-rw-r--r--fs/ceph/strings.c1
-rw-r--r--fs/ceph/super.c56
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c23
-rw-r--r--include/linux/ceph/ceph_features.h16
-rw-r--r--include/linux/ceph/ceph_fs.h1
-rw-r--r--include/linux/ceph/debugfs.h8
-rw-r--r--include/linux/ceph/libceph.h2
-rw-r--r--include/linux/ceph/osdmap.h5
-rw-r--r--include/linux/crush/crush.h12
-rw-r--r--net/ceph/ceph_common.c37
-rw-r--r--net/ceph/crush/crush.c14
-rw-r--r--net/ceph/crush/crush_ln_table.h166
-rw-r--r--net/ceph/crush/mapper.c118
-rw-r--r--net/ceph/debugfs.c24
-rw-r--r--net/ceph/messenger.c25
-rw-r--r--net/ceph/osdmap.c25
22 files changed, 633 insertions, 128 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b40af3203089..812523330a78 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3762,8 +3762,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
3762 goto out_tag_set; 3762 goto out_tag_set;
3763 } 3763 }
3764 3764
3765 /* We use the default size, but let's be explicit about it. */ 3765 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
3766 blk_queue_physical_block_size(q, SECTOR_SIZE); 3766 /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
3767 3767
3768 /* set io sizes to object size */ 3768 /* set io sizes to object size */
3769 segment_size = rbd_obj_bytes(&rbd_dev->header); 3769 segment_size = rbd_obj_bytes(&rbd_dev->header);
@@ -5301,8 +5301,13 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
5301 5301
5302 if (mapping) { 5302 if (mapping) {
5303 ret = rbd_dev_header_watch_sync(rbd_dev); 5303 ret = rbd_dev_header_watch_sync(rbd_dev);
5304 if (ret) 5304 if (ret) {
5305 if (ret == -ENOENT)
5306 pr_info("image %s/%s does not exist\n",
5307 rbd_dev->spec->pool_name,
5308 rbd_dev->spec->image_name);
5305 goto out_header_name; 5309 goto out_header_name;
5310 }
5306 } 5311 }
5307 5312
5308 ret = rbd_dev_header_info(rbd_dev); 5313 ret = rbd_dev_header_info(rbd_dev);
@@ -5319,8 +5324,14 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
5319 ret = rbd_spec_fill_snap_id(rbd_dev); 5324 ret = rbd_spec_fill_snap_id(rbd_dev);
5320 else 5325 else
5321 ret = rbd_spec_fill_names(rbd_dev); 5326 ret = rbd_spec_fill_names(rbd_dev);
5322 if (ret) 5327 if (ret) {
5328 if (ret == -ENOENT)
5329 pr_info("snap %s/%s@%s does not exist\n",
5330 rbd_dev->spec->pool_name,
5331 rbd_dev->spec->image_name,
5332 rbd_dev->spec->snap_name);
5323 goto err_out_probe; 5333 goto err_out_probe;
5334 }
5324 5335
5325 if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { 5336 if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
5326 ret = rbd_dev_v2_parent_info(rbd_dev); 5337 ret = rbd_dev_v2_parent_info(rbd_dev);
@@ -5390,8 +5401,11 @@ static ssize_t do_rbd_add(struct bus_type *bus,
5390 5401
5391 /* pick the pool */ 5402 /* pick the pool */
5392 rc = rbd_add_get_pool_id(rbdc, spec->pool_name); 5403 rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
5393 if (rc < 0) 5404 if (rc < 0) {
5405 if (rc == -ENOENT)
5406 pr_info("pool %s does not exist\n", spec->pool_name);
5394 goto err_out_client; 5407 goto err_out_client;
5408 }
5395 spec->pool_id = (u64)rc; 5409 spec->pool_id = (u64)rc;
5396 5410
5397 /* The ceph file layout needs to fit pool id in 32 bits */ 5411 /* The ceph file layout needs to fit pool id in 32 bits */
@@ -5673,7 +5687,7 @@ static int __init rbd_init(void)
5673 5687
5674 /* 5688 /*
5675 * The number of active work items is limited by the number of 5689 * The number of active work items is limited by the number of
5676 * rbd devices, so leave @max_active at default. 5690 * rbd devices * queue depth, so leave @max_active at default.
5677 */ 5691 */
5678 rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0); 5692 rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0);
5679 if (!rbd_wq) { 5693 if (!rbd_wq) {
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 155ab9c0246b..e162bcd105ee 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1146,6 +1146,10 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
1146 inode, page, (int)pos, (int)len); 1146 inode, page, (int)pos, (int)len);
1147 1147
1148 r = ceph_update_writeable_page(file, pos, len, page); 1148 r = ceph_update_writeable_page(file, pos, len, page);
1149 if (r < 0)
1150 page_cache_release(page);
1151 else
1152 *pagep = page;
1149 } while (r == -EAGAIN); 1153 } while (r == -EAGAIN);
1150 1154
1151 return r; 1155 return r;
@@ -1534,19 +1538,27 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
1534 1538
1535 osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); 1539 osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
1536 1540
1537 err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, 1541 {
1538 "inline_version", &inline_version, 1542 __le64 xattr_buf = cpu_to_le64(inline_version);
1539 sizeof(inline_version), 1543 err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
1540 CEPH_OSD_CMPXATTR_OP_GT, 1544 "inline_version", &xattr_buf,
1541 CEPH_OSD_CMPXATTR_MODE_U64); 1545 sizeof(xattr_buf),
1542 if (err) 1546 CEPH_OSD_CMPXATTR_OP_GT,
1543 goto out_put; 1547 CEPH_OSD_CMPXATTR_MODE_U64);
1544 1548 if (err)
1545 err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, 1549 goto out_put;
1546 "inline_version", &inline_version, 1550 }
1547 sizeof(inline_version), 0, 0); 1551
1548 if (err) 1552 {
1549 goto out_put; 1553 char xattr_buf[32];
1554 int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf),
1555 "%llu", inline_version);
1556 err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
1557 "inline_version",
1558 xattr_buf, xattr_len, 0, 0);
1559 if (err)
1560 goto out_put;
1561 }
1550 1562
1551 ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); 1563 ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
1552 err = ceph_osdc_start_request(&fsc->client->osdc, req, false); 1564 err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 8172775428a0..11631c4c7d14 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -896,6 +896,18 @@ int ceph_is_any_caps(struct inode *inode)
896 return ret; 896 return ret;
897} 897}
898 898
899static void drop_inode_snap_realm(struct ceph_inode_info *ci)
900{
901 struct ceph_snap_realm *realm = ci->i_snap_realm;
902 spin_lock(&realm->inodes_with_caps_lock);
903 list_del_init(&ci->i_snap_realm_item);
904 ci->i_snap_realm_counter++;
905 ci->i_snap_realm = NULL;
906 spin_unlock(&realm->inodes_with_caps_lock);
907 ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
908 realm);
909}
910
899/* 911/*
900 * Remove a cap. Take steps to deal with a racing iterate_session_caps. 912 * Remove a cap. Take steps to deal with a racing iterate_session_caps.
901 * 913 *
@@ -946,15 +958,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
946 if (removed) 958 if (removed)
947 ceph_put_cap(mdsc, cap); 959 ceph_put_cap(mdsc, cap);
948 960
949 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { 961 /* when reconnect denied, we remove session caps forcibly,
950 struct ceph_snap_realm *realm = ci->i_snap_realm; 962 * i_wr_ref can be non-zero. If there are ongoing write,
951 spin_lock(&realm->inodes_with_caps_lock); 963 * keep i_snap_realm.
952 list_del_init(&ci->i_snap_realm_item); 964 */
953 ci->i_snap_realm_counter++; 965 if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
954 ci->i_snap_realm = NULL; 966 drop_inode_snap_realm(ci);
955 spin_unlock(&realm->inodes_with_caps_lock); 967
956 ceph_put_snap_realm(mdsc, realm);
957 }
958 if (!__ceph_is_any_real_caps(ci)) 968 if (!__ceph_is_any_real_caps(ci))
959 __cap_delay_cancel(mdsc, ci); 969 __cap_delay_cancel(mdsc, ci);
960} 970}
@@ -1394,6 +1404,13 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1394 int was = ci->i_dirty_caps; 1404 int was = ci->i_dirty_caps;
1395 int dirty = 0; 1405 int dirty = 0;
1396 1406
1407 if (!ci->i_auth_cap) {
1408 pr_warn("__mark_dirty_caps %p %llx mask %s, "
1409 "but no auth cap (session was closed?)\n",
1410 inode, ceph_ino(inode), ceph_cap_string(mask));
1411 return 0;
1412 }
1413
1397 dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode, 1414 dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
1398 ceph_cap_string(mask), ceph_cap_string(was), 1415 ceph_cap_string(mask), ceph_cap_string(was),
1399 ceph_cap_string(was | mask)); 1416 ceph_cap_string(was | mask));
@@ -1404,7 +1421,6 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1404 ci->i_snap_realm->cached_context); 1421 ci->i_snap_realm->cached_context);
1405 dout(" inode %p now dirty snapc %p auth cap %p\n", 1422 dout(" inode %p now dirty snapc %p auth cap %p\n",
1406 &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); 1423 &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
1407 WARN_ON(!ci->i_auth_cap);
1408 BUG_ON(!list_empty(&ci->i_dirty_item)); 1424 BUG_ON(!list_empty(&ci->i_dirty_item));
1409 spin_lock(&mdsc->cap_dirty_lock); 1425 spin_lock(&mdsc->cap_dirty_lock);
1410 list_add(&ci->i_dirty_item, &mdsc->cap_dirty); 1426 list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -1545,7 +1561,19 @@ retry_locked:
1545 if (!mdsc->stopping && inode->i_nlink > 0) { 1561 if (!mdsc->stopping && inode->i_nlink > 0) {
1546 if (want) { 1562 if (want) {
1547 retain |= CEPH_CAP_ANY; /* be greedy */ 1563 retain |= CEPH_CAP_ANY; /* be greedy */
1564 } else if (S_ISDIR(inode->i_mode) &&
1565 (issued & CEPH_CAP_FILE_SHARED) &&
1566 __ceph_dir_is_complete(ci)) {
1567 /*
1568 * If a directory is complete, we want to keep
1569 * the exclusive cap. So that MDS does not end up
1570 * revoking the shared cap on every create/unlink
1571 * operation.
1572 */
1573 want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
1574 retain |= want;
1548 } else { 1575 } else {
1576
1549 retain |= CEPH_CAP_ANY_SHARED; 1577 retain |= CEPH_CAP_ANY_SHARED;
1550 /* 1578 /*
1551 * keep RD only if we didn't have the file open RW, 1579 * keep RD only if we didn't have the file open RW,
@@ -2309,6 +2337,9 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2309 wake = 1; 2337 wake = 1;
2310 } 2338 }
2311 } 2339 }
2340 /* see comment in __ceph_remove_cap() */
2341 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
2342 drop_inode_snap_realm(ci);
2312 } 2343 }
2313 spin_unlock(&ci->i_ceph_lock); 2344 spin_unlock(&ci->i_ceph_lock);
2314 2345
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 83e9976f7189..e729b79812b4 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -281,6 +281,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
281 /* can we use the dcache? */ 281 /* can we use the dcache? */
282 spin_lock(&ci->i_ceph_lock); 282 spin_lock(&ci->i_ceph_lock);
283 if ((ctx->pos == 2 || fi->dentry) && 283 if ((ctx->pos == 2 || fi->dentry) &&
284 ceph_test_mount_opt(fsc, DCACHE) &&
284 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 285 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
285 ceph_snap(inode) != CEPH_SNAPDIR && 286 ceph_snap(inode) != CEPH_SNAPDIR &&
286 __ceph_dir_is_complete_ordered(ci) && 287 __ceph_dir_is_complete_ordered(ci) &&
@@ -336,16 +337,23 @@ more:
336 ceph_mdsc_put_request(req); 337 ceph_mdsc_put_request(req);
337 return err; 338 return err;
338 } 339 }
339 req->r_inode = inode;
340 ihold(inode);
341 req->r_dentry = dget(file->f_path.dentry);
342 /* hints to request -> mds selection code */ 340 /* hints to request -> mds selection code */
343 req->r_direct_mode = USE_AUTH_MDS; 341 req->r_direct_mode = USE_AUTH_MDS;
344 req->r_direct_hash = ceph_frag_value(frag); 342 req->r_direct_hash = ceph_frag_value(frag);
345 req->r_direct_is_hash = true; 343 req->r_direct_is_hash = true;
346 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); 344 if (fi->last_name) {
345 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
346 if (!req->r_path2) {
347 ceph_mdsc_put_request(req);
348 return -ENOMEM;
349 }
350 }
347 req->r_readdir_offset = fi->next_offset; 351 req->r_readdir_offset = fi->next_offset;
348 req->r_args.readdir.frag = cpu_to_le32(frag); 352 req->r_args.readdir.frag = cpu_to_le32(frag);
353
354 req->r_inode = inode;
355 ihold(inode);
356 req->r_dentry = dget(file->f_path.dentry);
349 err = ceph_mdsc_do_request(mdsc, NULL, req); 357 err = ceph_mdsc_do_request(mdsc, NULL, req);
350 if (err < 0) { 358 if (err < 0) {
351 ceph_mdsc_put_request(req); 359 ceph_mdsc_put_request(req);
@@ -629,6 +637,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
629 fsc->mount_options->snapdir_name, 637 fsc->mount_options->snapdir_name,
630 dentry->d_name.len) && 638 dentry->d_name.len) &&
631 !is_root_ceph_dentry(dir, dentry) && 639 !is_root_ceph_dentry(dir, dentry) &&
640 ceph_test_mount_opt(fsc, DCACHE) &&
632 __ceph_dir_is_complete(ci) && 641 __ceph_dir_is_complete(ci) &&
633 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 642 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
634 spin_unlock(&ci->i_ceph_lock); 643 spin_unlock(&ci->i_ceph_lock);
@@ -755,10 +764,15 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
755 err = PTR_ERR(req); 764 err = PTR_ERR(req);
756 goto out; 765 goto out;
757 } 766 }
758 req->r_dentry = dget(dentry);
759 req->r_num_caps = 2;
760 req->r_path2 = kstrdup(dest, GFP_NOFS); 767 req->r_path2 = kstrdup(dest, GFP_NOFS);
768 if (!req->r_path2) {
769 err = -ENOMEM;
770 ceph_mdsc_put_request(req);
771 goto out;
772 }
761 req->r_locked_dir = dir; 773 req->r_locked_dir = dir;
774 req->r_dentry = dget(dentry);
775 req->r_num_caps = 2;
762 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 776 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
763 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 777 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
764 err = ceph_mdsc_do_request(mdsc, dir, req); 778 err = ceph_mdsc_do_request(mdsc, dir, req);
@@ -933,16 +947,20 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
933 struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); 947 struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
934 struct ceph_mds_client *mdsc = fsc->mdsc; 948 struct ceph_mds_client *mdsc = fsc->mdsc;
935 struct ceph_mds_request *req; 949 struct ceph_mds_request *req;
950 int op = CEPH_MDS_OP_RENAME;
936 int err; 951 int err;
937 952
938 if (ceph_snap(old_dir) != ceph_snap(new_dir)) 953 if (ceph_snap(old_dir) != ceph_snap(new_dir))
939 return -EXDEV; 954 return -EXDEV;
940 if (ceph_snap(old_dir) != CEPH_NOSNAP || 955 if (ceph_snap(old_dir) != CEPH_NOSNAP) {
941 ceph_snap(new_dir) != CEPH_NOSNAP) 956 if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
942 return -EROFS; 957 op = CEPH_MDS_OP_RENAMESNAP;
958 else
959 return -EROFS;
960 }
943 dout("rename dir %p dentry %p to dir %p dentry %p\n", 961 dout("rename dir %p dentry %p to dir %p dentry %p\n",
944 old_dir, old_dentry, new_dir, new_dentry); 962 old_dir, old_dentry, new_dir, new_dentry);
945 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); 963 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
946 if (IS_ERR(req)) 964 if (IS_ERR(req))
947 return PTR_ERR(req); 965 return PTR_ERR(req);
948 ihold(old_dir); 966 ihold(old_dir);
@@ -1240,11 +1258,12 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
1240 dout("dir_fsync %p wait on tid %llu (until %llu)\n", 1258 dout("dir_fsync %p wait on tid %llu (until %llu)\n",
1241 inode, req->r_tid, last_tid); 1259 inode, req->r_tid, last_tid);
1242 if (req->r_timeout) { 1260 if (req->r_timeout) {
1243 ret = wait_for_completion_timeout( 1261 unsigned long time_left = wait_for_completion_timeout(
1244 &req->r_safe_completion, req->r_timeout); 1262 &req->r_safe_completion,
1245 if (ret > 0) 1263 req->r_timeout);
1264 if (time_left > 0)
1246 ret = 0; 1265 ret = 0;
1247 else if (ret == 0) 1266 else
1248 ret = -EIO; /* timed out */ 1267 ret = -EIO; /* timed out */
1249 } else { 1268 } else {
1250 wait_for_completion(&req->r_safe_completion); 1269 wait_for_completion(&req->r_safe_completion);
@@ -1372,6 +1391,7 @@ const struct inode_operations ceph_snapdir_iops = {
1372 .getattr = ceph_getattr, 1391 .getattr = ceph_getattr,
1373 .mkdir = ceph_mkdir, 1392 .mkdir = ceph_mkdir,
1374 .rmdir = ceph_unlink, 1393 .rmdir = ceph_unlink,
1394 .rename = ceph_rename,
1375}; 1395};
1376 1396
1377const struct dentry_operations ceph_dentry_ops = { 1397const struct dentry_operations ceph_dentry_ops = {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 71c073f38e54..0a2eb32ffe43 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1021,6 +1021,33 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
1021 spin_unlock(&session->s_cap_lock); 1021 spin_unlock(&session->s_cap_lock);
1022} 1022}
1023 1023
1024static void cleanup_session_requests(struct ceph_mds_client *mdsc,
1025 struct ceph_mds_session *session)
1026{
1027 struct ceph_mds_request *req;
1028 struct rb_node *p;
1029
1030 dout("cleanup_session_requests mds%d\n", session->s_mds);
1031 mutex_lock(&mdsc->mutex);
1032 while (!list_empty(&session->s_unsafe)) {
1033 req = list_first_entry(&session->s_unsafe,
1034 struct ceph_mds_request, r_unsafe_item);
1035 list_del_init(&req->r_unsafe_item);
1036 pr_info(" dropping unsafe request %llu\n", req->r_tid);
1037 __unregister_request(mdsc, req);
1038 }
1039 /* zero r_attempts, so kick_requests() will re-send requests */
1040 p = rb_first(&mdsc->request_tree);
1041 while (p) {
1042 req = rb_entry(p, struct ceph_mds_request, r_node);
1043 p = rb_next(p);
1044 if (req->r_session &&
1045 req->r_session->s_mds == session->s_mds)
1046 req->r_attempts = 0;
1047 }
1048 mutex_unlock(&mdsc->mutex);
1049}
1050
1024/* 1051/*
1025 * Helper to safely iterate over all caps associated with a session, with 1052 * Helper to safely iterate over all caps associated with a session, with
1026 * special care taken to handle a racing __ceph_remove_cap(). 1053 * special care taken to handle a racing __ceph_remove_cap().
@@ -1098,7 +1125,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
1098 cap, ci, &ci->vfs_inode); 1125 cap, ci, &ci->vfs_inode);
1099 spin_lock(&ci->i_ceph_lock); 1126 spin_lock(&ci->i_ceph_lock);
1100 __ceph_remove_cap(cap, false); 1127 __ceph_remove_cap(cap, false);
1101 if (!__ceph_is_any_real_caps(ci)) { 1128 if (!ci->i_auth_cap) {
1102 struct ceph_mds_client *mdsc = 1129 struct ceph_mds_client *mdsc =
1103 ceph_sb_to_client(inode->i_sb)->mdsc; 1130 ceph_sb_to_client(inode->i_sb)->mdsc;
1104 1131
@@ -1120,13 +1147,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
1120 mdsc->num_cap_flushing--; 1147 mdsc->num_cap_flushing--;
1121 drop = 1; 1148 drop = 1;
1122 } 1149 }
1123 if (drop && ci->i_wrbuffer_ref) {
1124 pr_info(" dropping dirty data for %p %lld\n",
1125 inode, ceph_ino(inode));
1126 ci->i_wrbuffer_ref = 0;
1127 ci->i_wrbuffer_ref_head = 0;
1128 drop++;
1129 }
1130 spin_unlock(&mdsc->cap_dirty_lock); 1150 spin_unlock(&mdsc->cap_dirty_lock);
1131 } 1151 }
1132 spin_unlock(&ci->i_ceph_lock); 1152 spin_unlock(&ci->i_ceph_lock);
@@ -1853,7 +1873,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
1853 */ 1873 */
1854static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, 1874static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1855 struct ceph_mds_request *req, 1875 struct ceph_mds_request *req,
1856 int mds) 1876 int mds, bool drop_cap_releases)
1857{ 1877{
1858 struct ceph_msg *msg; 1878 struct ceph_msg *msg;
1859 struct ceph_mds_request_head *head; 1879 struct ceph_mds_request_head *head;
@@ -1937,6 +1957,12 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1937 releases += ceph_encode_inode_release(&p, 1957 releases += ceph_encode_inode_release(&p,
1938 req->r_old_dentry->d_inode, 1958 req->r_old_dentry->d_inode,
1939 mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); 1959 mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
1960
1961 if (drop_cap_releases) {
1962 releases = 0;
1963 p = msg->front.iov_base + req->r_request_release_offset;
1964 }
1965
1940 head->num_releases = cpu_to_le16(releases); 1966 head->num_releases = cpu_to_le16(releases);
1941 1967
1942 /* time stamp */ 1968 /* time stamp */
@@ -1989,7 +2015,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
1989 */ 2015 */
1990static int __prepare_send_request(struct ceph_mds_client *mdsc, 2016static int __prepare_send_request(struct ceph_mds_client *mdsc,
1991 struct ceph_mds_request *req, 2017 struct ceph_mds_request *req,
1992 int mds) 2018 int mds, bool drop_cap_releases)
1993{ 2019{
1994 struct ceph_mds_request_head *rhead; 2020 struct ceph_mds_request_head *rhead;
1995 struct ceph_msg *msg; 2021 struct ceph_msg *msg;
@@ -2048,7 +2074,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
2048 ceph_msg_put(req->r_request); 2074 ceph_msg_put(req->r_request);
2049 req->r_request = NULL; 2075 req->r_request = NULL;
2050 } 2076 }
2051 msg = create_request_message(mdsc, req, mds); 2077 msg = create_request_message(mdsc, req, mds, drop_cap_releases);
2052 if (IS_ERR(msg)) { 2078 if (IS_ERR(msg)) {
2053 req->r_err = PTR_ERR(msg); 2079 req->r_err = PTR_ERR(msg);
2054 complete_request(mdsc, req); 2080 complete_request(mdsc, req);
@@ -2132,7 +2158,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
2132 if (req->r_request_started == 0) /* note request start time */ 2158 if (req->r_request_started == 0) /* note request start time */
2133 req->r_request_started = jiffies; 2159 req->r_request_started = jiffies;
2134 2160
2135 err = __prepare_send_request(mdsc, req, mds); 2161 err = __prepare_send_request(mdsc, req, mds, false);
2136 if (!err) { 2162 if (!err) {
2137 ceph_msg_get(req->r_request); 2163 ceph_msg_get(req->r_request);
2138 ceph_con_send(&session->s_con, req->r_request); 2164 ceph_con_send(&session->s_con, req->r_request);
@@ -2590,6 +2616,7 @@ static void handle_session(struct ceph_mds_session *session,
2590 case CEPH_SESSION_CLOSE: 2616 case CEPH_SESSION_CLOSE:
2591 if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) 2617 if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
2592 pr_info("mds%d reconnect denied\n", session->s_mds); 2618 pr_info("mds%d reconnect denied\n", session->s_mds);
2619 cleanup_session_requests(mdsc, session);
2593 remove_session_caps(session); 2620 remove_session_caps(session);
2594 wake = 2; /* for good measure */ 2621 wake = 2; /* for good measure */
2595 wake_up_all(&mdsc->session_close_wq); 2622 wake_up_all(&mdsc->session_close_wq);
@@ -2658,7 +2685,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
2658 2685
2659 mutex_lock(&mdsc->mutex); 2686 mutex_lock(&mdsc->mutex);
2660 list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) { 2687 list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
2661 err = __prepare_send_request(mdsc, req, session->s_mds); 2688 err = __prepare_send_request(mdsc, req, session->s_mds, true);
2662 if (!err) { 2689 if (!err) {
2663 ceph_msg_get(req->r_request); 2690 ceph_msg_get(req->r_request);
2664 ceph_con_send(&session->s_con, req->r_request); 2691 ceph_con_send(&session->s_con, req->r_request);
@@ -2679,7 +2706,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
2679 continue; /* only old requests */ 2706 continue; /* only old requests */
2680 if (req->r_session && 2707 if (req->r_session &&
2681 req->r_session->s_mds == session->s_mds) { 2708 req->r_session->s_mds == session->s_mds) {
2682 err = __prepare_send_request(mdsc, req, session->s_mds); 2709 err = __prepare_send_request(mdsc, req,
2710 session->s_mds, true);
2683 if (!err) { 2711 if (!err) {
2684 ceph_msg_get(req->r_request); 2712 ceph_msg_get(req->r_request);
2685 ceph_con_send(&session->s_con, req->r_request); 2713 ceph_con_send(&session->s_con, req->r_request);
@@ -2864,7 +2892,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2864 spin_unlock(&session->s_cap_lock); 2892 spin_unlock(&session->s_cap_lock);
2865 2893
2866 /* trim unused caps to reduce MDS's cache rejoin time */ 2894 /* trim unused caps to reduce MDS's cache rejoin time */
2867 shrink_dcache_parent(mdsc->fsc->sb->s_root); 2895 if (mdsc->fsc->sb->s_root)
2896 shrink_dcache_parent(mdsc->fsc->sb->s_root);
2868 2897
2869 ceph_con_close(&session->s_con); 2898 ceph_con_close(&session->s_con);
2870 ceph_con_open(&session->s_con, 2899 ceph_con_open(&session->s_con,
@@ -3133,7 +3162,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
3133 di->lease_renew_from && 3162 di->lease_renew_from &&
3134 di->lease_renew_after == 0) { 3163 di->lease_renew_after == 0) {
3135 unsigned long duration = 3164 unsigned long duration =
3136 le32_to_cpu(h->duration_ms) * HZ / 1000; 3165 msecs_to_jiffies(le32_to_cpu(h->duration_ms));
3137 3166
3138 di->lease_seq = seq; 3167 di->lease_seq = seq;
3139 dentry->d_time = di->lease_renew_from + duration; 3168 dentry->d_time = di->lease_renew_from + duration;
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 51cc23e48111..89e6bc321df3 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -75,6 +75,7 @@ const char *ceph_mds_op_name(int op)
75 case CEPH_MDS_OP_LSSNAP: return "lssnap"; 75 case CEPH_MDS_OP_LSSNAP: return "lssnap";
76 case CEPH_MDS_OP_MKSNAP: return "mksnap"; 76 case CEPH_MDS_OP_MKSNAP: return "mksnap";
77 case CEPH_MDS_OP_RMSNAP: return "rmsnap"; 77 case CEPH_MDS_OP_RMSNAP: return "rmsnap";
78 case CEPH_MDS_OP_RENAMESNAP: return "renamesnap";
78 case CEPH_MDS_OP_SETFILELOCK: return "setfilelock"; 79 case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
79 case CEPH_MDS_OP_GETFILELOCK: return "getfilelock"; 80 case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
80 } 81 }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a63997b8bcff..e463ebd69a9c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -345,6 +345,11 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
345 fsopt->rsize = CEPH_RSIZE_DEFAULT; 345 fsopt->rsize = CEPH_RSIZE_DEFAULT;
346 fsopt->rasize = CEPH_RASIZE_DEFAULT; 346 fsopt->rasize = CEPH_RASIZE_DEFAULT;
347 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 347 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
348 if (!fsopt->snapdir_name) {
349 err = -ENOMEM;
350 goto out;
351 }
352
348 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 353 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
349 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 354 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
350 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; 355 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
@@ -406,31 +411,20 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
406{ 411{
407 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 412 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
408 struct ceph_mount_options *fsopt = fsc->mount_options; 413 struct ceph_mount_options *fsopt = fsc->mount_options;
409 struct ceph_options *opt = fsc->client->options; 414 size_t pos;
410 415 int ret;
411 if (opt->flags & CEPH_OPT_FSID) 416
412 seq_printf(m, ",fsid=%pU", &opt->fsid); 417 /* a comma between MNT/MS and client options */
413 if (opt->flags & CEPH_OPT_NOSHARE) 418 seq_putc(m, ',');
414 seq_puts(m, ",noshare"); 419 pos = m->count;
415 if (opt->flags & CEPH_OPT_NOCRC) 420
416 seq_puts(m, ",nocrc"); 421 ret = ceph_print_client_options(m, fsc->client);
417 if (opt->flags & CEPH_OPT_NOMSGAUTH) 422 if (ret)
418 seq_puts(m, ",nocephx_require_signatures"); 423 return ret;
419 if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) 424
420 seq_puts(m, ",notcp_nodelay"); 425 /* retract our comma if no client options */
421 426 if (m->count == pos)
422 if (opt->name) 427 m->count--;
423 seq_printf(m, ",name=%s", opt->name);
424 if (opt->key)
425 seq_puts(m, ",secret=<hidden>");
426
427 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
428 seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
429 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
430 seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
431 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
432 seq_printf(m, ",osdkeepalivetimeout=%d",
433 opt->osd_keepalive_timeout);
434 428
435 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 429 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
436 seq_puts(m, ",dirstat"); 430 seq_puts(m, ",dirstat");
@@ -438,14 +432,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
438 seq_puts(m, ",norbytes"); 432 seq_puts(m, ",norbytes");
439 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 433 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
440 seq_puts(m, ",noasyncreaddir"); 434 seq_puts(m, ",noasyncreaddir");
441 if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE) 435 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
442 seq_puts(m, ",dcache");
443 else
444 seq_puts(m, ",nodcache"); 436 seq_puts(m, ",nodcache");
445 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) 437 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
446 seq_puts(m, ",fsc"); 438 seq_puts(m, ",fsc");
447 else
448 seq_puts(m, ",nofsc");
449 439
450#ifdef CONFIG_CEPH_FS_POSIX_ACL 440#ifdef CONFIG_CEPH_FS_POSIX_ACL
451 if (fsopt->sb_flags & MS_POSIXACL) 441 if (fsopt->sb_flags & MS_POSIXACL)
@@ -477,6 +467,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
477 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 467 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
478 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 468 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
479 seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); 469 seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
470
480 return 0; 471 return 0;
481} 472}
482 473
@@ -730,6 +721,11 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
730 if (IS_ERR(req)) 721 if (IS_ERR(req))
731 return ERR_CAST(req); 722 return ERR_CAST(req);
732 req->r_path1 = kstrdup(path, GFP_NOFS); 723 req->r_path1 = kstrdup(path, GFP_NOFS);
724 if (!req->r_path1) {
725 root = ERR_PTR(-ENOMEM);
726 goto out;
727 }
728
733 req->r_ino1.ino = CEPH_INO_ROOT; 729 req->r_ino1.ino = CEPH_INO_ROOT;
734 req->r_ino1.snap = CEPH_NOSNAP; 730 req->r_ino1.snap = CEPH_NOSNAP;
735 req->r_started = started; 731 req->r_started = started;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 04c8124ed30e..fa20e1318939 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -36,7 +36,8 @@
36#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ 36#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
37#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ 37#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
38 38
39#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) 39#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES | \
40 CEPH_MOUNT_OPT_DCACHE)
40 41
41#define ceph_set_mount_opt(fsc, opt) \ 42#define ceph_set_mount_opt(fsc, opt) \
42 (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; 43 (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
@@ -881,7 +882,6 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
881 882
882/* file.c */ 883/* file.c */
883extern const struct file_operations ceph_file_fops; 884extern const struct file_operations ceph_file_fops;
884extern const struct address_space_operations ceph_aops;
885 885
886extern int ceph_open(struct inode *inode, struct file *file); 886extern int ceph_open(struct inode *inode, struct file *file);
887extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, 887extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 5a492caf34cb..5c4c9c256931 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -877,16 +877,23 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
877 err = PTR_ERR(req); 877 err = PTR_ERR(req);
878 goto out; 878 goto out;
879 } 879 }
880 req->r_inode = inode; 880
881 ihold(inode);
882 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
883 req->r_num_caps = 1;
884 req->r_args.setxattr.flags = cpu_to_le32(flags); 881 req->r_args.setxattr.flags = cpu_to_le32(flags);
885 req->r_path2 = kstrdup(name, GFP_NOFS); 882 req->r_path2 = kstrdup(name, GFP_NOFS);
883 if (!req->r_path2) {
884 ceph_mdsc_put_request(req);
885 err = -ENOMEM;
886 goto out;
887 }
886 888
887 req->r_pagelist = pagelist; 889 req->r_pagelist = pagelist;
888 pagelist = NULL; 890 pagelist = NULL;
889 891
892 req->r_inode = inode;
893 ihold(inode);
894 req->r_num_caps = 1;
895 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
896
890 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); 897 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
891 err = ceph_mdsc_do_request(mdsc, NULL, req); 898 err = ceph_mdsc_do_request(mdsc, NULL, req);
892 ceph_mdsc_put_request(req); 899 ceph_mdsc_put_request(req);
@@ -1019,12 +1026,14 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
1019 USE_AUTH_MDS); 1026 USE_AUTH_MDS);
1020 if (IS_ERR(req)) 1027 if (IS_ERR(req))
1021 return PTR_ERR(req); 1028 return PTR_ERR(req);
1029 req->r_path2 = kstrdup(name, GFP_NOFS);
1030 if (!req->r_path2)
1031 return -ENOMEM;
1032
1022 req->r_inode = inode; 1033 req->r_inode = inode;
1023 ihold(inode); 1034 ihold(inode);
1024 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1025 req->r_num_caps = 1; 1035 req->r_num_caps = 1;
1026 req->r_path2 = kstrdup(name, GFP_NOFS); 1036 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1027
1028 err = ceph_mdsc_do_request(mdsc, NULL, req); 1037 err = ceph_mdsc_do_request(mdsc, NULL, req);
1029 ceph_mdsc_put_request(req); 1038 ceph_mdsc_put_request(req);
1030 return err; 1039 return err;
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 71e05bbf8ceb..4763ad64e832 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -50,6 +50,19 @@
50#define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40) 50#define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40)
51#define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41) 51#define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41)
52#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */ 52#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */
53#define CEPH_FEATURE_MSGR_KEEPALIVE2 (1ULL<<42)
54#define CEPH_FEATURE_OSD_POOLRESEND (1ULL<<43)
55#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44)
56#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
57#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46)
58#define CEPH_FEATURE_OSD_REPOP (1ULL<<46) /* overlap with fadvise */
59#define CEPH_FEATURE_OSD_OBJECT_DIGEST (1ULL<<46) /* overlap with fadvise */
60#define CEPH_FEATURE_OSD_TRANSACTION_MAY_LAYOUT (1ULL<<46) /* overlap w/ fadvise */
61#define CEPH_FEATURE_MDS_QUOTA (1ULL<<47)
62#define CEPH_FEATURE_CRUSH_V4 (1ULL<<48) /* straw2 buckets */
63#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
64// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
65#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49) /* overlap w/ above */
53 66
54/* 67/*
55 * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature 68 * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -93,7 +106,8 @@ static inline u64 ceph_sanitize_features(u64 features)
93 CEPH_FEATURE_EXPORT_PEER | \ 106 CEPH_FEATURE_EXPORT_PEER | \
94 CEPH_FEATURE_OSDMAP_ENC | \ 107 CEPH_FEATURE_OSDMAP_ENC | \
95 CEPH_FEATURE_CRUSH_TUNABLES3 | \ 108 CEPH_FEATURE_CRUSH_TUNABLES3 | \
96 CEPH_FEATURE_OSD_PRIMARY_AFFINITY) 109 CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \
110 CEPH_FEATURE_CRUSH_V4)
97 111
98#define CEPH_FEATURES_REQUIRED_DEFAULT \ 112#define CEPH_FEATURES_REQUIRED_DEFAULT \
99 (CEPH_FEATURE_NOSRCADDR | \ 113 (CEPH_FEATURE_NOSRCADDR | \
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 31eb03d0c766..d7d072a25c27 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -323,6 +323,7 @@ enum {
323 CEPH_MDS_OP_MKSNAP = 0x01400, 323 CEPH_MDS_OP_MKSNAP = 0x01400,
324 CEPH_MDS_OP_RMSNAP = 0x01401, 324 CEPH_MDS_OP_RMSNAP = 0x01401,
325 CEPH_MDS_OP_LSSNAP = 0x00402, 325 CEPH_MDS_OP_LSSNAP = 0x00402,
326 CEPH_MDS_OP_RENAMESNAP = 0x01403,
326}; 327};
327 328
328extern const char *ceph_mds_op_name(int op); 329extern const char *ceph_mds_op_name(int op);
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h
index 1df086d7882d..29cf897cc5cd 100644
--- a/include/linux/ceph/debugfs.h
+++ b/include/linux/ceph/debugfs.h
@@ -7,13 +7,7 @@
7#define CEPH_DEFINE_SHOW_FUNC(name) \ 7#define CEPH_DEFINE_SHOW_FUNC(name) \
8static int name##_open(struct inode *inode, struct file *file) \ 8static int name##_open(struct inode *inode, struct file *file) \
9{ \ 9{ \
10 struct seq_file *sf; \ 10 return single_open(file, name, inode->i_private); \
11 int ret; \
12 \
13 ret = single_open(file, name, NULL); \
14 sf = file->private_data; \
15 sf->private = inode->i_private; \
16 return ret; \
17} \ 11} \
18 \ 12 \
19static const struct file_operations name##_fops = { \ 13static const struct file_operations name##_fops = { \
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 16fff9608848..30f92cefaa72 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -135,6 +135,7 @@ struct ceph_client {
135 struct dentry *debugfs_dir; 135 struct dentry *debugfs_dir;
136 struct dentry *debugfs_monmap; 136 struct dentry *debugfs_monmap;
137 struct dentry *debugfs_osdmap; 137 struct dentry *debugfs_osdmap;
138 struct dentry *debugfs_options;
138#endif 139#endif
139}; 140};
140 141
@@ -191,6 +192,7 @@ extern struct ceph_options *ceph_parse_options(char *options,
191 const char *dev_name, const char *dev_name_end, 192 const char *dev_name, const char *dev_name_end,
192 int (*parse_extra_token)(char *c, void *private), 193 int (*parse_extra_token)(char *c, void *private),
193 void *private); 194 void *private);
195int ceph_print_client_options(struct seq_file *m, struct ceph_client *client);
194extern void ceph_destroy_options(struct ceph_options *opt); 196extern void ceph_destroy_options(struct ceph_options *opt);
195extern int ceph_compare_options(struct ceph_options *new_opt, 197extern int ceph_compare_options(struct ceph_options *new_opt,
196 struct ceph_client *client); 198 struct ceph_client *client);
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 561ea896c657..e55c08bc3a96 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -175,13 +175,12 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
175 __u8 version; 175 __u8 version;
176 176
177 if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) { 177 if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
178 pr_warning("incomplete pg encoding"); 178 pr_warn("incomplete pg encoding\n");
179
180 return -EINVAL; 179 return -EINVAL;
181 } 180 }
182 version = ceph_decode_8(p); 181 version = ceph_decode_8(p);
183 if (version > 1) { 182 if (version > 1) {
184 pr_warning("do not understand pg encoding %d > 1", 183 pr_warn("do not understand pg encoding %d > 1\n",
185 (int)version); 184 (int)version);
186 return -EINVAL; 185 return -EINVAL;
187 } 186 }
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 4fad5f8ee01d..48a1a7d100f1 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -96,13 +96,15 @@ struct crush_rule {
96 * uniform O(1) poor poor 96 * uniform O(1) poor poor
97 * list O(n) optimal poor 97 * list O(n) optimal poor
98 * tree O(log n) good good 98 * tree O(log n) good good
99 * straw O(n) optimal optimal 99 * straw O(n) better better
100 * straw2 O(n) optimal optimal
100 */ 101 */
101enum { 102enum {
102 CRUSH_BUCKET_UNIFORM = 1, 103 CRUSH_BUCKET_UNIFORM = 1,
103 CRUSH_BUCKET_LIST = 2, 104 CRUSH_BUCKET_LIST = 2,
104 CRUSH_BUCKET_TREE = 3, 105 CRUSH_BUCKET_TREE = 3,
105 CRUSH_BUCKET_STRAW = 4 106 CRUSH_BUCKET_STRAW = 4,
107 CRUSH_BUCKET_STRAW2 = 5,
106}; 108};
107extern const char *crush_bucket_alg_name(int alg); 109extern const char *crush_bucket_alg_name(int alg);
108 110
@@ -149,6 +151,11 @@ struct crush_bucket_straw {
149 __u32 *straws; /* 16-bit fixed point */ 151 __u32 *straws; /* 16-bit fixed point */
150}; 152};
151 153
154struct crush_bucket_straw2 {
155 struct crush_bucket h;
156 __u32 *item_weights; /* 16-bit fixed point */
157};
158
152 159
153 160
154/* 161/*
@@ -189,6 +196,7 @@ extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
189extern void crush_destroy_bucket_list(struct crush_bucket_list *b); 196extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
190extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b); 197extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
191extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b); 198extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
199extern void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b);
192extern void crush_destroy_bucket(struct crush_bucket *b); 200extern void crush_destroy_bucket(struct crush_bucket *b);
193extern void crush_destroy_rule(struct crush_rule *r); 201extern void crush_destroy_rule(struct crush_rule *r);
194extern void crush_destroy(struct crush_map *map); 202extern void crush_destroy(struct crush_map *map);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index ec565508e904..79e8f71aef5b 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -490,6 +490,43 @@ out:
490} 490}
491EXPORT_SYMBOL(ceph_parse_options); 491EXPORT_SYMBOL(ceph_parse_options);
492 492
493int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
494{
495 struct ceph_options *opt = client->options;
496 size_t pos = m->count;
497
498 if (opt->name)
499 seq_printf(m, "name=%s,", opt->name);
500 if (opt->key)
501 seq_puts(m, "secret=<hidden>,");
502
503 if (opt->flags & CEPH_OPT_FSID)
504 seq_printf(m, "fsid=%pU,", &opt->fsid);
505 if (opt->flags & CEPH_OPT_NOSHARE)
506 seq_puts(m, "noshare,");
507 if (opt->flags & CEPH_OPT_NOCRC)
508 seq_puts(m, "nocrc,");
509 if (opt->flags & CEPH_OPT_NOMSGAUTH)
510 seq_puts(m, "nocephx_require_signatures,");
511 if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
512 seq_puts(m, "notcp_nodelay,");
513
514 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
515 seq_printf(m, "mount_timeout=%d,", opt->mount_timeout);
516 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
517 seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl);
518 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
519 seq_printf(m, "osdkeepalivetimeout=%d,",
520 opt->osd_keepalive_timeout);
521
522 /* drop redundant comma */
523 if (m->count != pos)
524 m->count--;
525
526 return 0;
527}
528EXPORT_SYMBOL(ceph_print_client_options);
529
493u64 ceph_client_id(struct ceph_client *client) 530u64 ceph_client_id(struct ceph_client *client)
494{ 531{
495 return client->monc.auth->global_id; 532 return client->monc.auth->global_id;
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 16bc199d9a62..9d84ce4ea0df 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -17,6 +17,7 @@ const char *crush_bucket_alg_name(int alg)
17 case CRUSH_BUCKET_LIST: return "list"; 17 case CRUSH_BUCKET_LIST: return "list";
18 case CRUSH_BUCKET_TREE: return "tree"; 18 case CRUSH_BUCKET_TREE: return "tree";
19 case CRUSH_BUCKET_STRAW: return "straw"; 19 case CRUSH_BUCKET_STRAW: return "straw";
20 case CRUSH_BUCKET_STRAW2: return "straw2";
20 default: return "unknown"; 21 default: return "unknown";
21 } 22 }
22} 23}
@@ -40,6 +41,8 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
40 return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)]; 41 return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
41 case CRUSH_BUCKET_STRAW: 42 case CRUSH_BUCKET_STRAW:
42 return ((struct crush_bucket_straw *)b)->item_weights[p]; 43 return ((struct crush_bucket_straw *)b)->item_weights[p];
44 case CRUSH_BUCKET_STRAW2:
45 return ((struct crush_bucket_straw2 *)b)->item_weights[p];
43 } 46 }
44 return 0; 47 return 0;
45} 48}
@@ -77,6 +80,14 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
77 kfree(b); 80 kfree(b);
78} 81}
79 82
83void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
84{
85 kfree(b->item_weights);
86 kfree(b->h.perm);
87 kfree(b->h.items);
88 kfree(b);
89}
90
80void crush_destroy_bucket(struct crush_bucket *b) 91void crush_destroy_bucket(struct crush_bucket *b)
81{ 92{
82 switch (b->alg) { 93 switch (b->alg) {
@@ -92,6 +103,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
92 case CRUSH_BUCKET_STRAW: 103 case CRUSH_BUCKET_STRAW:
93 crush_destroy_bucket_straw((struct crush_bucket_straw *)b); 104 crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
94 break; 105 break;
106 case CRUSH_BUCKET_STRAW2:
107 crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b);
108 break;
95 } 109 }
96} 110}
97 111
diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h
new file mode 100644
index 000000000000..6192c7fc958c
--- /dev/null
+++ b/net/ceph/crush/crush_ln_table.h
@@ -0,0 +1,166 @@
1/*
2 * Ceph - scalable distributed file system
3 *
4 * Copyright (C) 2015 Intel Corporation All Rights Reserved
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
10 *
11 */
12
13#if defined(__linux__)
14#include <linux/types.h>
15#elif defined(__FreeBSD__)
16#include <sys/types.h>
17#endif
18
19#ifndef CEPH_CRUSH_LN_H
20#define CEPH_CRUSH_LN_H
21
22
23// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
24// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
25
26static int64_t __RH_LH_tbl[128*2+2] = {
27 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll,
28 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all,
29 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll,
30 0x0000f4898d5f85bcll, 0x000010eb389fa29fll, 0x0000f2b9d6480f2cll, 0x000013aa2fdd27f1ll,
31 0x0000f0f0f0f0f0f1ll, 0x00001663f6fac913ll, 0x0000ef2eb71fc435ll, 0x00001918a16e4633ll,
32 0x0000ed7303b5cc0fll, 0x00001bc84240adabll, 0x0000ebbdb2a5c162ll, 0x00001e72ec117fa5ll,
33 0x0000ea0ea0ea0ea1ll, 0x00002118b119b4f3ll, 0x0000e865ac7b7604ll, 0x000023b9a32eaa56ll,
34 0x0000e6c2b4481cd9ll, 0x00002655d3c4f15cll, 0x0000e525982af70dll, 0x000028ed53f307eell,
35 0x0000e38e38e38e39ll, 0x00002b803473f7adll, 0x0000e1fc780e1fc8ll, 0x00002e0e85a9de04ll,
36 0x0000e070381c0e08ll, 0x0000309857a05e07ll, 0x0000dee95c4ca038ll, 0x0000331dba0efce1ll,
37 0x0000dd67c8a60dd7ll, 0x0000359ebc5b69d9ll, 0x0000dbeb61eed19dll, 0x0000381b6d9bb29bll,
38 0x0000da740da740dbll, 0x00003a93dc9864b2ll, 0x0000d901b2036407ll, 0x00003d0817ce9cd4ll,
39 0x0000d79435e50d7all, 0x00003f782d7204d0ll, 0x0000d62b80d62b81ll, 0x000041e42b6ec0c0ll,
40 0x0000d4c77b03531ell, 0x0000444c1f6b4c2dll, 0x0000d3680d3680d4ll, 0x000046b016ca47c1ll,
41 0x0000d20d20d20d21ll, 0x000049101eac381cll, 0x0000d0b69fcbd259ll, 0x00004b6c43f1366all,
42 0x0000cf6474a8819fll, 0x00004dc4933a9337ll, 0x0000ce168a772509ll, 0x0000501918ec6c11ll,
43 0x0000cccccccccccdll, 0x00005269e12f346ell, 0x0000cb8727c065c4ll, 0x000054b6f7f1325all,
44 0x0000ca4587e6b750ll, 0x0000570068e7ef5all, 0x0000c907da4e8712ll, 0x000059463f919deell,
45 0x0000c7ce0c7ce0c8ll, 0x00005b8887367433ll, 0x0000c6980c6980c7ll, 0x00005dc74ae9fbecll,
46 0x0000c565c87b5f9ell, 0x00006002958c5871ll, 0x0000c4372f855d83ll, 0x0000623a71cb82c8ll,
47 0x0000c30c30c30c31ll, 0x0000646eea247c5cll, 0x0000c1e4bbd595f7ll, 0x000066a008e4788cll,
48 0x0000c0c0c0c0c0c1ll, 0x000068cdd829fd81ll, 0x0000bfa02fe80bfbll, 0x00006af861e5fc7dll,
49 0x0000be82fa0be830ll, 0x00006d1fafdce20all, 0x0000bd6910470767ll, 0x00006f43cba79e40ll,
50 0x0000bc52640bc527ll, 0x00007164beb4a56dll, 0x0000bb3ee721a54ell, 0x000073829248e961ll,
51 0x0000ba2e8ba2e8bbll, 0x0000759d4f80cba8ll, 0x0000b92143fa36f6ll, 0x000077b4ff5108d9ll,
52 0x0000b81702e05c0cll, 0x000079c9aa879d53ll, 0x0000b70fbb5a19bfll, 0x00007bdb59cca388ll,
53 0x0000b60b60b60b61ll, 0x00007dea15a32c1bll, 0x0000b509e68a9b95ll, 0x00007ff5e66a0ffell,
54 0x0000b40b40b40b41ll, 0x000081fed45cbccbll, 0x0000b30f63528918ll, 0x00008404e793fb81ll,
55 0x0000b21642c8590cll, 0x000086082806b1d5ll, 0x0000b11fd3b80b12ll, 0x000088089d8a9e47ll,
56 0x0000b02c0b02c0b1ll, 0x00008a064fd50f2all, 0x0000af3addc680b0ll, 0x00008c01467b94bbll,
57 0x0000ae4c415c9883ll, 0x00008df988f4ae80ll, 0x0000ad602b580ad7ll, 0x00008fef1e987409ll,
58 0x0000ac7691840ac8ll, 0x000091e20ea1393ell, 0x0000ab8f69e2835all, 0x000093d2602c2e5fll,
59 0x0000aaaaaaaaaaabll, 0x000095c01a39fbd6ll, 0x0000a9c84a47a080ll, 0x000097ab43af59f9ll,
60 0x0000a8e83f5717c1ll, 0x00009993e355a4e5ll, 0x0000a80a80a80a81ll, 0x00009b79ffdb6c8bll,
61 0x0000a72f0539782all, 0x00009d5d9fd5010bll, 0x0000a655c4392d7cll, 0x00009f3ec9bcfb80ll,
62 0x0000a57eb50295fbll, 0x0000a11d83f4c355ll, 0x0000a4a9cf1d9684ll, 0x0000a2f9d4c51039ll,
63 0x0000a3d70a3d70a4ll, 0x0000a4d3c25e68dcll, 0x0000a3065e3fae7dll, 0x0000a6ab52d99e76ll,
64 0x0000a237c32b16d0ll, 0x0000a8808c384547ll, 0x0000a16b312ea8fdll, 0x0000aa5374652a1cll,
65 0x0000a0a0a0a0a0a1ll, 0x0000ac241134c4e9ll, 0x00009fd809fd80a0ll, 0x0000adf26865a8a1ll,
66 0x00009f1165e72549ll, 0x0000afbe7fa0f04dll, 0x00009e4cad23dd60ll, 0x0000b1885c7aa982ll,
67 0x00009d89d89d89d9ll, 0x0000b35004723c46ll, 0x00009cc8e160c3fcll, 0x0000b5157cf2d078ll,
68 0x00009c09c09c09c1ll, 0x0000b6d8cb53b0call, 0x00009b4c6f9ef03bll, 0x0000b899f4d8ab63ll,
69 0x00009a90e7d95bc7ll, 0x0000ba58feb2703all, 0x000099d722dabde6ll, 0x0000bc15edfeed32ll,
70 0x0000991f1a515886ll, 0x0000bdd0c7c9a817ll, 0x00009868c809868dll, 0x0000bf89910c1678ll,
71 0x000097b425ed097cll, 0x0000c1404eadf383ll, 0x000097012e025c05ll, 0x0000c2f5058593d9ll,
72 0x0000964fda6c0965ll, 0x0000c4a7ba58377cll, 0x000095a02568095bll, 0x0000c65871da59ddll,
73 0x000094f2094f2095ll, 0x0000c80730b00016ll, 0x0000944580944581ll, 0x0000c9b3fb6d0559ll,
74 0x0000939a85c4093all, 0x0000cb5ed69565afll, 0x000092f113840498ll, 0x0000cd07c69d8702ll,
75 0x0000924924924925ll, 0x0000ceaecfea8085ll, 0x000091a2b3c4d5e7ll, 0x0000d053f6d26089ll,
76 0x000090fdbc090fdcll, 0x0000d1f73f9c70c0ll, 0x0000905a38633e07ll, 0x0000d398ae817906ll,
77 0x00008fb823ee08fcll, 0x0000d53847ac00a6ll, 0x00008f1779d9fdc4ll, 0x0000d6d60f388e41ll,
78 0x00008e78356d1409ll, 0x0000d8720935e643ll, 0x00008dda5202376all, 0x0000da0c39a54804ll,
79 0x00008d3dcb08d3ddll, 0x0000dba4a47aa996ll, 0x00008ca29c046515ll, 0x0000dd3b4d9cf24bll,
80 0x00008c08c08c08c1ll, 0x0000ded038e633f3ll, 0x00008b70344a139cll, 0x0000e0636a23e2eell,
81 0x00008ad8f2fba939ll, 0x0000e1f4e5170d02ll, 0x00008a42f870566all, 0x0000e384ad748f0ell,
82 0x000089ae4089ae41ll, 0x0000e512c6e54998ll, 0x0000891ac73ae982ll, 0x0000e69f35065448ll,
83 0x0000888888888889ll, 0x0000e829fb693044ll, 0x000087f78087f781ll, 0x0000e9b31d93f98ell,
84 0x00008767ab5f34e5ll, 0x0000eb3a9f019750ll, 0x000086d905447a35ll, 0x0000ecc08321eb30ll,
85 0x0000864b8a7de6d2ll, 0x0000ee44cd59ffabll, 0x000085bf37612cefll, 0x0000efc781043579ll,
86 0x0000853408534086ll, 0x0000f148a170700all, 0x000084a9f9c8084bll, 0x0000f2c831e44116ll,
87 0x0000842108421085ll, 0x0000f446359b1353ll, 0x0000839930523fbfll, 0x0000f5c2afc65447ll,
88 0x000083126e978d50ll, 0x0000f73da38d9d4all, 0x0000828cbfbeb9a1ll, 0x0000f8b7140edbb1ll,
89 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll,
90 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll,
91 0x0000800000000000ll, 0x0000ffff00000000ll,
92 };
93
94
95 // LL_tbl[k] = 2^48*log2(1.0+k/2^15);
96static int64_t __LL_tbl[256] = {
97 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull,
98 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull,
99 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull,
100 0x00000023e5bbb2b2ull, 0x00000026c81c83e4ull, 0x00000029aa7790f0ull, 0x0000002c8cccd9edull,
101 0x0000002f6f1c5ef2ull, 0x0000003251662017ull, 0x0000003533aa1d71ull, 0x0000003815e8571aull,
102 0x0000003af820cd26ull, 0x0000003dda537faeull, 0x00000040bc806ec8ull, 0x000000439ea79a8cull,
103 0x0000004680c90310ull, 0x0000004962e4a86cull, 0x0000004c44fa8ab6ull, 0x0000004f270aaa06ull,
104 0x0000005209150672ull, 0x00000054eb19a013ull, 0x00000057cd1876fdull, 0x0000005aaf118b4aull,
105 0x0000005d9104dd0full, 0x0000006072f26c64ull, 0x0000006354da3960ull, 0x0000006636bc441aull,
106 0x0000006918988ca8ull, 0x0000006bfa6f1322ull, 0x0000006edc3fd79full, 0x00000071be0ada35ull,
107 0x000000749fd01afdull, 0x00000077818f9a0cull, 0x0000007a6349577aull, 0x0000007d44fd535eull,
108 0x0000008026ab8dceull, 0x00000083085406e3ull, 0x00000085e9f6beb2ull, 0x00000088cb93b552ull,
109 0x0000008bad2aeadcull, 0x0000008e8ebc5f65ull, 0x0000009170481305ull, 0x0000009451ce05d3ull,
110 0x00000097334e37e5ull, 0x0000009a14c8a953ull, 0x0000009cf63d5a33ull, 0x0000009fd7ac4a9dull,
111 0x000000a2b07f3458ull, 0x000000a59a78ea6aull, 0x000000a87bd699fbull, 0x000000ab5d2e8970ull,
112 0x000000ae3e80b8e3ull, 0x000000b11fcd2869ull, 0x000000b40113d818ull, 0x000000b6e254c80aull,
113 0x000000b9c38ff853ull, 0x000000bca4c5690cull, 0x000000bf85f51a4aull, 0x000000c2671f0c26ull,
114 0x000000c548433eb6ull, 0x000000c82961b211ull, 0x000000cb0a7a664dull, 0x000000cdeb8d5b82ull,
115 0x000000d0cc9a91c8ull, 0x000000d3ada20933ull, 0x000000d68ea3c1ddull, 0x000000d96f9fbbdbull,
116 0x000000dc5095f744ull, 0x000000df31867430ull, 0x000000e2127132b5ull, 0x000000e4f35632eaull,
117 0x000000e7d43574e6ull, 0x000000eab50ef8c1ull, 0x000000ed95e2be90ull, 0x000000f076b0c66cull,
118 0x000000f35779106aull, 0x000000f6383b9ca2ull, 0x000000f918f86b2aull, 0x000000fbf9af7c1aull,
119 0x000000feda60cf88ull, 0x00000101bb0c658cull, 0x000001049bb23e3cull, 0x000001077c5259afull,
120 0x0000010a5cecb7fcull, 0x0000010d3d81593aull, 0x000001101e103d7full, 0x00000112fe9964e4ull,
121 0x00000115df1ccf7eull, 0x00000118bf9a7d64ull, 0x0000011ba0126eadull, 0x0000011e8084a371ull,
122 0x0000012160f11bc6ull, 0x000001244157d7c3ull, 0x0000012721b8d77full, 0x0000012a02141b10ull,
123 0x0000012ce269a28eull, 0x0000012fc2b96e0full, 0x00000132a3037daaull, 0x000001358347d177ull,
124 0x000001386386698cull, 0x0000013b43bf45ffull, 0x0000013e23f266e9ull, 0x00000141041fcc5eull,
125 0x00000143e4477678ull, 0x00000146c469654bull, 0x00000149a48598f0ull, 0x0000014c849c117cull,
126 0x0000014f64accf08ull, 0x0000015244b7d1a9ull, 0x0000015524bd1976ull, 0x0000015804bca687ull,
127 0x0000015ae4b678f2ull, 0x0000015dc4aa90ceull, 0x00000160a498ee31ull, 0x0000016384819134ull,
128 0x00000166646479ecull, 0x000001694441a870ull, 0x0000016c24191cd7ull, 0x0000016df6ca19bdull,
129 0x00000171e3b6d7aaull, 0x00000174c37d1e44ull, 0x00000177a33dab1cull, 0x0000017a82f87e49ull,
130 0x0000017d62ad97e2ull, 0x00000180425cf7feull, 0x00000182b07f3458ull, 0x0000018601aa8c19ull,
131 0x00000188e148c046ull, 0x0000018bc0e13b52ull, 0x0000018ea073fd52ull, 0x000001918001065dull,
132 0x000001945f88568bull, 0x000001973f09edf2ull, 0x0000019a1e85ccaaull, 0x0000019cfdfbf2c8ull,
133 0x0000019fdd6c6063ull, 0x000001a2bcd71593ull, 0x000001a59c3c126eull, 0x000001a87b9b570bull,
134 0x000001ab5af4e380ull, 0x000001ae3a48b7e5ull, 0x000001b11996d450ull, 0x000001b3f8df38d9ull,
135 0x000001b6d821e595ull, 0x000001b9b75eda9bull, 0x000001bc96961803ull, 0x000001bf75c79de3ull,
136 0x000001c254f36c51ull, 0x000001c534198365ull, 0x000001c81339e336ull, 0x000001caf2548bd9ull,
137 0x000001cdd1697d67ull, 0x000001d0b078b7f5ull, 0x000001d38f823b9aull, 0x000001d66e86086dull,
138 0x000001d94d841e86ull, 0x000001dc2c7c7df9ull, 0x000001df0b6f26dfull, 0x000001e1ea5c194eull,
139 0x000001e4c943555dull, 0x000001e7a824db23ull, 0x000001ea8700aab5ull, 0x000001ed65d6c42bull,
140 0x000001f044a7279dull, 0x000001f32371d51full, 0x000001f60236cccaull, 0x000001f8e0f60eb3ull,
141 0x000001fbbfaf9af3ull, 0x000001fe9e63719eull, 0x000002017d1192ccull, 0x000002045bb9fe94ull,
142 0x000002073a5cb50dull, 0x00000209c06e6212ull, 0x0000020cf791026aull, 0x0000020fd622997cull,
143 0x00000212b07f3458ull, 0x000002159334a8d8ull, 0x0000021871b52150ull, 0x0000021b502fe517ull,
144 0x0000021d6a73a78full, 0x000002210d144eeeull, 0x00000223eb7df52cull, 0x00000226c9e1e713ull,
145 0x00000229a84024bbull, 0x0000022c23679b4eull, 0x0000022f64eb83a8ull, 0x000002324338a51bull,
146 0x00000235218012a9ull, 0x00000237ffc1cc69ull, 0x0000023a2c3b0ea4ull, 0x0000023d13ee805bull,
147 0x0000024035e9221full, 0x00000243788faf25ull, 0x0000024656b4e735ull, 0x00000247ed646bfeull,
148 0x0000024c12ee3d98ull, 0x0000024ef1025c1aull, 0x00000251cf10c799ull, 0x0000025492644d65ull,
149 0x000002578b1c85eeull, 0x0000025a6919d8f0ull, 0x0000025d13ee805bull, 0x0000026025036716ull,
150 0x0000026296453882ull, 0x00000265e0d62b53ull, 0x00000268beb701f3ull, 0x0000026b9c92265eull,
151 0x0000026d32f798a9ull, 0x00000271583758ebull, 0x000002743601673bull, 0x0000027713c5c3b0ull,
152 0x00000279f1846e5full, 0x0000027ccf3d6761ull, 0x0000027e6580aecbull, 0x000002828a9e44b3ull,
153 0x0000028568462932ull, 0x00000287bdbf5255ull, 0x0000028b2384de4aull, 0x0000028d13ee805bull,
154 0x0000029035e9221full, 0x0000029296453882ull, 0x0000029699bdfb61ull, 0x0000029902a37aabull,
155 0x0000029c54b864c9ull, 0x0000029deabd1083ull, 0x000002a20f9c0bb5ull, 0x000002a4c7605d61ull,
156 0x000002a7bdbf5255ull, 0x000002a96056dafcull, 0x000002ac3daf14efull, 0x000002af1b019ecaull,
157 0x000002b296453882ull, 0x000002b5d022d80full, 0x000002b8fa471cb3ull, 0x000002ba9012e713ull,
158 0x000002bd6d4901ccull, 0x000002c04a796cf6ull, 0x000002c327a428a6ull, 0x000002c61a5e8f4cull,
159 0x000002c8e1e891f6ull, 0x000002cbbf023fc2ull, 0x000002ce9c163e6eull, 0x000002d179248e13ull,
160 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull,
161};
162
163
164
165
166#endif
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index a1ef53c04415..5b47736d27d9 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -20,7 +20,7 @@
20 20
21#include <linux/crush/crush.h> 21#include <linux/crush/crush.h>
22#include <linux/crush/hash.h> 22#include <linux/crush/hash.h>
23#include <linux/crush/mapper.h> 23#include "crush_ln_table.h"
24 24
25/* 25/*
26 * Implement the core CRUSH mapping algorithm. 26 * Implement the core CRUSH mapping algorithm.
@@ -238,6 +238,102 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
238 return bucket->h.items[high]; 238 return bucket->h.items[high];
239} 239}
240 240
241// compute 2^44*log2(input+1)
242uint64_t crush_ln(unsigned xin)
243{
244 unsigned x=xin, x1;
245 int iexpon, index1, index2;
246 uint64_t RH, LH, LL, xl64, result;
247
248 x++;
249
250 // normalize input
251 iexpon = 15;
252 while(!(x&0x18000)) { x<<=1; iexpon--; }
253
254 index1 = (x>>8)<<1;
255 // RH ~ 2^56/index1
256 RH = __RH_LH_tbl[index1 - 256];
257 // LH ~ 2^48 * log2(index1/256)
258 LH = __RH_LH_tbl[index1 + 1 - 256];
259
260 // RH*x ~ 2^48 * (2^15 + xf), xf<2^8
261 xl64 = (int64_t)x * RH;
262 xl64 >>= 48;
263 x1 = xl64;
264
265 result = iexpon;
266 result <<= (12 + 32);
267
268 index2 = x1 & 0xff;
269 // LL ~ 2^48*log2(1.0+index2/2^15)
270 LL = __LL_tbl[index2];
271
272 LH = LH + LL;
273
274 LH >>= (48-12 - 32);
275 result += LH;
276
277 return result;
278}
279
280
281/*
282 * straw2
283 *
284 * for reference, see:
285 *
286 * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables
287 *
288 */
289
290static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
291 int x, int r)
292{
293 unsigned i, high = 0;
294 unsigned u;
295 unsigned w;
296 __s64 ln, draw, high_draw = 0;
297
298 for (i = 0; i < bucket->h.size; i++) {
299 w = bucket->item_weights[i];
300 if (w) {
301 u = crush_hash32_3(bucket->h.hash, x,
302 bucket->h.items[i], r);
303 u &= 0xffff;
304
305 /*
306 * for some reason slightly less than 0x10000 produces
307 * a slightly more accurate distribution... probably a
308 * rounding effect.
309 *
310 * the natural log lookup table maps [0,0xffff]
311 * (corresponding to real numbers [1/0x10000, 1] to
312 * [0, 0xffffffffffff] (corresponding to real numbers
313 * [-11.090355,0]).
314 */
315 ln = crush_ln(u) - 0x1000000000000ll;
316
317 /*
318 * divide by 16.16 fixed-point weight. note
319 * that the ln value is negative, so a larger
320 * weight means a larger (less negative) value
321 * for draw.
322 */
323 draw = div64_s64(ln, w);
324 } else {
325 draw = S64_MIN;
326 }
327
328 if (i == 0 || draw > high_draw) {
329 high = i;
330 high_draw = draw;
331 }
332 }
333 return bucket->h.items[high];
334}
335
336
241static int crush_bucket_choose(struct crush_bucket *in, int x, int r) 337static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
242{ 338{
243 dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); 339 dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
@@ -255,12 +351,16 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
255 case CRUSH_BUCKET_STRAW: 351 case CRUSH_BUCKET_STRAW:
256 return bucket_straw_choose((struct crush_bucket_straw *)in, 352 return bucket_straw_choose((struct crush_bucket_straw *)in,
257 x, r); 353 x, r);
354 case CRUSH_BUCKET_STRAW2:
355 return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
356 x, r);
258 default: 357 default:
259 dprintk("unknown bucket %d alg %d\n", in->id, in->alg); 358 dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
260 return in->items[0]; 359 return in->items[0];
261 } 360 }
262} 361}
263 362
363
264/* 364/*
265 * true if device is marked "out" (failed, fully offloaded) 365 * true if device is marked "out" (failed, fully offloaded)
266 * of the cluster 366 * of the cluster
@@ -290,6 +390,7 @@ static int is_out(const struct crush_map *map,
290 * @type: the type of item to choose 390 * @type: the type of item to choose
291 * @out: pointer to output vector 391 * @out: pointer to output vector
292 * @outpos: our position in that vector 392 * @outpos: our position in that vector
393 * @out_size: size of the out vector
293 * @tries: number of attempts to make 394 * @tries: number of attempts to make
294 * @recurse_tries: number of attempts to have recursive chooseleaf make 395 * @recurse_tries: number of attempts to have recursive chooseleaf make
295 * @local_retries: localized retries 396 * @local_retries: localized retries
@@ -304,6 +405,7 @@ static int crush_choose_firstn(const struct crush_map *map,
304 const __u32 *weight, int weight_max, 405 const __u32 *weight, int weight_max,
305 int x, int numrep, int type, 406 int x, int numrep, int type,
306 int *out, int outpos, 407 int *out, int outpos,
408 int out_size,
307 unsigned int tries, 409 unsigned int tries,
308 unsigned int recurse_tries, 410 unsigned int recurse_tries,
309 unsigned int local_retries, 411 unsigned int local_retries,
@@ -322,6 +424,7 @@ static int crush_choose_firstn(const struct crush_map *map,
322 int item = 0; 424 int item = 0;
323 int itemtype; 425 int itemtype;
324 int collide, reject; 426 int collide, reject;
427 int count = out_size;
325 428
326 dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n", 429 dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
327 recurse_to_leaf ? "_LEAF" : "", 430 recurse_to_leaf ? "_LEAF" : "",
@@ -329,7 +432,7 @@ static int crush_choose_firstn(const struct crush_map *map,
329 tries, recurse_tries, local_retries, local_fallback_retries, 432 tries, recurse_tries, local_retries, local_fallback_retries,
330 parent_r); 433 parent_r);
331 434
332 for (rep = outpos; rep < numrep; rep++) { 435 for (rep = outpos; rep < numrep && count > 0 ; rep++) {
333 /* keep trying until we get a non-out, non-colliding item */ 436 /* keep trying until we get a non-out, non-colliding item */
334 ftotal = 0; 437 ftotal = 0;
335 skip_rep = 0; 438 skip_rep = 0;
@@ -403,7 +506,7 @@ static int crush_choose_firstn(const struct crush_map *map,
403 map->buckets[-1-item], 506 map->buckets[-1-item],
404 weight, weight_max, 507 weight, weight_max,
405 x, outpos+1, 0, 508 x, outpos+1, 0,
406 out2, outpos, 509 out2, outpos, count,
407 recurse_tries, 0, 510 recurse_tries, 0,
408 local_retries, 511 local_retries,
409 local_fallback_retries, 512 local_fallback_retries,
@@ -463,6 +566,7 @@ reject:
463 dprintk("CHOOSE got %d\n", item); 566 dprintk("CHOOSE got %d\n", item);
464 out[outpos] = item; 567 out[outpos] = item;
465 outpos++; 568 outpos++;
569 count--;
466 } 570 }
467 571
468 dprintk("CHOOSE returns %d\n", outpos); 572 dprintk("CHOOSE returns %d\n", outpos);
@@ -654,6 +758,7 @@ int crush_do_rule(const struct crush_map *map,
654 __u32 step; 758 __u32 step;
655 int i, j; 759 int i, j;
656 int numrep; 760 int numrep;
761 int out_size;
657 /* 762 /*
658 * the original choose_total_tries value was off by one (it 763 * the original choose_total_tries value was off by one (it
659 * counted "retries" and not "tries"). add one. 764 * counted "retries" and not "tries"). add one.
@@ -761,6 +866,7 @@ int crush_do_rule(const struct crush_map *map,
761 x, numrep, 866 x, numrep,
762 curstep->arg2, 867 curstep->arg2,
763 o+osize, j, 868 o+osize, j,
869 result_max-osize,
764 choose_tries, 870 choose_tries,
765 recurse_tries, 871 recurse_tries,
766 choose_local_retries, 872 choose_local_retries,
@@ -770,11 +876,13 @@ int crush_do_rule(const struct crush_map *map,
770 c+osize, 876 c+osize,
771 0); 877 0);
772 } else { 878 } else {
879 out_size = ((numrep < (result_max-osize)) ?
880 numrep : (result_max-osize));
773 crush_choose_indep( 881 crush_choose_indep(
774 map, 882 map,
775 map->buckets[-1-w[i]], 883 map->buckets[-1-w[i]],
776 weight, weight_max, 884 weight, weight_max,
777 x, numrep, numrep, 885 x, out_size, numrep,
778 curstep->arg2, 886 curstep->arg2,
779 o+osize, j, 887 o+osize, j,
780 choose_tries, 888 choose_tries,
@@ -783,7 +891,7 @@ int crush_do_rule(const struct crush_map *map,
783 recurse_to_leaf, 891 recurse_to_leaf,
784 c+osize, 892 c+osize,
785 0); 893 0);
786 osize += numrep; 894 osize += out_size;
787 } 895 }
788 } 896 }
789 897
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 14d9995097cc..593dc2eabcc8 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -22,6 +22,7 @@
22 * .../monmap - current monmap 22 * .../monmap - current monmap
23 * .../osdc - active osd requests 23 * .../osdc - active osd requests
24 * .../monc - mon client state 24 * .../monc - mon client state
25 * .../client_options - libceph-only (i.e. not rbd or cephfs) options
25 * .../dentry_lru - dump contents of dentry lru 26 * .../dentry_lru - dump contents of dentry lru
26 * .../caps - expose cap (reservation) stats 27 * .../caps - expose cap (reservation) stats
27 * .../bdi - symlink to ../../bdi/something 28 * .../bdi - symlink to ../../bdi/something
@@ -177,10 +178,24 @@ static int osdc_show(struct seq_file *s, void *pp)
177 return 0; 178 return 0;
178} 179}
179 180
181static int client_options_show(struct seq_file *s, void *p)
182{
183 struct ceph_client *client = s->private;
184 int ret;
185
186 ret = ceph_print_client_options(s, client);
187 if (ret)
188 return ret;
189
190 seq_putc(s, '\n');
191 return 0;
192}
193
180CEPH_DEFINE_SHOW_FUNC(monmap_show) 194CEPH_DEFINE_SHOW_FUNC(monmap_show)
181CEPH_DEFINE_SHOW_FUNC(osdmap_show) 195CEPH_DEFINE_SHOW_FUNC(osdmap_show)
182CEPH_DEFINE_SHOW_FUNC(monc_show) 196CEPH_DEFINE_SHOW_FUNC(monc_show)
183CEPH_DEFINE_SHOW_FUNC(osdc_show) 197CEPH_DEFINE_SHOW_FUNC(osdc_show)
198CEPH_DEFINE_SHOW_FUNC(client_options_show)
184 199
185int ceph_debugfs_init(void) 200int ceph_debugfs_init(void)
186{ 201{
@@ -242,6 +257,14 @@ int ceph_debugfs_client_init(struct ceph_client *client)
242 if (!client->debugfs_osdmap) 257 if (!client->debugfs_osdmap)
243 goto out; 258 goto out;
244 259
260 client->debugfs_options = debugfs_create_file("client_options",
261 0600,
262 client->debugfs_dir,
263 client,
264 &client_options_show_fops);
265 if (!client->debugfs_options)
266 goto out;
267
245 return 0; 268 return 0;
246 269
247out: 270out:
@@ -252,6 +275,7 @@ out:
252void ceph_debugfs_client_cleanup(struct ceph_client *client) 275void ceph_debugfs_client_cleanup(struct ceph_client *client)
253{ 276{
254 dout("ceph_debugfs_client_cleanup %p\n", client); 277 dout("ceph_debugfs_client_cleanup %p\n", client);
278 debugfs_remove(client->debugfs_options);
255 debugfs_remove(client->debugfs_osdmap); 279 debugfs_remove(client->debugfs_osdmap);
256 debugfs_remove(client->debugfs_monmap); 280 debugfs_remove(client->debugfs_monmap);
257 debugfs_remove(client->osdc.debugfs_file); 281 debugfs_remove(client->osdc.debugfs_file);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index a9f4ae45b7fb..967080a9f043 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -505,8 +505,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
505 pr_err("connect %s error %d\n", 505 pr_err("connect %s error %d\n",
506 ceph_pr_addr(&con->peer_addr.in_addr), ret); 506 ceph_pr_addr(&con->peer_addr.in_addr), ret);
507 sock_release(sock); 507 sock_release(sock);
508 con->error_msg = "connect error";
509
510 return ret; 508 return ret;
511 } 509 }
512 510
@@ -2145,12 +2143,10 @@ static int process_connect(struct ceph_connection *con)
2145 * to WAIT. This shouldn't happen if we are the 2143 * to WAIT. This shouldn't happen if we are the
2146 * client. 2144 * client.
2147 */ 2145 */
2148 pr_err("process_connect got WAIT as client\n");
2149 con->error_msg = "protocol error, got WAIT as client"; 2146 con->error_msg = "protocol error, got WAIT as client";
2150 return -1; 2147 return -1;
2151 2148
2152 default: 2149 default:
2153 pr_err("connect protocol error, will retry\n");
2154 con->error_msg = "protocol error, garbage tag during connect"; 2150 con->error_msg = "protocol error, garbage tag during connect";
2155 return -1; 2151 return -1;
2156 } 2152 }
@@ -2282,8 +2278,7 @@ static int read_partial_message(struct ceph_connection *con)
2282 2278
2283 crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc)); 2279 crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
2284 if (cpu_to_le32(crc) != con->in_hdr.crc) { 2280 if (cpu_to_le32(crc) != con->in_hdr.crc) {
2285 pr_err("read_partial_message bad hdr " 2281 pr_err("read_partial_message bad hdr crc %u != expected %u\n",
2286 " crc %u != expected %u\n",
2287 crc, con->in_hdr.crc); 2282 crc, con->in_hdr.crc);
2288 return -EBADMSG; 2283 return -EBADMSG;
2289 } 2284 }
@@ -2313,7 +2308,7 @@ static int read_partial_message(struct ceph_connection *con)
2313 pr_err("read_partial_message bad seq %lld expected %lld\n", 2308 pr_err("read_partial_message bad seq %lld expected %lld\n",
2314 seq, con->in_seq + 1); 2309 seq, con->in_seq + 1);
2315 con->error_msg = "bad message sequence # for incoming message"; 2310 con->error_msg = "bad message sequence # for incoming message";
2316 return -EBADMSG; 2311 return -EBADE;
2317 } 2312 }
2318 2313
2319 /* allocate message? */ 2314 /* allocate message? */
@@ -2660,6 +2655,8 @@ more:
2660 switch (ret) { 2655 switch (ret) {
2661 case -EBADMSG: 2656 case -EBADMSG:
2662 con->error_msg = "bad crc"; 2657 con->error_msg = "bad crc";
2658 /* fall through */
2659 case -EBADE:
2663 ret = -EIO; 2660 ret = -EIO;
2664 break; 2661 break;
2665 case -EIO: 2662 case -EIO:
@@ -2838,7 +2835,8 @@ static void con_work(struct work_struct *work)
2838 if (ret < 0) { 2835 if (ret < 0) {
2839 if (ret == -EAGAIN) 2836 if (ret == -EAGAIN)
2840 continue; 2837 continue;
2841 con->error_msg = "socket error on read"; 2838 if (!con->error_msg)
2839 con->error_msg = "socket error on read";
2842 fault = true; 2840 fault = true;
2843 break; 2841 break;
2844 } 2842 }
@@ -2847,7 +2845,8 @@ static void con_work(struct work_struct *work)
2847 if (ret < 0) { 2845 if (ret < 0) {
2848 if (ret == -EAGAIN) 2846 if (ret == -EAGAIN)
2849 continue; 2847 continue;
2850 con->error_msg = "socket error on write"; 2848 if (!con->error_msg)
2849 con->error_msg = "socket error on write";
2851 fault = true; 2850 fault = true;
2852 } 2851 }
2853 2852
@@ -2869,11 +2868,13 @@ static void con_work(struct work_struct *work)
2869 */ 2868 */
2870static void con_fault(struct ceph_connection *con) 2869static void con_fault(struct ceph_connection *con)
2871{ 2870{
2872 pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
2873 ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
2874 dout("fault %p state %lu to peer %s\n", 2871 dout("fault %p state %lu to peer %s\n",
2875 con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); 2872 con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
2876 2873
2874 pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
2875 ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
2876 con->error_msg = NULL;
2877
2877 WARN_ON(con->state != CON_STATE_CONNECTING && 2878 WARN_ON(con->state != CON_STATE_CONNECTING &&
2878 con->state != CON_STATE_NEGOTIATING && 2879 con->state != CON_STATE_NEGOTIATING &&
2879 con->state != CON_STATE_OPEN); 2880 con->state != CON_STATE_OPEN);
@@ -3295,8 +3296,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
3295 */ 3296 */
3296 if (*skip) 3297 if (*skip)
3297 return 0; 3298 return 0;
3298 con->error_msg = "error allocating memory for incoming message";
3299 3299
3300 con->error_msg = "error allocating memory for incoming message";
3300 return -ENOMEM; 3301 return -ENOMEM;
3301 } 3302 }
3302 memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); 3303 memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index b8c3fde5b04f..15796696d64e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -122,6 +122,22 @@ bad:
122 return -EINVAL; 122 return -EINVAL;
123} 123}
124 124
125static int crush_decode_straw2_bucket(void **p, void *end,
126 struct crush_bucket_straw2 *b)
127{
128 int j;
129 dout("crush_decode_straw2_bucket %p to %p\n", *p, end);
130 b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
131 if (b->item_weights == NULL)
132 return -ENOMEM;
133 ceph_decode_need(p, end, b->h.size * sizeof(u32), bad);
134 for (j = 0; j < b->h.size; j++)
135 b->item_weights[j] = ceph_decode_32(p);
136 return 0;
137bad:
138 return -EINVAL;
139}
140
125static int skip_name_map(void **p, void *end) 141static int skip_name_map(void **p, void *end)
126{ 142{
127 int len; 143 int len;
@@ -204,6 +220,9 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
204 case CRUSH_BUCKET_STRAW: 220 case CRUSH_BUCKET_STRAW:
205 size = sizeof(struct crush_bucket_straw); 221 size = sizeof(struct crush_bucket_straw);
206 break; 222 break;
223 case CRUSH_BUCKET_STRAW2:
224 size = sizeof(struct crush_bucket_straw2);
225 break;
207 default: 226 default:
208 err = -EINVAL; 227 err = -EINVAL;
209 goto bad; 228 goto bad;
@@ -261,6 +280,12 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
261 if (err < 0) 280 if (err < 0)
262 goto bad; 281 goto bad;
263 break; 282 break;
283 case CRUSH_BUCKET_STRAW2:
284 err = crush_decode_straw2_bucket(p, end,
285 (struct crush_bucket_straw2 *)b);
286 if (err < 0)
287 goto bad;
288 break;
264 } 289 }
265 } 290 }
266 291