diff options
-rw-r--r-- | drivers/block/rbd.c | 26 | ||||
-rw-r--r-- | fs/ceph/addr.c | 38 | ||||
-rw-r--r-- | fs/ceph/caps.c | 51 | ||||
-rw-r--r-- | fs/ceph/dir.c | 48 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 61 | ||||
-rw-r--r-- | fs/ceph/strings.c | 1 | ||||
-rw-r--r-- | fs/ceph/super.c | 56 | ||||
-rw-r--r-- | fs/ceph/super.h | 4 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 23 | ||||
-rw-r--r-- | include/linux/ceph/ceph_features.h | 16 | ||||
-rw-r--r-- | include/linux/ceph/ceph_fs.h | 1 | ||||
-rw-r--r-- | include/linux/ceph/debugfs.h | 8 | ||||
-rw-r--r-- | include/linux/ceph/libceph.h | 2 | ||||
-rw-r--r-- | include/linux/ceph/osdmap.h | 5 | ||||
-rw-r--r-- | include/linux/crush/crush.h | 12 | ||||
-rw-r--r-- | net/ceph/ceph_common.c | 37 | ||||
-rw-r--r-- | net/ceph/crush/crush.c | 14 | ||||
-rw-r--r-- | net/ceph/crush/crush_ln_table.h | 166 | ||||
-rw-r--r-- | net/ceph/crush/mapper.c | 118 | ||||
-rw-r--r-- | net/ceph/debugfs.c | 24 | ||||
-rw-r--r-- | net/ceph/messenger.c | 25 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 25 |
22 files changed, 633 insertions, 128 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b40af3203089..812523330a78 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -3762,8 +3762,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
3762 | goto out_tag_set; | 3762 | goto out_tag_set; |
3763 | } | 3763 | } |
3764 | 3764 | ||
3765 | /* We use the default size, but let's be explicit about it. */ | 3765 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); |
3766 | blk_queue_physical_block_size(q, SECTOR_SIZE); | 3766 | /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ |
3767 | 3767 | ||
3768 | /* set io sizes to object size */ | 3768 | /* set io sizes to object size */ |
3769 | segment_size = rbd_obj_bytes(&rbd_dev->header); | 3769 | segment_size = rbd_obj_bytes(&rbd_dev->header); |
@@ -5301,8 +5301,13 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) | |||
5301 | 5301 | ||
5302 | if (mapping) { | 5302 | if (mapping) { |
5303 | ret = rbd_dev_header_watch_sync(rbd_dev); | 5303 | ret = rbd_dev_header_watch_sync(rbd_dev); |
5304 | if (ret) | 5304 | if (ret) { |
5305 | if (ret == -ENOENT) | ||
5306 | pr_info("image %s/%s does not exist\n", | ||
5307 | rbd_dev->spec->pool_name, | ||
5308 | rbd_dev->spec->image_name); | ||
5305 | goto out_header_name; | 5309 | goto out_header_name; |
5310 | } | ||
5306 | } | 5311 | } |
5307 | 5312 | ||
5308 | ret = rbd_dev_header_info(rbd_dev); | 5313 | ret = rbd_dev_header_info(rbd_dev); |
@@ -5319,8 +5324,14 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) | |||
5319 | ret = rbd_spec_fill_snap_id(rbd_dev); | 5324 | ret = rbd_spec_fill_snap_id(rbd_dev); |
5320 | else | 5325 | else |
5321 | ret = rbd_spec_fill_names(rbd_dev); | 5326 | ret = rbd_spec_fill_names(rbd_dev); |
5322 | if (ret) | 5327 | if (ret) { |
5328 | if (ret == -ENOENT) | ||
5329 | pr_info("snap %s/%s@%s does not exist\n", | ||
5330 | rbd_dev->spec->pool_name, | ||
5331 | rbd_dev->spec->image_name, | ||
5332 | rbd_dev->spec->snap_name); | ||
5323 | goto err_out_probe; | 5333 | goto err_out_probe; |
5334 | } | ||
5324 | 5335 | ||
5325 | if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { | 5336 | if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { |
5326 | ret = rbd_dev_v2_parent_info(rbd_dev); | 5337 | ret = rbd_dev_v2_parent_info(rbd_dev); |
@@ -5390,8 +5401,11 @@ static ssize_t do_rbd_add(struct bus_type *bus, | |||
5390 | 5401 | ||
5391 | /* pick the pool */ | 5402 | /* pick the pool */ |
5392 | rc = rbd_add_get_pool_id(rbdc, spec->pool_name); | 5403 | rc = rbd_add_get_pool_id(rbdc, spec->pool_name); |
5393 | if (rc < 0) | 5404 | if (rc < 0) { |
5405 | if (rc == -ENOENT) | ||
5406 | pr_info("pool %s does not exist\n", spec->pool_name); | ||
5394 | goto err_out_client; | 5407 | goto err_out_client; |
5408 | } | ||
5395 | spec->pool_id = (u64)rc; | 5409 | spec->pool_id = (u64)rc; |
5396 | 5410 | ||
5397 | /* The ceph file layout needs to fit pool id in 32 bits */ | 5411 | /* The ceph file layout needs to fit pool id in 32 bits */ |
@@ -5673,7 +5687,7 @@ static int __init rbd_init(void) | |||
5673 | 5687 | ||
5674 | /* | 5688 | /* |
5675 | * The number of active work items is limited by the number of | 5689 | * The number of active work items is limited by the number of |
5676 | * rbd devices, so leave @max_active at default. | 5690 | * rbd devices * queue depth, so leave @max_active at default. |
5677 | */ | 5691 | */ |
5678 | rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0); | 5692 | rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0); |
5679 | if (!rbd_wq) { | 5693 | if (!rbd_wq) { |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 155ab9c0246b..e162bcd105ee 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -1146,6 +1146,10 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, | |||
1146 | inode, page, (int)pos, (int)len); | 1146 | inode, page, (int)pos, (int)len); |
1147 | 1147 | ||
1148 | r = ceph_update_writeable_page(file, pos, len, page); | 1148 | r = ceph_update_writeable_page(file, pos, len, page); |
1149 | if (r < 0) | ||
1150 | page_cache_release(page); | ||
1151 | else | ||
1152 | *pagep = page; | ||
1149 | } while (r == -EAGAIN); | 1153 | } while (r == -EAGAIN); |
1150 | 1154 | ||
1151 | return r; | 1155 | return r; |
@@ -1534,19 +1538,27 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) | |||
1534 | 1538 | ||
1535 | osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); | 1539 | osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); |
1536 | 1540 | ||
1537 | err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, | 1541 | { |
1538 | "inline_version", &inline_version, | 1542 | __le64 xattr_buf = cpu_to_le64(inline_version); |
1539 | sizeof(inline_version), | 1543 | err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, |
1540 | CEPH_OSD_CMPXATTR_OP_GT, | 1544 | "inline_version", &xattr_buf, |
1541 | CEPH_OSD_CMPXATTR_MODE_U64); | 1545 | sizeof(xattr_buf), |
1542 | if (err) | 1546 | CEPH_OSD_CMPXATTR_OP_GT, |
1543 | goto out_put; | 1547 | CEPH_OSD_CMPXATTR_MODE_U64); |
1544 | 1548 | if (err) | |
1545 | err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, | 1549 | goto out_put; |
1546 | "inline_version", &inline_version, | 1550 | } |
1547 | sizeof(inline_version), 0, 0); | 1551 | |
1548 | if (err) | 1552 | { |
1549 | goto out_put; | 1553 | char xattr_buf[32]; |
1554 | int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf), | ||
1555 | "%llu", inline_version); | ||
1556 | err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, | ||
1557 | "inline_version", | ||
1558 | xattr_buf, xattr_len, 0, 0); | ||
1559 | if (err) | ||
1560 | goto out_put; | ||
1561 | } | ||
1550 | 1562 | ||
1551 | ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); | 1563 | ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); |
1552 | err = ceph_osdc_start_request(&fsc->client->osdc, req, false); | 1564 | err = ceph_osdc_start_request(&fsc->client->osdc, req, false); |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 8172775428a0..11631c4c7d14 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -896,6 +896,18 @@ int ceph_is_any_caps(struct inode *inode) | |||
896 | return ret; | 896 | return ret; |
897 | } | 897 | } |
898 | 898 | ||
899 | static void drop_inode_snap_realm(struct ceph_inode_info *ci) | ||
900 | { | ||
901 | struct ceph_snap_realm *realm = ci->i_snap_realm; | ||
902 | spin_lock(&realm->inodes_with_caps_lock); | ||
903 | list_del_init(&ci->i_snap_realm_item); | ||
904 | ci->i_snap_realm_counter++; | ||
905 | ci->i_snap_realm = NULL; | ||
906 | spin_unlock(&realm->inodes_with_caps_lock); | ||
907 | ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc, | ||
908 | realm); | ||
909 | } | ||
910 | |||
899 | /* | 911 | /* |
900 | * Remove a cap. Take steps to deal with a racing iterate_session_caps. | 912 | * Remove a cap. Take steps to deal with a racing iterate_session_caps. |
901 | * | 913 | * |
@@ -946,15 +958,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) | |||
946 | if (removed) | 958 | if (removed) |
947 | ceph_put_cap(mdsc, cap); | 959 | ceph_put_cap(mdsc, cap); |
948 | 960 | ||
949 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { | 961 | /* when reconnect denied, we remove session caps forcibly, |
950 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 962 | * i_wr_ref can be non-zero. If there are ongoing write, |
951 | spin_lock(&realm->inodes_with_caps_lock); | 963 | * keep i_snap_realm. |
952 | list_del_init(&ci->i_snap_realm_item); | 964 | */ |
953 | ci->i_snap_realm_counter++; | 965 | if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm) |
954 | ci->i_snap_realm = NULL; | 966 | drop_inode_snap_realm(ci); |
955 | spin_unlock(&realm->inodes_with_caps_lock); | 967 | |
956 | ceph_put_snap_realm(mdsc, realm); | ||
957 | } | ||
958 | if (!__ceph_is_any_real_caps(ci)) | 968 | if (!__ceph_is_any_real_caps(ci)) |
959 | __cap_delay_cancel(mdsc, ci); | 969 | __cap_delay_cancel(mdsc, ci); |
960 | } | 970 | } |
@@ -1394,6 +1404,13 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1394 | int was = ci->i_dirty_caps; | 1404 | int was = ci->i_dirty_caps; |
1395 | int dirty = 0; | 1405 | int dirty = 0; |
1396 | 1406 | ||
1407 | if (!ci->i_auth_cap) { | ||
1408 | pr_warn("__mark_dirty_caps %p %llx mask %s, " | ||
1409 | "but no auth cap (session was closed?)\n", | ||
1410 | inode, ceph_ino(inode), ceph_cap_string(mask)); | ||
1411 | return 0; | ||
1412 | } | ||
1413 | |||
1397 | dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode, | 1414 | dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode, |
1398 | ceph_cap_string(mask), ceph_cap_string(was), | 1415 | ceph_cap_string(mask), ceph_cap_string(was), |
1399 | ceph_cap_string(was | mask)); | 1416 | ceph_cap_string(was | mask)); |
@@ -1404,7 +1421,6 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1404 | ci->i_snap_realm->cached_context); | 1421 | ci->i_snap_realm->cached_context); |
1405 | dout(" inode %p now dirty snapc %p auth cap %p\n", | 1422 | dout(" inode %p now dirty snapc %p auth cap %p\n", |
1406 | &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); | 1423 | &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); |
1407 | WARN_ON(!ci->i_auth_cap); | ||
1408 | BUG_ON(!list_empty(&ci->i_dirty_item)); | 1424 | BUG_ON(!list_empty(&ci->i_dirty_item)); |
1409 | spin_lock(&mdsc->cap_dirty_lock); | 1425 | spin_lock(&mdsc->cap_dirty_lock); |
1410 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); | 1426 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); |
@@ -1545,7 +1561,19 @@ retry_locked: | |||
1545 | if (!mdsc->stopping && inode->i_nlink > 0) { | 1561 | if (!mdsc->stopping && inode->i_nlink > 0) { |
1546 | if (want) { | 1562 | if (want) { |
1547 | retain |= CEPH_CAP_ANY; /* be greedy */ | 1563 | retain |= CEPH_CAP_ANY; /* be greedy */ |
1564 | } else if (S_ISDIR(inode->i_mode) && | ||
1565 | (issued & CEPH_CAP_FILE_SHARED) && | ||
1566 | __ceph_dir_is_complete(ci)) { | ||
1567 | /* | ||
1568 | * If a directory is complete, we want to keep | ||
1569 | * the exclusive cap. So that MDS does not end up | ||
1570 | * revoking the shared cap on every create/unlink | ||
1571 | * operation. | ||
1572 | */ | ||
1573 | want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL; | ||
1574 | retain |= want; | ||
1548 | } else { | 1575 | } else { |
1576 | |||
1549 | retain |= CEPH_CAP_ANY_SHARED; | 1577 | retain |= CEPH_CAP_ANY_SHARED; |
1550 | /* | 1578 | /* |
1551 | * keep RD only if we didn't have the file open RW, | 1579 | * keep RD only if we didn't have the file open RW, |
@@ -2309,6 +2337,9 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) | |||
2309 | wake = 1; | 2337 | wake = 1; |
2310 | } | 2338 | } |
2311 | } | 2339 | } |
2340 | /* see comment in __ceph_remove_cap() */ | ||
2341 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) | ||
2342 | drop_inode_snap_realm(ci); | ||
2312 | } | 2343 | } |
2313 | spin_unlock(&ci->i_ceph_lock); | 2344 | spin_unlock(&ci->i_ceph_lock); |
2314 | 2345 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 83e9976f7189..e729b79812b4 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -281,6 +281,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) | |||
281 | /* can we use the dcache? */ | 281 | /* can we use the dcache? */ |
282 | spin_lock(&ci->i_ceph_lock); | 282 | spin_lock(&ci->i_ceph_lock); |
283 | if ((ctx->pos == 2 || fi->dentry) && | 283 | if ((ctx->pos == 2 || fi->dentry) && |
284 | ceph_test_mount_opt(fsc, DCACHE) && | ||
284 | !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && | 285 | !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && |
285 | ceph_snap(inode) != CEPH_SNAPDIR && | 286 | ceph_snap(inode) != CEPH_SNAPDIR && |
286 | __ceph_dir_is_complete_ordered(ci) && | 287 | __ceph_dir_is_complete_ordered(ci) && |
@@ -336,16 +337,23 @@ more: | |||
336 | ceph_mdsc_put_request(req); | 337 | ceph_mdsc_put_request(req); |
337 | return err; | 338 | return err; |
338 | } | 339 | } |
339 | req->r_inode = inode; | ||
340 | ihold(inode); | ||
341 | req->r_dentry = dget(file->f_path.dentry); | ||
342 | /* hints to request -> mds selection code */ | 340 | /* hints to request -> mds selection code */ |
343 | req->r_direct_mode = USE_AUTH_MDS; | 341 | req->r_direct_mode = USE_AUTH_MDS; |
344 | req->r_direct_hash = ceph_frag_value(frag); | 342 | req->r_direct_hash = ceph_frag_value(frag); |
345 | req->r_direct_is_hash = true; | 343 | req->r_direct_is_hash = true; |
346 | req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); | 344 | if (fi->last_name) { |
345 | req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); | ||
346 | if (!req->r_path2) { | ||
347 | ceph_mdsc_put_request(req); | ||
348 | return -ENOMEM; | ||
349 | } | ||
350 | } | ||
347 | req->r_readdir_offset = fi->next_offset; | 351 | req->r_readdir_offset = fi->next_offset; |
348 | req->r_args.readdir.frag = cpu_to_le32(frag); | 352 | req->r_args.readdir.frag = cpu_to_le32(frag); |
353 | |||
354 | req->r_inode = inode; | ||
355 | ihold(inode); | ||
356 | req->r_dentry = dget(file->f_path.dentry); | ||
349 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 357 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
350 | if (err < 0) { | 358 | if (err < 0) { |
351 | ceph_mdsc_put_request(req); | 359 | ceph_mdsc_put_request(req); |
@@ -629,6 +637,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||
629 | fsc->mount_options->snapdir_name, | 637 | fsc->mount_options->snapdir_name, |
630 | dentry->d_name.len) && | 638 | dentry->d_name.len) && |
631 | !is_root_ceph_dentry(dir, dentry) && | 639 | !is_root_ceph_dentry(dir, dentry) && |
640 | ceph_test_mount_opt(fsc, DCACHE) && | ||
632 | __ceph_dir_is_complete(ci) && | 641 | __ceph_dir_is_complete(ci) && |
633 | (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { | 642 | (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { |
634 | spin_unlock(&ci->i_ceph_lock); | 643 | spin_unlock(&ci->i_ceph_lock); |
@@ -755,10 +764,15 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, | |||
755 | err = PTR_ERR(req); | 764 | err = PTR_ERR(req); |
756 | goto out; | 765 | goto out; |
757 | } | 766 | } |
758 | req->r_dentry = dget(dentry); | ||
759 | req->r_num_caps = 2; | ||
760 | req->r_path2 = kstrdup(dest, GFP_NOFS); | 767 | req->r_path2 = kstrdup(dest, GFP_NOFS); |
768 | if (!req->r_path2) { | ||
769 | err = -ENOMEM; | ||
770 | ceph_mdsc_put_request(req); | ||
771 | goto out; | ||
772 | } | ||
761 | req->r_locked_dir = dir; | 773 | req->r_locked_dir = dir; |
774 | req->r_dentry = dget(dentry); | ||
775 | req->r_num_caps = 2; | ||
762 | req->r_dentry_drop = CEPH_CAP_FILE_SHARED; | 776 | req->r_dentry_drop = CEPH_CAP_FILE_SHARED; |
763 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; | 777 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
764 | err = ceph_mdsc_do_request(mdsc, dir, req); | 778 | err = ceph_mdsc_do_request(mdsc, dir, req); |
@@ -933,16 +947,20 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
933 | struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); | 947 | struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); |
934 | struct ceph_mds_client *mdsc = fsc->mdsc; | 948 | struct ceph_mds_client *mdsc = fsc->mdsc; |
935 | struct ceph_mds_request *req; | 949 | struct ceph_mds_request *req; |
950 | int op = CEPH_MDS_OP_RENAME; | ||
936 | int err; | 951 | int err; |
937 | 952 | ||
938 | if (ceph_snap(old_dir) != ceph_snap(new_dir)) | 953 | if (ceph_snap(old_dir) != ceph_snap(new_dir)) |
939 | return -EXDEV; | 954 | return -EXDEV; |
940 | if (ceph_snap(old_dir) != CEPH_NOSNAP || | 955 | if (ceph_snap(old_dir) != CEPH_NOSNAP) { |
941 | ceph_snap(new_dir) != CEPH_NOSNAP) | 956 | if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR) |
942 | return -EROFS; | 957 | op = CEPH_MDS_OP_RENAMESNAP; |
958 | else | ||
959 | return -EROFS; | ||
960 | } | ||
943 | dout("rename dir %p dentry %p to dir %p dentry %p\n", | 961 | dout("rename dir %p dentry %p to dir %p dentry %p\n", |
944 | old_dir, old_dentry, new_dir, new_dentry); | 962 | old_dir, old_dentry, new_dir, new_dentry); |
945 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); | 963 | req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); |
946 | if (IS_ERR(req)) | 964 | if (IS_ERR(req)) |
947 | return PTR_ERR(req); | 965 | return PTR_ERR(req); |
948 | ihold(old_dir); | 966 | ihold(old_dir); |
@@ -1240,11 +1258,12 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, | |||
1240 | dout("dir_fsync %p wait on tid %llu (until %llu)\n", | 1258 | dout("dir_fsync %p wait on tid %llu (until %llu)\n", |
1241 | inode, req->r_tid, last_tid); | 1259 | inode, req->r_tid, last_tid); |
1242 | if (req->r_timeout) { | 1260 | if (req->r_timeout) { |
1243 | ret = wait_for_completion_timeout( | 1261 | unsigned long time_left = wait_for_completion_timeout( |
1244 | &req->r_safe_completion, req->r_timeout); | 1262 | &req->r_safe_completion, |
1245 | if (ret > 0) | 1263 | req->r_timeout); |
1264 | if (time_left > 0) | ||
1246 | ret = 0; | 1265 | ret = 0; |
1247 | else if (ret == 0) | 1266 | else |
1248 | ret = -EIO; /* timed out */ | 1267 | ret = -EIO; /* timed out */ |
1249 | } else { | 1268 | } else { |
1250 | wait_for_completion(&req->r_safe_completion); | 1269 | wait_for_completion(&req->r_safe_completion); |
@@ -1372,6 +1391,7 @@ const struct inode_operations ceph_snapdir_iops = { | |||
1372 | .getattr = ceph_getattr, | 1391 | .getattr = ceph_getattr, |
1373 | .mkdir = ceph_mkdir, | 1392 | .mkdir = ceph_mkdir, |
1374 | .rmdir = ceph_unlink, | 1393 | .rmdir = ceph_unlink, |
1394 | .rename = ceph_rename, | ||
1375 | }; | 1395 | }; |
1376 | 1396 | ||
1377 | const struct dentry_operations ceph_dentry_ops = { | 1397 | const struct dentry_operations ceph_dentry_ops = { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 71c073f38e54..0a2eb32ffe43 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -1021,6 +1021,33 @@ static void cleanup_cap_releases(struct ceph_mds_session *session) | |||
1021 | spin_unlock(&session->s_cap_lock); | 1021 | spin_unlock(&session->s_cap_lock); |
1022 | } | 1022 | } |
1023 | 1023 | ||
1024 | static void cleanup_session_requests(struct ceph_mds_client *mdsc, | ||
1025 | struct ceph_mds_session *session) | ||
1026 | { | ||
1027 | struct ceph_mds_request *req; | ||
1028 | struct rb_node *p; | ||
1029 | |||
1030 | dout("cleanup_session_requests mds%d\n", session->s_mds); | ||
1031 | mutex_lock(&mdsc->mutex); | ||
1032 | while (!list_empty(&session->s_unsafe)) { | ||
1033 | req = list_first_entry(&session->s_unsafe, | ||
1034 | struct ceph_mds_request, r_unsafe_item); | ||
1035 | list_del_init(&req->r_unsafe_item); | ||
1036 | pr_info(" dropping unsafe request %llu\n", req->r_tid); | ||
1037 | __unregister_request(mdsc, req); | ||
1038 | } | ||
1039 | /* zero r_attempts, so kick_requests() will re-send requests */ | ||
1040 | p = rb_first(&mdsc->request_tree); | ||
1041 | while (p) { | ||
1042 | req = rb_entry(p, struct ceph_mds_request, r_node); | ||
1043 | p = rb_next(p); | ||
1044 | if (req->r_session && | ||
1045 | req->r_session->s_mds == session->s_mds) | ||
1046 | req->r_attempts = 0; | ||
1047 | } | ||
1048 | mutex_unlock(&mdsc->mutex); | ||
1049 | } | ||
1050 | |||
1024 | /* | 1051 | /* |
1025 | * Helper to safely iterate over all caps associated with a session, with | 1052 | * Helper to safely iterate over all caps associated with a session, with |
1026 | * special care taken to handle a racing __ceph_remove_cap(). | 1053 | * special care taken to handle a racing __ceph_remove_cap(). |
@@ -1098,7 +1125,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
1098 | cap, ci, &ci->vfs_inode); | 1125 | cap, ci, &ci->vfs_inode); |
1099 | spin_lock(&ci->i_ceph_lock); | 1126 | spin_lock(&ci->i_ceph_lock); |
1100 | __ceph_remove_cap(cap, false); | 1127 | __ceph_remove_cap(cap, false); |
1101 | if (!__ceph_is_any_real_caps(ci)) { | 1128 | if (!ci->i_auth_cap) { |
1102 | struct ceph_mds_client *mdsc = | 1129 | struct ceph_mds_client *mdsc = |
1103 | ceph_sb_to_client(inode->i_sb)->mdsc; | 1130 | ceph_sb_to_client(inode->i_sb)->mdsc; |
1104 | 1131 | ||
@@ -1120,13 +1147,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
1120 | mdsc->num_cap_flushing--; | 1147 | mdsc->num_cap_flushing--; |
1121 | drop = 1; | 1148 | drop = 1; |
1122 | } | 1149 | } |
1123 | if (drop && ci->i_wrbuffer_ref) { | ||
1124 | pr_info(" dropping dirty data for %p %lld\n", | ||
1125 | inode, ceph_ino(inode)); | ||
1126 | ci->i_wrbuffer_ref = 0; | ||
1127 | ci->i_wrbuffer_ref_head = 0; | ||
1128 | drop++; | ||
1129 | } | ||
1130 | spin_unlock(&mdsc->cap_dirty_lock); | 1150 | spin_unlock(&mdsc->cap_dirty_lock); |
1131 | } | 1151 | } |
1132 | spin_unlock(&ci->i_ceph_lock); | 1152 | spin_unlock(&ci->i_ceph_lock); |
@@ -1853,7 +1873,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, | |||
1853 | */ | 1873 | */ |
1854 | static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | 1874 | static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, |
1855 | struct ceph_mds_request *req, | 1875 | struct ceph_mds_request *req, |
1856 | int mds) | 1876 | int mds, bool drop_cap_releases) |
1857 | { | 1877 | { |
1858 | struct ceph_msg *msg; | 1878 | struct ceph_msg *msg; |
1859 | struct ceph_mds_request_head *head; | 1879 | struct ceph_mds_request_head *head; |
@@ -1937,6 +1957,12 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1937 | releases += ceph_encode_inode_release(&p, | 1957 | releases += ceph_encode_inode_release(&p, |
1938 | req->r_old_dentry->d_inode, | 1958 | req->r_old_dentry->d_inode, |
1939 | mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); | 1959 | mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); |
1960 | |||
1961 | if (drop_cap_releases) { | ||
1962 | releases = 0; | ||
1963 | p = msg->front.iov_base + req->r_request_release_offset; | ||
1964 | } | ||
1965 | |||
1940 | head->num_releases = cpu_to_le16(releases); | 1966 | head->num_releases = cpu_to_le16(releases); |
1941 | 1967 | ||
1942 | /* time stamp */ | 1968 | /* time stamp */ |
@@ -1989,7 +2015,7 @@ static void complete_request(struct ceph_mds_client *mdsc, | |||
1989 | */ | 2015 | */ |
1990 | static int __prepare_send_request(struct ceph_mds_client *mdsc, | 2016 | static int __prepare_send_request(struct ceph_mds_client *mdsc, |
1991 | struct ceph_mds_request *req, | 2017 | struct ceph_mds_request *req, |
1992 | int mds) | 2018 | int mds, bool drop_cap_releases) |
1993 | { | 2019 | { |
1994 | struct ceph_mds_request_head *rhead; | 2020 | struct ceph_mds_request_head *rhead; |
1995 | struct ceph_msg *msg; | 2021 | struct ceph_msg *msg; |
@@ -2048,7 +2074,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
2048 | ceph_msg_put(req->r_request); | 2074 | ceph_msg_put(req->r_request); |
2049 | req->r_request = NULL; | 2075 | req->r_request = NULL; |
2050 | } | 2076 | } |
2051 | msg = create_request_message(mdsc, req, mds); | 2077 | msg = create_request_message(mdsc, req, mds, drop_cap_releases); |
2052 | if (IS_ERR(msg)) { | 2078 | if (IS_ERR(msg)) { |
2053 | req->r_err = PTR_ERR(msg); | 2079 | req->r_err = PTR_ERR(msg); |
2054 | complete_request(mdsc, req); | 2080 | complete_request(mdsc, req); |
@@ -2132,7 +2158,7 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
2132 | if (req->r_request_started == 0) /* note request start time */ | 2158 | if (req->r_request_started == 0) /* note request start time */ |
2133 | req->r_request_started = jiffies; | 2159 | req->r_request_started = jiffies; |
2134 | 2160 | ||
2135 | err = __prepare_send_request(mdsc, req, mds); | 2161 | err = __prepare_send_request(mdsc, req, mds, false); |
2136 | if (!err) { | 2162 | if (!err) { |
2137 | ceph_msg_get(req->r_request); | 2163 | ceph_msg_get(req->r_request); |
2138 | ceph_con_send(&session->s_con, req->r_request); | 2164 | ceph_con_send(&session->s_con, req->r_request); |
@@ -2590,6 +2616,7 @@ static void handle_session(struct ceph_mds_session *session, | |||
2590 | case CEPH_SESSION_CLOSE: | 2616 | case CEPH_SESSION_CLOSE: |
2591 | if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) | 2617 | if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) |
2592 | pr_info("mds%d reconnect denied\n", session->s_mds); | 2618 | pr_info("mds%d reconnect denied\n", session->s_mds); |
2619 | cleanup_session_requests(mdsc, session); | ||
2593 | remove_session_caps(session); | 2620 | remove_session_caps(session); |
2594 | wake = 2; /* for good measure */ | 2621 | wake = 2; /* for good measure */ |
2595 | wake_up_all(&mdsc->session_close_wq); | 2622 | wake_up_all(&mdsc->session_close_wq); |
@@ -2658,7 +2685,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
2658 | 2685 | ||
2659 | mutex_lock(&mdsc->mutex); | 2686 | mutex_lock(&mdsc->mutex); |
2660 | list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) { | 2687 | list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) { |
2661 | err = __prepare_send_request(mdsc, req, session->s_mds); | 2688 | err = __prepare_send_request(mdsc, req, session->s_mds, true); |
2662 | if (!err) { | 2689 | if (!err) { |
2663 | ceph_msg_get(req->r_request); | 2690 | ceph_msg_get(req->r_request); |
2664 | ceph_con_send(&session->s_con, req->r_request); | 2691 | ceph_con_send(&session->s_con, req->r_request); |
@@ -2679,7 +2706,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
2679 | continue; /* only old requests */ | 2706 | continue; /* only old requests */ |
2680 | if (req->r_session && | 2707 | if (req->r_session && |
2681 | req->r_session->s_mds == session->s_mds) { | 2708 | req->r_session->s_mds == session->s_mds) { |
2682 | err = __prepare_send_request(mdsc, req, session->s_mds); | 2709 | err = __prepare_send_request(mdsc, req, |
2710 | session->s_mds, true); | ||
2683 | if (!err) { | 2711 | if (!err) { |
2684 | ceph_msg_get(req->r_request); | 2712 | ceph_msg_get(req->r_request); |
2685 | ceph_con_send(&session->s_con, req->r_request); | 2713 | ceph_con_send(&session->s_con, req->r_request); |
@@ -2864,7 +2892,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2864 | spin_unlock(&session->s_cap_lock); | 2892 | spin_unlock(&session->s_cap_lock); |
2865 | 2893 | ||
2866 | /* trim unused caps to reduce MDS's cache rejoin time */ | 2894 | /* trim unused caps to reduce MDS's cache rejoin time */ |
2867 | shrink_dcache_parent(mdsc->fsc->sb->s_root); | 2895 | if (mdsc->fsc->sb->s_root) |
2896 | shrink_dcache_parent(mdsc->fsc->sb->s_root); | ||
2868 | 2897 | ||
2869 | ceph_con_close(&session->s_con); | 2898 | ceph_con_close(&session->s_con); |
2870 | ceph_con_open(&session->s_con, | 2899 | ceph_con_open(&session->s_con, |
@@ -3133,7 +3162,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
3133 | di->lease_renew_from && | 3162 | di->lease_renew_from && |
3134 | di->lease_renew_after == 0) { | 3163 | di->lease_renew_after == 0) { |
3135 | unsigned long duration = | 3164 | unsigned long duration = |
3136 | le32_to_cpu(h->duration_ms) * HZ / 1000; | 3165 | msecs_to_jiffies(le32_to_cpu(h->duration_ms)); |
3137 | 3166 | ||
3138 | di->lease_seq = seq; | 3167 | di->lease_seq = seq; |
3139 | dentry->d_time = di->lease_renew_from + duration; | 3168 | dentry->d_time = di->lease_renew_from + duration; |
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c index 51cc23e48111..89e6bc321df3 100644 --- a/fs/ceph/strings.c +++ b/fs/ceph/strings.c | |||
@@ -75,6 +75,7 @@ const char *ceph_mds_op_name(int op) | |||
75 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; | 75 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; |
76 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; | 76 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; |
77 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; | 77 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; |
78 | case CEPH_MDS_OP_RENAMESNAP: return "renamesnap"; | ||
78 | case CEPH_MDS_OP_SETFILELOCK: return "setfilelock"; | 79 | case CEPH_MDS_OP_SETFILELOCK: return "setfilelock"; |
79 | case CEPH_MDS_OP_GETFILELOCK: return "getfilelock"; | 80 | case CEPH_MDS_OP_GETFILELOCK: return "getfilelock"; |
80 | } | 81 | } |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index a63997b8bcff..e463ebd69a9c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -345,6 +345,11 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, | |||
345 | fsopt->rsize = CEPH_RSIZE_DEFAULT; | 345 | fsopt->rsize = CEPH_RSIZE_DEFAULT; |
346 | fsopt->rasize = CEPH_RASIZE_DEFAULT; | 346 | fsopt->rasize = CEPH_RASIZE_DEFAULT; |
347 | fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | 347 | fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); |
348 | if (!fsopt->snapdir_name) { | ||
349 | err = -ENOMEM; | ||
350 | goto out; | ||
351 | } | ||
352 | |||
348 | fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; | 353 | fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; |
349 | fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | 354 | fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; |
350 | fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; | 355 | fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; |
@@ -406,31 +411,20 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) | |||
406 | { | 411 | { |
407 | struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); | 412 | struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); |
408 | struct ceph_mount_options *fsopt = fsc->mount_options; | 413 | struct ceph_mount_options *fsopt = fsc->mount_options; |
409 | struct ceph_options *opt = fsc->client->options; | 414 | size_t pos; |
410 | 415 | int ret; | |
411 | if (opt->flags & CEPH_OPT_FSID) | 416 | |
412 | seq_printf(m, ",fsid=%pU", &opt->fsid); | 417 | /* a comma between MNT/MS and client options */ |
413 | if (opt->flags & CEPH_OPT_NOSHARE) | 418 | seq_putc(m, ','); |
414 | seq_puts(m, ",noshare"); | 419 | pos = m->count; |
415 | if (opt->flags & CEPH_OPT_NOCRC) | 420 | |
416 | seq_puts(m, ",nocrc"); | 421 | ret = ceph_print_client_options(m, fsc->client); |
417 | if (opt->flags & CEPH_OPT_NOMSGAUTH) | 422 | if (ret) |
418 | seq_puts(m, ",nocephx_require_signatures"); | 423 | return ret; |
419 | if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) | 424 | |
420 | seq_puts(m, ",notcp_nodelay"); | 425 | /* retract our comma if no client options */ |
421 | 426 | if (m->count == pos) | |
422 | if (opt->name) | 427 | m->count--; |
423 | seq_printf(m, ",name=%s", opt->name); | ||
424 | if (opt->key) | ||
425 | seq_puts(m, ",secret=<hidden>"); | ||
426 | |||
427 | if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
428 | seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); | ||
429 | if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
430 | seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); | ||
431 | if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
432 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
433 | opt->osd_keepalive_timeout); | ||
434 | 428 | ||
435 | if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) | 429 | if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) |
436 | seq_puts(m, ",dirstat"); | 430 | seq_puts(m, ",dirstat"); |
@@ -438,14 +432,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) | |||
438 | seq_puts(m, ",norbytes"); | 432 | seq_puts(m, ",norbytes"); |
439 | if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) | 433 | if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) |
440 | seq_puts(m, ",noasyncreaddir"); | 434 | seq_puts(m, ",noasyncreaddir"); |
441 | if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE) | 435 | if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) |
442 | seq_puts(m, ",dcache"); | ||
443 | else | ||
444 | seq_puts(m, ",nodcache"); | 436 | seq_puts(m, ",nodcache"); |
445 | if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) | 437 | if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) |
446 | seq_puts(m, ",fsc"); | 438 | seq_puts(m, ",fsc"); |
447 | else | ||
448 | seq_puts(m, ",nofsc"); | ||
449 | 439 | ||
450 | #ifdef CONFIG_CEPH_FS_POSIX_ACL | 440 | #ifdef CONFIG_CEPH_FS_POSIX_ACL |
451 | if (fsopt->sb_flags & MS_POSIXACL) | 441 | if (fsopt->sb_flags & MS_POSIXACL) |
@@ -477,6 +467,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) | |||
477 | seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); | 467 | seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); |
478 | if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | 468 | if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) |
479 | seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); | 469 | seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); |
470 | |||
480 | return 0; | 471 | return 0; |
481 | } | 472 | } |
482 | 473 | ||
@@ -730,6 +721,11 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, | |||
730 | if (IS_ERR(req)) | 721 | if (IS_ERR(req)) |
731 | return ERR_CAST(req); | 722 | return ERR_CAST(req); |
732 | req->r_path1 = kstrdup(path, GFP_NOFS); | 723 | req->r_path1 = kstrdup(path, GFP_NOFS); |
724 | if (!req->r_path1) { | ||
725 | root = ERR_PTR(-ENOMEM); | ||
726 | goto out; | ||
727 | } | ||
728 | |||
733 | req->r_ino1.ino = CEPH_INO_ROOT; | 729 | req->r_ino1.ino = CEPH_INO_ROOT; |
734 | req->r_ino1.snap = CEPH_NOSNAP; | 730 | req->r_ino1.snap = CEPH_NOSNAP; |
735 | req->r_started = started; | 731 | req->r_started = started; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 04c8124ed30e..fa20e1318939 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -36,7 +36,8 @@ | |||
36 | #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ | 36 | #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ |
37 | #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ | 37 | #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ |
38 | 38 | ||
39 | #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) | 39 | #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES | \ |
40 | CEPH_MOUNT_OPT_DCACHE) | ||
40 | 41 | ||
41 | #define ceph_set_mount_opt(fsc, opt) \ | 42 | #define ceph_set_mount_opt(fsc, opt) \ |
42 | (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; | 43 | (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; |
@@ -881,7 +882,6 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); | |||
881 | 882 | ||
882 | /* file.c */ | 883 | /* file.c */ |
883 | extern const struct file_operations ceph_file_fops; | 884 | extern const struct file_operations ceph_file_fops; |
884 | extern const struct address_space_operations ceph_aops; | ||
885 | 885 | ||
886 | extern int ceph_open(struct inode *inode, struct file *file); | 886 | extern int ceph_open(struct inode *inode, struct file *file); |
887 | extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, | 887 | extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 5a492caf34cb..5c4c9c256931 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -877,16 +877,23 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
877 | err = PTR_ERR(req); | 877 | err = PTR_ERR(req); |
878 | goto out; | 878 | goto out; |
879 | } | 879 | } |
880 | req->r_inode = inode; | 880 | |
881 | ihold(inode); | ||
882 | req->r_inode_drop = CEPH_CAP_XATTR_SHARED; | ||
883 | req->r_num_caps = 1; | ||
884 | req->r_args.setxattr.flags = cpu_to_le32(flags); | 881 | req->r_args.setxattr.flags = cpu_to_le32(flags); |
885 | req->r_path2 = kstrdup(name, GFP_NOFS); | 882 | req->r_path2 = kstrdup(name, GFP_NOFS); |
883 | if (!req->r_path2) { | ||
884 | ceph_mdsc_put_request(req); | ||
885 | err = -ENOMEM; | ||
886 | goto out; | ||
887 | } | ||
886 | 888 | ||
887 | req->r_pagelist = pagelist; | 889 | req->r_pagelist = pagelist; |
888 | pagelist = NULL; | 890 | pagelist = NULL; |
889 | 891 | ||
892 | req->r_inode = inode; | ||
893 | ihold(inode); | ||
894 | req->r_num_caps = 1; | ||
895 | req->r_inode_drop = CEPH_CAP_XATTR_SHARED; | ||
896 | |||
890 | dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); | 897 | dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); |
891 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 898 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
892 | ceph_mdsc_put_request(req); | 899 | ceph_mdsc_put_request(req); |
@@ -1019,12 +1026,14 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name) | |||
1019 | USE_AUTH_MDS); | 1026 | USE_AUTH_MDS); |
1020 | if (IS_ERR(req)) | 1027 | if (IS_ERR(req)) |
1021 | return PTR_ERR(req); | 1028 | return PTR_ERR(req); |
1029 | req->r_path2 = kstrdup(name, GFP_NOFS); | ||
1030 | if (!req->r_path2) | ||
1031 | return -ENOMEM; | ||
1032 | |||
1022 | req->r_inode = inode; | 1033 | req->r_inode = inode; |
1023 | ihold(inode); | 1034 | ihold(inode); |
1024 | req->r_inode_drop = CEPH_CAP_XATTR_SHARED; | ||
1025 | req->r_num_caps = 1; | 1035 | req->r_num_caps = 1; |
1026 | req->r_path2 = kstrdup(name, GFP_NOFS); | 1036 | req->r_inode_drop = CEPH_CAP_XATTR_SHARED; |
1027 | |||
1028 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 1037 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
1029 | ceph_mdsc_put_request(req); | 1038 | ceph_mdsc_put_request(req); |
1030 | return err; | 1039 | return err; |
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 71e05bbf8ceb..4763ad64e832 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h | |||
@@ -50,6 +50,19 @@ | |||
50 | #define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40) | 50 | #define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40) |
51 | #define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41) | 51 | #define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41) |
52 | #define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */ | 52 | #define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */ |
53 | #define CEPH_FEATURE_MSGR_KEEPALIVE2 (1ULL<<42) | ||
54 | #define CEPH_FEATURE_OSD_POOLRESEND (1ULL<<43) | ||
55 | #define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44) | ||
56 | #define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45) | ||
57 | #define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46) | ||
58 | #define CEPH_FEATURE_OSD_REPOP (1ULL<<46) /* overlap with fadvise */ | ||
59 | #define CEPH_FEATURE_OSD_OBJECT_DIGEST (1ULL<<46) /* overlap with fadvise */ | ||
60 | #define CEPH_FEATURE_OSD_TRANSACTION_MAY_LAYOUT (1ULL<<46) /* overlap w/ fadvise */ | ||
61 | #define CEPH_FEATURE_MDS_QUOTA (1ULL<<47) | ||
62 | #define CEPH_FEATURE_CRUSH_V4 (1ULL<<48) /* straw2 buckets */ | ||
63 | #define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49) | ||
64 | // duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY | ||
65 | #define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49) /* overlap w/ above */ | ||
53 | 66 | ||
54 | /* | 67 | /* |
55 | * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature | 68 | * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature |
@@ -93,7 +106,8 @@ static inline u64 ceph_sanitize_features(u64 features) | |||
93 | CEPH_FEATURE_EXPORT_PEER | \ | 106 | CEPH_FEATURE_EXPORT_PEER | \ |
94 | CEPH_FEATURE_OSDMAP_ENC | \ | 107 | CEPH_FEATURE_OSDMAP_ENC | \ |
95 | CEPH_FEATURE_CRUSH_TUNABLES3 | \ | 108 | CEPH_FEATURE_CRUSH_TUNABLES3 | \ |
96 | CEPH_FEATURE_OSD_PRIMARY_AFFINITY) | 109 | CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ |
110 | CEPH_FEATURE_CRUSH_V4) | ||
97 | 111 | ||
98 | #define CEPH_FEATURES_REQUIRED_DEFAULT \ | 112 | #define CEPH_FEATURES_REQUIRED_DEFAULT \ |
99 | (CEPH_FEATURE_NOSRCADDR | \ | 113 | (CEPH_FEATURE_NOSRCADDR | \ |
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 31eb03d0c766..d7d072a25c27 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h | |||
@@ -323,6 +323,7 @@ enum { | |||
323 | CEPH_MDS_OP_MKSNAP = 0x01400, | 323 | CEPH_MDS_OP_MKSNAP = 0x01400, |
324 | CEPH_MDS_OP_RMSNAP = 0x01401, | 324 | CEPH_MDS_OP_RMSNAP = 0x01401, |
325 | CEPH_MDS_OP_LSSNAP = 0x00402, | 325 | CEPH_MDS_OP_LSSNAP = 0x00402, |
326 | CEPH_MDS_OP_RENAMESNAP = 0x01403, | ||
326 | }; | 327 | }; |
327 | 328 | ||
328 | extern const char *ceph_mds_op_name(int op); | 329 | extern const char *ceph_mds_op_name(int op); |
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h index 1df086d7882d..29cf897cc5cd 100644 --- a/include/linux/ceph/debugfs.h +++ b/include/linux/ceph/debugfs.h | |||
@@ -7,13 +7,7 @@ | |||
7 | #define CEPH_DEFINE_SHOW_FUNC(name) \ | 7 | #define CEPH_DEFINE_SHOW_FUNC(name) \ |
8 | static int name##_open(struct inode *inode, struct file *file) \ | 8 | static int name##_open(struct inode *inode, struct file *file) \ |
9 | { \ | 9 | { \ |
10 | struct seq_file *sf; \ | 10 | return single_open(file, name, inode->i_private); \ |
11 | int ret; \ | ||
12 | \ | ||
13 | ret = single_open(file, name, NULL); \ | ||
14 | sf = file->private_data; \ | ||
15 | sf->private = inode->i_private; \ | ||
16 | return ret; \ | ||
17 | } \ | 11 | } \ |
18 | \ | 12 | \ |
19 | static const struct file_operations name##_fops = { \ | 13 | static const struct file_operations name##_fops = { \ |
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 16fff9608848..30f92cefaa72 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h | |||
@@ -135,6 +135,7 @@ struct ceph_client { | |||
135 | struct dentry *debugfs_dir; | 135 | struct dentry *debugfs_dir; |
136 | struct dentry *debugfs_monmap; | 136 | struct dentry *debugfs_monmap; |
137 | struct dentry *debugfs_osdmap; | 137 | struct dentry *debugfs_osdmap; |
138 | struct dentry *debugfs_options; | ||
138 | #endif | 139 | #endif |
139 | }; | 140 | }; |
140 | 141 | ||
@@ -191,6 +192,7 @@ extern struct ceph_options *ceph_parse_options(char *options, | |||
191 | const char *dev_name, const char *dev_name_end, | 192 | const char *dev_name, const char *dev_name_end, |
192 | int (*parse_extra_token)(char *c, void *private), | 193 | int (*parse_extra_token)(char *c, void *private), |
193 | void *private); | 194 | void *private); |
195 | int ceph_print_client_options(struct seq_file *m, struct ceph_client *client); | ||
194 | extern void ceph_destroy_options(struct ceph_options *opt); | 196 | extern void ceph_destroy_options(struct ceph_options *opt); |
195 | extern int ceph_compare_options(struct ceph_options *new_opt, | 197 | extern int ceph_compare_options(struct ceph_options *new_opt, |
196 | struct ceph_client *client); | 198 | struct ceph_client *client); |
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 561ea896c657..e55c08bc3a96 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h | |||
@@ -175,13 +175,12 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) | |||
175 | __u8 version; | 175 | __u8 version; |
176 | 176 | ||
177 | if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) { | 177 | if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) { |
178 | pr_warning("incomplete pg encoding"); | 178 | pr_warn("incomplete pg encoding\n"); |
179 | |||
180 | return -EINVAL; | 179 | return -EINVAL; |
181 | } | 180 | } |
182 | version = ceph_decode_8(p); | 181 | version = ceph_decode_8(p); |
183 | if (version > 1) { | 182 | if (version > 1) { |
184 | pr_warning("do not understand pg encoding %d > 1", | 183 | pr_warn("do not understand pg encoding %d > 1\n", |
185 | (int)version); | 184 | (int)version); |
186 | return -EINVAL; | 185 | return -EINVAL; |
187 | } | 186 | } |
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 4fad5f8ee01d..48a1a7d100f1 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h | |||
@@ -96,13 +96,15 @@ struct crush_rule { | |||
96 | * uniform O(1) poor poor | 96 | * uniform O(1) poor poor |
97 | * list O(n) optimal poor | 97 | * list O(n) optimal poor |
98 | * tree O(log n) good good | 98 | * tree O(log n) good good |
99 | * straw O(n) optimal optimal | 99 | * straw O(n) better better |
100 | * straw2 O(n) optimal optimal | ||
100 | */ | 101 | */ |
101 | enum { | 102 | enum { |
102 | CRUSH_BUCKET_UNIFORM = 1, | 103 | CRUSH_BUCKET_UNIFORM = 1, |
103 | CRUSH_BUCKET_LIST = 2, | 104 | CRUSH_BUCKET_LIST = 2, |
104 | CRUSH_BUCKET_TREE = 3, | 105 | CRUSH_BUCKET_TREE = 3, |
105 | CRUSH_BUCKET_STRAW = 4 | 106 | CRUSH_BUCKET_STRAW = 4, |
107 | CRUSH_BUCKET_STRAW2 = 5, | ||
106 | }; | 108 | }; |
107 | extern const char *crush_bucket_alg_name(int alg); | 109 | extern const char *crush_bucket_alg_name(int alg); |
108 | 110 | ||
@@ -149,6 +151,11 @@ struct crush_bucket_straw { | |||
149 | __u32 *straws; /* 16-bit fixed point */ | 151 | __u32 *straws; /* 16-bit fixed point */ |
150 | }; | 152 | }; |
151 | 153 | ||
154 | struct crush_bucket_straw2 { | ||
155 | struct crush_bucket h; | ||
156 | __u32 *item_weights; /* 16-bit fixed point */ | ||
157 | }; | ||
158 | |||
152 | 159 | ||
153 | 160 | ||
154 | /* | 161 | /* |
@@ -189,6 +196,7 @@ extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b); | |||
189 | extern void crush_destroy_bucket_list(struct crush_bucket_list *b); | 196 | extern void crush_destroy_bucket_list(struct crush_bucket_list *b); |
190 | extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b); | 197 | extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b); |
191 | extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b); | 198 | extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b); |
199 | extern void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b); | ||
192 | extern void crush_destroy_bucket(struct crush_bucket *b); | 200 | extern void crush_destroy_bucket(struct crush_bucket *b); |
193 | extern void crush_destroy_rule(struct crush_rule *r); | 201 | extern void crush_destroy_rule(struct crush_rule *r); |
194 | extern void crush_destroy(struct crush_map *map); | 202 | extern void crush_destroy(struct crush_map *map); |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index ec565508e904..79e8f71aef5b 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
@@ -490,6 +490,43 @@ out: | |||
490 | } | 490 | } |
491 | EXPORT_SYMBOL(ceph_parse_options); | 491 | EXPORT_SYMBOL(ceph_parse_options); |
492 | 492 | ||
493 | int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) | ||
494 | { | ||
495 | struct ceph_options *opt = client->options; | ||
496 | size_t pos = m->count; | ||
497 | |||
498 | if (opt->name) | ||
499 | seq_printf(m, "name=%s,", opt->name); | ||
500 | if (opt->key) | ||
501 | seq_puts(m, "secret=<hidden>,"); | ||
502 | |||
503 | if (opt->flags & CEPH_OPT_FSID) | ||
504 | seq_printf(m, "fsid=%pU,", &opt->fsid); | ||
505 | if (opt->flags & CEPH_OPT_NOSHARE) | ||
506 | seq_puts(m, "noshare,"); | ||
507 | if (opt->flags & CEPH_OPT_NOCRC) | ||
508 | seq_puts(m, "nocrc,"); | ||
509 | if (opt->flags & CEPH_OPT_NOMSGAUTH) | ||
510 | seq_puts(m, "nocephx_require_signatures,"); | ||
511 | if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) | ||
512 | seq_puts(m, "notcp_nodelay,"); | ||
513 | |||
514 | if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
515 | seq_printf(m, "mount_timeout=%d,", opt->mount_timeout); | ||
516 | if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
517 | seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl); | ||
518 | if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
519 | seq_printf(m, "osdkeepalivetimeout=%d,", | ||
520 | opt->osd_keepalive_timeout); | ||
521 | |||
522 | /* drop redundant comma */ | ||
523 | if (m->count != pos) | ||
524 | m->count--; | ||
525 | |||
526 | return 0; | ||
527 | } | ||
528 | EXPORT_SYMBOL(ceph_print_client_options); | ||
529 | |||
493 | u64 ceph_client_id(struct ceph_client *client) | 530 | u64 ceph_client_id(struct ceph_client *client) |
494 | { | 531 | { |
495 | return client->monc.auth->global_id; | 532 | return client->monc.auth->global_id; |
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c index 16bc199d9a62..9d84ce4ea0df 100644 --- a/net/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c | |||
@@ -17,6 +17,7 @@ const char *crush_bucket_alg_name(int alg) | |||
17 | case CRUSH_BUCKET_LIST: return "list"; | 17 | case CRUSH_BUCKET_LIST: return "list"; |
18 | case CRUSH_BUCKET_TREE: return "tree"; | 18 | case CRUSH_BUCKET_TREE: return "tree"; |
19 | case CRUSH_BUCKET_STRAW: return "straw"; | 19 | case CRUSH_BUCKET_STRAW: return "straw"; |
20 | case CRUSH_BUCKET_STRAW2: return "straw2"; | ||
20 | default: return "unknown"; | 21 | default: return "unknown"; |
21 | } | 22 | } |
22 | } | 23 | } |
@@ -40,6 +41,8 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p) | |||
40 | return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)]; | 41 | return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)]; |
41 | case CRUSH_BUCKET_STRAW: | 42 | case CRUSH_BUCKET_STRAW: |
42 | return ((struct crush_bucket_straw *)b)->item_weights[p]; | 43 | return ((struct crush_bucket_straw *)b)->item_weights[p]; |
44 | case CRUSH_BUCKET_STRAW2: | ||
45 | return ((struct crush_bucket_straw2 *)b)->item_weights[p]; | ||
43 | } | 46 | } |
44 | return 0; | 47 | return 0; |
45 | } | 48 | } |
@@ -77,6 +80,14 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b) | |||
77 | kfree(b); | 80 | kfree(b); |
78 | } | 81 | } |
79 | 82 | ||
83 | void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b) | ||
84 | { | ||
85 | kfree(b->item_weights); | ||
86 | kfree(b->h.perm); | ||
87 | kfree(b->h.items); | ||
88 | kfree(b); | ||
89 | } | ||
90 | |||
80 | void crush_destroy_bucket(struct crush_bucket *b) | 91 | void crush_destroy_bucket(struct crush_bucket *b) |
81 | { | 92 | { |
82 | switch (b->alg) { | 93 | switch (b->alg) { |
@@ -92,6 +103,9 @@ void crush_destroy_bucket(struct crush_bucket *b) | |||
92 | case CRUSH_BUCKET_STRAW: | 103 | case CRUSH_BUCKET_STRAW: |
93 | crush_destroy_bucket_straw((struct crush_bucket_straw *)b); | 104 | crush_destroy_bucket_straw((struct crush_bucket_straw *)b); |
94 | break; | 105 | break; |
106 | case CRUSH_BUCKET_STRAW2: | ||
107 | crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b); | ||
108 | break; | ||
95 | } | 109 | } |
96 | } | 110 | } |
97 | 111 | ||
diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h new file mode 100644 index 000000000000..6192c7fc958c --- /dev/null +++ b/net/ceph/crush/crush_ln_table.h | |||
@@ -0,0 +1,166 @@ | |||
1 | /* | ||
2 | * Ceph - scalable distributed file system | ||
3 | * | ||
4 | * Copyright (C) 2015 Intel Corporation All Rights Reserved | ||
5 | * | ||
6 | * This is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License version 2.1, as published by the Free Software | ||
9 | * Foundation. See file COPYING. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #if defined(__linux__) | ||
14 | #include <linux/types.h> | ||
15 | #elif defined(__FreeBSD__) | ||
16 | #include <sys/types.h> | ||
17 | #endif | ||
18 | |||
19 | #ifndef CEPH_CRUSH_LN_H | ||
20 | #define CEPH_CRUSH_LN_H | ||
21 | |||
22 | |||
23 | // RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0) | ||
24 | // RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0) | ||
25 | |||
26 | static int64_t __RH_LH_tbl[128*2+2] = { | ||
27 | 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll, | ||
28 | 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all, | ||
29 | 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll, | ||
30 | 0x0000f4898d5f85bcll, 0x000010eb389fa29fll, 0x0000f2b9d6480f2cll, 0x000013aa2fdd27f1ll, | ||
31 | 0x0000f0f0f0f0f0f1ll, 0x00001663f6fac913ll, 0x0000ef2eb71fc435ll, 0x00001918a16e4633ll, | ||
32 | 0x0000ed7303b5cc0fll, 0x00001bc84240adabll, 0x0000ebbdb2a5c162ll, 0x00001e72ec117fa5ll, | ||
33 | 0x0000ea0ea0ea0ea1ll, 0x00002118b119b4f3ll, 0x0000e865ac7b7604ll, 0x000023b9a32eaa56ll, | ||
34 | 0x0000e6c2b4481cd9ll, 0x00002655d3c4f15cll, 0x0000e525982af70dll, 0x000028ed53f307eell, | ||
35 | 0x0000e38e38e38e39ll, 0x00002b803473f7adll, 0x0000e1fc780e1fc8ll, 0x00002e0e85a9de04ll, | ||
36 | 0x0000e070381c0e08ll, 0x0000309857a05e07ll, 0x0000dee95c4ca038ll, 0x0000331dba0efce1ll, | ||
37 | 0x0000dd67c8a60dd7ll, 0x0000359ebc5b69d9ll, 0x0000dbeb61eed19dll, 0x0000381b6d9bb29bll, | ||
38 | 0x0000da740da740dbll, 0x00003a93dc9864b2ll, 0x0000d901b2036407ll, 0x00003d0817ce9cd4ll, | ||
39 | 0x0000d79435e50d7all, 0x00003f782d7204d0ll, 0x0000d62b80d62b81ll, 0x000041e42b6ec0c0ll, | ||
40 | 0x0000d4c77b03531ell, 0x0000444c1f6b4c2dll, 0x0000d3680d3680d4ll, 0x000046b016ca47c1ll, | ||
41 | 0x0000d20d20d20d21ll, 0x000049101eac381cll, 0x0000d0b69fcbd259ll, 0x00004b6c43f1366all, | ||
42 | 0x0000cf6474a8819fll, 0x00004dc4933a9337ll, 0x0000ce168a772509ll, 0x0000501918ec6c11ll, | ||
43 | 0x0000cccccccccccdll, 0x00005269e12f346ell, 0x0000cb8727c065c4ll, 0x000054b6f7f1325all, | ||
44 | 0x0000ca4587e6b750ll, 0x0000570068e7ef5all, 0x0000c907da4e8712ll, 0x000059463f919deell, | ||
45 | 0x0000c7ce0c7ce0c8ll, 0x00005b8887367433ll, 0x0000c6980c6980c7ll, 0x00005dc74ae9fbecll, | ||
46 | 0x0000c565c87b5f9ell, 0x00006002958c5871ll, 0x0000c4372f855d83ll, 0x0000623a71cb82c8ll, | ||
47 | 0x0000c30c30c30c31ll, 0x0000646eea247c5cll, 0x0000c1e4bbd595f7ll, 0x000066a008e4788cll, | ||
48 | 0x0000c0c0c0c0c0c1ll, 0x000068cdd829fd81ll, 0x0000bfa02fe80bfbll, 0x00006af861e5fc7dll, | ||
49 | 0x0000be82fa0be830ll, 0x00006d1fafdce20all, 0x0000bd6910470767ll, 0x00006f43cba79e40ll, | ||
50 | 0x0000bc52640bc527ll, 0x00007164beb4a56dll, 0x0000bb3ee721a54ell, 0x000073829248e961ll, | ||
51 | 0x0000ba2e8ba2e8bbll, 0x0000759d4f80cba8ll, 0x0000b92143fa36f6ll, 0x000077b4ff5108d9ll, | ||
52 | 0x0000b81702e05c0cll, 0x000079c9aa879d53ll, 0x0000b70fbb5a19bfll, 0x00007bdb59cca388ll, | ||
53 | 0x0000b60b60b60b61ll, 0x00007dea15a32c1bll, 0x0000b509e68a9b95ll, 0x00007ff5e66a0ffell, | ||
54 | 0x0000b40b40b40b41ll, 0x000081fed45cbccbll, 0x0000b30f63528918ll, 0x00008404e793fb81ll, | ||
55 | 0x0000b21642c8590cll, 0x000086082806b1d5ll, 0x0000b11fd3b80b12ll, 0x000088089d8a9e47ll, | ||
56 | 0x0000b02c0b02c0b1ll, 0x00008a064fd50f2all, 0x0000af3addc680b0ll, 0x00008c01467b94bbll, | ||
57 | 0x0000ae4c415c9883ll, 0x00008df988f4ae80ll, 0x0000ad602b580ad7ll, 0x00008fef1e987409ll, | ||
58 | 0x0000ac7691840ac8ll, 0x000091e20ea1393ell, 0x0000ab8f69e2835all, 0x000093d2602c2e5fll, | ||
59 | 0x0000aaaaaaaaaaabll, 0x000095c01a39fbd6ll, 0x0000a9c84a47a080ll, 0x000097ab43af59f9ll, | ||
60 | 0x0000a8e83f5717c1ll, 0x00009993e355a4e5ll, 0x0000a80a80a80a81ll, 0x00009b79ffdb6c8bll, | ||
61 | 0x0000a72f0539782all, 0x00009d5d9fd5010bll, 0x0000a655c4392d7cll, 0x00009f3ec9bcfb80ll, | ||
62 | 0x0000a57eb50295fbll, 0x0000a11d83f4c355ll, 0x0000a4a9cf1d9684ll, 0x0000a2f9d4c51039ll, | ||
63 | 0x0000a3d70a3d70a4ll, 0x0000a4d3c25e68dcll, 0x0000a3065e3fae7dll, 0x0000a6ab52d99e76ll, | ||
64 | 0x0000a237c32b16d0ll, 0x0000a8808c384547ll, 0x0000a16b312ea8fdll, 0x0000aa5374652a1cll, | ||
65 | 0x0000a0a0a0a0a0a1ll, 0x0000ac241134c4e9ll, 0x00009fd809fd80a0ll, 0x0000adf26865a8a1ll, | ||
66 | 0x00009f1165e72549ll, 0x0000afbe7fa0f04dll, 0x00009e4cad23dd60ll, 0x0000b1885c7aa982ll, | ||
67 | 0x00009d89d89d89d9ll, 0x0000b35004723c46ll, 0x00009cc8e160c3fcll, 0x0000b5157cf2d078ll, | ||
68 | 0x00009c09c09c09c1ll, 0x0000b6d8cb53b0call, 0x00009b4c6f9ef03bll, 0x0000b899f4d8ab63ll, | ||
69 | 0x00009a90e7d95bc7ll, 0x0000ba58feb2703all, 0x000099d722dabde6ll, 0x0000bc15edfeed32ll, | ||
70 | 0x0000991f1a515886ll, 0x0000bdd0c7c9a817ll, 0x00009868c809868dll, 0x0000bf89910c1678ll, | ||
71 | 0x000097b425ed097cll, 0x0000c1404eadf383ll, 0x000097012e025c05ll, 0x0000c2f5058593d9ll, | ||
72 | 0x0000964fda6c0965ll, 0x0000c4a7ba58377cll, 0x000095a02568095bll, 0x0000c65871da59ddll, | ||
73 | 0x000094f2094f2095ll, 0x0000c80730b00016ll, 0x0000944580944581ll, 0x0000c9b3fb6d0559ll, | ||
74 | 0x0000939a85c4093all, 0x0000cb5ed69565afll, 0x000092f113840498ll, 0x0000cd07c69d8702ll, | ||
75 | 0x0000924924924925ll, 0x0000ceaecfea8085ll, 0x000091a2b3c4d5e7ll, 0x0000d053f6d26089ll, | ||
76 | 0x000090fdbc090fdcll, 0x0000d1f73f9c70c0ll, 0x0000905a38633e07ll, 0x0000d398ae817906ll, | ||
77 | 0x00008fb823ee08fcll, 0x0000d53847ac00a6ll, 0x00008f1779d9fdc4ll, 0x0000d6d60f388e41ll, | ||
78 | 0x00008e78356d1409ll, 0x0000d8720935e643ll, 0x00008dda5202376all, 0x0000da0c39a54804ll, | ||
79 | 0x00008d3dcb08d3ddll, 0x0000dba4a47aa996ll, 0x00008ca29c046515ll, 0x0000dd3b4d9cf24bll, | ||
80 | 0x00008c08c08c08c1ll, 0x0000ded038e633f3ll, 0x00008b70344a139cll, 0x0000e0636a23e2eell, | ||
81 | 0x00008ad8f2fba939ll, 0x0000e1f4e5170d02ll, 0x00008a42f870566all, 0x0000e384ad748f0ell, | ||
82 | 0x000089ae4089ae41ll, 0x0000e512c6e54998ll, 0x0000891ac73ae982ll, 0x0000e69f35065448ll, | ||
83 | 0x0000888888888889ll, 0x0000e829fb693044ll, 0x000087f78087f781ll, 0x0000e9b31d93f98ell, | ||
84 | 0x00008767ab5f34e5ll, 0x0000eb3a9f019750ll, 0x000086d905447a35ll, 0x0000ecc08321eb30ll, | ||
85 | 0x0000864b8a7de6d2ll, 0x0000ee44cd59ffabll, 0x000085bf37612cefll, 0x0000efc781043579ll, | ||
86 | 0x0000853408534086ll, 0x0000f148a170700all, 0x000084a9f9c8084bll, 0x0000f2c831e44116ll, | ||
87 | 0x0000842108421085ll, 0x0000f446359b1353ll, 0x0000839930523fbfll, 0x0000f5c2afc65447ll, | ||
88 | 0x000083126e978d50ll, 0x0000f73da38d9d4all, 0x0000828cbfbeb9a1ll, 0x0000f8b7140edbb1ll, | ||
89 | 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll, | ||
90 | 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll, | ||
91 | 0x0000800000000000ll, 0x0000ffff00000000ll, | ||
92 | }; | ||
93 | |||
94 | |||
95 | // LL_tbl[k] = 2^48*log2(1.0+k/2^15); | ||
96 | static int64_t __LL_tbl[256] = { | ||
97 | 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull, | ||
98 | 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull, | ||
99 | 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull, | ||
100 | 0x00000023e5bbb2b2ull, 0x00000026c81c83e4ull, 0x00000029aa7790f0ull, 0x0000002c8cccd9edull, | ||
101 | 0x0000002f6f1c5ef2ull, 0x0000003251662017ull, 0x0000003533aa1d71ull, 0x0000003815e8571aull, | ||
102 | 0x0000003af820cd26ull, 0x0000003dda537faeull, 0x00000040bc806ec8ull, 0x000000439ea79a8cull, | ||
103 | 0x0000004680c90310ull, 0x0000004962e4a86cull, 0x0000004c44fa8ab6ull, 0x0000004f270aaa06ull, | ||
104 | 0x0000005209150672ull, 0x00000054eb19a013ull, 0x00000057cd1876fdull, 0x0000005aaf118b4aull, | ||
105 | 0x0000005d9104dd0full, 0x0000006072f26c64ull, 0x0000006354da3960ull, 0x0000006636bc441aull, | ||
106 | 0x0000006918988ca8ull, 0x0000006bfa6f1322ull, 0x0000006edc3fd79full, 0x00000071be0ada35ull, | ||
107 | 0x000000749fd01afdull, 0x00000077818f9a0cull, 0x0000007a6349577aull, 0x0000007d44fd535eull, | ||
108 | 0x0000008026ab8dceull, 0x00000083085406e3ull, 0x00000085e9f6beb2ull, 0x00000088cb93b552ull, | ||
109 | 0x0000008bad2aeadcull, 0x0000008e8ebc5f65ull, 0x0000009170481305ull, 0x0000009451ce05d3ull, | ||
110 | 0x00000097334e37e5ull, 0x0000009a14c8a953ull, 0x0000009cf63d5a33ull, 0x0000009fd7ac4a9dull, | ||
111 | 0x000000a2b07f3458ull, 0x000000a59a78ea6aull, 0x000000a87bd699fbull, 0x000000ab5d2e8970ull, | ||
112 | 0x000000ae3e80b8e3ull, 0x000000b11fcd2869ull, 0x000000b40113d818ull, 0x000000b6e254c80aull, | ||
113 | 0x000000b9c38ff853ull, 0x000000bca4c5690cull, 0x000000bf85f51a4aull, 0x000000c2671f0c26ull, | ||
114 | 0x000000c548433eb6ull, 0x000000c82961b211ull, 0x000000cb0a7a664dull, 0x000000cdeb8d5b82ull, | ||
115 | 0x000000d0cc9a91c8ull, 0x000000d3ada20933ull, 0x000000d68ea3c1ddull, 0x000000d96f9fbbdbull, | ||
116 | 0x000000dc5095f744ull, 0x000000df31867430ull, 0x000000e2127132b5ull, 0x000000e4f35632eaull, | ||
117 | 0x000000e7d43574e6ull, 0x000000eab50ef8c1ull, 0x000000ed95e2be90ull, 0x000000f076b0c66cull, | ||
118 | 0x000000f35779106aull, 0x000000f6383b9ca2ull, 0x000000f918f86b2aull, 0x000000fbf9af7c1aull, | ||
119 | 0x000000feda60cf88ull, 0x00000101bb0c658cull, 0x000001049bb23e3cull, 0x000001077c5259afull, | ||
120 | 0x0000010a5cecb7fcull, 0x0000010d3d81593aull, 0x000001101e103d7full, 0x00000112fe9964e4ull, | ||
121 | 0x00000115df1ccf7eull, 0x00000118bf9a7d64ull, 0x0000011ba0126eadull, 0x0000011e8084a371ull, | ||
122 | 0x0000012160f11bc6ull, 0x000001244157d7c3ull, 0x0000012721b8d77full, 0x0000012a02141b10ull, | ||
123 | 0x0000012ce269a28eull, 0x0000012fc2b96e0full, 0x00000132a3037daaull, 0x000001358347d177ull, | ||
124 | 0x000001386386698cull, 0x0000013b43bf45ffull, 0x0000013e23f266e9ull, 0x00000141041fcc5eull, | ||
125 | 0x00000143e4477678ull, 0x00000146c469654bull, 0x00000149a48598f0ull, 0x0000014c849c117cull, | ||
126 | 0x0000014f64accf08ull, 0x0000015244b7d1a9ull, 0x0000015524bd1976ull, 0x0000015804bca687ull, | ||
127 | 0x0000015ae4b678f2ull, 0x0000015dc4aa90ceull, 0x00000160a498ee31ull, 0x0000016384819134ull, | ||
128 | 0x00000166646479ecull, 0x000001694441a870ull, 0x0000016c24191cd7ull, 0x0000016df6ca19bdull, | ||
129 | 0x00000171e3b6d7aaull, 0x00000174c37d1e44ull, 0x00000177a33dab1cull, 0x0000017a82f87e49ull, | ||
130 | 0x0000017d62ad97e2ull, 0x00000180425cf7feull, 0x00000182b07f3458ull, 0x0000018601aa8c19ull, | ||
131 | 0x00000188e148c046ull, 0x0000018bc0e13b52ull, 0x0000018ea073fd52ull, 0x000001918001065dull, | ||
132 | 0x000001945f88568bull, 0x000001973f09edf2ull, 0x0000019a1e85ccaaull, 0x0000019cfdfbf2c8ull, | ||
133 | 0x0000019fdd6c6063ull, 0x000001a2bcd71593ull, 0x000001a59c3c126eull, 0x000001a87b9b570bull, | ||
134 | 0x000001ab5af4e380ull, 0x000001ae3a48b7e5ull, 0x000001b11996d450ull, 0x000001b3f8df38d9ull, | ||
135 | 0x000001b6d821e595ull, 0x000001b9b75eda9bull, 0x000001bc96961803ull, 0x000001bf75c79de3ull, | ||
136 | 0x000001c254f36c51ull, 0x000001c534198365ull, 0x000001c81339e336ull, 0x000001caf2548bd9ull, | ||
137 | 0x000001cdd1697d67ull, 0x000001d0b078b7f5ull, 0x000001d38f823b9aull, 0x000001d66e86086dull, | ||
138 | 0x000001d94d841e86ull, 0x000001dc2c7c7df9ull, 0x000001df0b6f26dfull, 0x000001e1ea5c194eull, | ||
139 | 0x000001e4c943555dull, 0x000001e7a824db23ull, 0x000001ea8700aab5ull, 0x000001ed65d6c42bull, | ||
140 | 0x000001f044a7279dull, 0x000001f32371d51full, 0x000001f60236cccaull, 0x000001f8e0f60eb3ull, | ||
141 | 0x000001fbbfaf9af3ull, 0x000001fe9e63719eull, 0x000002017d1192ccull, 0x000002045bb9fe94ull, | ||
142 | 0x000002073a5cb50dull, 0x00000209c06e6212ull, 0x0000020cf791026aull, 0x0000020fd622997cull, | ||
143 | 0x00000212b07f3458ull, 0x000002159334a8d8ull, 0x0000021871b52150ull, 0x0000021b502fe517ull, | ||
144 | 0x0000021d6a73a78full, 0x000002210d144eeeull, 0x00000223eb7df52cull, 0x00000226c9e1e713ull, | ||
145 | 0x00000229a84024bbull, 0x0000022c23679b4eull, 0x0000022f64eb83a8ull, 0x000002324338a51bull, | ||
146 | 0x00000235218012a9ull, 0x00000237ffc1cc69ull, 0x0000023a2c3b0ea4ull, 0x0000023d13ee805bull, | ||
147 | 0x0000024035e9221full, 0x00000243788faf25ull, 0x0000024656b4e735ull, 0x00000247ed646bfeull, | ||
148 | 0x0000024c12ee3d98ull, 0x0000024ef1025c1aull, 0x00000251cf10c799ull, 0x0000025492644d65ull, | ||
149 | 0x000002578b1c85eeull, 0x0000025a6919d8f0ull, 0x0000025d13ee805bull, 0x0000026025036716ull, | ||
150 | 0x0000026296453882ull, 0x00000265e0d62b53ull, 0x00000268beb701f3ull, 0x0000026b9c92265eull, | ||
151 | 0x0000026d32f798a9ull, 0x00000271583758ebull, 0x000002743601673bull, 0x0000027713c5c3b0ull, | ||
152 | 0x00000279f1846e5full, 0x0000027ccf3d6761ull, 0x0000027e6580aecbull, 0x000002828a9e44b3ull, | ||
153 | 0x0000028568462932ull, 0x00000287bdbf5255ull, 0x0000028b2384de4aull, 0x0000028d13ee805bull, | ||
154 | 0x0000029035e9221full, 0x0000029296453882ull, 0x0000029699bdfb61ull, 0x0000029902a37aabull, | ||
155 | 0x0000029c54b864c9ull, 0x0000029deabd1083ull, 0x000002a20f9c0bb5ull, 0x000002a4c7605d61ull, | ||
156 | 0x000002a7bdbf5255ull, 0x000002a96056dafcull, 0x000002ac3daf14efull, 0x000002af1b019ecaull, | ||
157 | 0x000002b296453882ull, 0x000002b5d022d80full, 0x000002b8fa471cb3ull, 0x000002ba9012e713ull, | ||
158 | 0x000002bd6d4901ccull, 0x000002c04a796cf6ull, 0x000002c327a428a6ull, 0x000002c61a5e8f4cull, | ||
159 | 0x000002c8e1e891f6ull, 0x000002cbbf023fc2ull, 0x000002ce9c163e6eull, 0x000002d179248e13ull, | ||
160 | 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull, | ||
161 | }; | ||
162 | |||
163 | |||
164 | |||
165 | |||
166 | #endif | ||
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index a1ef53c04415..5b47736d27d9 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c | |||
@@ -20,7 +20,7 @@ | |||
20 | 20 | ||
21 | #include <linux/crush/crush.h> | 21 | #include <linux/crush/crush.h> |
22 | #include <linux/crush/hash.h> | 22 | #include <linux/crush/hash.h> |
23 | #include <linux/crush/mapper.h> | 23 | #include "crush_ln_table.h" |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * Implement the core CRUSH mapping algorithm. | 26 | * Implement the core CRUSH mapping algorithm. |
@@ -238,6 +238,102 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket, | |||
238 | return bucket->h.items[high]; | 238 | return bucket->h.items[high]; |
239 | } | 239 | } |
240 | 240 | ||
241 | // compute 2^44*log2(input+1) | ||
242 | uint64_t crush_ln(unsigned xin) | ||
243 | { | ||
244 | unsigned x=xin, x1; | ||
245 | int iexpon, index1, index2; | ||
246 | uint64_t RH, LH, LL, xl64, result; | ||
247 | |||
248 | x++; | ||
249 | |||
250 | // normalize input | ||
251 | iexpon = 15; | ||
252 | while(!(x&0x18000)) { x<<=1; iexpon--; } | ||
253 | |||
254 | index1 = (x>>8)<<1; | ||
255 | // RH ~ 2^56/index1 | ||
256 | RH = __RH_LH_tbl[index1 - 256]; | ||
257 | // LH ~ 2^48 * log2(index1/256) | ||
258 | LH = __RH_LH_tbl[index1 + 1 - 256]; | ||
259 | |||
260 | // RH*x ~ 2^48 * (2^15 + xf), xf<2^8 | ||
261 | xl64 = (int64_t)x * RH; | ||
262 | xl64 >>= 48; | ||
263 | x1 = xl64; | ||
264 | |||
265 | result = iexpon; | ||
266 | result <<= (12 + 32); | ||
267 | |||
268 | index2 = x1 & 0xff; | ||
269 | // LL ~ 2^48*log2(1.0+index2/2^15) | ||
270 | LL = __LL_tbl[index2]; | ||
271 | |||
272 | LH = LH + LL; | ||
273 | |||
274 | LH >>= (48-12 - 32); | ||
275 | result += LH; | ||
276 | |||
277 | return result; | ||
278 | } | ||
279 | |||
280 | |||
281 | /* | ||
282 | * straw2 | ||
283 | * | ||
284 | * for reference, see: | ||
285 | * | ||
286 | * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables | ||
287 | * | ||
288 | */ | ||
289 | |||
290 | static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket, | ||
291 | int x, int r) | ||
292 | { | ||
293 | unsigned i, high = 0; | ||
294 | unsigned u; | ||
295 | unsigned w; | ||
296 | __s64 ln, draw, high_draw = 0; | ||
297 | |||
298 | for (i = 0; i < bucket->h.size; i++) { | ||
299 | w = bucket->item_weights[i]; | ||
300 | if (w) { | ||
301 | u = crush_hash32_3(bucket->h.hash, x, | ||
302 | bucket->h.items[i], r); | ||
303 | u &= 0xffff; | ||
304 | |||
305 | /* | ||
306 | * for some reason slightly less than 0x10000 produces | ||
307 | * a slightly more accurate distribution... probably a | ||
308 | * rounding effect. | ||
309 | * | ||
310 | * the natural log lookup table maps [0,0xffff] | ||
311 | * (corresponding to real numbers [1/0x10000, 1] to | ||
312 | * [0, 0xffffffffffff] (corresponding to real numbers | ||
313 | * [-11.090355,0]). | ||
314 | */ | ||
315 | ln = crush_ln(u) - 0x1000000000000ll; | ||
316 | |||
317 | /* | ||
318 | * divide by 16.16 fixed-point weight. note | ||
319 | * that the ln value is negative, so a larger | ||
320 | * weight means a larger (less negative) value | ||
321 | * for draw. | ||
322 | */ | ||
323 | draw = div64_s64(ln, w); | ||
324 | } else { | ||
325 | draw = S64_MIN; | ||
326 | } | ||
327 | |||
328 | if (i == 0 || draw > high_draw) { | ||
329 | high = i; | ||
330 | high_draw = draw; | ||
331 | } | ||
332 | } | ||
333 | return bucket->h.items[high]; | ||
334 | } | ||
335 | |||
336 | |||
241 | static int crush_bucket_choose(struct crush_bucket *in, int x, int r) | 337 | static int crush_bucket_choose(struct crush_bucket *in, int x, int r) |
242 | { | 338 | { |
243 | dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); | 339 | dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); |
@@ -255,12 +351,16 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r) | |||
255 | case CRUSH_BUCKET_STRAW: | 351 | case CRUSH_BUCKET_STRAW: |
256 | return bucket_straw_choose((struct crush_bucket_straw *)in, | 352 | return bucket_straw_choose((struct crush_bucket_straw *)in, |
257 | x, r); | 353 | x, r); |
354 | case CRUSH_BUCKET_STRAW2: | ||
355 | return bucket_straw2_choose((struct crush_bucket_straw2 *)in, | ||
356 | x, r); | ||
258 | default: | 357 | default: |
259 | dprintk("unknown bucket %d alg %d\n", in->id, in->alg); | 358 | dprintk("unknown bucket %d alg %d\n", in->id, in->alg); |
260 | return in->items[0]; | 359 | return in->items[0]; |
261 | } | 360 | } |
262 | } | 361 | } |
263 | 362 | ||
363 | |||
264 | /* | 364 | /* |
265 | * true if device is marked "out" (failed, fully offloaded) | 365 | * true if device is marked "out" (failed, fully offloaded) |
266 | * of the cluster | 366 | * of the cluster |
@@ -290,6 +390,7 @@ static int is_out(const struct crush_map *map, | |||
290 | * @type: the type of item to choose | 390 | * @type: the type of item to choose |
291 | * @out: pointer to output vector | 391 | * @out: pointer to output vector |
292 | * @outpos: our position in that vector | 392 | * @outpos: our position in that vector |
393 | * @out_size: size of the out vector | ||
293 | * @tries: number of attempts to make | 394 | * @tries: number of attempts to make |
294 | * @recurse_tries: number of attempts to have recursive chooseleaf make | 395 | * @recurse_tries: number of attempts to have recursive chooseleaf make |
295 | * @local_retries: localized retries | 396 | * @local_retries: localized retries |
@@ -304,6 +405,7 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
304 | const __u32 *weight, int weight_max, | 405 | const __u32 *weight, int weight_max, |
305 | int x, int numrep, int type, | 406 | int x, int numrep, int type, |
306 | int *out, int outpos, | 407 | int *out, int outpos, |
408 | int out_size, | ||
307 | unsigned int tries, | 409 | unsigned int tries, |
308 | unsigned int recurse_tries, | 410 | unsigned int recurse_tries, |
309 | unsigned int local_retries, | 411 | unsigned int local_retries, |
@@ -322,6 +424,7 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
322 | int item = 0; | 424 | int item = 0; |
323 | int itemtype; | 425 | int itemtype; |
324 | int collide, reject; | 426 | int collide, reject; |
427 | int count = out_size; | ||
325 | 428 | ||
326 | dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n", | 429 | dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n", |
327 | recurse_to_leaf ? "_LEAF" : "", | 430 | recurse_to_leaf ? "_LEAF" : "", |
@@ -329,7 +432,7 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
329 | tries, recurse_tries, local_retries, local_fallback_retries, | 432 | tries, recurse_tries, local_retries, local_fallback_retries, |
330 | parent_r); | 433 | parent_r); |
331 | 434 | ||
332 | for (rep = outpos; rep < numrep; rep++) { | 435 | for (rep = outpos; rep < numrep && count > 0 ; rep++) { |
333 | /* keep trying until we get a non-out, non-colliding item */ | 436 | /* keep trying until we get a non-out, non-colliding item */ |
334 | ftotal = 0; | 437 | ftotal = 0; |
335 | skip_rep = 0; | 438 | skip_rep = 0; |
@@ -403,7 +506,7 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
403 | map->buckets[-1-item], | 506 | map->buckets[-1-item], |
404 | weight, weight_max, | 507 | weight, weight_max, |
405 | x, outpos+1, 0, | 508 | x, outpos+1, 0, |
406 | out2, outpos, | 509 | out2, outpos, count, |
407 | recurse_tries, 0, | 510 | recurse_tries, 0, |
408 | local_retries, | 511 | local_retries, |
409 | local_fallback_retries, | 512 | local_fallback_retries, |
@@ -463,6 +566,7 @@ reject: | |||
463 | dprintk("CHOOSE got %d\n", item); | 566 | dprintk("CHOOSE got %d\n", item); |
464 | out[outpos] = item; | 567 | out[outpos] = item; |
465 | outpos++; | 568 | outpos++; |
569 | count--; | ||
466 | } | 570 | } |
467 | 571 | ||
468 | dprintk("CHOOSE returns %d\n", outpos); | 572 | dprintk("CHOOSE returns %d\n", outpos); |
@@ -654,6 +758,7 @@ int crush_do_rule(const struct crush_map *map, | |||
654 | __u32 step; | 758 | __u32 step; |
655 | int i, j; | 759 | int i, j; |
656 | int numrep; | 760 | int numrep; |
761 | int out_size; | ||
657 | /* | 762 | /* |
658 | * the original choose_total_tries value was off by one (it | 763 | * the original choose_total_tries value was off by one (it |
659 | * counted "retries" and not "tries"). add one. | 764 | * counted "retries" and not "tries"). add one. |
@@ -761,6 +866,7 @@ int crush_do_rule(const struct crush_map *map, | |||
761 | x, numrep, | 866 | x, numrep, |
762 | curstep->arg2, | 867 | curstep->arg2, |
763 | o+osize, j, | 868 | o+osize, j, |
869 | result_max-osize, | ||
764 | choose_tries, | 870 | choose_tries, |
765 | recurse_tries, | 871 | recurse_tries, |
766 | choose_local_retries, | 872 | choose_local_retries, |
@@ -770,11 +876,13 @@ int crush_do_rule(const struct crush_map *map, | |||
770 | c+osize, | 876 | c+osize, |
771 | 0); | 877 | 0); |
772 | } else { | 878 | } else { |
879 | out_size = ((numrep < (result_max-osize)) ? | ||
880 | numrep : (result_max-osize)); | ||
773 | crush_choose_indep( | 881 | crush_choose_indep( |
774 | map, | 882 | map, |
775 | map->buckets[-1-w[i]], | 883 | map->buckets[-1-w[i]], |
776 | weight, weight_max, | 884 | weight, weight_max, |
777 | x, numrep, numrep, | 885 | x, out_size, numrep, |
778 | curstep->arg2, | 886 | curstep->arg2, |
779 | o+osize, j, | 887 | o+osize, j, |
780 | choose_tries, | 888 | choose_tries, |
@@ -783,7 +891,7 @@ int crush_do_rule(const struct crush_map *map, | |||
783 | recurse_to_leaf, | 891 | recurse_to_leaf, |
784 | c+osize, | 892 | c+osize, |
785 | 0); | 893 | 0); |
786 | osize += numrep; | 894 | osize += out_size; |
787 | } | 895 | } |
788 | } | 896 | } |
789 | 897 | ||
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 14d9995097cc..593dc2eabcc8 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c | |||
@@ -22,6 +22,7 @@ | |||
22 | * .../monmap - current monmap | 22 | * .../monmap - current monmap |
23 | * .../osdc - active osd requests | 23 | * .../osdc - active osd requests |
24 | * .../monc - mon client state | 24 | * .../monc - mon client state |
25 | * .../client_options - libceph-only (i.e. not rbd or cephfs) options | ||
25 | * .../dentry_lru - dump contents of dentry lru | 26 | * .../dentry_lru - dump contents of dentry lru |
26 | * .../caps - expose cap (reservation) stats | 27 | * .../caps - expose cap (reservation) stats |
27 | * .../bdi - symlink to ../../bdi/something | 28 | * .../bdi - symlink to ../../bdi/something |
@@ -177,10 +178,24 @@ static int osdc_show(struct seq_file *s, void *pp) | |||
177 | return 0; | 178 | return 0; |
178 | } | 179 | } |
179 | 180 | ||
181 | static int client_options_show(struct seq_file *s, void *p) | ||
182 | { | ||
183 | struct ceph_client *client = s->private; | ||
184 | int ret; | ||
185 | |||
186 | ret = ceph_print_client_options(s, client); | ||
187 | if (ret) | ||
188 | return ret; | ||
189 | |||
190 | seq_putc(s, '\n'); | ||
191 | return 0; | ||
192 | } | ||
193 | |||
180 | CEPH_DEFINE_SHOW_FUNC(monmap_show) | 194 | CEPH_DEFINE_SHOW_FUNC(monmap_show) |
181 | CEPH_DEFINE_SHOW_FUNC(osdmap_show) | 195 | CEPH_DEFINE_SHOW_FUNC(osdmap_show) |
182 | CEPH_DEFINE_SHOW_FUNC(monc_show) | 196 | CEPH_DEFINE_SHOW_FUNC(monc_show) |
183 | CEPH_DEFINE_SHOW_FUNC(osdc_show) | 197 | CEPH_DEFINE_SHOW_FUNC(osdc_show) |
198 | CEPH_DEFINE_SHOW_FUNC(client_options_show) | ||
184 | 199 | ||
185 | int ceph_debugfs_init(void) | 200 | int ceph_debugfs_init(void) |
186 | { | 201 | { |
@@ -242,6 +257,14 @@ int ceph_debugfs_client_init(struct ceph_client *client) | |||
242 | if (!client->debugfs_osdmap) | 257 | if (!client->debugfs_osdmap) |
243 | goto out; | 258 | goto out; |
244 | 259 | ||
260 | client->debugfs_options = debugfs_create_file("client_options", | ||
261 | 0600, | ||
262 | client->debugfs_dir, | ||
263 | client, | ||
264 | &client_options_show_fops); | ||
265 | if (!client->debugfs_options) | ||
266 | goto out; | ||
267 | |||
245 | return 0; | 268 | return 0; |
246 | 269 | ||
247 | out: | 270 | out: |
@@ -252,6 +275,7 @@ out: | |||
252 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | 275 | void ceph_debugfs_client_cleanup(struct ceph_client *client) |
253 | { | 276 | { |
254 | dout("ceph_debugfs_client_cleanup %p\n", client); | 277 | dout("ceph_debugfs_client_cleanup %p\n", client); |
278 | debugfs_remove(client->debugfs_options); | ||
255 | debugfs_remove(client->debugfs_osdmap); | 279 | debugfs_remove(client->debugfs_osdmap); |
256 | debugfs_remove(client->debugfs_monmap); | 280 | debugfs_remove(client->debugfs_monmap); |
257 | debugfs_remove(client->osdc.debugfs_file); | 281 | debugfs_remove(client->osdc.debugfs_file); |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index a9f4ae45b7fb..967080a9f043 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -505,8 +505,6 @@ static int ceph_tcp_connect(struct ceph_connection *con) | |||
505 | pr_err("connect %s error %d\n", | 505 | pr_err("connect %s error %d\n", |
506 | ceph_pr_addr(&con->peer_addr.in_addr), ret); | 506 | ceph_pr_addr(&con->peer_addr.in_addr), ret); |
507 | sock_release(sock); | 507 | sock_release(sock); |
508 | con->error_msg = "connect error"; | ||
509 | |||
510 | return ret; | 508 | return ret; |
511 | } | 509 | } |
512 | 510 | ||
@@ -2145,12 +2143,10 @@ static int process_connect(struct ceph_connection *con) | |||
2145 | * to WAIT. This shouldn't happen if we are the | 2143 | * to WAIT. This shouldn't happen if we are the |
2146 | * client. | 2144 | * client. |
2147 | */ | 2145 | */ |
2148 | pr_err("process_connect got WAIT as client\n"); | ||
2149 | con->error_msg = "protocol error, got WAIT as client"; | 2146 | con->error_msg = "protocol error, got WAIT as client"; |
2150 | return -1; | 2147 | return -1; |
2151 | 2148 | ||
2152 | default: | 2149 | default: |
2153 | pr_err("connect protocol error, will retry\n"); | ||
2154 | con->error_msg = "protocol error, garbage tag during connect"; | 2150 | con->error_msg = "protocol error, garbage tag during connect"; |
2155 | return -1; | 2151 | return -1; |
2156 | } | 2152 | } |
@@ -2282,8 +2278,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
2282 | 2278 | ||
2283 | crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc)); | 2279 | crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc)); |
2284 | if (cpu_to_le32(crc) != con->in_hdr.crc) { | 2280 | if (cpu_to_le32(crc) != con->in_hdr.crc) { |
2285 | pr_err("read_partial_message bad hdr " | 2281 | pr_err("read_partial_message bad hdr crc %u != expected %u\n", |
2286 | " crc %u != expected %u\n", | ||
2287 | crc, con->in_hdr.crc); | 2282 | crc, con->in_hdr.crc); |
2288 | return -EBADMSG; | 2283 | return -EBADMSG; |
2289 | } | 2284 | } |
@@ -2313,7 +2308,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
2313 | pr_err("read_partial_message bad seq %lld expected %lld\n", | 2308 | pr_err("read_partial_message bad seq %lld expected %lld\n", |
2314 | seq, con->in_seq + 1); | 2309 | seq, con->in_seq + 1); |
2315 | con->error_msg = "bad message sequence # for incoming message"; | 2310 | con->error_msg = "bad message sequence # for incoming message"; |
2316 | return -EBADMSG; | 2311 | return -EBADE; |
2317 | } | 2312 | } |
2318 | 2313 | ||
2319 | /* allocate message? */ | 2314 | /* allocate message? */ |
@@ -2660,6 +2655,8 @@ more: | |||
2660 | switch (ret) { | 2655 | switch (ret) { |
2661 | case -EBADMSG: | 2656 | case -EBADMSG: |
2662 | con->error_msg = "bad crc"; | 2657 | con->error_msg = "bad crc"; |
2658 | /* fall through */ | ||
2659 | case -EBADE: | ||
2663 | ret = -EIO; | 2660 | ret = -EIO; |
2664 | break; | 2661 | break; |
2665 | case -EIO: | 2662 | case -EIO: |
@@ -2838,7 +2835,8 @@ static void con_work(struct work_struct *work) | |||
2838 | if (ret < 0) { | 2835 | if (ret < 0) { |
2839 | if (ret == -EAGAIN) | 2836 | if (ret == -EAGAIN) |
2840 | continue; | 2837 | continue; |
2841 | con->error_msg = "socket error on read"; | 2838 | if (!con->error_msg) |
2839 | con->error_msg = "socket error on read"; | ||
2842 | fault = true; | 2840 | fault = true; |
2843 | break; | 2841 | break; |
2844 | } | 2842 | } |
@@ -2847,7 +2845,8 @@ static void con_work(struct work_struct *work) | |||
2847 | if (ret < 0) { | 2845 | if (ret < 0) { |
2848 | if (ret == -EAGAIN) | 2846 | if (ret == -EAGAIN) |
2849 | continue; | 2847 | continue; |
2850 | con->error_msg = "socket error on write"; | 2848 | if (!con->error_msg) |
2849 | con->error_msg = "socket error on write"; | ||
2851 | fault = true; | 2850 | fault = true; |
2852 | } | 2851 | } |
2853 | 2852 | ||
@@ -2869,11 +2868,13 @@ static void con_work(struct work_struct *work) | |||
2869 | */ | 2868 | */ |
2870 | static void con_fault(struct ceph_connection *con) | 2869 | static void con_fault(struct ceph_connection *con) |
2871 | { | 2870 | { |
2872 | pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), | ||
2873 | ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); | ||
2874 | dout("fault %p state %lu to peer %s\n", | 2871 | dout("fault %p state %lu to peer %s\n", |
2875 | con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); | 2872 | con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); |
2876 | 2873 | ||
2874 | pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), | ||
2875 | ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); | ||
2876 | con->error_msg = NULL; | ||
2877 | |||
2877 | WARN_ON(con->state != CON_STATE_CONNECTING && | 2878 | WARN_ON(con->state != CON_STATE_CONNECTING && |
2878 | con->state != CON_STATE_NEGOTIATING && | 2879 | con->state != CON_STATE_NEGOTIATING && |
2879 | con->state != CON_STATE_OPEN); | 2880 | con->state != CON_STATE_OPEN); |
@@ -3295,8 +3296,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) | |||
3295 | */ | 3296 | */ |
3296 | if (*skip) | 3297 | if (*skip) |
3297 | return 0; | 3298 | return 0; |
3298 | con->error_msg = "error allocating memory for incoming message"; | ||
3299 | 3299 | ||
3300 | con->error_msg = "error allocating memory for incoming message"; | ||
3300 | return -ENOMEM; | 3301 | return -ENOMEM; |
3301 | } | 3302 | } |
3302 | memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); | 3303 | memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index b8c3fde5b04f..15796696d64e 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -122,6 +122,22 @@ bad: | |||
122 | return -EINVAL; | 122 | return -EINVAL; |
123 | } | 123 | } |
124 | 124 | ||
125 | static int crush_decode_straw2_bucket(void **p, void *end, | ||
126 | struct crush_bucket_straw2 *b) | ||
127 | { | ||
128 | int j; | ||
129 | dout("crush_decode_straw2_bucket %p to %p\n", *p, end); | ||
130 | b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS); | ||
131 | if (b->item_weights == NULL) | ||
132 | return -ENOMEM; | ||
133 | ceph_decode_need(p, end, b->h.size * sizeof(u32), bad); | ||
134 | for (j = 0; j < b->h.size; j++) | ||
135 | b->item_weights[j] = ceph_decode_32(p); | ||
136 | return 0; | ||
137 | bad: | ||
138 | return -EINVAL; | ||
139 | } | ||
140 | |||
125 | static int skip_name_map(void **p, void *end) | 141 | static int skip_name_map(void **p, void *end) |
126 | { | 142 | { |
127 | int len; | 143 | int len; |
@@ -204,6 +220,9 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
204 | case CRUSH_BUCKET_STRAW: | 220 | case CRUSH_BUCKET_STRAW: |
205 | size = sizeof(struct crush_bucket_straw); | 221 | size = sizeof(struct crush_bucket_straw); |
206 | break; | 222 | break; |
223 | case CRUSH_BUCKET_STRAW2: | ||
224 | size = sizeof(struct crush_bucket_straw2); | ||
225 | break; | ||
207 | default: | 226 | default: |
208 | err = -EINVAL; | 227 | err = -EINVAL; |
209 | goto bad; | 228 | goto bad; |
@@ -261,6 +280,12 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
261 | if (err < 0) | 280 | if (err < 0) |
262 | goto bad; | 281 | goto bad; |
263 | break; | 282 | break; |
283 | case CRUSH_BUCKET_STRAW2: | ||
284 | err = crush_decode_straw2_bucket(p, end, | ||
285 | (struct crush_bucket_straw2 *)b); | ||
286 | if (err < 0) | ||
287 | goto bad; | ||
288 | break; | ||
264 | } | 289 | } |
265 | } | 290 | } |
266 | 291 | ||