diff options
Diffstat (limited to 'drivers/block/rbd.c')
| -rw-r--r-- | drivers/block/rbd.c | 935 |
1 files changed, 549 insertions, 386 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ca63104136e0..d6d314027b5d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
| @@ -55,6 +55,39 @@ | |||
| 55 | #define SECTOR_SHIFT 9 | 55 | #define SECTOR_SHIFT 9 |
| 56 | #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) | 56 | #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) |
| 57 | 57 | ||
| 58 | /* | ||
| 59 | * Increment the given counter and return its updated value. | ||
| 60 | * If the counter is already 0 it will not be incremented. | ||
| 61 | * If the counter is already at its maximum value returns | ||
| 62 | * -EINVAL without updating it. | ||
| 63 | */ | ||
| 64 | static int atomic_inc_return_safe(atomic_t *v) | ||
| 65 | { | ||
| 66 | unsigned int counter; | ||
| 67 | |||
| 68 | counter = (unsigned int)__atomic_add_unless(v, 1, 0); | ||
| 69 | if (counter <= (unsigned int)INT_MAX) | ||
| 70 | return (int)counter; | ||
| 71 | |||
| 72 | atomic_dec(v); | ||
| 73 | |||
| 74 | return -EINVAL; | ||
| 75 | } | ||
| 76 | |||
| 77 | /* Decrement the counter. Return the resulting value, or -EINVAL */ | ||
| 78 | static int atomic_dec_return_safe(atomic_t *v) | ||
| 79 | { | ||
| 80 | int counter; | ||
| 81 | |||
| 82 | counter = atomic_dec_return(v); | ||
| 83 | if (counter >= 0) | ||
| 84 | return counter; | ||
| 85 | |||
| 86 | atomic_inc(v); | ||
| 87 | |||
| 88 | return -EINVAL; | ||
| 89 | } | ||
| 90 | |||
| 58 | #define RBD_DRV_NAME "rbd" | 91 | #define RBD_DRV_NAME "rbd" |
| 59 | #define RBD_DRV_NAME_LONG "rbd (rados block device)" | 92 | #define RBD_DRV_NAME_LONG "rbd (rados block device)" |
| 60 | 93 | ||
| @@ -100,21 +133,20 @@ | |||
| 100 | * block device image metadata (in-memory version) | 133 | * block device image metadata (in-memory version) |
| 101 | */ | 134 | */ |
| 102 | struct rbd_image_header { | 135 | struct rbd_image_header { |
| 103 | /* These four fields never change for a given rbd image */ | 136 | /* These six fields never change for a given rbd image */ |
| 104 | char *object_prefix; | 137 | char *object_prefix; |
| 105 | u64 features; | ||
| 106 | __u8 obj_order; | 138 | __u8 obj_order; |
| 107 | __u8 crypt_type; | 139 | __u8 crypt_type; |
| 108 | __u8 comp_type; | 140 | __u8 comp_type; |
| 141 | u64 stripe_unit; | ||
| 142 | u64 stripe_count; | ||
| 143 | u64 features; /* Might be changeable someday? */ | ||
| 109 | 144 | ||
| 110 | /* The remaining fields need to be updated occasionally */ | 145 | /* The remaining fields need to be updated occasionally */ |
| 111 | u64 image_size; | 146 | u64 image_size; |
| 112 | struct ceph_snap_context *snapc; | 147 | struct ceph_snap_context *snapc; |
| 113 | char *snap_names; | 148 | char *snap_names; /* format 1 only */ |
| 114 | u64 *snap_sizes; | 149 | u64 *snap_sizes; /* format 1 only */ |
| 115 | |||
| 116 | u64 stripe_unit; | ||
| 117 | u64 stripe_count; | ||
| 118 | }; | 150 | }; |
| 119 | 151 | ||
| 120 | /* | 152 | /* |
| @@ -225,6 +257,7 @@ struct rbd_obj_request { | |||
| 225 | }; | 257 | }; |
| 226 | }; | 258 | }; |
| 227 | struct page **copyup_pages; | 259 | struct page **copyup_pages; |
| 260 | u32 copyup_page_count; | ||
| 228 | 261 | ||
| 229 | struct ceph_osd_request *osd_req; | 262 | struct ceph_osd_request *osd_req; |
| 230 | 263 | ||
| @@ -257,6 +290,7 @@ struct rbd_img_request { | |||
| 257 | struct rbd_obj_request *obj_request; /* obj req initiator */ | 290 | struct rbd_obj_request *obj_request; /* obj req initiator */ |
| 258 | }; | 291 | }; |
| 259 | struct page **copyup_pages; | 292 | struct page **copyup_pages; |
| 293 | u32 copyup_page_count; | ||
| 260 | spinlock_t completion_lock;/* protects next_completion */ | 294 | spinlock_t completion_lock;/* protects next_completion */ |
| 261 | u32 next_completion; | 295 | u32 next_completion; |
| 262 | rbd_img_callback_t callback; | 296 | rbd_img_callback_t callback; |
| @@ -311,6 +345,7 @@ struct rbd_device { | |||
| 311 | 345 | ||
| 312 | struct rbd_spec *parent_spec; | 346 | struct rbd_spec *parent_spec; |
| 313 | u64 parent_overlap; | 347 | u64 parent_overlap; |
| 348 | atomic_t parent_ref; | ||
| 314 | struct rbd_device *parent; | 349 | struct rbd_device *parent; |
| 315 | 350 | ||
| 316 | /* protects updating the header */ | 351 | /* protects updating the header */ |
| @@ -359,7 +394,8 @@ static ssize_t rbd_add(struct bus_type *bus, const char *buf, | |||
| 359 | size_t count); | 394 | size_t count); |
| 360 | static ssize_t rbd_remove(struct bus_type *bus, const char *buf, | 395 | static ssize_t rbd_remove(struct bus_type *bus, const char *buf, |
| 361 | size_t count); | 396 | size_t count); |
| 362 | static int rbd_dev_image_probe(struct rbd_device *rbd_dev); | 397 | static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping); |
| 398 | static void rbd_spec_put(struct rbd_spec *spec); | ||
| 363 | 399 | ||
| 364 | static struct bus_attribute rbd_bus_attrs[] = { | 400 | static struct bus_attribute rbd_bus_attrs[] = { |
| 365 | __ATTR(add, S_IWUSR, NULL, rbd_add), | 401 | __ATTR(add, S_IWUSR, NULL, rbd_add), |
| @@ -426,7 +462,8 @@ static void rbd_img_parent_read(struct rbd_obj_request *obj_request); | |||
| 426 | static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); | 462 | static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); |
| 427 | 463 | ||
| 428 | static int rbd_dev_refresh(struct rbd_device *rbd_dev); | 464 | static int rbd_dev_refresh(struct rbd_device *rbd_dev); |
| 429 | static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev); | 465 | static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); |
| 466 | static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev); | ||
| 430 | static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, | 467 | static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, |
| 431 | u64 snap_id); | 468 | u64 snap_id); |
| 432 | static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, | 469 | static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, |
| @@ -726,88 +763,123 @@ static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) | |||
| 726 | } | 763 | } |
| 727 | 764 | ||
| 728 | /* | 765 | /* |
| 729 | * Create a new header structure, translate header format from the on-disk | 766 | * Fill an rbd image header with information from the given format 1 |
| 730 | * header. | 767 | * on-disk header. |
| 731 | */ | 768 | */ |
| 732 | static int rbd_header_from_disk(struct rbd_image_header *header, | 769 | static int rbd_header_from_disk(struct rbd_device *rbd_dev, |
| 733 | struct rbd_image_header_ondisk *ondisk) | 770 | struct rbd_image_header_ondisk *ondisk) |
| 734 | { | 771 | { |
| 772 | struct rbd_image_header *header = &rbd_dev->header; | ||
| 773 | bool first_time = header->object_prefix == NULL; | ||
| 774 | struct ceph_snap_context *snapc; | ||
| 775 | char *object_prefix = NULL; | ||
| 776 | char *snap_names = NULL; | ||
| 777 | u64 *snap_sizes = NULL; | ||
| 735 | u32 snap_count; | 778 | u32 snap_count; |
| 736 | size_t len; | ||
| 737 | size_t size; | 779 | size_t size; |
| 780 | int ret = -ENOMEM; | ||
| 738 | u32 i; | 781 | u32 i; |
| 739 | 782 | ||
| 740 | memset(header, 0, sizeof (*header)); | 783 | /* Allocate this now to avoid having to handle failure below */ |
| 741 | 784 | ||
| 742 | snap_count = le32_to_cpu(ondisk->snap_count); | 785 | if (first_time) { |
| 786 | size_t len; | ||
| 743 | 787 | ||
| 744 | len = strnlen(ondisk->object_prefix, sizeof (ondisk->object_prefix)); | 788 | len = strnlen(ondisk->object_prefix, |
| 745 | header->object_prefix = kmalloc(len + 1, GFP_KERNEL); | 789 | sizeof (ondisk->object_prefix)); |
| 746 | if (!header->object_prefix) | 790 | object_prefix = kmalloc(len + 1, GFP_KERNEL); |
| 747 | return -ENOMEM; | 791 | if (!object_prefix) |
| 748 | memcpy(header->object_prefix, ondisk->object_prefix, len); | 792 | return -ENOMEM; |
| 749 | header->object_prefix[len] = '\0'; | 793 | memcpy(object_prefix, ondisk->object_prefix, len); |
| 794 | object_prefix[len] = '\0'; | ||
| 795 | } | ||
| 750 | 796 | ||
| 797 | /* Allocate the snapshot context and fill it in */ | ||
| 798 | |||
| 799 | snap_count = le32_to_cpu(ondisk->snap_count); | ||
| 800 | snapc = ceph_create_snap_context(snap_count, GFP_KERNEL); | ||
| 801 | if (!snapc) | ||
| 802 | goto out_err; | ||
| 803 | snapc->seq = le64_to_cpu(ondisk->snap_seq); | ||
| 751 | if (snap_count) { | 804 | if (snap_count) { |
| 805 | struct rbd_image_snap_ondisk *snaps; | ||
| 752 | u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); | 806 | u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); |
| 753 | 807 | ||
| 754 | /* Save a copy of the snapshot names */ | 808 | /* We'll keep a copy of the snapshot names... */ |
| 755 | 809 | ||
| 756 | if (snap_names_len > (u64) SIZE_MAX) | 810 | if (snap_names_len > (u64)SIZE_MAX) |
| 757 | return -EIO; | 811 | goto out_2big; |
| 758 | header->snap_names = kmalloc(snap_names_len, GFP_KERNEL); | 812 | snap_names = kmalloc(snap_names_len, GFP_KERNEL); |
| 759 | if (!header->snap_names) | 813 | if (!snap_names) |
| 760 | goto out_err; | 814 | goto out_err; |
| 815 | |||
| 816 | /* ...as well as the array of their sizes. */ | ||
| 817 | |||
| 818 | size = snap_count * sizeof (*header->snap_sizes); | ||
| 819 | snap_sizes = kmalloc(size, GFP_KERNEL); | ||
| 820 | if (!snap_sizes) | ||
| 821 | goto out_err; | ||
| 822 | |||
| 761 | /* | 823 | /* |
| 762 | * Note that rbd_dev_v1_header_read() guarantees | 824 | * Copy the names, and fill in each snapshot's id |
| 763 | * the ondisk buffer we're working with has | 825 | * and size. |
| 826 | * | ||
| 827 | * Note that rbd_dev_v1_header_info() guarantees the | ||
| 828 | * ondisk buffer we're working with has | ||
| 764 | * snap_names_len bytes beyond the end of the | 829 | * snap_names_len bytes beyond the end of the |
| 765 | * snapshot id array, this memcpy() is safe. | 830 | * snapshot id array, this memcpy() is safe. |
| 766 | */ | 831 | */ |
| 767 | memcpy(header->snap_names, &ondisk->snaps[snap_count], | 832 | memcpy(snap_names, &ondisk->snaps[snap_count], snap_names_len); |
| 768 | snap_names_len); | 833 | snaps = ondisk->snaps; |
| 834 | for (i = 0; i < snap_count; i++) { | ||
| 835 | snapc->snaps[i] = le64_to_cpu(snaps[i].id); | ||
| 836 | snap_sizes[i] = le64_to_cpu(snaps[i].image_size); | ||
| 837 | } | ||
| 838 | } | ||
| 769 | 839 | ||
| 770 | /* Record each snapshot's size */ | 840 | /* We won't fail any more, fill in the header */ |
| 771 | 841 | ||
| 772 | size = snap_count * sizeof (*header->snap_sizes); | 842 | down_write(&rbd_dev->header_rwsem); |
| 773 | header->snap_sizes = kmalloc(size, GFP_KERNEL); | 843 | if (first_time) { |
| 774 | if (!header->snap_sizes) | 844 | header->object_prefix = object_prefix; |
| 775 | goto out_err; | 845 | header->obj_order = ondisk->options.order; |
| 776 | for (i = 0; i < snap_count; i++) | 846 | header->crypt_type = ondisk->options.crypt_type; |
| 777 | header->snap_sizes[i] = | 847 | header->comp_type = ondisk->options.comp_type; |
| 778 | le64_to_cpu(ondisk->snaps[i].image_size); | 848 | /* The rest aren't used for format 1 images */ |
| 849 | header->stripe_unit = 0; | ||
| 850 | header->stripe_count = 0; | ||
| 851 | header->features = 0; | ||
| 779 | } else { | 852 | } else { |
| 780 | header->snap_names = NULL; | 853 | ceph_put_snap_context(header->snapc); |
| 781 | header->snap_sizes = NULL; | 854 | kfree(header->snap_names); |
| 855 | kfree(header->snap_sizes); | ||
| 782 | } | 856 | } |
| 783 | 857 | ||
| 784 | header->features = 0; /* No features support in v1 images */ | 858 | /* The remaining fields always get updated (when we refresh) */ |
| 785 | header->obj_order = ondisk->options.order; | ||
| 786 | header->crypt_type = ondisk->options.crypt_type; | ||
| 787 | header->comp_type = ondisk->options.comp_type; | ||
| 788 | |||
| 789 | /* Allocate and fill in the snapshot context */ | ||
| 790 | 859 | ||
| 791 | header->image_size = le64_to_cpu(ondisk->image_size); | 860 | header->image_size = le64_to_cpu(ondisk->image_size); |
| 861 | header->snapc = snapc; | ||
| 862 | header->snap_names = snap_names; | ||
| 863 | header->snap_sizes = snap_sizes; | ||
| 792 | 864 | ||
| 793 | header->snapc = ceph_create_snap_context(snap_count, GFP_KERNEL); | 865 | /* Make sure mapping size is consistent with header info */ |
| 794 | if (!header->snapc) | ||
| 795 | goto out_err; | ||
| 796 | header->snapc->seq = le64_to_cpu(ondisk->snap_seq); | ||
| 797 | for (i = 0; i < snap_count; i++) | ||
| 798 | header->snapc->snaps[i] = le64_to_cpu(ondisk->snaps[i].id); | ||
| 799 | 866 | ||
| 800 | return 0; | 867 | if (rbd_dev->spec->snap_id == CEPH_NOSNAP || first_time) |
| 868 | if (rbd_dev->mapping.size != header->image_size) | ||
| 869 | rbd_dev->mapping.size = header->image_size; | ||
| 870 | |||
| 871 | up_write(&rbd_dev->header_rwsem); | ||
| 801 | 872 | ||
| 873 | return 0; | ||
| 874 | out_2big: | ||
| 875 | ret = -EIO; | ||
| 802 | out_err: | 876 | out_err: |
| 803 | kfree(header->snap_sizes); | 877 | kfree(snap_sizes); |
| 804 | header->snap_sizes = NULL; | 878 | kfree(snap_names); |
| 805 | kfree(header->snap_names); | 879 | ceph_put_snap_context(snapc); |
| 806 | header->snap_names = NULL; | 880 | kfree(object_prefix); |
| 807 | kfree(header->object_prefix); | ||
| 808 | header->object_prefix = NULL; | ||
| 809 | 881 | ||
| 810 | return -ENOMEM; | 882 | return ret; |
| 811 | } | 883 | } |
| 812 | 884 | ||
| 813 | static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which) | 885 | static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which) |
| @@ -934,20 +1006,11 @@ static int rbd_snap_features(struct rbd_device *rbd_dev, u64 snap_id, | |||
| 934 | 1006 | ||
| 935 | static int rbd_dev_mapping_set(struct rbd_device *rbd_dev) | 1007 | static int rbd_dev_mapping_set(struct rbd_device *rbd_dev) |
| 936 | { | 1008 | { |
| 937 | const char *snap_name = rbd_dev->spec->snap_name; | 1009 | u64 snap_id = rbd_dev->spec->snap_id; |
| 938 | u64 snap_id; | ||
| 939 | u64 size = 0; | 1010 | u64 size = 0; |
| 940 | u64 features = 0; | 1011 | u64 features = 0; |
| 941 | int ret; | 1012 | int ret; |
| 942 | 1013 | ||
| 943 | if (strcmp(snap_name, RBD_SNAP_HEAD_NAME)) { | ||
| 944 | snap_id = rbd_snap_id_by_name(rbd_dev, snap_name); | ||
| 945 | if (snap_id == CEPH_NOSNAP) | ||
| 946 | return -ENOENT; | ||
| 947 | } else { | ||
| 948 | snap_id = CEPH_NOSNAP; | ||
| 949 | } | ||
| 950 | |||
| 951 | ret = rbd_snap_size(rbd_dev, snap_id, &size); | 1014 | ret = rbd_snap_size(rbd_dev, snap_id, &size); |
| 952 | if (ret) | 1015 | if (ret) |
| 953 | return ret; | 1016 | return ret; |
| @@ -958,11 +1021,6 @@ static int rbd_dev_mapping_set(struct rbd_device *rbd_dev) | |||
| 958 | rbd_dev->mapping.size = size; | 1021 | rbd_dev->mapping.size = size; |
| 959 | rbd_dev->mapping.features = features; | 1022 | rbd_dev->mapping.features = features; |
| 960 | 1023 | ||
| 961 | /* If we are mapping a snapshot it must be marked read-only */ | ||
| 962 | |||
| 963 | if (snap_id != CEPH_NOSNAP) | ||
| 964 | rbd_dev->mapping.read_only = true; | ||
| 965 | |||
| 966 | return 0; | 1024 | return 0; |
| 967 | } | 1025 | } |
| 968 | 1026 | ||
| @@ -970,14 +1028,6 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev) | |||
| 970 | { | 1028 | { |
| 971 | rbd_dev->mapping.size = 0; | 1029 | rbd_dev->mapping.size = 0; |
| 972 | rbd_dev->mapping.features = 0; | 1030 | rbd_dev->mapping.features = 0; |
| 973 | rbd_dev->mapping.read_only = true; | ||
| 974 | } | ||
| 975 | |||
| 976 | static void rbd_dev_clear_mapping(struct rbd_device *rbd_dev) | ||
| 977 | { | ||
| 978 | rbd_dev->mapping.size = 0; | ||
| 979 | rbd_dev->mapping.features = 0; | ||
| 980 | rbd_dev->mapping.read_only = true; | ||
| 981 | } | 1031 | } |
| 982 | 1032 | ||
| 983 | static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) | 1033 | static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) |
| @@ -1342,20 +1392,18 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request) | |||
| 1342 | kref_put(&obj_request->kref, rbd_obj_request_destroy); | 1392 | kref_put(&obj_request->kref, rbd_obj_request_destroy); |
| 1343 | } | 1393 | } |
| 1344 | 1394 | ||
| 1345 | static void rbd_img_request_get(struct rbd_img_request *img_request) | 1395 | static bool img_request_child_test(struct rbd_img_request *img_request); |
| 1346 | { | 1396 | static void rbd_parent_request_destroy(struct kref *kref); |
| 1347 | dout("%s: img %p (was %d)\n", __func__, img_request, | ||
| 1348 | atomic_read(&img_request->kref.refcount)); | ||
| 1349 | kref_get(&img_request->kref); | ||
| 1350 | } | ||
| 1351 | |||
| 1352 | static void rbd_img_request_destroy(struct kref *kref); | 1397 | static void rbd_img_request_destroy(struct kref *kref); |
| 1353 | static void rbd_img_request_put(struct rbd_img_request *img_request) | 1398 | static void rbd_img_request_put(struct rbd_img_request *img_request) |
| 1354 | { | 1399 | { |
| 1355 | rbd_assert(img_request != NULL); | 1400 | rbd_assert(img_request != NULL); |
| 1356 | dout("%s: img %p (was %d)\n", __func__, img_request, | 1401 | dout("%s: img %p (was %d)\n", __func__, img_request, |
| 1357 | atomic_read(&img_request->kref.refcount)); | 1402 | atomic_read(&img_request->kref.refcount)); |
| 1358 | kref_put(&img_request->kref, rbd_img_request_destroy); | 1403 | if (img_request_child_test(img_request)) |
| 1404 | kref_put(&img_request->kref, rbd_parent_request_destroy); | ||
| 1405 | else | ||
| 1406 | kref_put(&img_request->kref, rbd_img_request_destroy); | ||
| 1359 | } | 1407 | } |
| 1360 | 1408 | ||
| 1361 | static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, | 1409 | static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, |
| @@ -1472,6 +1520,12 @@ static void img_request_child_set(struct rbd_img_request *img_request) | |||
| 1472 | smp_mb(); | 1520 | smp_mb(); |
| 1473 | } | 1521 | } |
| 1474 | 1522 | ||
| 1523 | static void img_request_child_clear(struct rbd_img_request *img_request) | ||
| 1524 | { | ||
| 1525 | clear_bit(IMG_REQ_CHILD, &img_request->flags); | ||
| 1526 | smp_mb(); | ||
| 1527 | } | ||
| 1528 | |||
| 1475 | static bool img_request_child_test(struct rbd_img_request *img_request) | 1529 | static bool img_request_child_test(struct rbd_img_request *img_request) |
| 1476 | { | 1530 | { |
| 1477 | smp_mb(); | 1531 | smp_mb(); |
| @@ -1484,6 +1538,12 @@ static void img_request_layered_set(struct rbd_img_request *img_request) | |||
| 1484 | smp_mb(); | 1538 | smp_mb(); |
| 1485 | } | 1539 | } |
| 1486 | 1540 | ||
| 1541 | static void img_request_layered_clear(struct rbd_img_request *img_request) | ||
| 1542 | { | ||
| 1543 | clear_bit(IMG_REQ_LAYERED, &img_request->flags); | ||
| 1544 | smp_mb(); | ||
| 1545 | } | ||
| 1546 | |||
| 1487 | static bool img_request_layered_test(struct rbd_img_request *img_request) | 1547 | static bool img_request_layered_test(struct rbd_img_request *img_request) |
| 1488 | { | 1548 | { |
| 1489 | smp_mb(); | 1549 | smp_mb(); |
| @@ -1827,6 +1887,74 @@ static void rbd_obj_request_destroy(struct kref *kref) | |||
| 1827 | kmem_cache_free(rbd_obj_request_cache, obj_request); | 1887 | kmem_cache_free(rbd_obj_request_cache, obj_request); |
| 1828 | } | 1888 | } |
| 1829 | 1889 | ||
| 1890 | /* It's OK to call this for a device with no parent */ | ||
| 1891 | |||
| 1892 | static void rbd_spec_put(struct rbd_spec *spec); | ||
| 1893 | static void rbd_dev_unparent(struct rbd_device *rbd_dev) | ||
| 1894 | { | ||
| 1895 | rbd_dev_remove_parent(rbd_dev); | ||
| 1896 | rbd_spec_put(rbd_dev->parent_spec); | ||
| 1897 | rbd_dev->parent_spec = NULL; | ||
| 1898 | rbd_dev->parent_overlap = 0; | ||
| 1899 | } | ||
| 1900 | |||
| 1901 | /* | ||
| 1902 | * Parent image reference counting is used to determine when an | ||
| 1903 | * image's parent fields can be safely torn down--after there are no | ||
| 1904 | * more in-flight requests to the parent image. When the last | ||
| 1905 | * reference is dropped, cleaning them up is safe. | ||
| 1906 | */ | ||
| 1907 | static void rbd_dev_parent_put(struct rbd_device *rbd_dev) | ||
| 1908 | { | ||
| 1909 | int counter; | ||
| 1910 | |||
| 1911 | if (!rbd_dev->parent_spec) | ||
| 1912 | return; | ||
| 1913 | |||
| 1914 | counter = atomic_dec_return_safe(&rbd_dev->parent_ref); | ||
| 1915 | if (counter > 0) | ||
| 1916 | return; | ||
| 1917 | |||
| 1918 | /* Last reference; clean up parent data structures */ | ||
| 1919 | |||
| 1920 | if (!counter) | ||
| 1921 | rbd_dev_unparent(rbd_dev); | ||
| 1922 | else | ||
| 1923 | rbd_warn(rbd_dev, "parent reference underflow\n"); | ||
| 1924 | } | ||
| 1925 | |||
| 1926 | /* | ||
| 1927 | * If an image has a non-zero parent overlap, get a reference to its | ||
| 1928 | * parent. | ||
| 1929 | * | ||
| 1930 | * We must get the reference before checking for the overlap to | ||
| 1931 | * coordinate properly with zeroing the parent overlap in | ||
| 1932 | * rbd_dev_v2_parent_info() when an image gets flattened. We | ||
| 1933 | * drop it again if there is no overlap. | ||
| 1934 | * | ||
| 1935 | * Returns true if the rbd device has a parent with a non-zero | ||
| 1936 | * overlap and a reference for it was successfully taken, or | ||
| 1937 | * false otherwise. | ||
| 1938 | */ | ||
| 1939 | static bool rbd_dev_parent_get(struct rbd_device *rbd_dev) | ||
| 1940 | { | ||
| 1941 | int counter; | ||
| 1942 | |||
| 1943 | if (!rbd_dev->parent_spec) | ||
| 1944 | return false; | ||
| 1945 | |||
| 1946 | counter = atomic_inc_return_safe(&rbd_dev->parent_ref); | ||
| 1947 | if (counter > 0 && rbd_dev->parent_overlap) | ||
| 1948 | return true; | ||
| 1949 | |||
| 1950 | /* Image was flattened, but parent is not yet torn down */ | ||
| 1951 | |||
| 1952 | if (counter < 0) | ||
| 1953 | rbd_warn(rbd_dev, "parent reference overflow\n"); | ||
| 1954 | |||
| 1955 | return false; | ||
| 1956 | } | ||
| 1957 | |||
| 1830 | /* | 1958 | /* |
| 1831 | * Caller is responsible for filling in the list of object requests | 1959 | * Caller is responsible for filling in the list of object requests |
| 1832 | * that comprises the image request, and the Linux request pointer | 1960 | * that comprises the image request, and the Linux request pointer |
| @@ -1835,8 +1963,7 @@ static void rbd_obj_request_destroy(struct kref *kref) | |||
| 1835 | static struct rbd_img_request *rbd_img_request_create( | 1963 | static struct rbd_img_request *rbd_img_request_create( |
| 1836 | struct rbd_device *rbd_dev, | 1964 | struct rbd_device *rbd_dev, |
| 1837 | u64 offset, u64 length, | 1965 | u64 offset, u64 length, |
| 1838 | bool write_request, | 1966 | bool write_request) |
| 1839 | bool child_request) | ||
| 1840 | { | 1967 | { |
| 1841 | struct rbd_img_request *img_request; | 1968 | struct rbd_img_request *img_request; |
| 1842 | 1969 | ||
| @@ -1861,9 +1988,7 @@ static struct rbd_img_request *rbd_img_request_create( | |||
| 1861 | } else { | 1988 | } else { |
| 1862 | img_request->snap_id = rbd_dev->spec->snap_id; | 1989 | img_request->snap_id = rbd_dev->spec->snap_id; |
| 1863 | } | 1990 | } |
| 1864 | if (child_request) | 1991 | if (rbd_dev_parent_get(rbd_dev)) |
| 1865 | img_request_child_set(img_request); | ||
| 1866 | if (rbd_dev->parent_spec) | ||
| 1867 | img_request_layered_set(img_request); | 1992 | img_request_layered_set(img_request); |
| 1868 | spin_lock_init(&img_request->completion_lock); | 1993 | spin_lock_init(&img_request->completion_lock); |
| 1869 | img_request->next_completion = 0; | 1994 | img_request->next_completion = 0; |
| @@ -1873,9 +1998,6 @@ static struct rbd_img_request *rbd_img_request_create( | |||
| 1873 | INIT_LIST_HEAD(&img_request->obj_requests); | 1998 | INIT_LIST_HEAD(&img_request->obj_requests); |
| 1874 | kref_init(&img_request->kref); | 1999 | kref_init(&img_request->kref); |
| 1875 | 2000 | ||
| 1876 | rbd_img_request_get(img_request); /* Avoid a warning */ | ||
| 1877 | rbd_img_request_put(img_request); /* TEMPORARY */ | ||
| 1878 | |||
| 1879 | dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, | 2001 | dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, |
| 1880 | write_request ? "write" : "read", offset, length, | 2002 | write_request ? "write" : "read", offset, length, |
| 1881 | img_request); | 2003 | img_request); |
| @@ -1897,15 +2019,54 @@ static void rbd_img_request_destroy(struct kref *kref) | |||
| 1897 | rbd_img_obj_request_del(img_request, obj_request); | 2019 | rbd_img_obj_request_del(img_request, obj_request); |
| 1898 | rbd_assert(img_request->obj_request_count == 0); | 2020 | rbd_assert(img_request->obj_request_count == 0); |
| 1899 | 2021 | ||
| 2022 | if (img_request_layered_test(img_request)) { | ||
| 2023 | img_request_layered_clear(img_request); | ||
| 2024 | rbd_dev_parent_put(img_request->rbd_dev); | ||
| 2025 | } | ||
| 2026 | |||
| 1900 | if (img_request_write_test(img_request)) | 2027 | if (img_request_write_test(img_request)) |
| 1901 | ceph_put_snap_context(img_request->snapc); | 2028 | ceph_put_snap_context(img_request->snapc); |
| 1902 | 2029 | ||
| 1903 | if (img_request_child_test(img_request)) | ||
| 1904 | rbd_obj_request_put(img_request->obj_request); | ||
| 1905 | |||
| 1906 | kmem_cache_free(rbd_img_request_cache, img_request); | 2030 | kmem_cache_free(rbd_img_request_cache, img_request); |
| 1907 | } | 2031 | } |
| 1908 | 2032 | ||
| 2033 | static struct rbd_img_request *rbd_parent_request_create( | ||
| 2034 | struct rbd_obj_request *obj_request, | ||
| 2035 | u64 img_offset, u64 length) | ||
| 2036 | { | ||
| 2037 | struct rbd_img_request *parent_request; | ||
| 2038 | struct rbd_device *rbd_dev; | ||
| 2039 | |||
| 2040 | rbd_assert(obj_request->img_request); | ||
| 2041 | rbd_dev = obj_request->img_request->rbd_dev; | ||
| 2042 | |||
| 2043 | parent_request = rbd_img_request_create(rbd_dev->parent, | ||
| 2044 | img_offset, length, false); | ||
| 2045 | if (!parent_request) | ||
| 2046 | return NULL; | ||
| 2047 | |||
| 2048 | img_request_child_set(parent_request); | ||
| 2049 | rbd_obj_request_get(obj_request); | ||
| 2050 | parent_request->obj_request = obj_request; | ||
| 2051 | |||
| 2052 | return parent_request; | ||
| 2053 | } | ||
| 2054 | |||
| 2055 | static void rbd_parent_request_destroy(struct kref *kref) | ||
| 2056 | { | ||
| 2057 | struct rbd_img_request *parent_request; | ||
| 2058 | struct rbd_obj_request *orig_request; | ||
| 2059 | |||
| 2060 | parent_request = container_of(kref, struct rbd_img_request, kref); | ||
| 2061 | orig_request = parent_request->obj_request; | ||
| 2062 | |||
| 2063 | parent_request->obj_request = NULL; | ||
| 2064 | rbd_obj_request_put(orig_request); | ||
| 2065 | img_request_child_clear(parent_request); | ||
| 2066 | |||
| 2067 | rbd_img_request_destroy(kref); | ||
| 2068 | } | ||
| 2069 | |||
| 1909 | static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) | 2070 | static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) |
| 1910 | { | 2071 | { |
| 1911 | struct rbd_img_request *img_request; | 2072 | struct rbd_img_request *img_request; |
| @@ -2114,7 +2275,7 @@ rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) | |||
| 2114 | { | 2275 | { |
| 2115 | struct rbd_img_request *img_request; | 2276 | struct rbd_img_request *img_request; |
| 2116 | struct rbd_device *rbd_dev; | 2277 | struct rbd_device *rbd_dev; |
| 2117 | u64 length; | 2278 | struct page **pages; |
| 2118 | u32 page_count; | 2279 | u32 page_count; |
| 2119 | 2280 | ||
| 2120 | rbd_assert(obj_request->type == OBJ_REQUEST_BIO); | 2281 | rbd_assert(obj_request->type == OBJ_REQUEST_BIO); |
| @@ -2124,12 +2285,14 @@ rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) | |||
| 2124 | 2285 | ||
| 2125 | rbd_dev = img_request->rbd_dev; | 2286 | rbd_dev = img_request->rbd_dev; |
| 2126 | rbd_assert(rbd_dev); | 2287 | rbd_assert(rbd_dev); |
| 2127 | length = (u64)1 << rbd_dev->header.obj_order; | ||
| 2128 | page_count = (u32)calc_pages_for(0, length); | ||
| 2129 | 2288 | ||
| 2130 | rbd_assert(obj_request->copyup_pages); | 2289 | pages = obj_request->copyup_pages; |
| 2131 | ceph_release_page_vector(obj_request->copyup_pages, page_count); | 2290 | rbd_assert(pages != NULL); |
| 2132 | obj_request->copyup_pages = NULL; | 2291 | obj_request->copyup_pages = NULL; |
| 2292 | page_count = obj_request->copyup_page_count; | ||
| 2293 | rbd_assert(page_count); | ||
| 2294 | obj_request->copyup_page_count = 0; | ||
| 2295 | ceph_release_page_vector(pages, page_count); | ||
| 2133 | 2296 | ||
| 2134 | /* | 2297 | /* |
| 2135 | * We want the transfer count to reflect the size of the | 2298 | * We want the transfer count to reflect the size of the |
| @@ -2153,9 +2316,11 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) | |||
| 2153 | struct ceph_osd_client *osdc; | 2316 | struct ceph_osd_client *osdc; |
| 2154 | struct rbd_device *rbd_dev; | 2317 | struct rbd_device *rbd_dev; |
| 2155 | struct page **pages; | 2318 | struct page **pages; |
| 2156 | int result; | 2319 | u32 page_count; |
| 2157 | u64 obj_size; | 2320 | int img_result; |
| 2158 | u64 xferred; | 2321 | u64 parent_length; |
| 2322 | u64 offset; | ||
| 2323 | u64 length; | ||
| 2159 | 2324 | ||
| 2160 | rbd_assert(img_request_child_test(img_request)); | 2325 | rbd_assert(img_request_child_test(img_request)); |
| 2161 | 2326 | ||
| @@ -2164,46 +2329,74 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) | |||
| 2164 | pages = img_request->copyup_pages; | 2329 | pages = img_request->copyup_pages; |
| 2165 | rbd_assert(pages != NULL); | 2330 | rbd_assert(pages != NULL); |
| 2166 | img_request->copyup_pages = NULL; | 2331 | img_request->copyup_pages = NULL; |
| 2332 | page_count = img_request->copyup_page_count; | ||
| 2333 | rbd_assert(page_count); | ||
| 2334 | img_request->copyup_page_count = 0; | ||
| 2167 | 2335 | ||
| 2168 | orig_request = img_request->obj_request; | 2336 | orig_request = img_request->obj_request; |
| 2169 | rbd_assert(orig_request != NULL); | 2337 | rbd_assert(orig_request != NULL); |
| 2170 | rbd_assert(orig_request->type == OBJ_REQUEST_BIO); | 2338 | rbd_assert(obj_request_type_valid(orig_request->type)); |
| 2171 | result = img_request->result; | 2339 | img_result = img_request->result; |
| 2172 | obj_size = img_request->length; | 2340 | parent_length = img_request->length; |
| 2173 | xferred = img_request->xferred; | 2341 | rbd_assert(parent_length == img_request->xferred); |
| 2342 | rbd_img_request_put(img_request); | ||
| 2174 | 2343 | ||
| 2175 | rbd_dev = img_request->rbd_dev; | 2344 | rbd_assert(orig_request->img_request); |
| 2345 | rbd_dev = orig_request->img_request->rbd_dev; | ||
| 2176 | rbd_assert(rbd_dev); | 2346 | rbd_assert(rbd_dev); |
| 2177 | rbd_assert(obj_size == (u64)1 << rbd_dev->header.obj_order); | ||
| 2178 | 2347 | ||
| 2179 | rbd_img_request_put(img_request); | 2348 | /* |
| 2349 | * If the overlap has become 0 (most likely because the | ||
| 2350 | * image has been flattened) we need to free the pages | ||
| 2351 | * and re-submit the original write request. | ||
| 2352 | */ | ||
| 2353 | if (!rbd_dev->parent_overlap) { | ||
| 2354 | struct ceph_osd_client *osdc; | ||
| 2180 | 2355 | ||
| 2181 | if (result) | 2356 | ceph_release_page_vector(pages, page_count); |
| 2182 | goto out_err; | 2357 | osdc = &rbd_dev->rbd_client->client->osdc; |
| 2358 | img_result = rbd_obj_request_submit(osdc, orig_request); | ||
| 2359 | if (!img_result) | ||
| 2360 | return; | ||
| 2361 | } | ||
| 2183 | 2362 | ||
| 2184 | /* Allocate the new copyup osd request for the original request */ | 2363 | if (img_result) |
| 2364 | goto out_err; | ||
| 2185 | 2365 | ||
| 2186 | result = -ENOMEM; | 2366 | /* |
| 2187 | rbd_assert(!orig_request->osd_req); | 2367 | * The original osd request is of no use to use any more. |
| 2368 | * We need a new one that can hold the two ops in a copyup | ||
| 2369 | * request. Allocate the new copyup osd request for the | ||
| 2370 | * original request, and release the old one. | ||
| 2371 | */ | ||
| 2372 | img_result = -ENOMEM; | ||
| 2188 | osd_req = rbd_osd_req_create_copyup(orig_request); | 2373 | osd_req = rbd_osd_req_create_copyup(orig_request); |
| 2189 | if (!osd_req) | 2374 | if (!osd_req) |
| 2190 | goto out_err; | 2375 | goto out_err; |
| 2376 | rbd_osd_req_destroy(orig_request->osd_req); | ||
| 2191 | orig_request->osd_req = osd_req; | 2377 | orig_request->osd_req = osd_req; |
| 2192 | orig_request->copyup_pages = pages; | 2378 | orig_request->copyup_pages = pages; |
| 2379 | orig_request->copyup_page_count = page_count; | ||
| 2193 | 2380 | ||
| 2194 | /* Initialize the copyup op */ | 2381 | /* Initialize the copyup op */ |
| 2195 | 2382 | ||
| 2196 | osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup"); | 2383 | osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup"); |
| 2197 | osd_req_op_cls_request_data_pages(osd_req, 0, pages, obj_size, 0, | 2384 | osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0, |
| 2198 | false, false); | 2385 | false, false); |
| 2199 | 2386 | ||
| 2200 | /* Then the original write request op */ | 2387 | /* Then the original write request op */ |
| 2201 | 2388 | ||
| 2389 | offset = orig_request->offset; | ||
| 2390 | length = orig_request->length; | ||
| 2202 | osd_req_op_extent_init(osd_req, 1, CEPH_OSD_OP_WRITE, | 2391 | osd_req_op_extent_init(osd_req, 1, CEPH_OSD_OP_WRITE, |
| 2203 | orig_request->offset, | 2392 | offset, length, 0, 0); |
| 2204 | orig_request->length, 0, 0); | 2393 | if (orig_request->type == OBJ_REQUEST_BIO) |
| 2205 | osd_req_op_extent_osd_data_bio(osd_req, 1, orig_request->bio_list, | 2394 | osd_req_op_extent_osd_data_bio(osd_req, 1, |
| 2206 | orig_request->length); | 2395 | orig_request->bio_list, length); |
| 2396 | else | ||
| 2397 | osd_req_op_extent_osd_data_pages(osd_req, 1, | ||
| 2398 | orig_request->pages, length, | ||
| 2399 | offset & ~PAGE_MASK, false, false); | ||
| 2207 | 2400 | ||
| 2208 | rbd_osd_req_format_write(orig_request); | 2401 | rbd_osd_req_format_write(orig_request); |
| 2209 | 2402 | ||
| @@ -2211,13 +2404,13 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) | |||
| 2211 | 2404 | ||
| 2212 | orig_request->callback = rbd_img_obj_copyup_callback; | 2405 | orig_request->callback = rbd_img_obj_copyup_callback; |
| 2213 | osdc = &rbd_dev->rbd_client->client->osdc; | 2406 | osdc = &rbd_dev->rbd_client->client->osdc; |
| 2214 | result = rbd_obj_request_submit(osdc, orig_request); | 2407 | img_result = rbd_obj_request_submit(osdc, orig_request); |
| 2215 | if (!result) | 2408 | if (!img_result) |
| 2216 | return; | 2409 | return; |
| 2217 | out_err: | 2410 | out_err: |
| 2218 | /* Record the error code and complete the request */ | 2411 | /* Record the error code and complete the request */ |
| 2219 | 2412 | ||
| 2220 | orig_request->result = result; | 2413 | orig_request->result = img_result; |
| 2221 | orig_request->xferred = 0; | 2414 | orig_request->xferred = 0; |
| 2222 | obj_request_done_set(orig_request); | 2415 | obj_request_done_set(orig_request); |
| 2223 | rbd_obj_request_complete(orig_request); | 2416 | rbd_obj_request_complete(orig_request); |
| @@ -2249,7 +2442,7 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) | |||
| 2249 | int result; | 2442 | int result; |
| 2250 | 2443 | ||
| 2251 | rbd_assert(obj_request_img_data_test(obj_request)); | 2444 | rbd_assert(obj_request_img_data_test(obj_request)); |
| 2252 | rbd_assert(obj_request->type == OBJ_REQUEST_BIO); | 2445 | rbd_assert(obj_request_type_valid(obj_request->type)); |
| 2253 | 2446 | ||
| 2254 | img_request = obj_request->img_request; | 2447 | img_request = obj_request->img_request; |
| 2255 | rbd_assert(img_request != NULL); | 2448 | rbd_assert(img_request != NULL); |
| @@ -2257,15 +2450,6 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) | |||
| 2257 | rbd_assert(rbd_dev->parent != NULL); | 2450 | rbd_assert(rbd_dev->parent != NULL); |
| 2258 | 2451 | ||
| 2259 | /* | 2452 | /* |
| 2260 | * First things first. The original osd request is of no | ||
| 2261 | * use to use any more, we'll need a new one that can hold | ||
| 2262 | * the two ops in a copyup request. We'll get that later, | ||
| 2263 | * but for now we can release the old one. | ||
| 2264 | */ | ||
| 2265 | rbd_osd_req_destroy(obj_request->osd_req); | ||
| 2266 | obj_request->osd_req = NULL; | ||
| 2267 | |||
| 2268 | /* | ||
| 2269 | * Determine the byte range covered by the object in the | 2453 | * Determine the byte range covered by the object in the |
| 2270 | * child image to which the original request was to be sent. | 2454 | * child image to which the original request was to be sent. |
| 2271 | */ | 2455 | */ |
| @@ -2295,18 +2479,16 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) | |||
| 2295 | } | 2479 | } |
| 2296 | 2480 | ||
| 2297 | result = -ENOMEM; | 2481 | result = -ENOMEM; |
| 2298 | parent_request = rbd_img_request_create(rbd_dev->parent, | 2482 | parent_request = rbd_parent_request_create(obj_request, |
| 2299 | img_offset, length, | 2483 | img_offset, length); |
| 2300 | false, true); | ||
| 2301 | if (!parent_request) | 2484 | if (!parent_request) |
| 2302 | goto out_err; | 2485 | goto out_err; |
| 2303 | rbd_obj_request_get(obj_request); | ||
| 2304 | parent_request->obj_request = obj_request; | ||
| 2305 | 2486 | ||
| 2306 | result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages); | 2487 | result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages); |
| 2307 | if (result) | 2488 | if (result) |
| 2308 | goto out_err; | 2489 | goto out_err; |
| 2309 | parent_request->copyup_pages = pages; | 2490 | parent_request->copyup_pages = pages; |
| 2491 | parent_request->copyup_page_count = page_count; | ||
| 2310 | 2492 | ||
| 2311 | parent_request->callback = rbd_img_obj_parent_read_full_callback; | 2493 | parent_request->callback = rbd_img_obj_parent_read_full_callback; |
| 2312 | result = rbd_img_request_submit(parent_request); | 2494 | result = rbd_img_request_submit(parent_request); |
| @@ -2314,6 +2496,7 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) | |||
| 2314 | return 0; | 2496 | return 0; |
| 2315 | 2497 | ||
| 2316 | parent_request->copyup_pages = NULL; | 2498 | parent_request->copyup_pages = NULL; |
| 2499 | parent_request->copyup_page_count = 0; | ||
| 2317 | parent_request->obj_request = NULL; | 2500 | parent_request->obj_request = NULL; |
| 2318 | rbd_obj_request_put(obj_request); | 2501 | rbd_obj_request_put(obj_request); |
| 2319 | out_err: | 2502 | out_err: |
| @@ -2331,6 +2514,7 @@ out_err: | |||
| 2331 | static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request) | 2514 | static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request) |
| 2332 | { | 2515 | { |
| 2333 | struct rbd_obj_request *orig_request; | 2516 | struct rbd_obj_request *orig_request; |
| 2517 | struct rbd_device *rbd_dev; | ||
| 2334 | int result; | 2518 | int result; |
| 2335 | 2519 | ||
| 2336 | rbd_assert(!obj_request_img_data_test(obj_request)); | 2520 | rbd_assert(!obj_request_img_data_test(obj_request)); |
| @@ -2353,8 +2537,21 @@ static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request) | |||
| 2353 | obj_request->xferred, obj_request->length); | 2537 | obj_request->xferred, obj_request->length); |
| 2354 | rbd_obj_request_put(obj_request); | 2538 | rbd_obj_request_put(obj_request); |
| 2355 | 2539 | ||
| 2356 | rbd_assert(orig_request); | 2540 | /* |
| 2357 | rbd_assert(orig_request->img_request); | 2541 | * If the overlap has become 0 (most likely because the |
| 2542 | * image has been flattened) we need to free the pages | ||
| 2543 | * and re-submit the original write request. | ||
| 2544 | */ | ||
| 2545 | rbd_dev = orig_request->img_request->rbd_dev; | ||
| 2546 | if (!rbd_dev->parent_overlap) { | ||
| 2547 | struct ceph_osd_client *osdc; | ||
| 2548 | |||
| 2549 | rbd_obj_request_put(orig_request); | ||
| 2550 | osdc = &rbd_dev->rbd_client->client->osdc; | ||
| 2551 | result = rbd_obj_request_submit(osdc, orig_request); | ||
| 2552 | if (!result) | ||
| 2553 | return; | ||
| 2554 | } | ||
| 2358 | 2555 | ||
| 2359 | /* | 2556 | /* |
| 2360 | * Our only purpose here is to determine whether the object | 2557 | * Our only purpose here is to determine whether the object |
| @@ -2512,14 +2709,36 @@ static void rbd_img_parent_read_callback(struct rbd_img_request *img_request) | |||
| 2512 | struct rbd_obj_request *obj_request; | 2709 | struct rbd_obj_request *obj_request; |
| 2513 | struct rbd_device *rbd_dev; | 2710 | struct rbd_device *rbd_dev; |
| 2514 | u64 obj_end; | 2711 | u64 obj_end; |
| 2712 | u64 img_xferred; | ||
| 2713 | int img_result; | ||
| 2515 | 2714 | ||
| 2516 | rbd_assert(img_request_child_test(img_request)); | 2715 | rbd_assert(img_request_child_test(img_request)); |
| 2517 | 2716 | ||
| 2717 | /* First get what we need from the image request and release it */ | ||
| 2718 | |||
| 2518 | obj_request = img_request->obj_request; | 2719 | obj_request = img_request->obj_request; |
| 2720 | img_xferred = img_request->xferred; | ||
| 2721 | img_result = img_request->result; | ||
| 2722 | rbd_img_request_put(img_request); | ||
| 2723 | |||
| 2724 | /* | ||
| 2725 | * If the overlap has become 0 (most likely because the | ||
| 2726 | * image has been flattened) we need to re-submit the | ||
| 2727 | * original request. | ||
| 2728 | */ | ||
| 2519 | rbd_assert(obj_request); | 2729 | rbd_assert(obj_request); |
| 2520 | rbd_assert(obj_request->img_request); | 2730 | rbd_assert(obj_request->img_request); |
| 2731 | rbd_dev = obj_request->img_request->rbd_dev; | ||
| 2732 | if (!rbd_dev->parent_overlap) { | ||
| 2733 | struct ceph_osd_client *osdc; | ||
| 2734 | |||
| 2735 | osdc = &rbd_dev->rbd_client->client->osdc; | ||
| 2736 | img_result = rbd_obj_request_submit(osdc, obj_request); | ||
| 2737 | if (!img_result) | ||
| 2738 | return; | ||
| 2739 | } | ||
| 2521 | 2740 | ||
| 2522 | obj_request->result = img_request->result; | 2741 | obj_request->result = img_result; |
| 2523 | if (obj_request->result) | 2742 | if (obj_request->result) |
| 2524 | goto out; | 2743 | goto out; |
| 2525 | 2744 | ||
| @@ -2532,7 +2751,6 @@ static void rbd_img_parent_read_callback(struct rbd_img_request *img_request) | |||
| 2532 | */ | 2751 | */ |
| 2533 | rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length); | 2752 | rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length); |
| 2534 | obj_end = obj_request->img_offset + obj_request->length; | 2753 | obj_end = obj_request->img_offset + obj_request->length; |
| 2535 | rbd_dev = obj_request->img_request->rbd_dev; | ||
| 2536 | if (obj_end > rbd_dev->parent_overlap) { | 2754 | if (obj_end > rbd_dev->parent_overlap) { |
| 2537 | u64 xferred = 0; | 2755 | u64 xferred = 0; |
| 2538 | 2756 | ||
| @@ -2540,43 +2758,39 @@ static void rbd_img_parent_read_callback(struct rbd_img_request *img_request) | |||
| 2540 | xferred = rbd_dev->parent_overlap - | 2758 | xferred = rbd_dev->parent_overlap - |
| 2541 | obj_request->img_offset; | 2759 | obj_request->img_offset; |
| 2542 | 2760 | ||
| 2543 | obj_request->xferred = min(img_request->xferred, xferred); | 2761 | obj_request->xferred = min(img_xferred, xferred); |
| 2544 | } else { | 2762 | } else { |
| 2545 | obj_request->xferred = img_request->xferred; | 2763 | obj_request->xferred = img_xferred; |
| 2546 | } | 2764 | } |
| 2547 | out: | 2765 | out: |
| 2548 | rbd_img_request_put(img_request); | ||
| 2549 | rbd_img_obj_request_read_callback(obj_request); | 2766 | rbd_img_obj_request_read_callback(obj_request); |
| 2550 | rbd_obj_request_complete(obj_request); | 2767 | rbd_obj_request_complete(obj_request); |
| 2551 | } | 2768 | } |
| 2552 | 2769 | ||
| 2553 | static void rbd_img_parent_read(struct rbd_obj_request *obj_request) | 2770 | static void rbd_img_parent_read(struct rbd_obj_request *obj_request) |
| 2554 | { | 2771 | { |
| 2555 | struct rbd_device *rbd_dev; | ||
| 2556 | struct rbd_img_request *img_request; | 2772 | struct rbd_img_request *img_request; |
| 2557 | int result; | 2773 | int result; |
| 2558 | 2774 | ||
| 2559 | rbd_assert(obj_request_img_data_test(obj_request)); | 2775 | rbd_assert(obj_request_img_data_test(obj_request)); |
| 2560 | rbd_assert(obj_request->img_request != NULL); | 2776 | rbd_assert(obj_request->img_request != NULL); |
| 2561 | rbd_assert(obj_request->result == (s32) -ENOENT); | 2777 | rbd_assert(obj_request->result == (s32) -ENOENT); |
| 2562 | rbd_assert(obj_request->type == OBJ_REQUEST_BIO); | 2778 | rbd_assert(obj_request_type_valid(obj_request->type)); |
| 2563 | 2779 | ||
| 2564 | rbd_dev = obj_request->img_request->rbd_dev; | ||
| 2565 | rbd_assert(rbd_dev->parent != NULL); | ||
| 2566 | /* rbd_read_finish(obj_request, obj_request->length); */ | 2780 | /* rbd_read_finish(obj_request, obj_request->length); */ |
| 2567 | img_request = rbd_img_request_create(rbd_dev->parent, | 2781 | img_request = rbd_parent_request_create(obj_request, |
| 2568 | obj_request->img_offset, | 2782 | obj_request->img_offset, |
| 2569 | obj_request->length, | 2783 | obj_request->length); |
| 2570 | false, true); | ||
| 2571 | result = -ENOMEM; | 2784 | result = -ENOMEM; |
| 2572 | if (!img_request) | 2785 | if (!img_request) |
| 2573 | goto out_err; | 2786 | goto out_err; |
| 2574 | 2787 | ||
| 2575 | rbd_obj_request_get(obj_request); | 2788 | if (obj_request->type == OBJ_REQUEST_BIO) |
| 2576 | img_request->obj_request = obj_request; | 2789 | result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, |
| 2577 | 2790 | obj_request->bio_list); | |
| 2578 | result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, | 2791 | else |
| 2579 | obj_request->bio_list); | 2792 | result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES, |
| 2793 | obj_request->pages); | ||
| 2580 | if (result) | 2794 | if (result) |
| 2581 | goto out_err; | 2795 | goto out_err; |
| 2582 | 2796 | ||
| @@ -2626,6 +2840,7 @@ out: | |||
| 2626 | static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | 2840 | static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) |
| 2627 | { | 2841 | { |
| 2628 | struct rbd_device *rbd_dev = (struct rbd_device *)data; | 2842 | struct rbd_device *rbd_dev = (struct rbd_device *)data; |
| 2843 | int ret; | ||
| 2629 | 2844 | ||
| 2630 | if (!rbd_dev) | 2845 | if (!rbd_dev) |
| 2631 | return; | 2846 | return; |
| @@ -2633,7 +2848,9 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | |||
| 2633 | dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, | 2848 | dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, |
| 2634 | rbd_dev->header_name, (unsigned long long)notify_id, | 2849 | rbd_dev->header_name, (unsigned long long)notify_id, |
| 2635 | (unsigned int)opcode); | 2850 | (unsigned int)opcode); |
| 2636 | (void)rbd_dev_refresh(rbd_dev); | 2851 | ret = rbd_dev_refresh(rbd_dev); |
| 2852 | if (ret) | ||
| 2853 | rbd_warn(rbd_dev, ": header refresh error (%d)\n", ret); | ||
| 2637 | 2854 | ||
| 2638 | rbd_obj_notify_ack(rbd_dev, notify_id); | 2855 | rbd_obj_notify_ack(rbd_dev, notify_id); |
| 2639 | } | 2856 | } |
| @@ -2642,7 +2859,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | |||
| 2642 | * Request sync osd watch/unwatch. The value of "start" determines | 2859 | * Request sync osd watch/unwatch. The value of "start" determines |
| 2643 | * whether a watch request is being initiated or torn down. | 2860 | * whether a watch request is being initiated or torn down. |
| 2644 | */ | 2861 | */ |
| 2645 | static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) | 2862 | static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start) |
| 2646 | { | 2863 | { |
| 2647 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; | 2864 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; |
| 2648 | struct rbd_obj_request *obj_request; | 2865 | struct rbd_obj_request *obj_request; |
| @@ -2676,7 +2893,7 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) | |||
| 2676 | rbd_dev->watch_request->osd_req); | 2893 | rbd_dev->watch_request->osd_req); |
| 2677 | 2894 | ||
| 2678 | osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, | 2895 | osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, |
| 2679 | rbd_dev->watch_event->cookie, 0, start); | 2896 | rbd_dev->watch_event->cookie, 0, start ? 1 : 0); |
| 2680 | rbd_osd_req_format_write(obj_request); | 2897 | rbd_osd_req_format_write(obj_request); |
| 2681 | 2898 | ||
| 2682 | ret = rbd_obj_request_submit(osdc, obj_request); | 2899 | ret = rbd_obj_request_submit(osdc, obj_request); |
| @@ -2869,9 +3086,16 @@ static void rbd_request_fn(struct request_queue *q) | |||
| 2869 | goto end_request; /* Shouldn't happen */ | 3086 | goto end_request; /* Shouldn't happen */ |
| 2870 | } | 3087 | } |
| 2871 | 3088 | ||
| 3089 | result = -EIO; | ||
| 3090 | if (offset + length > rbd_dev->mapping.size) { | ||
| 3091 | rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)\n", | ||
| 3092 | offset, length, rbd_dev->mapping.size); | ||
| 3093 | goto end_request; | ||
| 3094 | } | ||
| 3095 | |||
| 2872 | result = -ENOMEM; | 3096 | result = -ENOMEM; |
| 2873 | img_request = rbd_img_request_create(rbd_dev, offset, length, | 3097 | img_request = rbd_img_request_create(rbd_dev, offset, length, |
| 2874 | write_request, false); | 3098 | write_request); |
| 2875 | if (!img_request) | 3099 | if (!img_request) |
| 2876 | goto end_request; | 3100 | goto end_request; |
| 2877 | 3101 | ||
| @@ -3022,17 +3246,11 @@ out: | |||
| 3022 | } | 3246 | } |
| 3023 | 3247 | ||
| 3024 | /* | 3248 | /* |
| 3025 | * Read the complete header for the given rbd device. | 3249 | * Read the complete header for the given rbd device. On successful |
| 3026 | * | 3250 | * return, the rbd_dev->header field will contain up-to-date |
| 3027 | * Returns a pointer to a dynamically-allocated buffer containing | 3251 | * information about the image. |
| 3028 | * the complete and validated header. Caller can pass the address | ||
| 3029 | * of a variable that will be filled in with the version of the | ||
| 3030 | * header object at the time it was read. | ||
| 3031 | * | ||
| 3032 | * Returns a pointer-coded errno if a failure occurs. | ||
| 3033 | */ | 3252 | */ |
| 3034 | static struct rbd_image_header_ondisk * | 3253 | static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) |
| 3035 | rbd_dev_v1_header_read(struct rbd_device *rbd_dev) | ||
| 3036 | { | 3254 | { |
| 3037 | struct rbd_image_header_ondisk *ondisk = NULL; | 3255 | struct rbd_image_header_ondisk *ondisk = NULL; |
| 3038 | u32 snap_count = 0; | 3256 | u32 snap_count = 0; |
| @@ -3057,22 +3275,22 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev) | |||
| 3057 | size += names_size; | 3275 | size += names_size; |
| 3058 | ondisk = kmalloc(size, GFP_KERNEL); | 3276 | ondisk = kmalloc(size, GFP_KERNEL); |
| 3059 | if (!ondisk) | 3277 | if (!ondisk) |
| 3060 | return ERR_PTR(-ENOMEM); | 3278 | return -ENOMEM; |
| 3061 | 3279 | ||
| 3062 | ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, | 3280 | ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, |
| 3063 | 0, size, ondisk); | 3281 | 0, size, ondisk); |
| 3064 | if (ret < 0) | 3282 | if (ret < 0) |
| 3065 | goto out_err; | 3283 | goto out; |
| 3066 | if ((size_t)ret < size) { | 3284 | if ((size_t)ret < size) { |
| 3067 | ret = -ENXIO; | 3285 | ret = -ENXIO; |
| 3068 | rbd_warn(rbd_dev, "short header read (want %zd got %d)", | 3286 | rbd_warn(rbd_dev, "short header read (want %zd got %d)", |
| 3069 | size, ret); | 3287 | size, ret); |
| 3070 | goto out_err; | 3288 | goto out; |
| 3071 | } | 3289 | } |
| 3072 | if (!rbd_dev_ondisk_valid(ondisk)) { | 3290 | if (!rbd_dev_ondisk_valid(ondisk)) { |
| 3073 | ret = -ENXIO; | 3291 | ret = -ENXIO; |
| 3074 | rbd_warn(rbd_dev, "invalid header"); | 3292 | rbd_warn(rbd_dev, "invalid header"); |
| 3075 | goto out_err; | 3293 | goto out; |
| 3076 | } | 3294 | } |
| 3077 | 3295 | ||
| 3078 | names_size = le64_to_cpu(ondisk->snap_names_len); | 3296 | names_size = le64_to_cpu(ondisk->snap_names_len); |
| @@ -3080,85 +3298,13 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev) | |||
| 3080 | snap_count = le32_to_cpu(ondisk->snap_count); | 3298 | snap_count = le32_to_cpu(ondisk->snap_count); |
| 3081 | } while (snap_count != want_count); | 3299 | } while (snap_count != want_count); |
| 3082 | 3300 | ||
| 3083 | return ondisk; | 3301 | ret = rbd_header_from_disk(rbd_dev, ondisk); |
| 3084 | 3302 | out: | |
| 3085 | out_err: | ||
| 3086 | kfree(ondisk); | ||
| 3087 | |||
| 3088 | return ERR_PTR(ret); | ||
| 3089 | } | ||
| 3090 | |||
| 3091 | /* | ||
| 3092 | * reload the ondisk the header | ||
| 3093 | */ | ||
| 3094 | static int rbd_read_header(struct rbd_device *rbd_dev, | ||
| 3095 | struct rbd_image_header *header) | ||
| 3096 | { | ||
| 3097 | struct rbd_image_header_ondisk *ondisk; | ||
| 3098 | int ret; | ||
| 3099 | |||
| 3100 | ondisk = rbd_dev_v1_header_read(rbd_dev); | ||
| 3101 | if (IS_ERR(ondisk)) | ||
| 3102 | return PTR_ERR(ondisk); | ||
| 3103 | ret = rbd_header_from_disk(header, ondisk); | ||
| 3104 | kfree(ondisk); | 3303 | kfree(ondisk); |
| 3105 | 3304 | ||
| 3106 | return ret; | 3305 | return ret; |
| 3107 | } | 3306 | } |
| 3108 | 3307 | ||
| 3109 | static void rbd_update_mapping_size(struct rbd_device *rbd_dev) | ||
| 3110 | { | ||
| 3111 | if (rbd_dev->spec->snap_id != CEPH_NOSNAP) | ||
| 3112 | return; | ||
| 3113 | |||
| 3114 | if (rbd_dev->mapping.size != rbd_dev->header.image_size) { | ||
| 3115 | sector_t size; | ||
| 3116 | |||
| 3117 | rbd_dev->mapping.size = rbd_dev->header.image_size; | ||
| 3118 | size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; | ||
| 3119 | dout("setting size to %llu sectors", (unsigned long long)size); | ||
| 3120 | set_capacity(rbd_dev->disk, size); | ||
| 3121 | } | ||
| 3122 | } | ||
| 3123 | |||
| 3124 | /* | ||
| 3125 | * only read the first part of the ondisk header, without the snaps info | ||
| 3126 | */ | ||
| 3127 | static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev) | ||
| 3128 | { | ||
| 3129 | int ret; | ||
| 3130 | struct rbd_image_header h; | ||
| 3131 | |||
| 3132 | ret = rbd_read_header(rbd_dev, &h); | ||
| 3133 | if (ret < 0) | ||
| 3134 | return ret; | ||
| 3135 | |||
| 3136 | down_write(&rbd_dev->header_rwsem); | ||
| 3137 | |||
| 3138 | /* Update image size, and check for resize of mapped image */ | ||
| 3139 | rbd_dev->header.image_size = h.image_size; | ||
| 3140 | rbd_update_mapping_size(rbd_dev); | ||
| 3141 | |||
| 3142 | /* rbd_dev->header.object_prefix shouldn't change */ | ||
| 3143 | kfree(rbd_dev->header.snap_sizes); | ||
| 3144 | kfree(rbd_dev->header.snap_names); | ||
| 3145 | /* osd requests may still refer to snapc */ | ||
| 3146 | ceph_put_snap_context(rbd_dev->header.snapc); | ||
| 3147 | |||
| 3148 | rbd_dev->header.image_size = h.image_size; | ||
| 3149 | rbd_dev->header.snapc = h.snapc; | ||
| 3150 | rbd_dev->header.snap_names = h.snap_names; | ||
| 3151 | rbd_dev->header.snap_sizes = h.snap_sizes; | ||
| 3152 | /* Free the extra copy of the object prefix */ | ||
| 3153 | if (strcmp(rbd_dev->header.object_prefix, h.object_prefix)) | ||
| 3154 | rbd_warn(rbd_dev, "object prefix changed (ignoring)"); | ||
| 3155 | kfree(h.object_prefix); | ||
| 3156 | |||
| 3157 | up_write(&rbd_dev->header_rwsem); | ||
| 3158 | |||
| 3159 | return ret; | ||
| 3160 | } | ||
| 3161 | |||
| 3162 | /* | 3308 | /* |
| 3163 | * Clear the rbd device's EXISTS flag if the snapshot it's mapped to | 3309 | * Clear the rbd device's EXISTS flag if the snapshot it's mapped to |
| 3164 | * has disappeared from the (just updated) snapshot context. | 3310 | * has disappeared from the (just updated) snapshot context. |
| @@ -3180,26 +3326,29 @@ static void rbd_exists_validate(struct rbd_device *rbd_dev) | |||
| 3180 | 3326 | ||
| 3181 | static int rbd_dev_refresh(struct rbd_device *rbd_dev) | 3327 | static int rbd_dev_refresh(struct rbd_device *rbd_dev) |
| 3182 | { | 3328 | { |
| 3183 | u64 image_size; | 3329 | u64 mapping_size; |
| 3184 | int ret; | 3330 | int ret; |
| 3185 | 3331 | ||
| 3186 | rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); | 3332 | rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); |
| 3187 | image_size = rbd_dev->header.image_size; | 3333 | mapping_size = rbd_dev->mapping.size; |
| 3188 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 3334 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); |
| 3189 | if (rbd_dev->image_format == 1) | 3335 | if (rbd_dev->image_format == 1) |
| 3190 | ret = rbd_dev_v1_refresh(rbd_dev); | 3336 | ret = rbd_dev_v1_header_info(rbd_dev); |
| 3191 | else | 3337 | else |
| 3192 | ret = rbd_dev_v2_refresh(rbd_dev); | 3338 | ret = rbd_dev_v2_header_info(rbd_dev); |
| 3193 | 3339 | ||
| 3194 | /* If it's a mapped snapshot, validate its EXISTS flag */ | 3340 | /* If it's a mapped snapshot, validate its EXISTS flag */ |
| 3195 | 3341 | ||
| 3196 | rbd_exists_validate(rbd_dev); | 3342 | rbd_exists_validate(rbd_dev); |
| 3197 | mutex_unlock(&ctl_mutex); | 3343 | mutex_unlock(&ctl_mutex); |
| 3198 | if (ret) | 3344 | if (mapping_size != rbd_dev->mapping.size) { |
| 3199 | rbd_warn(rbd_dev, "got notification but failed to " | 3345 | sector_t size; |
| 3200 | " update snaps: %d\n", ret); | 3346 | |
| 3201 | if (image_size != rbd_dev->header.image_size) | 3347 | size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; |
| 3348 | dout("setting size to %llu sectors", (unsigned long long)size); | ||
| 3349 | set_capacity(rbd_dev->disk, size); | ||
| 3202 | revalidate_disk(rbd_dev->disk); | 3350 | revalidate_disk(rbd_dev->disk); |
| 3351 | } | ||
| 3203 | 3352 | ||
| 3204 | return ret; | 3353 | return ret; |
| 3205 | } | 3354 | } |
| @@ -3403,6 +3552,8 @@ static ssize_t rbd_image_refresh(struct device *dev, | |||
| 3403 | int ret; | 3552 | int ret; |
| 3404 | 3553 | ||
| 3405 | ret = rbd_dev_refresh(rbd_dev); | 3554 | ret = rbd_dev_refresh(rbd_dev); |
| 3555 | if (ret) | ||
| 3556 | rbd_warn(rbd_dev, ": manual header refresh error (%d)\n", ret); | ||
| 3406 | 3557 | ||
| 3407 | return ret < 0 ? ret : size; | 3558 | return ret < 0 ? ret : size; |
| 3408 | } | 3559 | } |
| @@ -3501,6 +3652,7 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, | |||
| 3501 | 3652 | ||
| 3502 | spin_lock_init(&rbd_dev->lock); | 3653 | spin_lock_init(&rbd_dev->lock); |
| 3503 | rbd_dev->flags = 0; | 3654 | rbd_dev->flags = 0; |
| 3655 | atomic_set(&rbd_dev->parent_ref, 0); | ||
| 3504 | INIT_LIST_HEAD(&rbd_dev->node); | 3656 | INIT_LIST_HEAD(&rbd_dev->node); |
| 3505 | init_rwsem(&rbd_dev->header_rwsem); | 3657 | init_rwsem(&rbd_dev->header_rwsem); |
| 3506 | 3658 | ||
| @@ -3650,6 +3802,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
| 3650 | __le64 snapid; | 3802 | __le64 snapid; |
| 3651 | void *p; | 3803 | void *p; |
| 3652 | void *end; | 3804 | void *end; |
| 3805 | u64 pool_id; | ||
| 3653 | char *image_id; | 3806 | char *image_id; |
| 3654 | u64 overlap; | 3807 | u64 overlap; |
| 3655 | int ret; | 3808 | int ret; |
| @@ -3680,18 +3833,37 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
| 3680 | p = reply_buf; | 3833 | p = reply_buf; |
| 3681 | end = reply_buf + ret; | 3834 | end = reply_buf + ret; |
| 3682 | ret = -ERANGE; | 3835 | ret = -ERANGE; |
| 3683 | ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err); | 3836 | ceph_decode_64_safe(&p, end, pool_id, out_err); |
| 3684 | if (parent_spec->pool_id == CEPH_NOPOOL) | 3837 | if (pool_id == CEPH_NOPOOL) { |
| 3838 | /* | ||
| 3839 | * Either the parent never existed, or we have | ||
| 3840 | * record of it but the image got flattened so it no | ||
| 3841 | * longer has a parent. When the parent of a | ||
| 3842 | * layered image disappears we immediately set the | ||
| 3843 | * overlap to 0. The effect of this is that all new | ||
| 3844 | * requests will be treated as if the image had no | ||
| 3845 | * parent. | ||
| 3846 | */ | ||
| 3847 | if (rbd_dev->parent_overlap) { | ||
| 3848 | rbd_dev->parent_overlap = 0; | ||
| 3849 | smp_mb(); | ||
| 3850 | rbd_dev_parent_put(rbd_dev); | ||
| 3851 | pr_info("%s: clone image has been flattened\n", | ||
| 3852 | rbd_dev->disk->disk_name); | ||
| 3853 | } | ||
| 3854 | |||
| 3685 | goto out; /* No parent? No problem. */ | 3855 | goto out; /* No parent? No problem. */ |
| 3856 | } | ||
| 3686 | 3857 | ||
| 3687 | /* The ceph file layout needs to fit pool id in 32 bits */ | 3858 | /* The ceph file layout needs to fit pool id in 32 bits */ |
| 3688 | 3859 | ||
| 3689 | ret = -EIO; | 3860 | ret = -EIO; |
| 3690 | if (parent_spec->pool_id > (u64)U32_MAX) { | 3861 | if (pool_id > (u64)U32_MAX) { |
| 3691 | rbd_warn(NULL, "parent pool id too large (%llu > %u)\n", | 3862 | rbd_warn(NULL, "parent pool id too large (%llu > %u)\n", |
| 3692 | (unsigned long long)parent_spec->pool_id, U32_MAX); | 3863 | (unsigned long long)pool_id, U32_MAX); |
| 3693 | goto out_err; | 3864 | goto out_err; |
| 3694 | } | 3865 | } |
| 3866 | parent_spec->pool_id = pool_id; | ||
| 3695 | 3867 | ||
| 3696 | image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); | 3868 | image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); |
| 3697 | if (IS_ERR(image_id)) { | 3869 | if (IS_ERR(image_id)) { |
| @@ -3702,9 +3874,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
| 3702 | ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); | 3874 | ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); |
| 3703 | ceph_decode_64_safe(&p, end, overlap, out_err); | 3875 | ceph_decode_64_safe(&p, end, overlap, out_err); |
| 3704 | 3876 | ||
| 3705 | rbd_dev->parent_overlap = overlap; | 3877 | if (overlap) { |
| 3706 | rbd_dev->parent_spec = parent_spec; | 3878 | rbd_spec_put(rbd_dev->parent_spec); |
| 3707 | parent_spec = NULL; /* rbd_dev now owns this */ | 3879 | rbd_dev->parent_spec = parent_spec; |
| 3880 | parent_spec = NULL; /* rbd_dev now owns this */ | ||
| 3881 | rbd_dev->parent_overlap = overlap; | ||
| 3882 | } else { | ||
| 3883 | rbd_warn(rbd_dev, "ignoring parent of clone with overlap 0\n"); | ||
| 3884 | } | ||
| 3708 | out: | 3885 | out: |
| 3709 | ret = 0; | 3886 | ret = 0; |
| 3710 | out_err: | 3887 | out_err: |
| @@ -4002,6 +4179,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) | |||
| 4002 | for (i = 0; i < snap_count; i++) | 4179 | for (i = 0; i < snap_count; i++) |
| 4003 | snapc->snaps[i] = ceph_decode_64(&p); | 4180 | snapc->snaps[i] = ceph_decode_64(&p); |
| 4004 | 4181 | ||
| 4182 | ceph_put_snap_context(rbd_dev->header.snapc); | ||
| 4005 | rbd_dev->header.snapc = snapc; | 4183 | rbd_dev->header.snapc = snapc; |
| 4006 | 4184 | ||
| 4007 | dout(" snap context seq = %llu, snap_count = %u\n", | 4185 | dout(" snap context seq = %llu, snap_count = %u\n", |
| @@ -4053,21 +4231,56 @@ out: | |||
| 4053 | return snap_name; | 4231 | return snap_name; |
| 4054 | } | 4232 | } |
| 4055 | 4233 | ||
| 4056 | static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev) | 4234 | static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) |
| 4057 | { | 4235 | { |
| 4236 | bool first_time = rbd_dev->header.object_prefix == NULL; | ||
| 4058 | int ret; | 4237 | int ret; |
| 4059 | 4238 | ||
| 4060 | down_write(&rbd_dev->header_rwsem); | 4239 | down_write(&rbd_dev->header_rwsem); |
| 4061 | 4240 | ||
| 4241 | if (first_time) { | ||
| 4242 | ret = rbd_dev_v2_header_onetime(rbd_dev); | ||
| 4243 | if (ret) | ||
| 4244 | goto out; | ||
| 4245 | } | ||
| 4246 | |||
| 4247 | /* | ||
| 4248 | * If the image supports layering, get the parent info. We | ||
| 4249 | * need to probe the first time regardless. Thereafter we | ||
| 4250 | * only need to if there's a parent, to see if it has | ||
| 4251 | * disappeared due to the mapped image getting flattened. | ||
| 4252 | */ | ||
| 4253 | if (rbd_dev->header.features & RBD_FEATURE_LAYERING && | ||
| 4254 | (first_time || rbd_dev->parent_spec)) { | ||
| 4255 | bool warn; | ||
| 4256 | |||
| 4257 | ret = rbd_dev_v2_parent_info(rbd_dev); | ||
| 4258 | if (ret) | ||
| 4259 | goto out; | ||
| 4260 | |||
| 4261 | /* | ||
| 4262 | * Print a warning if this is the initial probe and | ||
| 4263 | * the image has a parent. Don't print it if the | ||
| 4264 | * image now being probed is itself a parent. We | ||
| 4265 | * can tell at this point because we won't know its | ||
| 4266 | * pool name yet (just its pool id). | ||
| 4267 | */ | ||
| 4268 | warn = rbd_dev->parent_spec && rbd_dev->spec->pool_name; | ||
| 4269 | if (first_time && warn) | ||
| 4270 | rbd_warn(rbd_dev, "WARNING: kernel layering " | ||
| 4271 | "is EXPERIMENTAL!"); | ||
| 4272 | } | ||
| 4273 | |||
| 4062 | ret = rbd_dev_v2_image_size(rbd_dev); | 4274 | ret = rbd_dev_v2_image_size(rbd_dev); |
| 4063 | if (ret) | 4275 | if (ret) |
| 4064 | goto out; | 4276 | goto out; |
| 4065 | rbd_update_mapping_size(rbd_dev); | 4277 | |
| 4278 | if (rbd_dev->spec->snap_id == CEPH_NOSNAP) | ||
| 4279 | if (rbd_dev->mapping.size != rbd_dev->header.image_size) | ||
| 4280 | rbd_dev->mapping.size = rbd_dev->header.image_size; | ||
| 4066 | 4281 | ||
| 4067 | ret = rbd_dev_v2_snap_context(rbd_dev); | 4282 | ret = rbd_dev_v2_snap_context(rbd_dev); |
| 4068 | dout("rbd_dev_v2_snap_context returned %d\n", ret); | 4283 | dout("rbd_dev_v2_snap_context returned %d\n", ret); |
| 4069 | if (ret) | ||
| 4070 | goto out; | ||
| 4071 | out: | 4284 | out: |
| 4072 | up_write(&rbd_dev->header_rwsem); | 4285 | up_write(&rbd_dev->header_rwsem); |
| 4073 | 4286 | ||
| @@ -4490,10 +4703,10 @@ static void rbd_dev_unprobe(struct rbd_device *rbd_dev) | |||
| 4490 | { | 4703 | { |
| 4491 | struct rbd_image_header *header; | 4704 | struct rbd_image_header *header; |
| 4492 | 4705 | ||
| 4493 | rbd_dev_remove_parent(rbd_dev); | 4706 | /* Drop parent reference unless it's already been done (or none) */ |
| 4494 | rbd_spec_put(rbd_dev->parent_spec); | 4707 | |
| 4495 | rbd_dev->parent_spec = NULL; | 4708 | if (rbd_dev->parent_overlap) |
| 4496 | rbd_dev->parent_overlap = 0; | 4709 | rbd_dev_parent_put(rbd_dev); |
| 4497 | 4710 | ||
| 4498 | /* Free dynamic fields from the header, then zero it out */ | 4711 | /* Free dynamic fields from the header, then zero it out */ |
| 4499 | 4712 | ||
| @@ -4505,72 +4718,22 @@ static void rbd_dev_unprobe(struct rbd_device *rbd_dev) | |||
| 4505 | memset(header, 0, sizeof (*header)); | 4718 | memset(header, 0, sizeof (*header)); |
| 4506 | } | 4719 | } |
| 4507 | 4720 | ||
| 4508 | static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) | 4721 | static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev) |
| 4509 | { | 4722 | { |
| 4510 | int ret; | 4723 | int ret; |
| 4511 | 4724 | ||
| 4512 | /* Populate rbd image metadata */ | ||
| 4513 | |||
| 4514 | ret = rbd_read_header(rbd_dev, &rbd_dev->header); | ||
| 4515 | if (ret < 0) | ||
| 4516 | goto out_err; | ||
| 4517 | |||
| 4518 | /* Version 1 images have no parent (no layering) */ | ||
| 4519 | |||
| 4520 | rbd_dev->parent_spec = NULL; | ||
| 4521 | rbd_dev->parent_overlap = 0; | ||
| 4522 | |||
| 4523 | dout("discovered version 1 image, header name is %s\n", | ||
| 4524 | rbd_dev->header_name); | ||
| 4525 | |||
| 4526 | return 0; | ||
| 4527 | |||
| 4528 | out_err: | ||
| 4529 | kfree(rbd_dev->header_name); | ||
| 4530 | rbd_dev->header_name = NULL; | ||
| 4531 | kfree(rbd_dev->spec->image_id); | ||
| 4532 | rbd_dev->spec->image_id = NULL; | ||
| 4533 | |||
| 4534 | return ret; | ||
| 4535 | } | ||
| 4536 | |||
| 4537 | static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) | ||
| 4538 | { | ||
| 4539 | int ret; | ||
| 4540 | |||
| 4541 | ret = rbd_dev_v2_image_size(rbd_dev); | ||
| 4542 | if (ret) | ||
| 4543 | goto out_err; | ||
| 4544 | |||
| 4545 | /* Get the object prefix (a.k.a. block_name) for the image */ | ||
| 4546 | |||
| 4547 | ret = rbd_dev_v2_object_prefix(rbd_dev); | 4725 | ret = rbd_dev_v2_object_prefix(rbd_dev); |
| 4548 | if (ret) | 4726 | if (ret) |
| 4549 | goto out_err; | 4727 | goto out_err; |
| 4550 | 4728 | ||
| 4551 | /* Get the and check features for the image */ | 4729 | /* |
| 4552 | 4730 | * Get the and check features for the image. Currently the | |
| 4731 | * features are assumed to never change. | ||
| 4732 | */ | ||
| 4553 | ret = rbd_dev_v2_features(rbd_dev); | 4733 | ret = rbd_dev_v2_features(rbd_dev); |
| 4554 | if (ret) | 4734 | if (ret) |
| 4555 | goto out_err; | 4735 | goto out_err; |
| 4556 | 4736 | ||
| 4557 | /* If the image supports layering, get the parent info */ | ||
| 4558 | |||
| 4559 | if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { | ||
| 4560 | ret = rbd_dev_v2_parent_info(rbd_dev); | ||
| 4561 | if (ret) | ||
| 4562 | goto out_err; | ||
| 4563 | |||
| 4564 | /* | ||
| 4565 | * Don't print a warning for parent images. We can | ||
| 4566 | * tell this point because we won't know its pool | ||
| 4567 | * name yet (just its pool id). | ||
| 4568 | */ | ||
| 4569 | if (rbd_dev->spec->pool_name) | ||
| 4570 | rbd_warn(rbd_dev, "WARNING: kernel layering " | ||
| 4571 | "is EXPERIMENTAL!"); | ||
| 4572 | } | ||
| 4573 | |||
| 4574 | /* If the image supports fancy striping, get its parameters */ | 4737 | /* If the image supports fancy striping, get its parameters */ |
| 4575 | 4738 | ||
| 4576 | if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { | 4739 | if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { |
| @@ -4578,28 +4741,11 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) | |||
| 4578 | if (ret < 0) | 4741 | if (ret < 0) |
| 4579 | goto out_err; | 4742 | goto out_err; |
| 4580 | } | 4743 | } |
| 4581 | 4744 | /* No support for crypto and compression type format 2 images */ | |
| 4582 | /* crypto and compression type aren't (yet) supported for v2 images */ | ||
| 4583 | |||
| 4584 | rbd_dev->header.crypt_type = 0; | ||
| 4585 | rbd_dev->header.comp_type = 0; | ||
| 4586 | |||
| 4587 | /* Get the snapshot context, plus the header version */ | ||
| 4588 | |||
| 4589 | ret = rbd_dev_v2_snap_context(rbd_dev); | ||
| 4590 | if (ret) | ||
| 4591 | goto out_err; | ||
| 4592 | |||
| 4593 | dout("discovered version 2 image, header name is %s\n", | ||
| 4594 | rbd_dev->header_name); | ||
| 4595 | 4745 | ||
| 4596 | return 0; | 4746 | return 0; |
| 4597 | out_err: | 4747 | out_err: |
| 4598 | rbd_dev->parent_overlap = 0; | 4748 | rbd_dev->header.features = 0; |
| 4599 | rbd_spec_put(rbd_dev->parent_spec); | ||
| 4600 | rbd_dev->parent_spec = NULL; | ||
| 4601 | kfree(rbd_dev->header_name); | ||
| 4602 | rbd_dev->header_name = NULL; | ||
| 4603 | kfree(rbd_dev->header.object_prefix); | 4749 | kfree(rbd_dev->header.object_prefix); |
| 4604 | rbd_dev->header.object_prefix = NULL; | 4750 | rbd_dev->header.object_prefix = NULL; |
| 4605 | 4751 | ||
| @@ -4628,15 +4774,16 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) | |||
| 4628 | if (!parent) | 4774 | if (!parent) |
| 4629 | goto out_err; | 4775 | goto out_err; |
| 4630 | 4776 | ||
| 4631 | ret = rbd_dev_image_probe(parent); | 4777 | ret = rbd_dev_image_probe(parent, false); |
| 4632 | if (ret < 0) | 4778 | if (ret < 0) |
| 4633 | goto out_err; | 4779 | goto out_err; |
| 4634 | rbd_dev->parent = parent; | 4780 | rbd_dev->parent = parent; |
| 4781 | atomic_set(&rbd_dev->parent_ref, 1); | ||
| 4635 | 4782 | ||
| 4636 | return 0; | 4783 | return 0; |
| 4637 | out_err: | 4784 | out_err: |
| 4638 | if (parent) { | 4785 | if (parent) { |
| 4639 | rbd_spec_put(rbd_dev->parent_spec); | 4786 | rbd_dev_unparent(rbd_dev); |
| 4640 | kfree(rbd_dev->header_name); | 4787 | kfree(rbd_dev->header_name); |
| 4641 | rbd_dev_destroy(parent); | 4788 | rbd_dev_destroy(parent); |
| 4642 | } else { | 4789 | } else { |
| @@ -4651,10 +4798,6 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) | |||
| 4651 | { | 4798 | { |
| 4652 | int ret; | 4799 | int ret; |
| 4653 | 4800 | ||
| 4654 | ret = rbd_dev_mapping_set(rbd_dev); | ||
| 4655 | if (ret) | ||
| 4656 | return ret; | ||
| 4657 | |||
| 4658 | /* generate unique id: find highest unique id, add one */ | 4801 | /* generate unique id: find highest unique id, add one */ |
| 4659 | rbd_dev_id_get(rbd_dev); | 4802 | rbd_dev_id_get(rbd_dev); |
| 4660 | 4803 | ||
| @@ -4676,13 +4819,17 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) | |||
| 4676 | if (ret) | 4819 | if (ret) |
| 4677 | goto err_out_blkdev; | 4820 | goto err_out_blkdev; |
| 4678 | 4821 | ||
| 4679 | ret = rbd_bus_add_dev(rbd_dev); | 4822 | ret = rbd_dev_mapping_set(rbd_dev); |
| 4680 | if (ret) | 4823 | if (ret) |
| 4681 | goto err_out_disk; | 4824 | goto err_out_disk; |
| 4825 | set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); | ||
| 4826 | |||
| 4827 | ret = rbd_bus_add_dev(rbd_dev); | ||
| 4828 | if (ret) | ||
| 4829 | goto err_out_mapping; | ||
| 4682 | 4830 | ||
| 4683 | /* Everything's ready. Announce the disk to the world. */ | 4831 | /* Everything's ready. Announce the disk to the world. */ |
| 4684 | 4832 | ||
| 4685 | set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); | ||
| 4686 | set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); | 4833 | set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); |
| 4687 | add_disk(rbd_dev->disk); | 4834 | add_disk(rbd_dev->disk); |
| 4688 | 4835 | ||
| @@ -4691,6 +4838,8 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) | |||
| 4691 | 4838 | ||
| 4692 | return ret; | 4839 | return ret; |
| 4693 | 4840 | ||
| 4841 | err_out_mapping: | ||
| 4842 | rbd_dev_mapping_clear(rbd_dev); | ||
| 4694 | err_out_disk: | 4843 | err_out_disk: |
| 4695 | rbd_free_disk(rbd_dev); | 4844 | rbd_free_disk(rbd_dev); |
| 4696 | err_out_blkdev: | 4845 | err_out_blkdev: |
| @@ -4731,12 +4880,7 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev) | |||
| 4731 | 4880 | ||
| 4732 | static void rbd_dev_image_release(struct rbd_device *rbd_dev) | 4881 | static void rbd_dev_image_release(struct rbd_device *rbd_dev) |
| 4733 | { | 4882 | { |
| 4734 | int ret; | ||
| 4735 | |||
| 4736 | rbd_dev_unprobe(rbd_dev); | 4883 | rbd_dev_unprobe(rbd_dev); |
| 4737 | ret = rbd_dev_header_watch_sync(rbd_dev, 0); | ||
| 4738 | if (ret) | ||
| 4739 | rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); | ||
| 4740 | kfree(rbd_dev->header_name); | 4884 | kfree(rbd_dev->header_name); |
| 4741 | rbd_dev->header_name = NULL; | 4885 | rbd_dev->header_name = NULL; |
| 4742 | rbd_dev->image_format = 0; | 4886 | rbd_dev->image_format = 0; |
| @@ -4748,10 +4892,11 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev) | |||
| 4748 | 4892 | ||
| 4749 | /* | 4893 | /* |
| 4750 | * Probe for the existence of the header object for the given rbd | 4894 | * Probe for the existence of the header object for the given rbd |
| 4751 | * device. For format 2 images this includes determining the image | 4895 | * device. If this image is the one being mapped (i.e., not a |
| 4752 | * id. | 4896 | * parent), initiate a watch on its header object before using that |
| 4897 | * object to get detailed information about the rbd image. | ||
| 4753 | */ | 4898 | */ |
| 4754 | static int rbd_dev_image_probe(struct rbd_device *rbd_dev) | 4899 | static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) |
| 4755 | { | 4900 | { |
| 4756 | int ret; | 4901 | int ret; |
| 4757 | int tmp; | 4902 | int tmp; |
| @@ -4771,14 +4916,16 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev) | |||
| 4771 | if (ret) | 4916 | if (ret) |
| 4772 | goto err_out_format; | 4917 | goto err_out_format; |
| 4773 | 4918 | ||
| 4774 | ret = rbd_dev_header_watch_sync(rbd_dev, 1); | 4919 | if (mapping) { |
| 4775 | if (ret) | 4920 | ret = rbd_dev_header_watch_sync(rbd_dev, true); |
| 4776 | goto out_header_name; | 4921 | if (ret) |
| 4922 | goto out_header_name; | ||
| 4923 | } | ||
| 4777 | 4924 | ||
| 4778 | if (rbd_dev->image_format == 1) | 4925 | if (rbd_dev->image_format == 1) |
| 4779 | ret = rbd_dev_v1_probe(rbd_dev); | 4926 | ret = rbd_dev_v1_header_info(rbd_dev); |
| 4780 | else | 4927 | else |
| 4781 | ret = rbd_dev_v2_probe(rbd_dev); | 4928 | ret = rbd_dev_v2_header_info(rbd_dev); |
| 4782 | if (ret) | 4929 | if (ret) |
| 4783 | goto err_out_watch; | 4930 | goto err_out_watch; |
| 4784 | 4931 | ||
| @@ -4787,15 +4934,22 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev) | |||
| 4787 | goto err_out_probe; | 4934 | goto err_out_probe; |
| 4788 | 4935 | ||
| 4789 | ret = rbd_dev_probe_parent(rbd_dev); | 4936 | ret = rbd_dev_probe_parent(rbd_dev); |
| 4790 | if (!ret) | 4937 | if (ret) |
| 4791 | return 0; | 4938 | goto err_out_probe; |
| 4939 | |||
| 4940 | dout("discovered format %u image, header name is %s\n", | ||
| 4941 | rbd_dev->image_format, rbd_dev->header_name); | ||
| 4792 | 4942 | ||
| 4943 | return 0; | ||
| 4793 | err_out_probe: | 4944 | err_out_probe: |
| 4794 | rbd_dev_unprobe(rbd_dev); | 4945 | rbd_dev_unprobe(rbd_dev); |
| 4795 | err_out_watch: | 4946 | err_out_watch: |
| 4796 | tmp = rbd_dev_header_watch_sync(rbd_dev, 0); | 4947 | if (mapping) { |
| 4797 | if (tmp) | 4948 | tmp = rbd_dev_header_watch_sync(rbd_dev, false); |
| 4798 | rbd_warn(rbd_dev, "unable to tear down watch request\n"); | 4949 | if (tmp) |
| 4950 | rbd_warn(rbd_dev, "unable to tear down " | ||
| 4951 | "watch request (%d)\n", tmp); | ||
| 4952 | } | ||
| 4799 | out_header_name: | 4953 | out_header_name: |
| 4800 | kfree(rbd_dev->header_name); | 4954 | kfree(rbd_dev->header_name); |
| 4801 | rbd_dev->header_name = NULL; | 4955 | rbd_dev->header_name = NULL; |
| @@ -4819,6 +4973,7 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
| 4819 | struct rbd_spec *spec = NULL; | 4973 | struct rbd_spec *spec = NULL; |
| 4820 | struct rbd_client *rbdc; | 4974 | struct rbd_client *rbdc; |
| 4821 | struct ceph_osd_client *osdc; | 4975 | struct ceph_osd_client *osdc; |
| 4976 | bool read_only; | ||
| 4822 | int rc = -ENOMEM; | 4977 | int rc = -ENOMEM; |
| 4823 | 4978 | ||
| 4824 | if (!try_module_get(THIS_MODULE)) | 4979 | if (!try_module_get(THIS_MODULE)) |
| @@ -4828,6 +4983,9 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
| 4828 | rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); | 4983 | rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); |
| 4829 | if (rc < 0) | 4984 | if (rc < 0) |
| 4830 | goto err_out_module; | 4985 | goto err_out_module; |
| 4986 | read_only = rbd_opts->read_only; | ||
| 4987 | kfree(rbd_opts); | ||
| 4988 | rbd_opts = NULL; /* done with this */ | ||
| 4831 | 4989 | ||
| 4832 | rbdc = rbd_get_client(ceph_opts); | 4990 | rbdc = rbd_get_client(ceph_opts); |
| 4833 | if (IS_ERR(rbdc)) { | 4991 | if (IS_ERR(rbdc)) { |
| @@ -4858,14 +5016,16 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
| 4858 | rbdc = NULL; /* rbd_dev now owns this */ | 5016 | rbdc = NULL; /* rbd_dev now owns this */ |
| 4859 | spec = NULL; /* rbd_dev now owns this */ | 5017 | spec = NULL; /* rbd_dev now owns this */ |
| 4860 | 5018 | ||
| 4861 | rbd_dev->mapping.read_only = rbd_opts->read_only; | 5019 | rc = rbd_dev_image_probe(rbd_dev, true); |
| 4862 | kfree(rbd_opts); | ||
| 4863 | rbd_opts = NULL; /* done with this */ | ||
| 4864 | |||
| 4865 | rc = rbd_dev_image_probe(rbd_dev); | ||
| 4866 | if (rc < 0) | 5020 | if (rc < 0) |
| 4867 | goto err_out_rbd_dev; | 5021 | goto err_out_rbd_dev; |
| 4868 | 5022 | ||
| 5023 | /* If we are mapping a snapshot it must be marked read-only */ | ||
| 5024 | |||
| 5025 | if (rbd_dev->spec->snap_id != CEPH_NOSNAP) | ||
| 5026 | read_only = true; | ||
| 5027 | rbd_dev->mapping.read_only = read_only; | ||
| 5028 | |||
| 4869 | rc = rbd_dev_device_setup(rbd_dev); | 5029 | rc = rbd_dev_device_setup(rbd_dev); |
| 4870 | if (!rc) | 5030 | if (!rc) |
| 4871 | return count; | 5031 | return count; |
| @@ -4911,7 +5071,7 @@ static void rbd_dev_device_release(struct device *dev) | |||
| 4911 | 5071 | ||
| 4912 | rbd_free_disk(rbd_dev); | 5072 | rbd_free_disk(rbd_dev); |
| 4913 | clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); | 5073 | clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); |
| 4914 | rbd_dev_clear_mapping(rbd_dev); | 5074 | rbd_dev_mapping_clear(rbd_dev); |
| 4915 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | 5075 | unregister_blkdev(rbd_dev->major, rbd_dev->name); |
| 4916 | rbd_dev->major = 0; | 5076 | rbd_dev->major = 0; |
| 4917 | rbd_dev_id_put(rbd_dev); | 5077 | rbd_dev_id_put(rbd_dev); |
| @@ -4978,10 +5138,13 @@ static ssize_t rbd_remove(struct bus_type *bus, | |||
| 4978 | spin_unlock_irq(&rbd_dev->lock); | 5138 | spin_unlock_irq(&rbd_dev->lock); |
| 4979 | if (ret < 0) | 5139 | if (ret < 0) |
| 4980 | goto done; | 5140 | goto done; |
| 4981 | ret = count; | ||
| 4982 | rbd_bus_del_dev(rbd_dev); | 5141 | rbd_bus_del_dev(rbd_dev); |
| 5142 | ret = rbd_dev_header_watch_sync(rbd_dev, false); | ||
| 5143 | if (ret) | ||
| 5144 | rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); | ||
| 4983 | rbd_dev_image_release(rbd_dev); | 5145 | rbd_dev_image_release(rbd_dev); |
| 4984 | module_put(THIS_MODULE); | 5146 | module_put(THIS_MODULE); |
| 5147 | ret = count; | ||
| 4985 | done: | 5148 | done: |
| 4986 | mutex_unlock(&ctl_mutex); | 5149 | mutex_unlock(&ctl_mutex); |
| 4987 | 5150 | ||
