diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-19 13:50:37 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-19 13:50:37 -0400 |
commit | e9ff04dd94d46c817bbb103531cdef6e7bd5d022 (patch) | |
tree | f6544ac65503d04b7760492e7f2592132e4a6d6c /drivers/block | |
parent | ed24fee24a6be9568b1ee30209bafe4dad66be0e (diff) | |
parent | 9c89d62948c4740e379a7e0085dd8d7c1561f53f (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull ceph fixes from Sage Weil:
"These fix several bugs with RBD from 3.11 that didn't get tested in
time for the merge window: some error handling, a use-after-free, and
a sequencing issue when unmapping and image races with a notify
operation.
There is also a patch fixing a problem with the new ceph + fscache
code that just went in"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
fscache: check consistency does not decrement refcount
rbd: fix error handling from rbd_snap_name()
rbd: ignore unmapped snapshots that no longer exist
rbd: fix use-after free of rbd_dev->disk
rbd: make rbd_obj_notify_ack() synchronous
rbd: complete notifies before cleaning up osd_client and rbd_dev
libceph: add function to ensure notifies are complete
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/rbd.c | 77 |
1 files changed, 59 insertions, 18 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b22a7d0fe5b7..cb1db2979d3d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -931,12 +931,14 @@ static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, | |||
931 | u64 snap_id) | 931 | u64 snap_id) |
932 | { | 932 | { |
933 | u32 which; | 933 | u32 which; |
934 | const char *snap_name; | ||
934 | 935 | ||
935 | which = rbd_dev_snap_index(rbd_dev, snap_id); | 936 | which = rbd_dev_snap_index(rbd_dev, snap_id); |
936 | if (which == BAD_SNAP_INDEX) | 937 | if (which == BAD_SNAP_INDEX) |
937 | return NULL; | 938 | return ERR_PTR(-ENOENT); |
938 | 939 | ||
939 | return _rbd_dev_v1_snap_name(rbd_dev, which); | 940 | snap_name = _rbd_dev_v1_snap_name(rbd_dev, which); |
941 | return snap_name ? snap_name : ERR_PTR(-ENOMEM); | ||
940 | } | 942 | } |
941 | 943 | ||
942 | static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) | 944 | static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) |
@@ -2812,7 +2814,7 @@ out_err: | |||
2812 | obj_request_done_set(obj_request); | 2814 | obj_request_done_set(obj_request); |
2813 | } | 2815 | } |
2814 | 2816 | ||
2815 | static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id) | 2817 | static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id) |
2816 | { | 2818 | { |
2817 | struct rbd_obj_request *obj_request; | 2819 | struct rbd_obj_request *obj_request; |
2818 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; | 2820 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; |
@@ -2827,16 +2829,17 @@ static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id) | |||
2827 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); | 2829 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); |
2828 | if (!obj_request->osd_req) | 2830 | if (!obj_request->osd_req) |
2829 | goto out; | 2831 | goto out; |
2830 | obj_request->callback = rbd_obj_request_put; | ||
2831 | 2832 | ||
2832 | osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK, | 2833 | osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK, |
2833 | notify_id, 0, 0); | 2834 | notify_id, 0, 0); |
2834 | rbd_osd_req_format_read(obj_request); | 2835 | rbd_osd_req_format_read(obj_request); |
2835 | 2836 | ||
2836 | ret = rbd_obj_request_submit(osdc, obj_request); | 2837 | ret = rbd_obj_request_submit(osdc, obj_request); |
2837 | out: | ||
2838 | if (ret) | 2838 | if (ret) |
2839 | rbd_obj_request_put(obj_request); | 2839 | goto out; |
2840 | ret = rbd_obj_request_wait(obj_request); | ||
2841 | out: | ||
2842 | rbd_obj_request_put(obj_request); | ||
2840 | 2843 | ||
2841 | return ret; | 2844 | return ret; |
2842 | } | 2845 | } |
@@ -2856,7 +2859,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | |||
2856 | if (ret) | 2859 | if (ret) |
2857 | rbd_warn(rbd_dev, "header refresh error (%d)\n", ret); | 2860 | rbd_warn(rbd_dev, "header refresh error (%d)\n", ret); |
2858 | 2861 | ||
2859 | rbd_obj_notify_ack(rbd_dev, notify_id); | 2862 | rbd_obj_notify_ack_sync(rbd_dev, notify_id); |
2860 | } | 2863 | } |
2861 | 2864 | ||
2862 | /* | 2865 | /* |
@@ -3328,6 +3331,31 @@ static void rbd_exists_validate(struct rbd_device *rbd_dev) | |||
3328 | clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); | 3331 | clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); |
3329 | } | 3332 | } |
3330 | 3333 | ||
3334 | static void rbd_dev_update_size(struct rbd_device *rbd_dev) | ||
3335 | { | ||
3336 | sector_t size; | ||
3337 | bool removing; | ||
3338 | |||
3339 | /* | ||
3340 | * Don't hold the lock while doing disk operations, | ||
3341 | * or lock ordering will conflict with the bdev mutex via: | ||
3342 | * rbd_add() -> blkdev_get() -> rbd_open() | ||
3343 | */ | ||
3344 | spin_lock_irq(&rbd_dev->lock); | ||
3345 | removing = test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); | ||
3346 | spin_unlock_irq(&rbd_dev->lock); | ||
3347 | /* | ||
3348 | * If the device is being removed, rbd_dev->disk has | ||
3349 | * been destroyed, so don't try to update its size | ||
3350 | */ | ||
3351 | if (!removing) { | ||
3352 | size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; | ||
3353 | dout("setting size to %llu sectors", (unsigned long long)size); | ||
3354 | set_capacity(rbd_dev->disk, size); | ||
3355 | revalidate_disk(rbd_dev->disk); | ||
3356 | } | ||
3357 | } | ||
3358 | |||
3331 | static int rbd_dev_refresh(struct rbd_device *rbd_dev) | 3359 | static int rbd_dev_refresh(struct rbd_device *rbd_dev) |
3332 | { | 3360 | { |
3333 | u64 mapping_size; | 3361 | u64 mapping_size; |
@@ -3347,12 +3375,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) | |||
3347 | up_write(&rbd_dev->header_rwsem); | 3375 | up_write(&rbd_dev->header_rwsem); |
3348 | 3376 | ||
3349 | if (mapping_size != rbd_dev->mapping.size) { | 3377 | if (mapping_size != rbd_dev->mapping.size) { |
3350 | sector_t size; | 3378 | rbd_dev_update_size(rbd_dev); |
3351 | |||
3352 | size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; | ||
3353 | dout("setting size to %llu sectors", (unsigned long long)size); | ||
3354 | set_capacity(rbd_dev->disk, size); | ||
3355 | revalidate_disk(rbd_dev->disk); | ||
3356 | } | 3379 | } |
3357 | 3380 | ||
3358 | return ret; | 3381 | return ret; |
@@ -4061,8 +4084,13 @@ static u64 rbd_v2_snap_id_by_name(struct rbd_device *rbd_dev, const char *name) | |||
4061 | 4084 | ||
4062 | snap_id = snapc->snaps[which]; | 4085 | snap_id = snapc->snaps[which]; |
4063 | snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id); | 4086 | snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id); |
4064 | if (IS_ERR(snap_name)) | 4087 | if (IS_ERR(snap_name)) { |
4065 | break; | 4088 | /* ignore no-longer existing snapshots */ |
4089 | if (PTR_ERR(snap_name) == -ENOENT) | ||
4090 | continue; | ||
4091 | else | ||
4092 | break; | ||
4093 | } | ||
4066 | found = !strcmp(name, snap_name); | 4094 | found = !strcmp(name, snap_name); |
4067 | kfree(snap_name); | 4095 | kfree(snap_name); |
4068 | } | 4096 | } |
@@ -4141,8 +4169,8 @@ static int rbd_dev_spec_update(struct rbd_device *rbd_dev) | |||
4141 | /* Look up the snapshot name, and make a copy */ | 4169 | /* Look up the snapshot name, and make a copy */ |
4142 | 4170 | ||
4143 | snap_name = rbd_snap_name(rbd_dev, spec->snap_id); | 4171 | snap_name = rbd_snap_name(rbd_dev, spec->snap_id); |
4144 | if (!snap_name) { | 4172 | if (IS_ERR(snap_name)) { |
4145 | ret = -ENOMEM; | 4173 | ret = PTR_ERR(snap_name); |
4146 | goto out_err; | 4174 | goto out_err; |
4147 | } | 4175 | } |
4148 | 4176 | ||
@@ -5163,10 +5191,23 @@ static ssize_t rbd_remove(struct bus_type *bus, | |||
5163 | if (ret < 0 || already) | 5191 | if (ret < 0 || already) |
5164 | return ret; | 5192 | return ret; |
5165 | 5193 | ||
5166 | rbd_bus_del_dev(rbd_dev); | ||
5167 | ret = rbd_dev_header_watch_sync(rbd_dev, false); | 5194 | ret = rbd_dev_header_watch_sync(rbd_dev, false); |
5168 | if (ret) | 5195 | if (ret) |
5169 | rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); | 5196 | rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); |
5197 | |||
5198 | /* | ||
5199 | * flush remaining watch callbacks - these must be complete | ||
5200 | * before the osd_client is shutdown | ||
5201 | */ | ||
5202 | dout("%s: flushing notifies", __func__); | ||
5203 | ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc); | ||
5204 | /* | ||
5205 | * Don't free anything from rbd_dev->disk until after all | ||
5206 | * notifies are completely processed. Otherwise | ||
5207 | * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting | ||
5208 | * in a potential use after free of rbd_dev->disk or rbd_dev. | ||
5209 | */ | ||
5210 | rbd_bus_del_dev(rbd_dev); | ||
5170 | rbd_dev_image_release(rbd_dev); | 5211 | rbd_dev_image_release(rbd_dev); |
5171 | module_put(THIS_MODULE); | 5212 | module_put(THIS_MODULE); |
5172 | 5213 | ||