diff options
author | Josh Durgin <josh.durgin@inktank.com> | 2013-08-29 20:26:31 -0400 |
---|---|---|
committer | Josh Durgin <josh.durgin@inktank.com> | 2013-09-09 14:16:25 -0400 |
commit | 9875201e10496612080e7d164acc8f625c18725c (patch) | |
tree | 9921ff119c3d9e87db94e455597fbe772906ac81 /drivers/block | |
parent | 20e0af67ce88c657d0601977b9941a2256afbdaa (diff) |
rbd: fix use-after free of rbd_dev->disk
Removing a device deallocates the disk, unschedules the watch, and
finally cleans up the rbd_dev structure. rbd_dev_refresh(), called
from the watch callback, updates the disk size and rbd_dev
structure. With no locking between them, rbd_dev_refresh() may use the
device or rbd_dev after they've been freed.
To fix this, check whether RBD_DEV_FLAG_REMOVING is set before
updating the disk size in rbd_dev_refresh(). In order to prevent a
race where rbd_dev_refresh() is already revalidating the disk when
rbd_remove() is called, move the call to rbd_bus_del_dev() after the
watch is unregistered and all notifies are complete. It's safe to
defer deleting this structure because no new requests can be submitted
once the RBD_DEV_FLAG_REMOVING is set, since the device cannot be
opened.
Fixes: http://tracker.ceph.com/issues/5636
Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
Reviewed-by: Alex Elder <elder@linaro.org>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/rbd.c | 40 |
1 files changed, 33 insertions, 7 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 9e5f07f936dd..47c6f9cf9dba 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -3325,6 +3325,31 @@ static void rbd_exists_validate(struct rbd_device *rbd_dev) | |||
3325 | clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); | 3325 | clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); |
3326 | } | 3326 | } |
3327 | 3327 | ||
3328 | static void rbd_dev_update_size(struct rbd_device *rbd_dev) | ||
3329 | { | ||
3330 | sector_t size; | ||
3331 | bool removing; | ||
3332 | |||
3333 | /* | ||
3334 | * Don't hold the lock while doing disk operations, | ||
3335 | * or lock ordering will conflict with the bdev mutex via: | ||
3336 | * rbd_add() -> blkdev_get() -> rbd_open() | ||
3337 | */ | ||
3338 | spin_lock_irq(&rbd_dev->lock); | ||
3339 | removing = test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); | ||
3340 | spin_unlock_irq(&rbd_dev->lock); | ||
3341 | /* | ||
3342 | * If the device is being removed, rbd_dev->disk has | ||
3343 | * been destroyed, so don't try to update its size | ||
3344 | */ | ||
3345 | if (!removing) { | ||
3346 | size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; | ||
3347 | dout("setting size to %llu sectors", (unsigned long long)size); | ||
3348 | set_capacity(rbd_dev->disk, size); | ||
3349 | revalidate_disk(rbd_dev->disk); | ||
3350 | } | ||
3351 | } | ||
3352 | |||
3328 | static int rbd_dev_refresh(struct rbd_device *rbd_dev) | 3353 | static int rbd_dev_refresh(struct rbd_device *rbd_dev) |
3329 | { | 3354 | { |
3330 | u64 mapping_size; | 3355 | u64 mapping_size; |
@@ -3344,12 +3369,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) | |||
3344 | up_write(&rbd_dev->header_rwsem); | 3369 | up_write(&rbd_dev->header_rwsem); |
3345 | 3370 | ||
3346 | if (mapping_size != rbd_dev->mapping.size) { | 3371 | if (mapping_size != rbd_dev->mapping.size) { |
3347 | sector_t size; | 3372 | rbd_dev_update_size(rbd_dev); |
3348 | |||
3349 | size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; | ||
3350 | dout("setting size to %llu sectors", (unsigned long long)size); | ||
3351 | set_capacity(rbd_dev->disk, size); | ||
3352 | revalidate_disk(rbd_dev->disk); | ||
3353 | } | 3373 | } |
3354 | 3374 | ||
3355 | return ret; | 3375 | return ret; |
@@ -5160,7 +5180,6 @@ static ssize_t rbd_remove(struct bus_type *bus, | |||
5160 | if (ret < 0 || already) | 5180 | if (ret < 0 || already) |
5161 | return ret; | 5181 | return ret; |
5162 | 5182 | ||
5163 | rbd_bus_del_dev(rbd_dev); | ||
5164 | ret = rbd_dev_header_watch_sync(rbd_dev, false); | 5183 | ret = rbd_dev_header_watch_sync(rbd_dev, false); |
5165 | if (ret) | 5184 | if (ret) |
5166 | rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); | 5185 | rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); |
@@ -5171,6 +5190,13 @@ static ssize_t rbd_remove(struct bus_type *bus, | |||
5171 | */ | 5190 | */ |
5172 | dout("%s: flushing notifies", __func__); | 5191 | dout("%s: flushing notifies", __func__); |
5173 | ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc); | 5192 | ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc); |
5193 | /* | ||
5194 | * Don't free anything from rbd_dev->disk until after all | ||
5195 | * notifies are completely processed. Otherwise | ||
5196 | * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting | ||
5197 | * in a potential use after free of rbd_dev->disk or rbd_dev. | ||
5198 | */ | ||
5199 | rbd_bus_del_dev(rbd_dev); | ||
5174 | rbd_dev_image_release(rbd_dev); | 5200 | rbd_dev_image_release(rbd_dev); |
5175 | module_put(THIS_MODULE); | 5201 | module_put(THIS_MODULE); |
5176 | 5202 | ||