aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Elder <elder@inktank.com>2013-04-21 01:32:07 -0400
committerSage Weil <sage@inktank.com>2013-05-02 00:19:15 -0400
commita9e8ba2cb3eb64cf6cfa509d096ef79bc1c827ae (patch)
tree8822a3411c5353abfdd12341a2cbd91af2ffe03b
parent0eefd470f034cc18349fa1a9e4fda000e963c4e3 (diff)
rbd: enforce parent overlap
A clone image has a defined overlap point with its parent image. That is the byte offset beyond which the parent image has no defined data to back the clone, and anything thereafter can be viewed as being zero-filled by the clone image. This is needed because a clone image can be resized. If it gets resized larger than the snapshot it is based on, the overlap defines the original size. If the clone gets resized downward below the original size the new clone size defines the overlap. If the clone is subsequently resized to be larger, the overlap won't be increased because the previous resize invalidated any parent data beyond that point. This resolves: http://tracker.ceph.com/issues/4724 Signed-off-by: Alex Elder <elder@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
-rw-r--r--drivers/block/rbd.c64
1 files changed, 54 insertions, 10 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c34719c917b1..ee53d8e52801 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1437,20 +1437,20 @@ static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request)
1437static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) 1437static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
1438{ 1438{
1439 struct rbd_img_request *img_request = NULL; 1439 struct rbd_img_request *img_request = NULL;
1440 struct rbd_device *rbd_dev = NULL;
1440 bool layered = false; 1441 bool layered = false;
1441 1442
1442 if (obj_request_img_data_test(obj_request)) { 1443 if (obj_request_img_data_test(obj_request)) {
1443 img_request = obj_request->img_request; 1444 img_request = obj_request->img_request;
1444 layered = img_request && img_request_layered_test(img_request); 1445 layered = img_request && img_request_layered_test(img_request);
1445 } else { 1446 rbd_dev = img_request->rbd_dev;
1446 img_request = NULL;
1447 layered = false;
1448 } 1447 }
1449 1448
1450 dout("%s: obj %p img %p result %d %llu/%llu\n", __func__, 1449 dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
1451 obj_request, img_request, obj_request->result, 1450 obj_request, img_request, obj_request->result,
1452 obj_request->xferred, obj_request->length); 1451 obj_request->xferred, obj_request->length);
1453 if (layered && obj_request->result == -ENOENT) 1452 if (layered && obj_request->result == -ENOENT &&
1453 obj_request->img_offset < rbd_dev->parent_overlap)
1454 rbd_img_parent_read(obj_request); 1454 rbd_img_parent_read(obj_request);
1455 else if (img_request) 1455 else if (img_request)
1456 rbd_img_obj_request_read_callback(obj_request); 1456 rbd_img_obj_request_read_callback(obj_request);
@@ -2166,6 +2166,16 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
2166 length = (u64)1 << rbd_dev->header.obj_order; 2166 length = (u64)1 << rbd_dev->header.obj_order;
2167 2167
2168 /* 2168 /*
2169 * There is no defined parent data beyond the parent
2170 * overlap, so limit what we read at that boundary if
2171 * necessary.
2172 */
2173 if (img_offset + length > rbd_dev->parent_overlap) {
2174 rbd_assert(img_offset < rbd_dev->parent_overlap);
2175 length = rbd_dev->parent_overlap - img_offset;
2176 }
2177
2178 /*
2169 * Allocate a page array big enough to receive the data read 2179 * Allocate a page array big enough to receive the data read
2170 * from the parent. 2180 * from the parent.
2171 */ 2181 */
@@ -2325,21 +2335,28 @@ out:
2325static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request) 2335static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request)
2326{ 2336{
2327 struct rbd_img_request *img_request; 2337 struct rbd_img_request *img_request;
2338 struct rbd_device *rbd_dev;
2328 bool known; 2339 bool known;
2329 2340
2330 rbd_assert(obj_request_img_data_test(obj_request)); 2341 rbd_assert(obj_request_img_data_test(obj_request));
2331 2342
2332 img_request = obj_request->img_request; 2343 img_request = obj_request->img_request;
2333 rbd_assert(img_request); 2344 rbd_assert(img_request);
2345 rbd_dev = img_request->rbd_dev;
2334 2346
2335 /* 2347 /*
2336 * Only layered writes need special handling. If it's not a 2348 * Only writes to layered images need special handling.
2337 * layered write, or it is a layered write but we know the 2349 * Reads and non-layered writes are simple object requests.
2338 * target object exists, it's no different from any other 2350 * Layered writes that start beyond the end of the overlap
2339 * object request. 2351 * with the parent have no parent data, so they too are
2352 * simple object requests. Finally, if the target object is
2353 * known to already exist, its parent data has already been
2354 * copied, so a write to the object can also be handled as a
2355 * simple object request.
2340 */ 2356 */
2341 if (!img_request_write_test(img_request) || 2357 if (!img_request_write_test(img_request) ||
2342 !img_request_layered_test(img_request) || 2358 !img_request_layered_test(img_request) ||
2359 rbd_dev->parent_overlap <= obj_request->img_offset ||
2343 ((known = obj_request_known_test(obj_request)) && 2360 ((known = obj_request_known_test(obj_request)) &&
2344 obj_request_exists_test(obj_request))) { 2361 obj_request_exists_test(obj_request))) {
2345 2362
@@ -2386,14 +2403,41 @@ static int rbd_img_request_submit(struct rbd_img_request *img_request)
2386static void rbd_img_parent_read_callback(struct rbd_img_request *img_request) 2403static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
2387{ 2404{
2388 struct rbd_obj_request *obj_request; 2405 struct rbd_obj_request *obj_request;
2406 struct rbd_device *rbd_dev;
2407 u64 obj_end;
2389 2408
2390 rbd_assert(img_request_child_test(img_request)); 2409 rbd_assert(img_request_child_test(img_request));
2391 2410
2392 obj_request = img_request->obj_request; 2411 obj_request = img_request->obj_request;
2393 rbd_assert(obj_request != NULL); 2412 rbd_assert(obj_request);
2413 rbd_assert(obj_request->img_request);
2414
2394 obj_request->result = img_request->result; 2415 obj_request->result = img_request->result;
2395 obj_request->xferred = img_request->xferred; 2416 if (obj_request->result)
2417 goto out;
2396 2418
2419 /*
2420 * We need to zero anything beyond the parent overlap
2421 * boundary. Since rbd_img_obj_request_read_callback()
2422 * will zero anything beyond the end of a short read, an
2423 * easy way to do this is to pretend the data from the
2424 * parent came up short--ending at the overlap boundary.
2425 */
2426 rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length);
2427 obj_end = obj_request->img_offset + obj_request->length;
2428 rbd_dev = obj_request->img_request->rbd_dev;
2429 if (obj_end > rbd_dev->parent_overlap) {
2430 u64 xferred = 0;
2431
2432 if (obj_request->img_offset < rbd_dev->parent_overlap)
2433 xferred = rbd_dev->parent_overlap -
2434 obj_request->img_offset;
2435
2436 obj_request->xferred = min(img_request->xferred, xferred);
2437 } else {
2438 obj_request->xferred = img_request->xferred;
2439 }
2440out:
2397 rbd_img_obj_request_read_callback(obj_request); 2441 rbd_img_obj_request_read_callback(obj_request);
2398 rbd_obj_request_complete(obj_request); 2442 rbd_obj_request_complete(obj_request);
2399} 2443}