diff options
author | Kiyoshi Ueda <k-ueda@ct.jp.nec.com> | 2009-12-10 18:52:16 -0500 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2009-12-10 18:52:16 -0500 |
commit | 9f518b27cf682dd5155a4c1679d52cd4b5be82f2 (patch) | |
tree | 3515037fb33d11065cbc02a8aa8c13ef6a406fa9 | |
parent | 6facdaff229f2b25d0de82be9be99b9f562e72ba (diff) |
dm: simplify request based suspend
The semantics of bio-based dm were changed recently in the case of
suspend with "--nolockfs" but without "--noflush".
Before 2.6.30, I/Os submitted before the suspend invocation were always
flushed. From 2.6.30 onwards, I/Os submitted before the suspend
invocation might not be flushed. (For details, see
http://marc.info/?t=123994433400003&r=1&w=2)
This patch brings the behaviour of request-based dm into line with
bio-based dm, simplifying the code and preparing for a subsequent patch
that will wait for all in_flight I/Os to complete without stopping
request_queue and use dm_wait_for_completion() for it.
This change in semantics simplifies the suspend code as follows:
o Suspend is implemented as stopping request_queue
in request-based dm, and all I/Os are queued in the request_queue
even after suspend is invoked.
o In the old semantics, we had to track whether I/Os were
queued before or after the suspend invocation, so a special
barrier-like request called 'suspend marker' was introduced.
o With the new semantics, we don't need to flush any I/O
so we can remove the marker and the code related to the marker
handling and I/O flushing.
After removing this codes, the suspend sequence is now:
1. Flush all I/Os by lock_fs() if needed.
2. Stop dispatching any I/O by stopping the request_queue.
3. Wait for all in-flight I/Os to be completed or requeued.
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
-rw-r--r-- | drivers/md/dm.c | 158 |
1 files changed, 14 insertions, 144 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 30f5dc8e52b..634b1daab2d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -178,9 +178,6 @@ struct mapped_device { | |||
178 | /* forced geometry settings */ | 178 | /* forced geometry settings */ |
179 | struct hd_geometry geometry; | 179 | struct hd_geometry geometry; |
180 | 180 | ||
181 | /* marker of flush suspend for request-based dm */ | ||
182 | struct request suspend_rq; | ||
183 | |||
184 | /* For saving the address of __make_request for request based dm */ | 181 | /* For saving the address of __make_request for request based dm */ |
185 | make_request_fn *saved_make_request_fn; | 182 | make_request_fn *saved_make_request_fn; |
186 | 183 | ||
@@ -1471,11 +1468,6 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md, | |||
1471 | return clone; | 1468 | return clone; |
1472 | } | 1469 | } |
1473 | 1470 | ||
1474 | static int dm_rq_flush_suspending(struct mapped_device *md) | ||
1475 | { | ||
1476 | return !md->suspend_rq.special; | ||
1477 | } | ||
1478 | |||
1479 | /* | 1471 | /* |
1480 | * Called with the queue lock held. | 1472 | * Called with the queue lock held. |
1481 | */ | 1473 | */ |
@@ -1484,14 +1476,6 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) | |||
1484 | struct mapped_device *md = q->queuedata; | 1476 | struct mapped_device *md = q->queuedata; |
1485 | struct request *clone; | 1477 | struct request *clone; |
1486 | 1478 | ||
1487 | if (unlikely(rq == &md->suspend_rq)) { | ||
1488 | if (dm_rq_flush_suspending(md)) | ||
1489 | return BLKPREP_OK; | ||
1490 | else | ||
1491 | /* The flush suspend was interrupted */ | ||
1492 | return BLKPREP_KILL; | ||
1493 | } | ||
1494 | |||
1495 | if (unlikely(rq->special)) { | 1479 | if (unlikely(rq->special)) { |
1496 | DMWARN("Already has something in rq->special."); | 1480 | DMWARN("Already has something in rq->special."); |
1497 | return BLKPREP_KILL; | 1481 | return BLKPREP_KILL; |
@@ -1560,27 +1544,15 @@ static void dm_request_fn(struct request_queue *q) | |||
1560 | struct request *rq; | 1544 | struct request *rq; |
1561 | 1545 | ||
1562 | /* | 1546 | /* |
1563 | * For noflush suspend, check blk_queue_stopped() to immediately | 1547 | * For suspend, check blk_queue_stopped() and don't increment |
1564 | * quit I/O dispatching. | 1548 | * the number of in-flight I/Os after the queue is stopped |
1549 | * in dm_suspend(). | ||
1565 | */ | 1550 | */ |
1566 | while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { | 1551 | while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { |
1567 | rq = blk_peek_request(q); | 1552 | rq = blk_peek_request(q); |
1568 | if (!rq) | 1553 | if (!rq) |
1569 | goto plug_and_out; | 1554 | goto plug_and_out; |
1570 | 1555 | ||
1571 | if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend maker */ | ||
1572 | if (queue_in_flight(q)) | ||
1573 | /* Not quiet yet. Wait more */ | ||
1574 | goto plug_and_out; | ||
1575 | |||
1576 | /* This device should be quiet now */ | ||
1577 | __stop_queue(q); | ||
1578 | blk_start_request(rq); | ||
1579 | __blk_end_request_all(rq, 0); | ||
1580 | wake_up(&md->wait); | ||
1581 | goto out; | ||
1582 | } | ||
1583 | |||
1584 | ti = dm_table_find_target(map, blk_rq_pos(rq)); | 1556 | ti = dm_table_find_target(map, blk_rq_pos(rq)); |
1585 | if (ti->type->busy && ti->type->busy(ti)) | 1557 | if (ti->type->busy && ti->type->busy(ti)) |
1586 | goto plug_and_out; | 1558 | goto plug_and_out; |
@@ -2112,7 +2084,7 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
2112 | smp_mb(); | 2084 | smp_mb(); |
2113 | if (dm_request_based(md)) { | 2085 | if (dm_request_based(md)) { |
2114 | spin_lock_irqsave(q->queue_lock, flags); | 2086 | spin_lock_irqsave(q->queue_lock, flags); |
2115 | if (!queue_in_flight(q) && blk_queue_stopped(q)) { | 2087 | if (!queue_in_flight(q)) { |
2116 | spin_unlock_irqrestore(q->queue_lock, flags); | 2088 | spin_unlock_irqrestore(q->queue_lock, flags); |
2117 | break; | 2089 | break; |
2118 | } | 2090 | } |
@@ -2245,67 +2217,6 @@ out: | |||
2245 | return r; | 2217 | return r; |
2246 | } | 2218 | } |
2247 | 2219 | ||
2248 | static void dm_rq_invalidate_suspend_marker(struct mapped_device *md) | ||
2249 | { | ||
2250 | md->suspend_rq.special = (void *)0x1; | ||
2251 | } | ||
2252 | |||
2253 | static void dm_rq_abort_suspend(struct mapped_device *md, int noflush) | ||
2254 | { | ||
2255 | struct request_queue *q = md->queue; | ||
2256 | unsigned long flags; | ||
2257 | |||
2258 | spin_lock_irqsave(q->queue_lock, flags); | ||
2259 | if (!noflush) | ||
2260 | dm_rq_invalidate_suspend_marker(md); | ||
2261 | __start_queue(q); | ||
2262 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2263 | } | ||
2264 | |||
2265 | static void dm_rq_start_suspend(struct mapped_device *md, int noflush) | ||
2266 | { | ||
2267 | struct request *rq = &md->suspend_rq; | ||
2268 | struct request_queue *q = md->queue; | ||
2269 | |||
2270 | if (noflush) | ||
2271 | stop_queue(q); | ||
2272 | else { | ||
2273 | blk_rq_init(q, rq); | ||
2274 | blk_insert_request(q, rq, 0, NULL); | ||
2275 | } | ||
2276 | } | ||
2277 | |||
2278 | static int dm_rq_suspend_available(struct mapped_device *md, int noflush) | ||
2279 | { | ||
2280 | int r = 1; | ||
2281 | struct request *rq = &md->suspend_rq; | ||
2282 | struct request_queue *q = md->queue; | ||
2283 | unsigned long flags; | ||
2284 | |||
2285 | if (noflush) | ||
2286 | return r; | ||
2287 | |||
2288 | /* The marker must be protected by queue lock if it is in use */ | ||
2289 | spin_lock_irqsave(q->queue_lock, flags); | ||
2290 | if (unlikely(rq->ref_count)) { | ||
2291 | /* | ||
2292 | * This can happen, when the previous flush suspend was | ||
2293 | * interrupted, the marker is still in the queue and | ||
2294 | * this flush suspend has been invoked, because we don't | ||
2295 | * remove the marker at the time of suspend interruption. | ||
2296 | * We have only one marker per mapped_device, so we can't | ||
2297 | * start another flush suspend while it is in use. | ||
2298 | */ | ||
2299 | BUG_ON(!rq->special); /* The marker should be invalidated */ | ||
2300 | DMWARN("Invalidating the previous flush suspend is still in" | ||
2301 | " progress. Please retry later."); | ||
2302 | r = 0; | ||
2303 | } | ||
2304 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2305 | |||
2306 | return r; | ||
2307 | } | ||
2308 | |||
2309 | /* | 2220 | /* |
2310 | * Functions to lock and unlock any filesystem running on the | 2221 | * Functions to lock and unlock any filesystem running on the |
2311 | * device. | 2222 | * device. |
@@ -2348,49 +2259,11 @@ static void unlock_fs(struct mapped_device *md) | |||
2348 | /* | 2259 | /* |
2349 | * Suspend mechanism in request-based dm. | 2260 | * Suspend mechanism in request-based dm. |
2350 | * | 2261 | * |
2351 | * After the suspend starts, further incoming requests are kept in | 2262 | * 1. Flush all I/Os by lock_fs() if needed. |
2352 | * the request_queue and deferred. | 2263 | * 2. Stop dispatching any I/O by stopping the request_queue. |
2353 | * Remaining requests in the request_queue at the start of suspend are flushed | 2264 | * 3. Wait for all in-flight I/Os to be completed or requeued. |
2354 | * if it is flush suspend. | ||
2355 | * The suspend completes when the following conditions have been satisfied, | ||
2356 | * so wait for it: | ||
2357 | * 1. q->in_flight is 0 (which means no in_flight request) | ||
2358 | * 2. queue has been stopped (which means no request dispatching) | ||
2359 | * | ||
2360 | * | ||
2361 | * Noflush suspend | ||
2362 | * --------------- | ||
2363 | * Noflush suspend doesn't need to dispatch remaining requests. | ||
2364 | * So stop the queue immediately. Then, wait for all in_flight requests | ||
2365 | * to be completed or requeued. | ||
2366 | * | ||
2367 | * To abort noflush suspend, start the queue. | ||
2368 | * | 2265 | * |
2369 | * | 2266 | * To abort suspend, start the request_queue. |
2370 | * Flush suspend | ||
2371 | * ------------- | ||
2372 | * Flush suspend needs to dispatch remaining requests. So stop the queue | ||
2373 | * after the remaining requests are completed. (Requeued request must be also | ||
2374 | * re-dispatched and completed. Until then, we can't stop the queue.) | ||
2375 | * | ||
2376 | * During flushing the remaining requests, further incoming requests are also | ||
2377 | * inserted to the same queue. To distinguish which requests are to be | ||
2378 | * flushed, we insert a marker request to the queue at the time of starting | ||
2379 | * flush suspend, like a barrier. | ||
2380 | * The dispatching is blocked when the marker is found on the top of the queue. | ||
2381 | * And the queue is stopped when all in_flight requests are completed, since | ||
2382 | * that means the remaining requests are completely flushed. | ||
2383 | * Then, the marker is removed from the queue. | ||
2384 | * | ||
2385 | * To abort flush suspend, we also need to take care of the marker, not only | ||
2386 | * starting the queue. | ||
2387 | * We don't remove the marker forcibly from the queue since it's against | ||
2388 | * the block-layer manner. Instead, we put a invalidated mark on the marker. | ||
2389 | * When the invalidated marker is found on the top of the queue, it is | ||
2390 | * immediately removed from the queue, so it doesn't block dispatching. | ||
2391 | * Because we have only one marker per mapped_device, we can't start another | ||
2392 | * flush suspend until the invalidated marker is removed from the queue. | ||
2393 | * So fail and return with -EBUSY in such a case. | ||
2394 | */ | 2267 | */ |
2395 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | 2268 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) |
2396 | { | 2269 | { |
@@ -2406,11 +2279,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2406 | goto out_unlock; | 2279 | goto out_unlock; |
2407 | } | 2280 | } |
2408 | 2281 | ||
2409 | if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) { | ||
2410 | r = -EBUSY; | ||
2411 | goto out_unlock; | ||
2412 | } | ||
2413 | |||
2414 | map = dm_get_table(md); | 2282 | map = dm_get_table(md); |
2415 | 2283 | ||
2416 | /* | 2284 | /* |
@@ -2424,8 +2292,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2424 | dm_table_presuspend_targets(map); | 2292 | dm_table_presuspend_targets(map); |
2425 | 2293 | ||
2426 | /* | 2294 | /* |
2427 | * Flush I/O to the device. noflush supersedes do_lockfs, | 2295 | * Flush I/O to the device. |
2428 | * because lock_fs() needs to flush I/Os. | 2296 | * Any I/O submitted after lock_fs() may not be flushed. |
2297 | * noflush takes precedence over do_lockfs. | ||
2298 | * (lock_fs() flushes I/Os and waits for them to complete.) | ||
2429 | */ | 2299 | */ |
2430 | if (!noflush && do_lockfs) { | 2300 | if (!noflush && do_lockfs) { |
2431 | r = lock_fs(md); | 2301 | r = lock_fs(md); |
@@ -2457,7 +2327,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2457 | flush_workqueue(md->wq); | 2327 | flush_workqueue(md->wq); |
2458 | 2328 | ||
2459 | if (dm_request_based(md)) | 2329 | if (dm_request_based(md)) |
2460 | dm_rq_start_suspend(md, noflush); | 2330 | stop_queue(md->queue); |
2461 | 2331 | ||
2462 | /* | 2332 | /* |
2463 | * At this point no more requests are entering target request routines. | 2333 | * At this point no more requests are entering target request routines. |
@@ -2476,7 +2346,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2476 | dm_queue_flush(md); | 2346 | dm_queue_flush(md); |
2477 | 2347 | ||
2478 | if (dm_request_based(md)) | 2348 | if (dm_request_based(md)) |
2479 | dm_rq_abort_suspend(md, noflush); | 2349 | start_queue(md->queue); |
2480 | 2350 | ||
2481 | unlock_fs(md); | 2351 | unlock_fs(md); |
2482 | goto out; /* pushback list is already flushed, so skip flush */ | 2352 | goto out; /* pushback list is already flushed, so skip flush */ |