aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKiyoshi Ueda <k-ueda@ct.jp.nec.com>2009-12-10 18:52:16 -0500
committerAlasdair G Kergon <agk@redhat.com>2009-12-10 18:52:16 -0500
commit9f518b27cf682dd5155a4c1679d52cd4b5be82f2 (patch)
tree3515037fb33d11065cbc02a8aa8c13ef6a406fa9
parent6facdaff229f2b25d0de82be9be99b9f562e72ba (diff)
dm: simplify request based suspend
The semantics of bio-based dm were changed recently in the case of suspend with "--nolockfs" but without "--noflush". Before 2.6.30, I/Os submitted before the suspend invocation were always flushed. From 2.6.30 onwards, I/Os submitted before the suspend invocation might not be flushed. (For details, see http://marc.info/?t=123994433400003&r=1&w=2) This patch brings the behaviour of request-based dm into line with bio-based dm, simplifying the code and preparing for a subsequent patch that will wait for all in_flight I/Os to complete without stopping request_queue and use dm_wait_for_completion() for it. This change in semantics simplifies the suspend code as follows: o Suspend is implemented as stopping request_queue in request-based dm, and all I/Os are queued in the request_queue even after suspend is invoked. o In the old semantics, we had to track whether I/Os were queued before or after the suspend invocation, so a special barrier-like request called 'suspend marker' was introduced. o With the new semantics, we don't need to flush any I/O so we can remove the marker and the code related to the marker handling and I/O flushing. After removing this codes, the suspend sequence is now: 1. Flush all I/Os by lock_fs() if needed. 2. Stop dispatching any I/O by stopping the request_queue. 3. Wait for all in-flight I/Os to be completed or requeued. Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com> Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
-rw-r--r--drivers/md/dm.c158
1 files changed, 14 insertions, 144 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 30f5dc8e52b..634b1daab2d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -178,9 +178,6 @@ struct mapped_device {
178 /* forced geometry settings */ 178 /* forced geometry settings */
179 struct hd_geometry geometry; 179 struct hd_geometry geometry;
180 180
181 /* marker of flush suspend for request-based dm */
182 struct request suspend_rq;
183
184 /* For saving the address of __make_request for request based dm */ 181 /* For saving the address of __make_request for request based dm */
185 make_request_fn *saved_make_request_fn; 182 make_request_fn *saved_make_request_fn;
186 183
@@ -1471,11 +1468,6 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1471 return clone; 1468 return clone;
1472} 1469}
1473 1470
1474static int dm_rq_flush_suspending(struct mapped_device *md)
1475{
1476 return !md->suspend_rq.special;
1477}
1478
1479/* 1471/*
1480 * Called with the queue lock held. 1472 * Called with the queue lock held.
1481 */ 1473 */
@@ -1484,14 +1476,6 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
1484 struct mapped_device *md = q->queuedata; 1476 struct mapped_device *md = q->queuedata;
1485 struct request *clone; 1477 struct request *clone;
1486 1478
1487 if (unlikely(rq == &md->suspend_rq)) {
1488 if (dm_rq_flush_suspending(md))
1489 return BLKPREP_OK;
1490 else
1491 /* The flush suspend was interrupted */
1492 return BLKPREP_KILL;
1493 }
1494
1495 if (unlikely(rq->special)) { 1479 if (unlikely(rq->special)) {
1496 DMWARN("Already has something in rq->special."); 1480 DMWARN("Already has something in rq->special.");
1497 return BLKPREP_KILL; 1481 return BLKPREP_KILL;
@@ -1560,27 +1544,15 @@ static void dm_request_fn(struct request_queue *q)
1560 struct request *rq; 1544 struct request *rq;
1561 1545
1562 /* 1546 /*
1563 * For noflush suspend, check blk_queue_stopped() to immediately 1547 * For suspend, check blk_queue_stopped() and don't increment
1564 * quit I/O dispatching. 1548 * the number of in-flight I/Os after the queue is stopped
1549 * in dm_suspend().
1565 */ 1550 */
1566 while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { 1551 while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
1567 rq = blk_peek_request(q); 1552 rq = blk_peek_request(q);
1568 if (!rq) 1553 if (!rq)
1569 goto plug_and_out; 1554 goto plug_and_out;
1570 1555
1571 if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend maker */
1572 if (queue_in_flight(q))
1573 /* Not quiet yet. Wait more */
1574 goto plug_and_out;
1575
1576 /* This device should be quiet now */
1577 __stop_queue(q);
1578 blk_start_request(rq);
1579 __blk_end_request_all(rq, 0);
1580 wake_up(&md->wait);
1581 goto out;
1582 }
1583
1584 ti = dm_table_find_target(map, blk_rq_pos(rq)); 1556 ti = dm_table_find_target(map, blk_rq_pos(rq));
1585 if (ti->type->busy && ti->type->busy(ti)) 1557 if (ti->type->busy && ti->type->busy(ti))
1586 goto plug_and_out; 1558 goto plug_and_out;
@@ -2112,7 +2084,7 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2112 smp_mb(); 2084 smp_mb();
2113 if (dm_request_based(md)) { 2085 if (dm_request_based(md)) {
2114 spin_lock_irqsave(q->queue_lock, flags); 2086 spin_lock_irqsave(q->queue_lock, flags);
2115 if (!queue_in_flight(q) && blk_queue_stopped(q)) { 2087 if (!queue_in_flight(q)) {
2116 spin_unlock_irqrestore(q->queue_lock, flags); 2088 spin_unlock_irqrestore(q->queue_lock, flags);
2117 break; 2089 break;
2118 } 2090 }
@@ -2245,67 +2217,6 @@ out:
2245 return r; 2217 return r;
2246} 2218}
2247 2219
2248static void dm_rq_invalidate_suspend_marker(struct mapped_device *md)
2249{
2250 md->suspend_rq.special = (void *)0x1;
2251}
2252
2253static void dm_rq_abort_suspend(struct mapped_device *md, int noflush)
2254{
2255 struct request_queue *q = md->queue;
2256 unsigned long flags;
2257
2258 spin_lock_irqsave(q->queue_lock, flags);
2259 if (!noflush)
2260 dm_rq_invalidate_suspend_marker(md);
2261 __start_queue(q);
2262 spin_unlock_irqrestore(q->queue_lock, flags);
2263}
2264
2265static void dm_rq_start_suspend(struct mapped_device *md, int noflush)
2266{
2267 struct request *rq = &md->suspend_rq;
2268 struct request_queue *q = md->queue;
2269
2270 if (noflush)
2271 stop_queue(q);
2272 else {
2273 blk_rq_init(q, rq);
2274 blk_insert_request(q, rq, 0, NULL);
2275 }
2276}
2277
2278static int dm_rq_suspend_available(struct mapped_device *md, int noflush)
2279{
2280 int r = 1;
2281 struct request *rq = &md->suspend_rq;
2282 struct request_queue *q = md->queue;
2283 unsigned long flags;
2284
2285 if (noflush)
2286 return r;
2287
2288 /* The marker must be protected by queue lock if it is in use */
2289 spin_lock_irqsave(q->queue_lock, flags);
2290 if (unlikely(rq->ref_count)) {
2291 /*
2292 * This can happen, when the previous flush suspend was
2293 * interrupted, the marker is still in the queue and
2294 * this flush suspend has been invoked, because we don't
2295 * remove the marker at the time of suspend interruption.
2296 * We have only one marker per mapped_device, so we can't
2297 * start another flush suspend while it is in use.
2298 */
2299 BUG_ON(!rq->special); /* The marker should be invalidated */
2300 DMWARN("Invalidating the previous flush suspend is still in"
2301 " progress. Please retry later.");
2302 r = 0;
2303 }
2304 spin_unlock_irqrestore(q->queue_lock, flags);
2305
2306 return r;
2307}
2308
2309/* 2220/*
2310 * Functions to lock and unlock any filesystem running on the 2221 * Functions to lock and unlock any filesystem running on the
2311 * device. 2222 * device.
@@ -2348,49 +2259,11 @@ static void unlock_fs(struct mapped_device *md)
2348/* 2259/*
2349 * Suspend mechanism in request-based dm. 2260 * Suspend mechanism in request-based dm.
2350 * 2261 *
2351 * After the suspend starts, further incoming requests are kept in 2262 * 1. Flush all I/Os by lock_fs() if needed.
2352 * the request_queue and deferred. 2263 * 2. Stop dispatching any I/O by stopping the request_queue.
2353 * Remaining requests in the request_queue at the start of suspend are flushed 2264 * 3. Wait for all in-flight I/Os to be completed or requeued.
2354 * if it is flush suspend.
2355 * The suspend completes when the following conditions have been satisfied,
2356 * so wait for it:
2357 * 1. q->in_flight is 0 (which means no in_flight request)
2358 * 2. queue has been stopped (which means no request dispatching)
2359 *
2360 *
2361 * Noflush suspend
2362 * ---------------
2363 * Noflush suspend doesn't need to dispatch remaining requests.
2364 * So stop the queue immediately. Then, wait for all in_flight requests
2365 * to be completed or requeued.
2366 *
2367 * To abort noflush suspend, start the queue.
2368 * 2265 *
2369 * 2266 * To abort suspend, start the request_queue.
2370 * Flush suspend
2371 * -------------
2372 * Flush suspend needs to dispatch remaining requests. So stop the queue
2373 * after the remaining requests are completed. (Requeued request must be also
2374 * re-dispatched and completed. Until then, we can't stop the queue.)
2375 *
2376 * During flushing the remaining requests, further incoming requests are also
2377 * inserted to the same queue. To distinguish which requests are to be
2378 * flushed, we insert a marker request to the queue at the time of starting
2379 * flush suspend, like a barrier.
2380 * The dispatching is blocked when the marker is found on the top of the queue.
2381 * And the queue is stopped when all in_flight requests are completed, since
2382 * that means the remaining requests are completely flushed.
2383 * Then, the marker is removed from the queue.
2384 *
2385 * To abort flush suspend, we also need to take care of the marker, not only
2386 * starting the queue.
2387 * We don't remove the marker forcibly from the queue since it's against
2388 * the block-layer manner. Instead, we put a invalidated mark on the marker.
2389 * When the invalidated marker is found on the top of the queue, it is
2390 * immediately removed from the queue, so it doesn't block dispatching.
2391 * Because we have only one marker per mapped_device, we can't start another
2392 * flush suspend until the invalidated marker is removed from the queue.
2393 * So fail and return with -EBUSY in such a case.
2394 */ 2267 */
2395int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 2268int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2396{ 2269{
@@ -2406,11 +2279,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2406 goto out_unlock; 2279 goto out_unlock;
2407 } 2280 }
2408 2281
2409 if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) {
2410 r = -EBUSY;
2411 goto out_unlock;
2412 }
2413
2414 map = dm_get_table(md); 2282 map = dm_get_table(md);
2415 2283
2416 /* 2284 /*
@@ -2424,8 +2292,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2424 dm_table_presuspend_targets(map); 2292 dm_table_presuspend_targets(map);
2425 2293
2426 /* 2294 /*
2427 * Flush I/O to the device. noflush supersedes do_lockfs, 2295 * Flush I/O to the device.
2428 * because lock_fs() needs to flush I/Os. 2296 * Any I/O submitted after lock_fs() may not be flushed.
2297 * noflush takes precedence over do_lockfs.
2298 * (lock_fs() flushes I/Os and waits for them to complete.)
2429 */ 2299 */
2430 if (!noflush && do_lockfs) { 2300 if (!noflush && do_lockfs) {
2431 r = lock_fs(md); 2301 r = lock_fs(md);
@@ -2457,7 +2327,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2457 flush_workqueue(md->wq); 2327 flush_workqueue(md->wq);
2458 2328
2459 if (dm_request_based(md)) 2329 if (dm_request_based(md))
2460 dm_rq_start_suspend(md, noflush); 2330 stop_queue(md->queue);
2461 2331
2462 /* 2332 /*
2463 * At this point no more requests are entering target request routines. 2333 * At this point no more requests are entering target request routines.
@@ -2476,7 +2346,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2476 dm_queue_flush(md); 2346 dm_queue_flush(md);
2477 2347
2478 if (dm_request_based(md)) 2348 if (dm_request_based(md))
2479 dm_rq_abort_suspend(md, noflush); 2349 start_queue(md->queue);
2480 2350
2481 unlock_fs(md); 2351 unlock_fs(md);
2482 goto out; /* pushback list is already flushed, so skip flush */ 2352 goto out; /* pushback list is already flushed, so skip flush */