aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/dm.c157
1 files changed, 45 insertions, 112 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 65114e4d9f65..2011704b8ba0 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -110,7 +110,6 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
110#define DMF_FREEING 3 110#define DMF_FREEING 3
111#define DMF_DELETING 4 111#define DMF_DELETING 4
112#define DMF_NOFLUSH_SUSPENDING 5 112#define DMF_NOFLUSH_SUSPENDING 5
113#define DMF_QUEUE_IO_TO_THREAD 6
114 113
115/* 114/*
116 * Work processed by per-device workqueue. 115 * Work processed by per-device workqueue.
@@ -144,11 +143,6 @@ struct mapped_device {
144 spinlock_t deferred_lock; 143 spinlock_t deferred_lock;
145 144
146 /* 145 /*
147 * An error from the flush request currently being processed.
148 */
149 int flush_error;
150
151 /*
152 * Processing queue (flush) 146 * Processing queue (flush)
153 */ 147 */
154 struct workqueue_struct *wq; 148 struct workqueue_struct *wq;
@@ -518,16 +512,10 @@ static void end_io_acct(struct dm_io *io)
518 */ 512 */
519static void queue_io(struct mapped_device *md, struct bio *bio) 513static void queue_io(struct mapped_device *md, struct bio *bio)
520{ 514{
521 down_write(&md->io_lock);
522
523 spin_lock_irq(&md->deferred_lock); 515 spin_lock_irq(&md->deferred_lock);
524 bio_list_add(&md->deferred, bio); 516 bio_list_add(&md->deferred, bio);
525 spin_unlock_irq(&md->deferred_lock); 517 spin_unlock_irq(&md->deferred_lock);
526 518 queue_work(md->wq, &md->work);
527 if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))
528 queue_work(md->wq, &md->work);
529
530 up_write(&md->io_lock);
531} 519}
532 520
533/* 521/*
@@ -615,11 +603,9 @@ static void dec_pending(struct dm_io *io, int error)
615 * Target requested pushing back the I/O. 603 * Target requested pushing back the I/O.
616 */ 604 */
617 spin_lock_irqsave(&md->deferred_lock, flags); 605 spin_lock_irqsave(&md->deferred_lock, flags);
618 if (__noflush_suspending(md)) { 606 if (__noflush_suspending(md))
619 if (!(io->bio->bi_rw & REQ_FLUSH)) 607 bio_list_add_head(&md->deferred, io->bio);
620 bio_list_add_head(&md->deferred, 608 else
621 io->bio);
622 } else
623 /* noflush suspend was interrupted. */ 609 /* noflush suspend was interrupted. */
624 io->error = -EIO; 610 io->error = -EIO;
625 spin_unlock_irqrestore(&md->deferred_lock, flags); 611 spin_unlock_irqrestore(&md->deferred_lock, flags);
@@ -627,26 +613,22 @@ static void dec_pending(struct dm_io *io, int error)
627 613
628 io_error = io->error; 614 io_error = io->error;
629 bio = io->bio; 615 bio = io->bio;
616 end_io_acct(io);
617 free_io(md, io);
618
619 if (io_error == DM_ENDIO_REQUEUE)
620 return;
630 621
631 if (bio->bi_rw & REQ_FLUSH) { 622 if (!(bio->bi_rw & REQ_FLUSH) || !bio->bi_size) {
623 trace_block_bio_complete(md->queue, bio);
624 bio_endio(bio, io_error);
625 } else {
632 /* 626 /*
633 * There can be just one flush request so we use 627 * Preflush done for flush with data, reissue
634 * a per-device variable for error reporting. 628 * without REQ_FLUSH.
635 * Note that you can't touch the bio after end_io_acct
636 */ 629 */
637 if (!md->flush_error) 630 bio->bi_rw &= ~REQ_FLUSH;
638 md->flush_error = io_error; 631 queue_io(md, bio);
639 end_io_acct(io);
640 free_io(md, io);
641 } else {
642 end_io_acct(io);
643 free_io(md, io);
644
645 if (io_error != DM_ENDIO_REQUEUE) {
646 trace_block_bio_complete(md->queue, bio);
647
648 bio_endio(bio, io_error);
649 }
650 } 632 }
651 } 633 }
652} 634}
@@ -1298,21 +1280,17 @@ static int __clone_and_map(struct clone_info *ci)
1298 */ 1280 */
1299static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) 1281static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
1300{ 1282{
1283 bool is_flush = bio->bi_rw & REQ_FLUSH;
1301 struct clone_info ci; 1284 struct clone_info ci;
1302 int error = 0; 1285 int error = 0;
1303 1286
1304 ci.map = dm_get_live_table(md); 1287 ci.map = dm_get_live_table(md);
1305 if (unlikely(!ci.map)) { 1288 if (unlikely(!ci.map)) {
1306 if (!(bio->bi_rw & REQ_FLUSH)) 1289 bio_io_error(bio);
1307 bio_io_error(bio);
1308 else
1309 if (!md->flush_error)
1310 md->flush_error = -EIO;
1311 return; 1290 return;
1312 } 1291 }
1313 1292
1314 ci.md = md; 1293 ci.md = md;
1315 ci.bio = bio;
1316 ci.io = alloc_io(md); 1294 ci.io = alloc_io(md);
1317 ci.io->error = 0; 1295 ci.io->error = 0;
1318 atomic_set(&ci.io->io_count, 1); 1296 atomic_set(&ci.io->io_count, 1);
@@ -1320,18 +1298,19 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
1320 ci.io->md = md; 1298 ci.io->md = md;
1321 spin_lock_init(&ci.io->endio_lock); 1299 spin_lock_init(&ci.io->endio_lock);
1322 ci.sector = bio->bi_sector; 1300 ci.sector = bio->bi_sector;
1323 if (!(bio->bi_rw & REQ_FLUSH)) 1301 ci.idx = bio->bi_idx;
1302
1303 if (!is_flush) {
1304 ci.bio = bio;
1324 ci.sector_count = bio_sectors(bio); 1305 ci.sector_count = bio_sectors(bio);
1325 else { 1306 } else {
1326 /* all FLUSH bio's reaching here should be empty */ 1307 ci.bio = &ci.md->flush_bio;
1327 WARN_ON_ONCE(bio_has_data(bio));
1328 ci.sector_count = 1; 1308 ci.sector_count = 1;
1329 } 1309 }
1330 ci.idx = bio->bi_idx;
1331 1310
1332 start_io_acct(ci.io); 1311 start_io_acct(ci.io);
1333 while (ci.sector_count && !error) { 1312 while (ci.sector_count && !error) {
1334 if (!(bio->bi_rw & REQ_FLUSH)) 1313 if (!is_flush)
1335 error = __clone_and_map(&ci); 1314 error = __clone_and_map(&ci);
1336 else 1315 else
1337 error = __clone_and_map_flush(&ci); 1316 error = __clone_and_map_flush(&ci);
@@ -1419,22 +1398,14 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
1419 part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); 1398 part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
1420 part_stat_unlock(); 1399 part_stat_unlock();
1421 1400
1422 /* 1401 /* if we're suspended, we have to queue this io for later */
1423 * If we're suspended or the thread is processing flushes 1402 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
1424 * we have to queue this io for later.
1425 */
1426 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
1427 (bio->bi_rw & REQ_FLUSH)) {
1428 up_read(&md->io_lock); 1403 up_read(&md->io_lock);
1429 1404
1430 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && 1405 if (bio_rw(bio) != READA)
1431 bio_rw(bio) == READA) { 1406 queue_io(md, bio);
1407 else
1432 bio_io_error(bio); 1408 bio_io_error(bio);
1433 return 0;
1434 }
1435
1436 queue_io(md, bio);
1437
1438 return 0; 1409 return 0;
1439 } 1410 }
1440 1411
@@ -1923,6 +1894,10 @@ static struct mapped_device *alloc_dev(int minor)
1923 if (!md->bdev) 1894 if (!md->bdev)
1924 goto bad_bdev; 1895 goto bad_bdev;
1925 1896
1897 bio_init(&md->flush_bio);
1898 md->flush_bio.bi_bdev = md->bdev;
1899 md->flush_bio.bi_rw = WRITE_FLUSH;
1900
1926 /* Populate the mapping, nobody knows we exist yet */ 1901 /* Populate the mapping, nobody knows we exist yet */
1927 spin_lock(&_minor_lock); 1902 spin_lock(&_minor_lock);
1928 old_md = idr_replace(&_minor_idr, md, minor); 1903 old_md = idr_replace(&_minor_idr, md, minor);
@@ -2313,37 +2288,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2313 return r; 2288 return r;
2314} 2289}
2315 2290
2316static void process_flush(struct mapped_device *md, struct bio *bio)
2317{
2318 md->flush_error = 0;
2319
2320 /* handle REQ_FLUSH */
2321 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2322
2323 bio_init(&md->flush_bio);
2324 md->flush_bio.bi_bdev = md->bdev;
2325 md->flush_bio.bi_rw = WRITE_FLUSH;
2326 __split_and_process_bio(md, &md->flush_bio);
2327
2328 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2329
2330 /* if it's an empty flush or the preflush failed, we're done */
2331 if (!bio_has_data(bio) || md->flush_error) {
2332 if (md->flush_error != DM_ENDIO_REQUEUE)
2333 bio_endio(bio, md->flush_error);
2334 else {
2335 spin_lock_irq(&md->deferred_lock);
2336 bio_list_add_head(&md->deferred, bio);
2337 spin_unlock_irq(&md->deferred_lock);
2338 }
2339 return;
2340 }
2341
2342 /* issue data + REQ_FUA */
2343 bio->bi_rw &= ~REQ_FLUSH;
2344 __split_and_process_bio(md, bio);
2345}
2346
2347/* 2291/*
2348 * Process the deferred bios 2292 * Process the deferred bios
2349 */ 2293 */
@@ -2353,33 +2297,27 @@ static void dm_wq_work(struct work_struct *work)
2353 work); 2297 work);
2354 struct bio *c; 2298 struct bio *c;
2355 2299
2356 down_write(&md->io_lock); 2300 down_read(&md->io_lock);
2357 2301
2358 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 2302 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
2359 spin_lock_irq(&md->deferred_lock); 2303 spin_lock_irq(&md->deferred_lock);
2360 c = bio_list_pop(&md->deferred); 2304 c = bio_list_pop(&md->deferred);
2361 spin_unlock_irq(&md->deferred_lock); 2305 spin_unlock_irq(&md->deferred_lock);
2362 2306
2363 if (!c) { 2307 if (!c)
2364 clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
2365 break; 2308 break;
2366 }
2367 2309
2368 up_write(&md->io_lock); 2310 up_read(&md->io_lock);
2369 2311
2370 if (dm_request_based(md)) 2312 if (dm_request_based(md))
2371 generic_make_request(c); 2313 generic_make_request(c);
2372 else { 2314 else
2373 if (c->bi_rw & REQ_FLUSH) 2315 __split_and_process_bio(md, c);
2374 process_flush(md, c);
2375 else
2376 __split_and_process_bio(md, c);
2377 }
2378 2316
2379 down_write(&md->io_lock); 2317 down_read(&md->io_lock);
2380 } 2318 }
2381 2319
2382 up_write(&md->io_lock); 2320 up_read(&md->io_lock);
2383} 2321}
2384 2322
2385static void dm_queue_flush(struct mapped_device *md) 2323static void dm_queue_flush(struct mapped_device *md)
@@ -2511,17 +2449,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2511 * 2449 *
2512 * To get all processes out of __split_and_process_bio in dm_request, 2450 * To get all processes out of __split_and_process_bio in dm_request,
2513 * we take the write lock. To prevent any process from reentering 2451 * we take the write lock. To prevent any process from reentering
2514 * __split_and_process_bio from dm_request, we set 2452 * __split_and_process_bio from dm_request and quiesce the thread
2515 * DMF_QUEUE_IO_TO_THREAD. 2453 * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call
2516 * 2454 * flush_workqueue(md->wq).
2517 * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND
2518 * and call flush_workqueue(md->wq). flush_workqueue will wait until
2519 * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any
2520 * further calls to __split_and_process_bio from dm_wq_work.
2521 */ 2455 */
2522 down_write(&md->io_lock); 2456 down_write(&md->io_lock);
2523 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 2457 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2524 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
2525 up_write(&md->io_lock); 2458 up_write(&md->io_lock);
2526 2459
2527 /* 2460 /*