diff options
-rw-r--r-- | drivers/md/dm.c | 157 |
1 files changed, 45 insertions, 112 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 65114e4d9f65..2011704b8ba0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -110,7 +110,6 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | |||
110 | #define DMF_FREEING 3 | 110 | #define DMF_FREEING 3 |
111 | #define DMF_DELETING 4 | 111 | #define DMF_DELETING 4 |
112 | #define DMF_NOFLUSH_SUSPENDING 5 | 112 | #define DMF_NOFLUSH_SUSPENDING 5 |
113 | #define DMF_QUEUE_IO_TO_THREAD 6 | ||
114 | 113 | ||
115 | /* | 114 | /* |
116 | * Work processed by per-device workqueue. | 115 | * Work processed by per-device workqueue. |
@@ -144,11 +143,6 @@ struct mapped_device { | |||
144 | spinlock_t deferred_lock; | 143 | spinlock_t deferred_lock; |
145 | 144 | ||
146 | /* | 145 | /* |
147 | * An error from the flush request currently being processed. | ||
148 | */ | ||
149 | int flush_error; | ||
150 | |||
151 | /* | ||
152 | * Processing queue (flush) | 146 | * Processing queue (flush) |
153 | */ | 147 | */ |
154 | struct workqueue_struct *wq; | 148 | struct workqueue_struct *wq; |
@@ -518,16 +512,10 @@ static void end_io_acct(struct dm_io *io) | |||
518 | */ | 512 | */ |
519 | static void queue_io(struct mapped_device *md, struct bio *bio) | 513 | static void queue_io(struct mapped_device *md, struct bio *bio) |
520 | { | 514 | { |
521 | down_write(&md->io_lock); | ||
522 | |||
523 | spin_lock_irq(&md->deferred_lock); | 515 | spin_lock_irq(&md->deferred_lock); |
524 | bio_list_add(&md->deferred, bio); | 516 | bio_list_add(&md->deferred, bio); |
525 | spin_unlock_irq(&md->deferred_lock); | 517 | spin_unlock_irq(&md->deferred_lock); |
526 | 518 | queue_work(md->wq, &md->work); | |
527 | if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) | ||
528 | queue_work(md->wq, &md->work); | ||
529 | |||
530 | up_write(&md->io_lock); | ||
531 | } | 519 | } |
532 | 520 | ||
533 | /* | 521 | /* |
@@ -615,11 +603,9 @@ static void dec_pending(struct dm_io *io, int error) | |||
615 | * Target requested pushing back the I/O. | 603 | * Target requested pushing back the I/O. |
616 | */ | 604 | */ |
617 | spin_lock_irqsave(&md->deferred_lock, flags); | 605 | spin_lock_irqsave(&md->deferred_lock, flags); |
618 | if (__noflush_suspending(md)) { | 606 | if (__noflush_suspending(md)) |
619 | if (!(io->bio->bi_rw & REQ_FLUSH)) | 607 | bio_list_add_head(&md->deferred, io->bio); |
620 | bio_list_add_head(&md->deferred, | 608 | else |
621 | io->bio); | ||
622 | } else | ||
623 | /* noflush suspend was interrupted. */ | 609 | /* noflush suspend was interrupted. */ |
624 | io->error = -EIO; | 610 | io->error = -EIO; |
625 | spin_unlock_irqrestore(&md->deferred_lock, flags); | 611 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
@@ -627,26 +613,22 @@ static void dec_pending(struct dm_io *io, int error) | |||
627 | 613 | ||
628 | io_error = io->error; | 614 | io_error = io->error; |
629 | bio = io->bio; | 615 | bio = io->bio; |
616 | end_io_acct(io); | ||
617 | free_io(md, io); | ||
618 | |||
619 | if (io_error == DM_ENDIO_REQUEUE) | ||
620 | return; | ||
630 | 621 | ||
631 | if (bio->bi_rw & REQ_FLUSH) { | 622 | if (!(bio->bi_rw & REQ_FLUSH) || !bio->bi_size) { |
623 | trace_block_bio_complete(md->queue, bio); | ||
624 | bio_endio(bio, io_error); | ||
625 | } else { | ||
632 | /* | 626 | /* |
633 | * There can be just one flush request so we use | 627 | * Preflush done for flush with data, reissue |
634 | * a per-device variable for error reporting. | 628 | * without REQ_FLUSH. |
635 | * Note that you can't touch the bio after end_io_acct | ||
636 | */ | 629 | */ |
637 | if (!md->flush_error) | 630 | bio->bi_rw &= ~REQ_FLUSH; |
638 | md->flush_error = io_error; | 631 | queue_io(md, bio); |
639 | end_io_acct(io); | ||
640 | free_io(md, io); | ||
641 | } else { | ||
642 | end_io_acct(io); | ||
643 | free_io(md, io); | ||
644 | |||
645 | if (io_error != DM_ENDIO_REQUEUE) { | ||
646 | trace_block_bio_complete(md->queue, bio); | ||
647 | |||
648 | bio_endio(bio, io_error); | ||
649 | } | ||
650 | } | 632 | } |
651 | } | 633 | } |
652 | } | 634 | } |
@@ -1298,21 +1280,17 @@ static int __clone_and_map(struct clone_info *ci) | |||
1298 | */ | 1280 | */ |
1299 | static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | 1281 | static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) |
1300 | { | 1282 | { |
1283 | bool is_flush = bio->bi_rw & REQ_FLUSH; | ||
1301 | struct clone_info ci; | 1284 | struct clone_info ci; |
1302 | int error = 0; | 1285 | int error = 0; |
1303 | 1286 | ||
1304 | ci.map = dm_get_live_table(md); | 1287 | ci.map = dm_get_live_table(md); |
1305 | if (unlikely(!ci.map)) { | 1288 | if (unlikely(!ci.map)) { |
1306 | if (!(bio->bi_rw & REQ_FLUSH)) | 1289 | bio_io_error(bio); |
1307 | bio_io_error(bio); | ||
1308 | else | ||
1309 | if (!md->flush_error) | ||
1310 | md->flush_error = -EIO; | ||
1311 | return; | 1290 | return; |
1312 | } | 1291 | } |
1313 | 1292 | ||
1314 | ci.md = md; | 1293 | ci.md = md; |
1315 | ci.bio = bio; | ||
1316 | ci.io = alloc_io(md); | 1294 | ci.io = alloc_io(md); |
1317 | ci.io->error = 0; | 1295 | ci.io->error = 0; |
1318 | atomic_set(&ci.io->io_count, 1); | 1296 | atomic_set(&ci.io->io_count, 1); |
@@ -1320,18 +1298,19 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
1320 | ci.io->md = md; | 1298 | ci.io->md = md; |
1321 | spin_lock_init(&ci.io->endio_lock); | 1299 | spin_lock_init(&ci.io->endio_lock); |
1322 | ci.sector = bio->bi_sector; | 1300 | ci.sector = bio->bi_sector; |
1323 | if (!(bio->bi_rw & REQ_FLUSH)) | 1301 | ci.idx = bio->bi_idx; |
1302 | |||
1303 | if (!is_flush) { | ||
1304 | ci.bio = bio; | ||
1324 | ci.sector_count = bio_sectors(bio); | 1305 | ci.sector_count = bio_sectors(bio); |
1325 | else { | 1306 | } else { |
1326 | /* all FLUSH bio's reaching here should be empty */ | 1307 | ci.bio = &ci.md->flush_bio; |
1327 | WARN_ON_ONCE(bio_has_data(bio)); | ||
1328 | ci.sector_count = 1; | 1308 | ci.sector_count = 1; |
1329 | } | 1309 | } |
1330 | ci.idx = bio->bi_idx; | ||
1331 | 1310 | ||
1332 | start_io_acct(ci.io); | 1311 | start_io_acct(ci.io); |
1333 | while (ci.sector_count && !error) { | 1312 | while (ci.sector_count && !error) { |
1334 | if (!(bio->bi_rw & REQ_FLUSH)) | 1313 | if (!is_flush) |
1335 | error = __clone_and_map(&ci); | 1314 | error = __clone_and_map(&ci); |
1336 | else | 1315 | else |
1337 | error = __clone_and_map_flush(&ci); | 1316 | error = __clone_and_map_flush(&ci); |
@@ -1419,22 +1398,14 @@ static int _dm_request(struct request_queue *q, struct bio *bio) | |||
1419 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); | 1398 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); |
1420 | part_stat_unlock(); | 1399 | part_stat_unlock(); |
1421 | 1400 | ||
1422 | /* | 1401 | /* if we're suspended, we have to queue this io for later */ |
1423 | * If we're suspended or the thread is processing flushes | 1402 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { |
1424 | * we have to queue this io for later. | ||
1425 | */ | ||
1426 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || | ||
1427 | (bio->bi_rw & REQ_FLUSH)) { | ||
1428 | up_read(&md->io_lock); | 1403 | up_read(&md->io_lock); |
1429 | 1404 | ||
1430 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && | 1405 | if (bio_rw(bio) != READA) |
1431 | bio_rw(bio) == READA) { | 1406 | queue_io(md, bio); |
1407 | else | ||
1432 | bio_io_error(bio); | 1408 | bio_io_error(bio); |
1433 | return 0; | ||
1434 | } | ||
1435 | |||
1436 | queue_io(md, bio); | ||
1437 | |||
1438 | return 0; | 1409 | return 0; |
1439 | } | 1410 | } |
1440 | 1411 | ||
@@ -1923,6 +1894,10 @@ static struct mapped_device *alloc_dev(int minor) | |||
1923 | if (!md->bdev) | 1894 | if (!md->bdev) |
1924 | goto bad_bdev; | 1895 | goto bad_bdev; |
1925 | 1896 | ||
1897 | bio_init(&md->flush_bio); | ||
1898 | md->flush_bio.bi_bdev = md->bdev; | ||
1899 | md->flush_bio.bi_rw = WRITE_FLUSH; | ||
1900 | |||
1926 | /* Populate the mapping, nobody knows we exist yet */ | 1901 | /* Populate the mapping, nobody knows we exist yet */ |
1927 | spin_lock(&_minor_lock); | 1902 | spin_lock(&_minor_lock); |
1928 | old_md = idr_replace(&_minor_idr, md, minor); | 1903 | old_md = idr_replace(&_minor_idr, md, minor); |
@@ -2313,37 +2288,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
2313 | return r; | 2288 | return r; |
2314 | } | 2289 | } |
2315 | 2290 | ||
2316 | static void process_flush(struct mapped_device *md, struct bio *bio) | ||
2317 | { | ||
2318 | md->flush_error = 0; | ||
2319 | |||
2320 | /* handle REQ_FLUSH */ | ||
2321 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
2322 | |||
2323 | bio_init(&md->flush_bio); | ||
2324 | md->flush_bio.bi_bdev = md->bdev; | ||
2325 | md->flush_bio.bi_rw = WRITE_FLUSH; | ||
2326 | __split_and_process_bio(md, &md->flush_bio); | ||
2327 | |||
2328 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
2329 | |||
2330 | /* if it's an empty flush or the preflush failed, we're done */ | ||
2331 | if (!bio_has_data(bio) || md->flush_error) { | ||
2332 | if (md->flush_error != DM_ENDIO_REQUEUE) | ||
2333 | bio_endio(bio, md->flush_error); | ||
2334 | else { | ||
2335 | spin_lock_irq(&md->deferred_lock); | ||
2336 | bio_list_add_head(&md->deferred, bio); | ||
2337 | spin_unlock_irq(&md->deferred_lock); | ||
2338 | } | ||
2339 | return; | ||
2340 | } | ||
2341 | |||
2342 | /* issue data + REQ_FUA */ | ||
2343 | bio->bi_rw &= ~REQ_FLUSH; | ||
2344 | __split_and_process_bio(md, bio); | ||
2345 | } | ||
2346 | |||
2347 | /* | 2291 | /* |
2348 | * Process the deferred bios | 2292 | * Process the deferred bios |
2349 | */ | 2293 | */ |
@@ -2353,33 +2297,27 @@ static void dm_wq_work(struct work_struct *work) | |||
2353 | work); | 2297 | work); |
2354 | struct bio *c; | 2298 | struct bio *c; |
2355 | 2299 | ||
2356 | down_write(&md->io_lock); | 2300 | down_read(&md->io_lock); |
2357 | 2301 | ||
2358 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 2302 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
2359 | spin_lock_irq(&md->deferred_lock); | 2303 | spin_lock_irq(&md->deferred_lock); |
2360 | c = bio_list_pop(&md->deferred); | 2304 | c = bio_list_pop(&md->deferred); |
2361 | spin_unlock_irq(&md->deferred_lock); | 2305 | spin_unlock_irq(&md->deferred_lock); |
2362 | 2306 | ||
2363 | if (!c) { | 2307 | if (!c) |
2364 | clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | ||
2365 | break; | 2308 | break; |
2366 | } | ||
2367 | 2309 | ||
2368 | up_write(&md->io_lock); | 2310 | up_read(&md->io_lock); |
2369 | 2311 | ||
2370 | if (dm_request_based(md)) | 2312 | if (dm_request_based(md)) |
2371 | generic_make_request(c); | 2313 | generic_make_request(c); |
2372 | else { | 2314 | else |
2373 | if (c->bi_rw & REQ_FLUSH) | 2315 | __split_and_process_bio(md, c); |
2374 | process_flush(md, c); | ||
2375 | else | ||
2376 | __split_and_process_bio(md, c); | ||
2377 | } | ||
2378 | 2316 | ||
2379 | down_write(&md->io_lock); | 2317 | down_read(&md->io_lock); |
2380 | } | 2318 | } |
2381 | 2319 | ||
2382 | up_write(&md->io_lock); | 2320 | up_read(&md->io_lock); |
2383 | } | 2321 | } |
2384 | 2322 | ||
2385 | static void dm_queue_flush(struct mapped_device *md) | 2323 | static void dm_queue_flush(struct mapped_device *md) |
@@ -2511,17 +2449,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2511 | * | 2449 | * |
2512 | * To get all processes out of __split_and_process_bio in dm_request, | 2450 | * To get all processes out of __split_and_process_bio in dm_request, |
2513 | * we take the write lock. To prevent any process from reentering | 2451 | * we take the write lock. To prevent any process from reentering |
2514 | * __split_and_process_bio from dm_request, we set | 2452 | * __split_and_process_bio from dm_request and quiesce the thread |
2515 | * DMF_QUEUE_IO_TO_THREAD. | 2453 | * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call |
2516 | * | 2454 | * flush_workqueue(md->wq). |
2517 | * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND | ||
2518 | * and call flush_workqueue(md->wq). flush_workqueue will wait until | ||
2519 | * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any | ||
2520 | * further calls to __split_and_process_bio from dm_wq_work. | ||
2521 | */ | 2455 | */ |
2522 | down_write(&md->io_lock); | 2456 | down_write(&md->io_lock); |
2523 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | 2457 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
2524 | set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | ||
2525 | up_write(&md->io_lock); | 2458 | up_write(&md->io_lock); |
2526 | 2459 | ||
2527 | /* | 2460 | /* |