diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 200 |
1 files changed, 142 insertions, 58 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 788ba96a6256..424f7b048c30 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -6,7 +6,6 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include "dm.h" | 8 | #include "dm.h" |
9 | #include "dm-bio-list.h" | ||
10 | #include "dm-uevent.h" | 9 | #include "dm-uevent.h" |
11 | 10 | ||
12 | #include <linux/init.h> | 11 | #include <linux/init.h> |
@@ -89,12 +88,13 @@ union map_info *dm_get_mapinfo(struct bio *bio) | |||
89 | /* | 88 | /* |
90 | * Bits for the md->flags field. | 89 | * Bits for the md->flags field. |
91 | */ | 90 | */ |
92 | #define DMF_BLOCK_IO 0 | 91 | #define DMF_BLOCK_IO_FOR_SUSPEND 0 |
93 | #define DMF_SUSPENDED 1 | 92 | #define DMF_SUSPENDED 1 |
94 | #define DMF_FROZEN 2 | 93 | #define DMF_FROZEN 2 |
95 | #define DMF_FREEING 3 | 94 | #define DMF_FREEING 3 |
96 | #define DMF_DELETING 4 | 95 | #define DMF_DELETING 4 |
97 | #define DMF_NOFLUSH_SUSPENDING 5 | 96 | #define DMF_NOFLUSH_SUSPENDING 5 |
97 | #define DMF_QUEUE_IO_TO_THREAD 6 | ||
98 | 98 | ||
99 | /* | 99 | /* |
100 | * Work processed by per-device workqueue. | 100 | * Work processed by per-device workqueue. |
@@ -124,6 +124,11 @@ struct mapped_device { | |||
124 | spinlock_t deferred_lock; | 124 | spinlock_t deferred_lock; |
125 | 125 | ||
126 | /* | 126 | /* |
127 | * An error from the barrier request currently being processed. | ||
128 | */ | ||
129 | int barrier_error; | ||
130 | |||
131 | /* | ||
127 | * Processing queue (flush/barriers) | 132 | * Processing queue (flush/barriers) |
128 | */ | 133 | */ |
129 | struct workqueue_struct *wq; | 134 | struct workqueue_struct *wq; |
@@ -424,6 +429,10 @@ static void end_io_acct(struct dm_io *io) | |||
424 | part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); | 429 | part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); |
425 | part_stat_unlock(); | 430 | part_stat_unlock(); |
426 | 431 | ||
432 | /* | ||
433 | * After this is decremented the bio must not be touched if it is | ||
434 | * a barrier. | ||
435 | */ | ||
427 | dm_disk(md)->part0.in_flight = pending = | 436 | dm_disk(md)->part0.in_flight = pending = |
428 | atomic_dec_return(&md->pending); | 437 | atomic_dec_return(&md->pending); |
429 | 438 | ||
@@ -435,21 +444,18 @@ static void end_io_acct(struct dm_io *io) | |||
435 | /* | 444 | /* |
436 | * Add the bio to the list of deferred io. | 445 | * Add the bio to the list of deferred io. |
437 | */ | 446 | */ |
438 | static int queue_io(struct mapped_device *md, struct bio *bio) | 447 | static void queue_io(struct mapped_device *md, struct bio *bio) |
439 | { | 448 | { |
440 | down_write(&md->io_lock); | 449 | down_write(&md->io_lock); |
441 | 450 | ||
442 | if (!test_bit(DMF_BLOCK_IO, &md->flags)) { | ||
443 | up_write(&md->io_lock); | ||
444 | return 1; | ||
445 | } | ||
446 | |||
447 | spin_lock_irq(&md->deferred_lock); | 451 | spin_lock_irq(&md->deferred_lock); |
448 | bio_list_add(&md->deferred, bio); | 452 | bio_list_add(&md->deferred, bio); |
449 | spin_unlock_irq(&md->deferred_lock); | 453 | spin_unlock_irq(&md->deferred_lock); |
450 | 454 | ||
455 | if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) | ||
456 | queue_work(md->wq, &md->work); | ||
457 | |||
451 | up_write(&md->io_lock); | 458 | up_write(&md->io_lock); |
452 | return 0; /* deferred successfully */ | ||
453 | } | 459 | } |
454 | 460 | ||
455 | /* | 461 | /* |
@@ -533,25 +539,35 @@ static void dec_pending(struct dm_io *io, int error) | |||
533 | */ | 539 | */ |
534 | spin_lock_irqsave(&md->deferred_lock, flags); | 540 | spin_lock_irqsave(&md->deferred_lock, flags); |
535 | if (__noflush_suspending(md)) | 541 | if (__noflush_suspending(md)) |
536 | bio_list_add(&md->deferred, io->bio); | 542 | bio_list_add_head(&md->deferred, io->bio); |
537 | else | 543 | else |
538 | /* noflush suspend was interrupted. */ | 544 | /* noflush suspend was interrupted. */ |
539 | io->error = -EIO; | 545 | io->error = -EIO; |
540 | spin_unlock_irqrestore(&md->deferred_lock, flags); | 546 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
541 | } | 547 | } |
542 | 548 | ||
543 | end_io_acct(io); | ||
544 | |||
545 | io_error = io->error; | 549 | io_error = io->error; |
546 | bio = io->bio; | 550 | bio = io->bio; |
547 | 551 | ||
548 | free_io(md, io); | 552 | if (bio_barrier(bio)) { |
553 | /* | ||
554 | * There can be just one barrier request so we use | ||
555 | * a per-device variable for error reporting. | ||
556 | * Note that you can't touch the bio after end_io_acct | ||
557 | */ | ||
558 | md->barrier_error = io_error; | ||
559 | end_io_acct(io); | ||
560 | } else { | ||
561 | end_io_acct(io); | ||
549 | 562 | ||
550 | if (io_error != DM_ENDIO_REQUEUE) { | 563 | if (io_error != DM_ENDIO_REQUEUE) { |
551 | trace_block_bio_complete(md->queue, bio); | 564 | trace_block_bio_complete(md->queue, bio); |
552 | 565 | ||
553 | bio_endio(bio, io_error); | 566 | bio_endio(bio, io_error); |
567 | } | ||
554 | } | 568 | } |
569 | |||
570 | free_io(md, io); | ||
555 | } | 571 | } |
556 | } | 572 | } |
557 | 573 | ||
@@ -693,13 +709,19 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
693 | 709 | ||
694 | clone->bi_sector = sector; | 710 | clone->bi_sector = sector; |
695 | clone->bi_bdev = bio->bi_bdev; | 711 | clone->bi_bdev = bio->bi_bdev; |
696 | clone->bi_rw = bio->bi_rw; | 712 | clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER); |
697 | clone->bi_vcnt = 1; | 713 | clone->bi_vcnt = 1; |
698 | clone->bi_size = to_bytes(len); | 714 | clone->bi_size = to_bytes(len); |
699 | clone->bi_io_vec->bv_offset = offset; | 715 | clone->bi_io_vec->bv_offset = offset; |
700 | clone->bi_io_vec->bv_len = clone->bi_size; | 716 | clone->bi_io_vec->bv_len = clone->bi_size; |
701 | clone->bi_flags |= 1 << BIO_CLONED; | 717 | clone->bi_flags |= 1 << BIO_CLONED; |
702 | 718 | ||
719 | if (bio_integrity(bio)) { | ||
720 | bio_integrity_clone(clone, bio, GFP_NOIO); | ||
721 | bio_integrity_trim(clone, | ||
722 | bio_sector_offset(bio, idx, offset), len); | ||
723 | } | ||
724 | |||
703 | return clone; | 725 | return clone; |
704 | } | 726 | } |
705 | 727 | ||
@@ -714,6 +736,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
714 | 736 | ||
715 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); | 737 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); |
716 | __bio_clone(clone, bio); | 738 | __bio_clone(clone, bio); |
739 | clone->bi_rw &= ~(1 << BIO_RW_BARRIER); | ||
717 | clone->bi_destructor = dm_bio_destructor; | 740 | clone->bi_destructor = dm_bio_destructor; |
718 | clone->bi_sector = sector; | 741 | clone->bi_sector = sector; |
719 | clone->bi_idx = idx; | 742 | clone->bi_idx = idx; |
@@ -721,6 +744,14 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
721 | clone->bi_size = to_bytes(len); | 744 | clone->bi_size = to_bytes(len); |
722 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); | 745 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); |
723 | 746 | ||
747 | if (bio_integrity(bio)) { | ||
748 | bio_integrity_clone(clone, bio, GFP_NOIO); | ||
749 | |||
750 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) | ||
751 | bio_integrity_trim(clone, | ||
752 | bio_sector_offset(bio, idx, 0), len); | ||
753 | } | ||
754 | |||
724 | return clone; | 755 | return clone; |
725 | } | 756 | } |
726 | 757 | ||
@@ -834,14 +865,13 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
834 | 865 | ||
835 | ci.map = dm_get_table(md); | 866 | ci.map = dm_get_table(md); |
836 | if (unlikely(!ci.map)) { | 867 | if (unlikely(!ci.map)) { |
837 | bio_io_error(bio); | 868 | if (!bio_barrier(bio)) |
838 | return; | 869 | bio_io_error(bio); |
839 | } | 870 | else |
840 | if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { | 871 | md->barrier_error = -EIO; |
841 | dm_table_put(ci.map); | ||
842 | bio_endio(bio, -EOPNOTSUPP); | ||
843 | return; | 872 | return; |
844 | } | 873 | } |
874 | |||
845 | ci.md = md; | 875 | ci.md = md; |
846 | ci.bio = bio; | 876 | ci.bio = bio; |
847 | ci.io = alloc_io(md); | 877 | ci.io = alloc_io(md); |
@@ -918,7 +948,6 @@ out: | |||
918 | */ | 948 | */ |
919 | static int dm_request(struct request_queue *q, struct bio *bio) | 949 | static int dm_request(struct request_queue *q, struct bio *bio) |
920 | { | 950 | { |
921 | int r = -EIO; | ||
922 | int rw = bio_data_dir(bio); | 951 | int rw = bio_data_dir(bio); |
923 | struct mapped_device *md = q->queuedata; | 952 | struct mapped_device *md = q->queuedata; |
924 | int cpu; | 953 | int cpu; |
@@ -931,34 +960,27 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
931 | part_stat_unlock(); | 960 | part_stat_unlock(); |
932 | 961 | ||
933 | /* | 962 | /* |
934 | * If we're suspended we have to queue | 963 | * If we're suspended or the thread is processing barriers |
935 | * this io for later. | 964 | * we have to queue this io for later. |
936 | */ | 965 | */ |
937 | while (test_bit(DMF_BLOCK_IO, &md->flags)) { | 966 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || |
967 | unlikely(bio_barrier(bio))) { | ||
938 | up_read(&md->io_lock); | 968 | up_read(&md->io_lock); |
939 | 969 | ||
940 | if (bio_rw(bio) != READA) | 970 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && |
941 | r = queue_io(md, bio); | 971 | bio_rw(bio) == READA) { |
972 | bio_io_error(bio); | ||
973 | return 0; | ||
974 | } | ||
942 | 975 | ||
943 | if (r <= 0) | 976 | queue_io(md, bio); |
944 | goto out_req; | ||
945 | 977 | ||
946 | /* | 978 | return 0; |
947 | * We're in a while loop, because someone could suspend | ||
948 | * before we get to the following read lock. | ||
949 | */ | ||
950 | down_read(&md->io_lock); | ||
951 | } | 979 | } |
952 | 980 | ||
953 | __split_and_process_bio(md, bio); | 981 | __split_and_process_bio(md, bio); |
954 | up_read(&md->io_lock); | 982 | up_read(&md->io_lock); |
955 | return 0; | 983 | return 0; |
956 | |||
957 | out_req: | ||
958 | if (r < 0) | ||
959 | bio_io_error(bio); | ||
960 | |||
961 | return 0; | ||
962 | } | 984 | } |
963 | 985 | ||
964 | static void dm_unplug_all(struct request_queue *q) | 986 | static void dm_unplug_all(struct request_queue *q) |
@@ -978,7 +1000,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits) | |||
978 | struct mapped_device *md = congested_data; | 1000 | struct mapped_device *md = congested_data; |
979 | struct dm_table *map; | 1001 | struct dm_table *map; |
980 | 1002 | ||
981 | if (!test_bit(DMF_BLOCK_IO, &md->flags)) { | 1003 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
982 | map = dm_get_table(md); | 1004 | map = dm_get_table(md); |
983 | if (map) { | 1005 | if (map) { |
984 | r = dm_table_any_congested(map, bdi_bits); | 1006 | r = dm_table_any_congested(map, bdi_bits); |
@@ -1193,6 +1215,7 @@ static void free_dev(struct mapped_device *md) | |||
1193 | mempool_destroy(md->tio_pool); | 1215 | mempool_destroy(md->tio_pool); |
1194 | mempool_destroy(md->io_pool); | 1216 | mempool_destroy(md->io_pool); |
1195 | bioset_free(md->bs); | 1217 | bioset_free(md->bs); |
1218 | blk_integrity_unregister(md->disk); | ||
1196 | del_gendisk(md->disk); | 1219 | del_gendisk(md->disk); |
1197 | free_minor(minor); | 1220 | free_minor(minor); |
1198 | 1221 | ||
@@ -1406,6 +1429,36 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
1406 | return r; | 1429 | return r; |
1407 | } | 1430 | } |
1408 | 1431 | ||
1432 | static int dm_flush(struct mapped_device *md) | ||
1433 | { | ||
1434 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
1435 | return 0; | ||
1436 | } | ||
1437 | |||
1438 | static void process_barrier(struct mapped_device *md, struct bio *bio) | ||
1439 | { | ||
1440 | int error = dm_flush(md); | ||
1441 | |||
1442 | if (unlikely(error)) { | ||
1443 | bio_endio(bio, error); | ||
1444 | return; | ||
1445 | } | ||
1446 | if (bio_empty_barrier(bio)) { | ||
1447 | bio_endio(bio, 0); | ||
1448 | return; | ||
1449 | } | ||
1450 | |||
1451 | __split_and_process_bio(md, bio); | ||
1452 | |||
1453 | error = dm_flush(md); | ||
1454 | |||
1455 | if (!error && md->barrier_error) | ||
1456 | error = md->barrier_error; | ||
1457 | |||
1458 | if (md->barrier_error != DM_ENDIO_REQUEUE) | ||
1459 | bio_endio(bio, error); | ||
1460 | } | ||
1461 | |||
1409 | /* | 1462 | /* |
1410 | * Process the deferred bios | 1463 | * Process the deferred bios |
1411 | */ | 1464 | */ |
@@ -1417,25 +1470,34 @@ static void dm_wq_work(struct work_struct *work) | |||
1417 | 1470 | ||
1418 | down_write(&md->io_lock); | 1471 | down_write(&md->io_lock); |
1419 | 1472 | ||
1420 | next_bio: | 1473 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
1421 | spin_lock_irq(&md->deferred_lock); | 1474 | spin_lock_irq(&md->deferred_lock); |
1422 | c = bio_list_pop(&md->deferred); | 1475 | c = bio_list_pop(&md->deferred); |
1423 | spin_unlock_irq(&md->deferred_lock); | 1476 | spin_unlock_irq(&md->deferred_lock); |
1424 | 1477 | ||
1425 | if (c) { | 1478 | if (!c) { |
1426 | __split_and_process_bio(md, c); | 1479 | clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); |
1427 | goto next_bio; | 1480 | break; |
1428 | } | 1481 | } |
1429 | 1482 | ||
1430 | clear_bit(DMF_BLOCK_IO, &md->flags); | 1483 | up_write(&md->io_lock); |
1484 | |||
1485 | if (bio_barrier(c)) | ||
1486 | process_barrier(md, c); | ||
1487 | else | ||
1488 | __split_and_process_bio(md, c); | ||
1489 | |||
1490 | down_write(&md->io_lock); | ||
1491 | } | ||
1431 | 1492 | ||
1432 | up_write(&md->io_lock); | 1493 | up_write(&md->io_lock); |
1433 | } | 1494 | } |
1434 | 1495 | ||
1435 | static void dm_queue_flush(struct mapped_device *md) | 1496 | static void dm_queue_flush(struct mapped_device *md) |
1436 | { | 1497 | { |
1498 | clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | ||
1499 | smp_mb__after_clear_bit(); | ||
1437 | queue_work(md->wq, &md->work); | 1500 | queue_work(md->wq, &md->work); |
1438 | flush_workqueue(md->wq); | ||
1439 | } | 1501 | } |
1440 | 1502 | ||
1441 | /* | 1503 | /* |
@@ -1553,20 +1615,36 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1553 | } | 1615 | } |
1554 | 1616 | ||
1555 | /* | 1617 | /* |
1556 | * First we set the BLOCK_IO flag so no more ios will be mapped. | 1618 | * Here we must make sure that no processes are submitting requests |
1619 | * to target drivers i.e. no one may be executing | ||
1620 | * __split_and_process_bio. This is called from dm_request and | ||
1621 | * dm_wq_work. | ||
1622 | * | ||
1623 | * To get all processes out of __split_and_process_bio in dm_request, | ||
1624 | * we take the write lock. To prevent any process from reentering | ||
1625 | * __split_and_process_bio from dm_request, we set | ||
1626 | * DMF_QUEUE_IO_TO_THREAD. | ||
1627 | * | ||
1628 | * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND | ||
1629 | * and call flush_workqueue(md->wq). flush_workqueue will wait until | ||
1630 | * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any | ||
1631 | * further calls to __split_and_process_bio from dm_wq_work. | ||
1557 | */ | 1632 | */ |
1558 | down_write(&md->io_lock); | 1633 | down_write(&md->io_lock); |
1559 | set_bit(DMF_BLOCK_IO, &md->flags); | 1634 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
1560 | 1635 | set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | |
1561 | up_write(&md->io_lock); | 1636 | up_write(&md->io_lock); |
1562 | 1637 | ||
1638 | flush_workqueue(md->wq); | ||
1639 | |||
1563 | /* | 1640 | /* |
1564 | * Wait for the already-mapped ios to complete. | 1641 | * At this point no more requests are entering target request routines. |
1642 | * We call dm_wait_for_completion to wait for all existing requests | ||
1643 | * to finish. | ||
1565 | */ | 1644 | */ |
1566 | r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); | 1645 | r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); |
1567 | 1646 | ||
1568 | down_write(&md->io_lock); | 1647 | down_write(&md->io_lock); |
1569 | |||
1570 | if (noflush) | 1648 | if (noflush) |
1571 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | 1649 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); |
1572 | up_write(&md->io_lock); | 1650 | up_write(&md->io_lock); |
@@ -1579,6 +1657,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1579 | goto out; /* pushback list is already flushed, so skip flush */ | 1657 | goto out; /* pushback list is already flushed, so skip flush */ |
1580 | } | 1658 | } |
1581 | 1659 | ||
1660 | /* | ||
1661 | * If dm_wait_for_completion returned 0, the device is completely | ||
1662 | * quiescent now. There is no request-processing activity. All new | ||
1663 | * requests are being added to md->deferred list. | ||
1664 | */ | ||
1665 | |||
1582 | dm_table_postsuspend_targets(map); | 1666 | dm_table_postsuspend_targets(map); |
1583 | 1667 | ||
1584 | set_bit(DMF_SUSPENDED, &md->flags); | 1668 | set_bit(DMF_SUSPENDED, &md->flags); |