aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c200
1 files changed, 142 insertions, 58 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 788ba96a6256..424f7b048c30 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -6,7 +6,6 @@
6 */ 6 */
7 7
8#include "dm.h" 8#include "dm.h"
9#include "dm-bio-list.h"
10#include "dm-uevent.h" 9#include "dm-uevent.h"
11 10
12#include <linux/init.h> 11#include <linux/init.h>
@@ -89,12 +88,13 @@ union map_info *dm_get_mapinfo(struct bio *bio)
89/* 88/*
90 * Bits for the md->flags field. 89 * Bits for the md->flags field.
91 */ 90 */
92#define DMF_BLOCK_IO 0 91#define DMF_BLOCK_IO_FOR_SUSPEND 0
93#define DMF_SUSPENDED 1 92#define DMF_SUSPENDED 1
94#define DMF_FROZEN 2 93#define DMF_FROZEN 2
95#define DMF_FREEING 3 94#define DMF_FREEING 3
96#define DMF_DELETING 4 95#define DMF_DELETING 4
97#define DMF_NOFLUSH_SUSPENDING 5 96#define DMF_NOFLUSH_SUSPENDING 5
97#define DMF_QUEUE_IO_TO_THREAD 6
98 98
99/* 99/*
100 * Work processed by per-device workqueue. 100 * Work processed by per-device workqueue.
@@ -124,6 +124,11 @@ struct mapped_device {
124 spinlock_t deferred_lock; 124 spinlock_t deferred_lock;
125 125
126 /* 126 /*
127 * An error from the barrier request currently being processed.
128 */
129 int barrier_error;
130
131 /*
127 * Processing queue (flush/barriers) 132 * Processing queue (flush/barriers)
128 */ 133 */
129 struct workqueue_struct *wq; 134 struct workqueue_struct *wq;
@@ -424,6 +429,10 @@ static void end_io_acct(struct dm_io *io)
424 part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); 429 part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
425 part_stat_unlock(); 430 part_stat_unlock();
426 431
432 /*
433 * After this is decremented the bio must not be touched if it is
434 * a barrier.
435 */
427 dm_disk(md)->part0.in_flight = pending = 436 dm_disk(md)->part0.in_flight = pending =
428 atomic_dec_return(&md->pending); 437 atomic_dec_return(&md->pending);
429 438
@@ -435,21 +444,18 @@ static void end_io_acct(struct dm_io *io)
435/* 444/*
436 * Add the bio to the list of deferred io. 445 * Add the bio to the list of deferred io.
437 */ 446 */
438static int queue_io(struct mapped_device *md, struct bio *bio) 447static void queue_io(struct mapped_device *md, struct bio *bio)
439{ 448{
440 down_write(&md->io_lock); 449 down_write(&md->io_lock);
441 450
442 if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
443 up_write(&md->io_lock);
444 return 1;
445 }
446
447 spin_lock_irq(&md->deferred_lock); 451 spin_lock_irq(&md->deferred_lock);
448 bio_list_add(&md->deferred, bio); 452 bio_list_add(&md->deferred, bio);
449 spin_unlock_irq(&md->deferred_lock); 453 spin_unlock_irq(&md->deferred_lock);
450 454
455 if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))
456 queue_work(md->wq, &md->work);
457
451 up_write(&md->io_lock); 458 up_write(&md->io_lock);
452 return 0; /* deferred successfully */
453} 459}
454 460
455/* 461/*
@@ -533,25 +539,35 @@ static void dec_pending(struct dm_io *io, int error)
533 */ 539 */
534 spin_lock_irqsave(&md->deferred_lock, flags); 540 spin_lock_irqsave(&md->deferred_lock, flags);
535 if (__noflush_suspending(md)) 541 if (__noflush_suspending(md))
536 bio_list_add(&md->deferred, io->bio); 542 bio_list_add_head(&md->deferred, io->bio);
537 else 543 else
538 /* noflush suspend was interrupted. */ 544 /* noflush suspend was interrupted. */
539 io->error = -EIO; 545 io->error = -EIO;
540 spin_unlock_irqrestore(&md->deferred_lock, flags); 546 spin_unlock_irqrestore(&md->deferred_lock, flags);
541 } 547 }
542 548
543 end_io_acct(io);
544
545 io_error = io->error; 549 io_error = io->error;
546 bio = io->bio; 550 bio = io->bio;
547 551
548 free_io(md, io); 552 if (bio_barrier(bio)) {
553 /*
554 * There can be just one barrier request so we use
555 * a per-device variable for error reporting.
556 * Note that you can't touch the bio after end_io_acct
557 */
558 md->barrier_error = io_error;
559 end_io_acct(io);
560 } else {
561 end_io_acct(io);
549 562
550 if (io_error != DM_ENDIO_REQUEUE) { 563 if (io_error != DM_ENDIO_REQUEUE) {
551 trace_block_bio_complete(md->queue, bio); 564 trace_block_bio_complete(md->queue, bio);
552 565
553 bio_endio(bio, io_error); 566 bio_endio(bio, io_error);
567 }
554 } 568 }
569
570 free_io(md, io);
555 } 571 }
556} 572}
557 573
@@ -693,13 +709,19 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
693 709
694 clone->bi_sector = sector; 710 clone->bi_sector = sector;
695 clone->bi_bdev = bio->bi_bdev; 711 clone->bi_bdev = bio->bi_bdev;
696 clone->bi_rw = bio->bi_rw; 712 clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
697 clone->bi_vcnt = 1; 713 clone->bi_vcnt = 1;
698 clone->bi_size = to_bytes(len); 714 clone->bi_size = to_bytes(len);
699 clone->bi_io_vec->bv_offset = offset; 715 clone->bi_io_vec->bv_offset = offset;
700 clone->bi_io_vec->bv_len = clone->bi_size; 716 clone->bi_io_vec->bv_len = clone->bi_size;
701 clone->bi_flags |= 1 << BIO_CLONED; 717 clone->bi_flags |= 1 << BIO_CLONED;
702 718
719 if (bio_integrity(bio)) {
720 bio_integrity_clone(clone, bio, GFP_NOIO);
721 bio_integrity_trim(clone,
722 bio_sector_offset(bio, idx, offset), len);
723 }
724
703 return clone; 725 return clone;
704} 726}
705 727
@@ -714,6 +736,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
714 736
715 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); 737 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
716 __bio_clone(clone, bio); 738 __bio_clone(clone, bio);
739 clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
717 clone->bi_destructor = dm_bio_destructor; 740 clone->bi_destructor = dm_bio_destructor;
718 clone->bi_sector = sector; 741 clone->bi_sector = sector;
719 clone->bi_idx = idx; 742 clone->bi_idx = idx;
@@ -721,6 +744,14 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
721 clone->bi_size = to_bytes(len); 744 clone->bi_size = to_bytes(len);
722 clone->bi_flags &= ~(1 << BIO_SEG_VALID); 745 clone->bi_flags &= ~(1 << BIO_SEG_VALID);
723 746
747 if (bio_integrity(bio)) {
748 bio_integrity_clone(clone, bio, GFP_NOIO);
749
750 if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
751 bio_integrity_trim(clone,
752 bio_sector_offset(bio, idx, 0), len);
753 }
754
724 return clone; 755 return clone;
725} 756}
726 757
@@ -834,14 +865,13 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
834 865
835 ci.map = dm_get_table(md); 866 ci.map = dm_get_table(md);
836 if (unlikely(!ci.map)) { 867 if (unlikely(!ci.map)) {
837 bio_io_error(bio); 868 if (!bio_barrier(bio))
838 return; 869 bio_io_error(bio);
839 } 870 else
840 if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { 871 md->barrier_error = -EIO;
841 dm_table_put(ci.map);
842 bio_endio(bio, -EOPNOTSUPP);
843 return; 872 return;
844 } 873 }
874
845 ci.md = md; 875 ci.md = md;
846 ci.bio = bio; 876 ci.bio = bio;
847 ci.io = alloc_io(md); 877 ci.io = alloc_io(md);
@@ -918,7 +948,6 @@ out:
918 */ 948 */
919static int dm_request(struct request_queue *q, struct bio *bio) 949static int dm_request(struct request_queue *q, struct bio *bio)
920{ 950{
921 int r = -EIO;
922 int rw = bio_data_dir(bio); 951 int rw = bio_data_dir(bio);
923 struct mapped_device *md = q->queuedata; 952 struct mapped_device *md = q->queuedata;
924 int cpu; 953 int cpu;
@@ -931,34 +960,27 @@ static int dm_request(struct request_queue *q, struct bio *bio)
931 part_stat_unlock(); 960 part_stat_unlock();
932 961
933 /* 962 /*
934 * If we're suspended we have to queue 963 * If we're suspended or the thread is processing barriers
935 * this io for later. 964 * we have to queue this io for later.
936 */ 965 */
937 while (test_bit(DMF_BLOCK_IO, &md->flags)) { 966 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
967 unlikely(bio_barrier(bio))) {
938 up_read(&md->io_lock); 968 up_read(&md->io_lock);
939 969
940 if (bio_rw(bio) != READA) 970 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
941 r = queue_io(md, bio); 971 bio_rw(bio) == READA) {
972 bio_io_error(bio);
973 return 0;
974 }
942 975
943 if (r <= 0) 976 queue_io(md, bio);
944 goto out_req;
945 977
946 /* 978 return 0;
947 * We're in a while loop, because someone could suspend
948 * before we get to the following read lock.
949 */
950 down_read(&md->io_lock);
951 } 979 }
952 980
953 __split_and_process_bio(md, bio); 981 __split_and_process_bio(md, bio);
954 up_read(&md->io_lock); 982 up_read(&md->io_lock);
955 return 0; 983 return 0;
956
957out_req:
958 if (r < 0)
959 bio_io_error(bio);
960
961 return 0;
962} 984}
963 985
964static void dm_unplug_all(struct request_queue *q) 986static void dm_unplug_all(struct request_queue *q)
@@ -978,7 +1000,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
978 struct mapped_device *md = congested_data; 1000 struct mapped_device *md = congested_data;
979 struct dm_table *map; 1001 struct dm_table *map;
980 1002
981 if (!test_bit(DMF_BLOCK_IO, &md->flags)) { 1003 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
982 map = dm_get_table(md); 1004 map = dm_get_table(md);
983 if (map) { 1005 if (map) {
984 r = dm_table_any_congested(map, bdi_bits); 1006 r = dm_table_any_congested(map, bdi_bits);
@@ -1193,6 +1215,7 @@ static void free_dev(struct mapped_device *md)
1193 mempool_destroy(md->tio_pool); 1215 mempool_destroy(md->tio_pool);
1194 mempool_destroy(md->io_pool); 1216 mempool_destroy(md->io_pool);
1195 bioset_free(md->bs); 1217 bioset_free(md->bs);
1218 blk_integrity_unregister(md->disk);
1196 del_gendisk(md->disk); 1219 del_gendisk(md->disk);
1197 free_minor(minor); 1220 free_minor(minor);
1198 1221
@@ -1406,6 +1429,36 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
1406 return r; 1429 return r;
1407} 1430}
1408 1431
1432static int dm_flush(struct mapped_device *md)
1433{
1434 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
1435 return 0;
1436}
1437
1438static void process_barrier(struct mapped_device *md, struct bio *bio)
1439{
1440 int error = dm_flush(md);
1441
1442 if (unlikely(error)) {
1443 bio_endio(bio, error);
1444 return;
1445 }
1446 if (bio_empty_barrier(bio)) {
1447 bio_endio(bio, 0);
1448 return;
1449 }
1450
1451 __split_and_process_bio(md, bio);
1452
1453 error = dm_flush(md);
1454
1455 if (!error && md->barrier_error)
1456 error = md->barrier_error;
1457
1458 if (md->barrier_error != DM_ENDIO_REQUEUE)
1459 bio_endio(bio, error);
1460}
1461
1409/* 1462/*
1410 * Process the deferred bios 1463 * Process the deferred bios
1411 */ 1464 */
@@ -1417,25 +1470,34 @@ static void dm_wq_work(struct work_struct *work)
1417 1470
1418 down_write(&md->io_lock); 1471 down_write(&md->io_lock);
1419 1472
1420next_bio: 1473 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
1421 spin_lock_irq(&md->deferred_lock); 1474 spin_lock_irq(&md->deferred_lock);
1422 c = bio_list_pop(&md->deferred); 1475 c = bio_list_pop(&md->deferred);
1423 spin_unlock_irq(&md->deferred_lock); 1476 spin_unlock_irq(&md->deferred_lock);
1424 1477
1425 if (c) { 1478 if (!c) {
1426 __split_and_process_bio(md, c); 1479 clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
1427 goto next_bio; 1480 break;
1428 } 1481 }
1429 1482
1430 clear_bit(DMF_BLOCK_IO, &md->flags); 1483 up_write(&md->io_lock);
1484
1485 if (bio_barrier(c))
1486 process_barrier(md, c);
1487 else
1488 __split_and_process_bio(md, c);
1489
1490 down_write(&md->io_lock);
1491 }
1431 1492
1432 up_write(&md->io_lock); 1493 up_write(&md->io_lock);
1433} 1494}
1434 1495
1435static void dm_queue_flush(struct mapped_device *md) 1496static void dm_queue_flush(struct mapped_device *md)
1436{ 1497{
1498 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
1499 smp_mb__after_clear_bit();
1437 queue_work(md->wq, &md->work); 1500 queue_work(md->wq, &md->work);
1438 flush_workqueue(md->wq);
1439} 1501}
1440 1502
1441/* 1503/*
@@ -1553,20 +1615,36 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1553 } 1615 }
1554 1616
1555 /* 1617 /*
1556 * First we set the BLOCK_IO flag so no more ios will be mapped. 1618 * Here we must make sure that no processes are submitting requests
1619 * to target drivers i.e. no one may be executing
1620 * __split_and_process_bio. This is called from dm_request and
1621 * dm_wq_work.
1622 *
1623 * To get all processes out of __split_and_process_bio in dm_request,
1624 * we take the write lock. To prevent any process from reentering
1625 * __split_and_process_bio from dm_request, we set
1626 * DMF_QUEUE_IO_TO_THREAD.
1627 *
1628 * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND
1629 * and call flush_workqueue(md->wq). flush_workqueue will wait until
1630 * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any
1631 * further calls to __split_and_process_bio from dm_wq_work.
1557 */ 1632 */
1558 down_write(&md->io_lock); 1633 down_write(&md->io_lock);
1559 set_bit(DMF_BLOCK_IO, &md->flags); 1634 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
1560 1635 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
1561 up_write(&md->io_lock); 1636 up_write(&md->io_lock);
1562 1637
1638 flush_workqueue(md->wq);
1639
1563 /* 1640 /*
1564 * Wait for the already-mapped ios to complete. 1641 * At this point no more requests are entering target request routines.
1642 * We call dm_wait_for_completion to wait for all existing requests
1643 * to finish.
1565 */ 1644 */
1566 r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); 1645 r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
1567 1646
1568 down_write(&md->io_lock); 1647 down_write(&md->io_lock);
1569
1570 if (noflush) 1648 if (noflush)
1571 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 1649 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
1572 up_write(&md->io_lock); 1650 up_write(&md->io_lock);
@@ -1579,6 +1657,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1579 goto out; /* pushback list is already flushed, so skip flush */ 1657 goto out; /* pushback list is already flushed, so skip flush */
1580 } 1658 }
1581 1659
1660 /*
1661 * If dm_wait_for_completion returned 0, the device is completely
1662 * quiescent now. There is no request-processing activity. All new
1663 * requests are being added to md->deferred list.
1664 */
1665
1582 dm_table_postsuspend_targets(map); 1666 dm_table_postsuspend_targets(map);
1583 1667
1584 set_bit(DMF_SUSPENDED, &md->flags); 1668 set_bit(DMF_SUSPENDED, &md->flags);