aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm.c214
1 files changed, 196 insertions, 18 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 821a5dd6a8d1..3de8d6d5b0b8 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -143,9 +143,19 @@ struct mapped_device {
143 int barrier_error; 143 int barrier_error;
144 144
145 /* 145 /*
146 * Protect barrier_error from concurrent endio processing
147 * in request-based dm.
148 */
149 spinlock_t barrier_error_lock;
150
151 /*
146 * Processing queue (flush/barriers) 152 * Processing queue (flush/barriers)
147 */ 153 */
148 struct workqueue_struct *wq; 154 struct workqueue_struct *wq;
155 struct work_struct barrier_work;
156
157 /* A pointer to the currently processing pre/post flush request */
158 struct request *flush_request;
149 159
150 /* 160 /*
151 * The current mapping. 161 * The current mapping.
@@ -722,6 +732,23 @@ static void end_clone_bio(struct bio *clone, int error)
722 blk_update_request(tio->orig, 0, nr_bytes); 732 blk_update_request(tio->orig, 0, nr_bytes);
723} 733}
724 734
735static void store_barrier_error(struct mapped_device *md, int error)
736{
737 unsigned long flags;
738
739 spin_lock_irqsave(&md->barrier_error_lock, flags);
740 /*
741 * Basically, the first error is taken, but:
742 * -EOPNOTSUPP supersedes any I/O error.
743 * Requeue request supersedes any I/O error but -EOPNOTSUPP.
744 */
745 if (!md->barrier_error || error == -EOPNOTSUPP ||
746 (md->barrier_error != -EOPNOTSUPP &&
747 error == DM_ENDIO_REQUEUE))
748 md->barrier_error = error;
749 spin_unlock_irqrestore(&md->barrier_error_lock, flags);
750}
751
725/* 752/*
726 * Don't touch any member of the md after calling this function because 753 * Don't touch any member of the md after calling this function because
727 * the md may be freed in dm_put() at the end of this function. 754 * the md may be freed in dm_put() at the end of this function.
@@ -759,11 +786,13 @@ static void free_rq_clone(struct request *clone)
759static void dm_end_request(struct request *clone, int error) 786static void dm_end_request(struct request *clone, int error)
760{ 787{
761 int rw = rq_data_dir(clone); 788 int rw = rq_data_dir(clone);
789 int run_queue = 1;
790 bool is_barrier = blk_barrier_rq(clone);
762 struct dm_rq_target_io *tio = clone->end_io_data; 791 struct dm_rq_target_io *tio = clone->end_io_data;
763 struct mapped_device *md = tio->md; 792 struct mapped_device *md = tio->md;
764 struct request *rq = tio->orig; 793 struct request *rq = tio->orig;
765 794
766 if (blk_pc_request(rq)) { 795 if (blk_pc_request(rq) && !is_barrier) {
767 rq->errors = clone->errors; 796 rq->errors = clone->errors;
768 rq->resid_len = clone->resid_len; 797 rq->resid_len = clone->resid_len;
769 798
@@ -778,9 +807,14 @@ static void dm_end_request(struct request *clone, int error)
778 807
779 free_rq_clone(clone); 808 free_rq_clone(clone);
780 809
781 blk_end_request_all(rq, error); 810 if (unlikely(is_barrier)) {
811 if (unlikely(error))
812 store_barrier_error(md, error);
813 run_queue = 0;
814 } else
815 blk_end_request_all(rq, error);
782 816
783 rq_completed(md, rw, 1); 817 rq_completed(md, rw, run_queue);
784} 818}
785 819
786static void dm_unprep_request(struct request *rq) 820static void dm_unprep_request(struct request *rq)
@@ -805,6 +839,16 @@ void dm_requeue_unmapped_request(struct request *clone)
805 struct request_queue *q = rq->q; 839 struct request_queue *q = rq->q;
806 unsigned long flags; 840 unsigned long flags;
807 841
842 if (unlikely(blk_barrier_rq(clone))) {
843 /*
844 * Barrier clones share an original request.
845 * Leave it to dm_end_request(), which handles this special
846 * case.
847 */
848 dm_end_request(clone, DM_ENDIO_REQUEUE);
849 return;
850 }
851
808 dm_unprep_request(rq); 852 dm_unprep_request(rq);
809 853
810 spin_lock_irqsave(q->queue_lock, flags); 854 spin_lock_irqsave(q->queue_lock, flags);
@@ -894,6 +938,19 @@ static void dm_complete_request(struct request *clone, int error)
894 struct dm_rq_target_io *tio = clone->end_io_data; 938 struct dm_rq_target_io *tio = clone->end_io_data;
895 struct request *rq = tio->orig; 939 struct request *rq = tio->orig;
896 940
941 if (unlikely(blk_barrier_rq(clone))) {
942 /*
943 * Barrier clones share an original request. So can't use
944 * softirq_done with the original.
945 * Pass the clone to dm_done() directly in this special case.
946 * It is safe (even if clone->q->queue_lock is held here)
947 * because there is no I/O dispatching during the completion
948 * of barrier clone.
949 */
950 dm_done(clone, error, true);
951 return;
952 }
953
897 tio->error = error; 954 tio->error = error;
898 rq->completion_data = clone; 955 rq->completion_data = clone;
899 blk_complete_request(rq); 956 blk_complete_request(rq);
@@ -910,6 +967,17 @@ void dm_kill_unmapped_request(struct request *clone, int error)
910 struct dm_rq_target_io *tio = clone->end_io_data; 967 struct dm_rq_target_io *tio = clone->end_io_data;
911 struct request *rq = tio->orig; 968 struct request *rq = tio->orig;
912 969
970 if (unlikely(blk_barrier_rq(clone))) {
971 /*
972 * Barrier clones share an original request.
973 * Leave it to dm_end_request(), which handles this special
974 * case.
975 */
976 BUG_ON(error > 0);
977 dm_end_request(clone, error);
978 return;
979 }
980
913 rq->cmd_flags |= REQ_FAILED; 981 rq->cmd_flags |= REQ_FAILED;
914 dm_complete_request(clone, error); 982 dm_complete_request(clone, error);
915} 983}
@@ -1364,11 +1432,6 @@ static int dm_make_request(struct request_queue *q, struct bio *bio)
1364{ 1432{
1365 struct mapped_device *md = q->queuedata; 1433 struct mapped_device *md = q->queuedata;
1366 1434
1367 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
1368 bio_endio(bio, -EOPNOTSUPP);
1369 return 0;
1370 }
1371
1372 return md->saved_make_request_fn(q, bio); /* call __make_request() */ 1435 return md->saved_make_request_fn(q, bio); /* call __make_request() */
1373} 1436}
1374 1437
@@ -1387,6 +1450,25 @@ static int dm_request(struct request_queue *q, struct bio *bio)
1387 return _dm_request(q, bio); 1450 return _dm_request(q, bio);
1388} 1451}
1389 1452
1453/*
1454 * Mark this request as flush request, so that dm_request_fn() can
1455 * recognize.
1456 */
1457static void dm_rq_prepare_flush(struct request_queue *q, struct request *rq)
1458{
1459 rq->cmd_type = REQ_TYPE_LINUX_BLOCK;
1460 rq->cmd[0] = REQ_LB_OP_FLUSH;
1461}
1462
1463static bool dm_rq_is_flush_request(struct request *rq)
1464{
1465 if (rq->cmd_type == REQ_TYPE_LINUX_BLOCK &&
1466 rq->cmd[0] == REQ_LB_OP_FLUSH)
1467 return true;
1468 else
1469 return false;
1470}
1471
1390void dm_dispatch_request(struct request *rq) 1472void dm_dispatch_request(struct request *rq)
1391{ 1473{
1392 int r; 1474 int r;
@@ -1432,16 +1514,24 @@ static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1432static int setup_clone(struct request *clone, struct request *rq, 1514static int setup_clone(struct request *clone, struct request *rq,
1433 struct dm_rq_target_io *tio) 1515 struct dm_rq_target_io *tio)
1434{ 1516{
1435 int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, 1517 int r;
1436 dm_rq_bio_constructor, tio);
1437 1518
1438 if (r) 1519 if (dm_rq_is_flush_request(rq)) {
1439 return r; 1520 blk_rq_init(NULL, clone);
1521 clone->cmd_type = REQ_TYPE_FS;
1522 clone->cmd_flags |= (REQ_HARDBARRIER | WRITE);
1523 } else {
1524 r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1525 dm_rq_bio_constructor, tio);
1526 if (r)
1527 return r;
1528
1529 clone->cmd = rq->cmd;
1530 clone->cmd_len = rq->cmd_len;
1531 clone->sense = rq->sense;
1532 clone->buffer = rq->buffer;
1533 }
1440 1534
1441 clone->cmd = rq->cmd;
1442 clone->cmd_len = rq->cmd_len;
1443 clone->sense = rq->sense;
1444 clone->buffer = rq->buffer;
1445 clone->end_io = end_clone_request; 1535 clone->end_io = end_clone_request;
1446 clone->end_io_data = tio; 1536 clone->end_io_data = tio;
1447 1537
@@ -1482,6 +1572,9 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
1482 struct mapped_device *md = q->queuedata; 1572 struct mapped_device *md = q->queuedata;
1483 struct request *clone; 1573 struct request *clone;
1484 1574
1575 if (unlikely(dm_rq_is_flush_request(rq)))
1576 return BLKPREP_OK;
1577
1485 if (unlikely(rq->special)) { 1578 if (unlikely(rq->special)) {
1486 DMWARN("Already has something in rq->special."); 1579 DMWARN("Already has something in rq->special.");
1487 return BLKPREP_KILL; 1580 return BLKPREP_KILL;
@@ -1560,6 +1653,14 @@ static void dm_request_fn(struct request_queue *q)
1560 if (!rq) 1653 if (!rq)
1561 goto plug_and_out; 1654 goto plug_and_out;
1562 1655
1656 if (unlikely(dm_rq_is_flush_request(rq))) {
1657 BUG_ON(md->flush_request);
1658 md->flush_request = rq;
1659 blk_start_request(rq);
1660 queue_work(md->wq, &md->barrier_work);
1661 goto out;
1662 }
1663
1563 ti = dm_table_find_target(map, blk_rq_pos(rq)); 1664 ti = dm_table_find_target(map, blk_rq_pos(rq));
1564 if (ti->type->busy && ti->type->busy(ti)) 1665 if (ti->type->busy && ti->type->busy(ti))
1565 goto plug_and_out; 1666 goto plug_and_out;
@@ -1726,6 +1827,7 @@ out:
1726static const struct block_device_operations dm_blk_dops; 1827static const struct block_device_operations dm_blk_dops;
1727 1828
1728static void dm_wq_work(struct work_struct *work); 1829static void dm_wq_work(struct work_struct *work);
1830static void dm_rq_barrier_work(struct work_struct *work);
1729 1831
1730/* 1832/*
1731 * Allocate and initialise a blank device with a given minor. 1833 * Allocate and initialise a blank device with a given minor.
@@ -1755,6 +1857,7 @@ static struct mapped_device *alloc_dev(int minor)
1755 init_rwsem(&md->io_lock); 1857 init_rwsem(&md->io_lock);
1756 mutex_init(&md->suspend_lock); 1858 mutex_init(&md->suspend_lock);
1757 spin_lock_init(&md->deferred_lock); 1859 spin_lock_init(&md->deferred_lock);
1860 spin_lock_init(&md->barrier_error_lock);
1758 rwlock_init(&md->map_lock); 1861 rwlock_init(&md->map_lock);
1759 atomic_set(&md->holders, 1); 1862 atomic_set(&md->holders, 1);
1760 atomic_set(&md->open_count, 0); 1863 atomic_set(&md->open_count, 0);
@@ -1789,6 +1892,8 @@ static struct mapped_device *alloc_dev(int minor)
1789 blk_queue_softirq_done(md->queue, dm_softirq_done); 1892 blk_queue_softirq_done(md->queue, dm_softirq_done);
1790 blk_queue_prep_rq(md->queue, dm_prep_fn); 1893 blk_queue_prep_rq(md->queue, dm_prep_fn);
1791 blk_queue_lld_busy(md->queue, dm_lld_busy); 1894 blk_queue_lld_busy(md->queue, dm_lld_busy);
1895 blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH,
1896 dm_rq_prepare_flush);
1792 1897
1793 md->disk = alloc_disk(1); 1898 md->disk = alloc_disk(1);
1794 if (!md->disk) 1899 if (!md->disk)
@@ -1798,6 +1903,7 @@ static struct mapped_device *alloc_dev(int minor)
1798 atomic_set(&md->pending[1], 0); 1903 atomic_set(&md->pending[1], 0);
1799 init_waitqueue_head(&md->wait); 1904 init_waitqueue_head(&md->wait);
1800 INIT_WORK(&md->work, dm_wq_work); 1905 INIT_WORK(&md->work, dm_wq_work);
1906 INIT_WORK(&md->barrier_work, dm_rq_barrier_work);
1801 init_waitqueue_head(&md->eventq); 1907 init_waitqueue_head(&md->eventq);
1802 1908
1803 md->disk->major = _major; 1909 md->disk->major = _major;
@@ -2185,6 +2291,73 @@ static void dm_queue_flush(struct mapped_device *md)
2185 queue_work(md->wq, &md->work); 2291 queue_work(md->wq, &md->work);
2186} 2292}
2187 2293
2294static void dm_rq_set_flush_nr(struct request *clone, unsigned flush_nr)
2295{
2296 struct dm_rq_target_io *tio = clone->end_io_data;
2297
2298 tio->info.flush_request = flush_nr;
2299}
2300
2301/* Issue barrier requests to targets and wait for their completion. */
2302static int dm_rq_barrier(struct mapped_device *md)
2303{
2304 int i, j;
2305 struct dm_table *map = dm_get_table(md);
2306 unsigned num_targets = dm_table_get_num_targets(map);
2307 struct dm_target *ti;
2308 struct request *clone;
2309
2310 md->barrier_error = 0;
2311
2312 for (i = 0; i < num_targets; i++) {
2313 ti = dm_table_get_target(map, i);
2314 for (j = 0; j < ti->num_flush_requests; j++) {
2315 clone = clone_rq(md->flush_request, md, GFP_NOIO);
2316 dm_rq_set_flush_nr(clone, j);
2317 atomic_inc(&md->pending[rq_data_dir(clone)]);
2318 map_request(ti, clone, md);
2319 }
2320 }
2321
2322 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2323 dm_table_put(map);
2324
2325 return md->barrier_error;
2326}
2327
2328static void dm_rq_barrier_work(struct work_struct *work)
2329{
2330 int error;
2331 struct mapped_device *md = container_of(work, struct mapped_device,
2332 barrier_work);
2333 struct request_queue *q = md->queue;
2334 struct request *rq;
2335 unsigned long flags;
2336
2337 /*
2338 * Hold the md reference here and leave it at the last part so that
2339 * the md can't be deleted by device opener when the barrier request
2340 * completes.
2341 */
2342 dm_get(md);
2343
2344 error = dm_rq_barrier(md);
2345
2346 rq = md->flush_request;
2347 md->flush_request = NULL;
2348
2349 if (error == DM_ENDIO_REQUEUE) {
2350 spin_lock_irqsave(q->queue_lock, flags);
2351 blk_requeue_request(q, rq);
2352 spin_unlock_irqrestore(q->queue_lock, flags);
2353 } else
2354 blk_end_request_all(rq, error);
2355
2356 blk_run_queue(q);
2357
2358 dm_put(md);
2359}
2360
2188/* 2361/*
2189 * Swap in a new table (destroying old one). 2362 * Swap in a new table (destroying old one).
2190 */ 2363 */
@@ -2325,11 +2498,16 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2325 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); 2498 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
2326 up_write(&md->io_lock); 2499 up_write(&md->io_lock);
2327 2500
2328 flush_workqueue(md->wq); 2501 /*
2329 2502 * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which
2503 * can be kicked until md->queue is stopped. So stop md->queue before
2504 * flushing md->wq.
2505 */
2330 if (dm_request_based(md)) 2506 if (dm_request_based(md))
2331 stop_queue(md->queue); 2507 stop_queue(md->queue);
2332 2508
2509 flush_workqueue(md->wq);
2510
2333 /* 2511 /*
2334 * At this point no more requests are entering target request routines. 2512 * At this point no more requests are entering target request routines.
2335 * We call dm_wait_for_completion to wait for all existing requests 2513 * We call dm_wait_for_completion to wait for all existing requests