diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 214 |
1 files changed, 196 insertions, 18 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 821a5dd6a8d1..3de8d6d5b0b8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -143,9 +143,19 @@ struct mapped_device { | |||
143 | int barrier_error; | 143 | int barrier_error; |
144 | 144 | ||
145 | /* | 145 | /* |
146 | * Protect barrier_error from concurrent endio processing | ||
147 | * in request-based dm. | ||
148 | */ | ||
149 | spinlock_t barrier_error_lock; | ||
150 | |||
151 | /* | ||
146 | * Processing queue (flush/barriers) | 152 | * Processing queue (flush/barriers) |
147 | */ | 153 | */ |
148 | struct workqueue_struct *wq; | 154 | struct workqueue_struct *wq; |
155 | struct work_struct barrier_work; | ||
156 | |||
157 | /* A pointer to the currently processing pre/post flush request */ | ||
158 | struct request *flush_request; | ||
149 | 159 | ||
150 | /* | 160 | /* |
151 | * The current mapping. | 161 | * The current mapping. |
@@ -722,6 +732,23 @@ static void end_clone_bio(struct bio *clone, int error) | |||
722 | blk_update_request(tio->orig, 0, nr_bytes); | 732 | blk_update_request(tio->orig, 0, nr_bytes); |
723 | } | 733 | } |
724 | 734 | ||
735 | static void store_barrier_error(struct mapped_device *md, int error) | ||
736 | { | ||
737 | unsigned long flags; | ||
738 | |||
739 | spin_lock_irqsave(&md->barrier_error_lock, flags); | ||
740 | /* | ||
741 | * Basically, the first error is taken, but: | ||
742 | * -EOPNOTSUPP supersedes any I/O error. | ||
743 | * Requeue request supersedes any I/O error but -EOPNOTSUPP. | ||
744 | */ | ||
745 | if (!md->barrier_error || error == -EOPNOTSUPP || | ||
746 | (md->barrier_error != -EOPNOTSUPP && | ||
747 | error == DM_ENDIO_REQUEUE)) | ||
748 | md->barrier_error = error; | ||
749 | spin_unlock_irqrestore(&md->barrier_error_lock, flags); | ||
750 | } | ||
751 | |||
725 | /* | 752 | /* |
726 | * Don't touch any member of the md after calling this function because | 753 | * Don't touch any member of the md after calling this function because |
727 | * the md may be freed in dm_put() at the end of this function. | 754 | * the md may be freed in dm_put() at the end of this function. |
@@ -759,11 +786,13 @@ static void free_rq_clone(struct request *clone) | |||
759 | static void dm_end_request(struct request *clone, int error) | 786 | static void dm_end_request(struct request *clone, int error) |
760 | { | 787 | { |
761 | int rw = rq_data_dir(clone); | 788 | int rw = rq_data_dir(clone); |
789 | int run_queue = 1; | ||
790 | bool is_barrier = blk_barrier_rq(clone); | ||
762 | struct dm_rq_target_io *tio = clone->end_io_data; | 791 | struct dm_rq_target_io *tio = clone->end_io_data; |
763 | struct mapped_device *md = tio->md; | 792 | struct mapped_device *md = tio->md; |
764 | struct request *rq = tio->orig; | 793 | struct request *rq = tio->orig; |
765 | 794 | ||
766 | if (blk_pc_request(rq)) { | 795 | if (blk_pc_request(rq) && !is_barrier) { |
767 | rq->errors = clone->errors; | 796 | rq->errors = clone->errors; |
768 | rq->resid_len = clone->resid_len; | 797 | rq->resid_len = clone->resid_len; |
769 | 798 | ||
@@ -778,9 +807,14 @@ static void dm_end_request(struct request *clone, int error) | |||
778 | 807 | ||
779 | free_rq_clone(clone); | 808 | free_rq_clone(clone); |
780 | 809 | ||
781 | blk_end_request_all(rq, error); | 810 | if (unlikely(is_barrier)) { |
811 | if (unlikely(error)) | ||
812 | store_barrier_error(md, error); | ||
813 | run_queue = 0; | ||
814 | } else | ||
815 | blk_end_request_all(rq, error); | ||
782 | 816 | ||
783 | rq_completed(md, rw, 1); | 817 | rq_completed(md, rw, run_queue); |
784 | } | 818 | } |
785 | 819 | ||
786 | static void dm_unprep_request(struct request *rq) | 820 | static void dm_unprep_request(struct request *rq) |
@@ -805,6 +839,16 @@ void dm_requeue_unmapped_request(struct request *clone) | |||
805 | struct request_queue *q = rq->q; | 839 | struct request_queue *q = rq->q; |
806 | unsigned long flags; | 840 | unsigned long flags; |
807 | 841 | ||
842 | if (unlikely(blk_barrier_rq(clone))) { | ||
843 | /* | ||
844 | * Barrier clones share an original request. | ||
845 | * Leave it to dm_end_request(), which handles this special | ||
846 | * case. | ||
847 | */ | ||
848 | dm_end_request(clone, DM_ENDIO_REQUEUE); | ||
849 | return; | ||
850 | } | ||
851 | |||
808 | dm_unprep_request(rq); | 852 | dm_unprep_request(rq); |
809 | 853 | ||
810 | spin_lock_irqsave(q->queue_lock, flags); | 854 | spin_lock_irqsave(q->queue_lock, flags); |
@@ -894,6 +938,19 @@ static void dm_complete_request(struct request *clone, int error) | |||
894 | struct dm_rq_target_io *tio = clone->end_io_data; | 938 | struct dm_rq_target_io *tio = clone->end_io_data; |
895 | struct request *rq = tio->orig; | 939 | struct request *rq = tio->orig; |
896 | 940 | ||
941 | if (unlikely(blk_barrier_rq(clone))) { | ||
942 | /* | ||
943 | * Barrier clones share an original request. So can't use | ||
944 | * softirq_done with the original. | ||
945 | * Pass the clone to dm_done() directly in this special case. | ||
946 | * It is safe (even if clone->q->queue_lock is held here) | ||
947 | * because there is no I/O dispatching during the completion | ||
948 | * of barrier clone. | ||
949 | */ | ||
950 | dm_done(clone, error, true); | ||
951 | return; | ||
952 | } | ||
953 | |||
897 | tio->error = error; | 954 | tio->error = error; |
898 | rq->completion_data = clone; | 955 | rq->completion_data = clone; |
899 | blk_complete_request(rq); | 956 | blk_complete_request(rq); |
@@ -910,6 +967,17 @@ void dm_kill_unmapped_request(struct request *clone, int error) | |||
910 | struct dm_rq_target_io *tio = clone->end_io_data; | 967 | struct dm_rq_target_io *tio = clone->end_io_data; |
911 | struct request *rq = tio->orig; | 968 | struct request *rq = tio->orig; |
912 | 969 | ||
970 | if (unlikely(blk_barrier_rq(clone))) { | ||
971 | /* | ||
972 | * Barrier clones share an original request. | ||
973 | * Leave it to dm_end_request(), which handles this special | ||
974 | * case. | ||
975 | */ | ||
976 | BUG_ON(error > 0); | ||
977 | dm_end_request(clone, error); | ||
978 | return; | ||
979 | } | ||
980 | |||
913 | rq->cmd_flags |= REQ_FAILED; | 981 | rq->cmd_flags |= REQ_FAILED; |
914 | dm_complete_request(clone, error); | 982 | dm_complete_request(clone, error); |
915 | } | 983 | } |
@@ -1364,11 +1432,6 @@ static int dm_make_request(struct request_queue *q, struct bio *bio) | |||
1364 | { | 1432 | { |
1365 | struct mapped_device *md = q->queuedata; | 1433 | struct mapped_device *md = q->queuedata; |
1366 | 1434 | ||
1367 | if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { | ||
1368 | bio_endio(bio, -EOPNOTSUPP); | ||
1369 | return 0; | ||
1370 | } | ||
1371 | |||
1372 | return md->saved_make_request_fn(q, bio); /* call __make_request() */ | 1435 | return md->saved_make_request_fn(q, bio); /* call __make_request() */ |
1373 | } | 1436 | } |
1374 | 1437 | ||
@@ -1387,6 +1450,25 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
1387 | return _dm_request(q, bio); | 1450 | return _dm_request(q, bio); |
1388 | } | 1451 | } |
1389 | 1452 | ||
1453 | /* | ||
1454 | * Mark this request as flush request, so that dm_request_fn() can | ||
1455 | * recognize. | ||
1456 | */ | ||
1457 | static void dm_rq_prepare_flush(struct request_queue *q, struct request *rq) | ||
1458 | { | ||
1459 | rq->cmd_type = REQ_TYPE_LINUX_BLOCK; | ||
1460 | rq->cmd[0] = REQ_LB_OP_FLUSH; | ||
1461 | } | ||
1462 | |||
1463 | static bool dm_rq_is_flush_request(struct request *rq) | ||
1464 | { | ||
1465 | if (rq->cmd_type == REQ_TYPE_LINUX_BLOCK && | ||
1466 | rq->cmd[0] == REQ_LB_OP_FLUSH) | ||
1467 | return true; | ||
1468 | else | ||
1469 | return false; | ||
1470 | } | ||
1471 | |||
1390 | void dm_dispatch_request(struct request *rq) | 1472 | void dm_dispatch_request(struct request *rq) |
1391 | { | 1473 | { |
1392 | int r; | 1474 | int r; |
@@ -1432,16 +1514,24 @@ static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, | |||
1432 | static int setup_clone(struct request *clone, struct request *rq, | 1514 | static int setup_clone(struct request *clone, struct request *rq, |
1433 | struct dm_rq_target_io *tio) | 1515 | struct dm_rq_target_io *tio) |
1434 | { | 1516 | { |
1435 | int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | 1517 | int r; |
1436 | dm_rq_bio_constructor, tio); | ||
1437 | 1518 | ||
1438 | if (r) | 1519 | if (dm_rq_is_flush_request(rq)) { |
1439 | return r; | 1520 | blk_rq_init(NULL, clone); |
1521 | clone->cmd_type = REQ_TYPE_FS; | ||
1522 | clone->cmd_flags |= (REQ_HARDBARRIER | WRITE); | ||
1523 | } else { | ||
1524 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | ||
1525 | dm_rq_bio_constructor, tio); | ||
1526 | if (r) | ||
1527 | return r; | ||
1528 | |||
1529 | clone->cmd = rq->cmd; | ||
1530 | clone->cmd_len = rq->cmd_len; | ||
1531 | clone->sense = rq->sense; | ||
1532 | clone->buffer = rq->buffer; | ||
1533 | } | ||
1440 | 1534 | ||
1441 | clone->cmd = rq->cmd; | ||
1442 | clone->cmd_len = rq->cmd_len; | ||
1443 | clone->sense = rq->sense; | ||
1444 | clone->buffer = rq->buffer; | ||
1445 | clone->end_io = end_clone_request; | 1535 | clone->end_io = end_clone_request; |
1446 | clone->end_io_data = tio; | 1536 | clone->end_io_data = tio; |
1447 | 1537 | ||
@@ -1482,6 +1572,9 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) | |||
1482 | struct mapped_device *md = q->queuedata; | 1572 | struct mapped_device *md = q->queuedata; |
1483 | struct request *clone; | 1573 | struct request *clone; |
1484 | 1574 | ||
1575 | if (unlikely(dm_rq_is_flush_request(rq))) | ||
1576 | return BLKPREP_OK; | ||
1577 | |||
1485 | if (unlikely(rq->special)) { | 1578 | if (unlikely(rq->special)) { |
1486 | DMWARN("Already has something in rq->special."); | 1579 | DMWARN("Already has something in rq->special."); |
1487 | return BLKPREP_KILL; | 1580 | return BLKPREP_KILL; |
@@ -1560,6 +1653,14 @@ static void dm_request_fn(struct request_queue *q) | |||
1560 | if (!rq) | 1653 | if (!rq) |
1561 | goto plug_and_out; | 1654 | goto plug_and_out; |
1562 | 1655 | ||
1656 | if (unlikely(dm_rq_is_flush_request(rq))) { | ||
1657 | BUG_ON(md->flush_request); | ||
1658 | md->flush_request = rq; | ||
1659 | blk_start_request(rq); | ||
1660 | queue_work(md->wq, &md->barrier_work); | ||
1661 | goto out; | ||
1662 | } | ||
1663 | |||
1563 | ti = dm_table_find_target(map, blk_rq_pos(rq)); | 1664 | ti = dm_table_find_target(map, blk_rq_pos(rq)); |
1564 | if (ti->type->busy && ti->type->busy(ti)) | 1665 | if (ti->type->busy && ti->type->busy(ti)) |
1565 | goto plug_and_out; | 1666 | goto plug_and_out; |
@@ -1726,6 +1827,7 @@ out: | |||
1726 | static const struct block_device_operations dm_blk_dops; | 1827 | static const struct block_device_operations dm_blk_dops; |
1727 | 1828 | ||
1728 | static void dm_wq_work(struct work_struct *work); | 1829 | static void dm_wq_work(struct work_struct *work); |
1830 | static void dm_rq_barrier_work(struct work_struct *work); | ||
1729 | 1831 | ||
1730 | /* | 1832 | /* |
1731 | * Allocate and initialise a blank device with a given minor. | 1833 | * Allocate and initialise a blank device with a given minor. |
@@ -1755,6 +1857,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
1755 | init_rwsem(&md->io_lock); | 1857 | init_rwsem(&md->io_lock); |
1756 | mutex_init(&md->suspend_lock); | 1858 | mutex_init(&md->suspend_lock); |
1757 | spin_lock_init(&md->deferred_lock); | 1859 | spin_lock_init(&md->deferred_lock); |
1860 | spin_lock_init(&md->barrier_error_lock); | ||
1758 | rwlock_init(&md->map_lock); | 1861 | rwlock_init(&md->map_lock); |
1759 | atomic_set(&md->holders, 1); | 1862 | atomic_set(&md->holders, 1); |
1760 | atomic_set(&md->open_count, 0); | 1863 | atomic_set(&md->open_count, 0); |
@@ -1789,6 +1892,8 @@ static struct mapped_device *alloc_dev(int minor) | |||
1789 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 1892 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
1790 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 1893 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
1791 | blk_queue_lld_busy(md->queue, dm_lld_busy); | 1894 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
1895 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH, | ||
1896 | dm_rq_prepare_flush); | ||
1792 | 1897 | ||
1793 | md->disk = alloc_disk(1); | 1898 | md->disk = alloc_disk(1); |
1794 | if (!md->disk) | 1899 | if (!md->disk) |
@@ -1798,6 +1903,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
1798 | atomic_set(&md->pending[1], 0); | 1903 | atomic_set(&md->pending[1], 0); |
1799 | init_waitqueue_head(&md->wait); | 1904 | init_waitqueue_head(&md->wait); |
1800 | INIT_WORK(&md->work, dm_wq_work); | 1905 | INIT_WORK(&md->work, dm_wq_work); |
1906 | INIT_WORK(&md->barrier_work, dm_rq_barrier_work); | ||
1801 | init_waitqueue_head(&md->eventq); | 1907 | init_waitqueue_head(&md->eventq); |
1802 | 1908 | ||
1803 | md->disk->major = _major; | 1909 | md->disk->major = _major; |
@@ -2185,6 +2291,73 @@ static void dm_queue_flush(struct mapped_device *md) | |||
2185 | queue_work(md->wq, &md->work); | 2291 | queue_work(md->wq, &md->work); |
2186 | } | 2292 | } |
2187 | 2293 | ||
2294 | static void dm_rq_set_flush_nr(struct request *clone, unsigned flush_nr) | ||
2295 | { | ||
2296 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
2297 | |||
2298 | tio->info.flush_request = flush_nr; | ||
2299 | } | ||
2300 | |||
2301 | /* Issue barrier requests to targets and wait for their completion. */ | ||
2302 | static int dm_rq_barrier(struct mapped_device *md) | ||
2303 | { | ||
2304 | int i, j; | ||
2305 | struct dm_table *map = dm_get_table(md); | ||
2306 | unsigned num_targets = dm_table_get_num_targets(map); | ||
2307 | struct dm_target *ti; | ||
2308 | struct request *clone; | ||
2309 | |||
2310 | md->barrier_error = 0; | ||
2311 | |||
2312 | for (i = 0; i < num_targets; i++) { | ||
2313 | ti = dm_table_get_target(map, i); | ||
2314 | for (j = 0; j < ti->num_flush_requests; j++) { | ||
2315 | clone = clone_rq(md->flush_request, md, GFP_NOIO); | ||
2316 | dm_rq_set_flush_nr(clone, j); | ||
2317 | atomic_inc(&md->pending[rq_data_dir(clone)]); | ||
2318 | map_request(ti, clone, md); | ||
2319 | } | ||
2320 | } | ||
2321 | |||
2322 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
2323 | dm_table_put(map); | ||
2324 | |||
2325 | return md->barrier_error; | ||
2326 | } | ||
2327 | |||
2328 | static void dm_rq_barrier_work(struct work_struct *work) | ||
2329 | { | ||
2330 | int error; | ||
2331 | struct mapped_device *md = container_of(work, struct mapped_device, | ||
2332 | barrier_work); | ||
2333 | struct request_queue *q = md->queue; | ||
2334 | struct request *rq; | ||
2335 | unsigned long flags; | ||
2336 | |||
2337 | /* | ||
2338 | * Hold the md reference here and leave it at the last part so that | ||
2339 | * the md can't be deleted by device opener when the barrier request | ||
2340 | * completes. | ||
2341 | */ | ||
2342 | dm_get(md); | ||
2343 | |||
2344 | error = dm_rq_barrier(md); | ||
2345 | |||
2346 | rq = md->flush_request; | ||
2347 | md->flush_request = NULL; | ||
2348 | |||
2349 | if (error == DM_ENDIO_REQUEUE) { | ||
2350 | spin_lock_irqsave(q->queue_lock, flags); | ||
2351 | blk_requeue_request(q, rq); | ||
2352 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2353 | } else | ||
2354 | blk_end_request_all(rq, error); | ||
2355 | |||
2356 | blk_run_queue(q); | ||
2357 | |||
2358 | dm_put(md); | ||
2359 | } | ||
2360 | |||
2188 | /* | 2361 | /* |
2189 | * Swap in a new table (destroying old one). | 2362 | * Swap in a new table (destroying old one). |
2190 | */ | 2363 | */ |
@@ -2325,11 +2498,16 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2325 | set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | 2498 | set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); |
2326 | up_write(&md->io_lock); | 2499 | up_write(&md->io_lock); |
2327 | 2500 | ||
2328 | flush_workqueue(md->wq); | 2501 | /* |
2329 | 2502 | * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which | |
2503 | * can be kicked until md->queue is stopped. So stop md->queue before | ||
2504 | * flushing md->wq. | ||
2505 | */ | ||
2330 | if (dm_request_based(md)) | 2506 | if (dm_request_based(md)) |
2331 | stop_queue(md->queue); | 2507 | stop_queue(md->queue); |
2332 | 2508 | ||
2509 | flush_workqueue(md->wq); | ||
2510 | |||
2333 | /* | 2511 | /* |
2334 | * At this point no more requests are entering target request routines. | 2512 | * At this point no more requests are entering target request routines. |
2335 | * We call dm_wait_for_completion to wait for all existing requests | 2513 | * We call dm_wait_for_completion to wait for all existing requests |