aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKiyoshi Ueda <k-ueda@ct.jp.nec.com>2009-06-22 05:12:36 -0400
committerAlasdair G Kergon <agk@redhat.com>2009-06-22 05:12:36 -0400
commite6ee8c0b767540f59e20da3ced282601db8aa502 (patch)
tree101cb830994734eb45a4a47cd5988f24da67fa4f
parentcec47e3d4a861e1d942b3a580d0bbef2700d2bb2 (diff)
dm: enable request based option
This patch enables request-based dm. o Request-based dm and bio-based dm coexist, since there are some target drivers which are more fitting to bio-based dm. Also, there are other bio-based devices in the kernel (e.g. md, loop). Since bio-based device can't receive struct request, there are some limitations on device stacking between bio-based and request-based. type of underlying device bio-based request-based ---------------------------------------------- bio-based OK OK request-based -- OK The device type is recognized by the queue flag in the kernel, so dm follows that. o The type of a dm device is decided at the first table binding time. Once the type of a dm device is decided, the type can't be changed. o Mempool allocations are deferred to at the table loading time, since mempools for request-based dm are different from those for bio-based dm and needed mempool type is fixed by the type of table. o Currently, request-based dm supports only tables that have a single target. To support multiple targets, we need to support request splitting or prevent bio/request from spanning multiple targets. The former needs lots of changes in the block layer, and the latter needs that all target drivers support merge() function. Both will take a time. Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com> Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
-rw-r--r--drivers/md/dm-ioctl.c13
-rw-r--r--drivers/md/dm-table.c111
-rw-r--r--drivers/md/dm.c162
-rw-r--r--drivers/md/dm.h25
4 files changed, 285 insertions, 26 deletions
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 1c871736f48..7f77f18fcaf 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1050,6 +1050,12 @@ static int populate_table(struct dm_table *table,
1050 next = spec->next; 1050 next = spec->next;
1051 } 1051 }
1052 1052
1053 r = dm_table_set_type(table);
1054 if (r) {
1055 DMWARN("unable to set table type");
1056 return r;
1057 }
1058
1053 return dm_table_complete(table); 1059 return dm_table_complete(table);
1054} 1060}
1055 1061
@@ -1095,6 +1101,13 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
1095 goto out; 1101 goto out;
1096 } 1102 }
1097 1103
1104 r = dm_table_alloc_md_mempools(t);
1105 if (r) {
1106 DMWARN("unable to allocate mempools for this table");
1107 dm_table_destroy(t);
1108 goto out;
1109 }
1110
1098 down_write(&_hash_lock); 1111 down_write(&_hash_lock);
1099 hc = dm_get_mdptr(md); 1112 hc = dm_get_mdptr(md);
1100 if (!hc || hc->md != md) { 1113 if (!hc || hc->md != md) {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index c5f784419f2..aaeb82ed285 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -41,6 +41,7 @@
41struct dm_table { 41struct dm_table {
42 struct mapped_device *md; 42 struct mapped_device *md;
43 atomic_t holders; 43 atomic_t holders;
44 unsigned type;
44 45
45 /* btree table */ 46 /* btree table */
46 unsigned int depth; 47 unsigned int depth;
@@ -65,6 +66,8 @@ struct dm_table {
65 /* events get handed up using this callback */ 66 /* events get handed up using this callback */
66 void (*event_fn)(void *); 67 void (*event_fn)(void *);
67 void *event_context; 68 void *event_context;
69
70 struct dm_md_mempools *mempools;
68}; 71};
69 72
70/* 73/*
@@ -258,6 +261,8 @@ void dm_table_destroy(struct dm_table *t)
258 if (t->devices.next != &t->devices) 261 if (t->devices.next != &t->devices)
259 free_devices(&t->devices); 262 free_devices(&t->devices);
260 263
264 dm_free_md_mempools(t->mempools);
265
261 kfree(t); 266 kfree(t);
262} 267}
263 268
@@ -764,6 +769,99 @@ int dm_table_add_target(struct dm_table *t, const char *type,
764 return r; 769 return r;
765} 770}
766 771
772int dm_table_set_type(struct dm_table *t)
773{
774 unsigned i;
775 unsigned bio_based = 0, request_based = 0;
776 struct dm_target *tgt;
777 struct dm_dev_internal *dd;
778 struct list_head *devices;
779
780 for (i = 0; i < t->num_targets; i++) {
781 tgt = t->targets + i;
782 if (dm_target_request_based(tgt))
783 request_based = 1;
784 else
785 bio_based = 1;
786
787 if (bio_based && request_based) {
788 DMWARN("Inconsistent table: different target types"
789 " can't be mixed up");
790 return -EINVAL;
791 }
792 }
793
794 if (bio_based) {
795 /* We must use this table as bio-based */
796 t->type = DM_TYPE_BIO_BASED;
797 return 0;
798 }
799
800 BUG_ON(!request_based); /* No targets in this table */
801
802 /* Non-request-stackable devices can't be used for request-based dm */
803 devices = dm_table_get_devices(t);
804 list_for_each_entry(dd, devices, list) {
805 if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) {
806 DMWARN("table load rejected: including"
807 " non-request-stackable devices");
808 return -EINVAL;
809 }
810 }
811
812 /*
813 * Request-based dm supports only tables that have a single target now.
814 * To support multiple targets, request splitting support is needed,
815 * and that needs lots of changes in the block-layer.
816 * (e.g. request completion process for partial completion.)
817 */
818 if (t->num_targets > 1) {
819 DMWARN("Request-based dm doesn't support multiple targets yet");
820 return -EINVAL;
821 }
822
823 t->type = DM_TYPE_REQUEST_BASED;
824
825 return 0;
826}
827
828unsigned dm_table_get_type(struct dm_table *t)
829{
830 return t->type;
831}
832
833bool dm_table_request_based(struct dm_table *t)
834{
835 return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
836}
837
838int dm_table_alloc_md_mempools(struct dm_table *t)
839{
840 unsigned type = dm_table_get_type(t);
841
842 if (unlikely(type == DM_TYPE_NONE)) {
843 DMWARN("no table type is set, can't allocate mempools");
844 return -EINVAL;
845 }
846
847 t->mempools = dm_alloc_md_mempools(type);
848 if (!t->mempools)
849 return -ENOMEM;
850
851 return 0;
852}
853
854void dm_table_free_md_mempools(struct dm_table *t)
855{
856 dm_free_md_mempools(t->mempools);
857 t->mempools = NULL;
858}
859
860struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t)
861{
862 return t->mempools;
863}
864
767static int setup_indexes(struct dm_table *t) 865static int setup_indexes(struct dm_table *t)
768{ 866{
769 int i; 867 int i;
@@ -985,6 +1083,19 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
985 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); 1083 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
986 1084
987 dm_table_set_integrity(t); 1085 dm_table_set_integrity(t);
1086
1087 /*
1088 * QUEUE_FLAG_STACKABLE must be set after all queue settings are
1089 * visible to other CPUs because, once the flag is set, incoming bios
1090 * are processed by request-based dm, which refers to the queue
1091 * settings.
1092 * Until the flag set, bios are passed to bio-based dm and queued to
1093 * md->deferred where queue settings are not needed yet.
1094 * Those bios are passed to request-based dm at the resume time.
1095 */
1096 smp_mb();
1097 if (dm_table_request_based(t))
1098 queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q);
988} 1099}
989 1100
990unsigned int dm_table_get_num_targets(struct dm_table *t) 1101unsigned int dm_table_get_num_targets(struct dm_table *t)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index be003e5fea3..5a843c1f4d6 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -190,6 +190,15 @@ struct mapped_device {
190 struct bio barrier_bio; 190 struct bio barrier_bio;
191}; 191};
192 192
193/*
194 * For mempools pre-allocation at the table loading time.
195 */
196struct dm_md_mempools {
197 mempool_t *io_pool;
198 mempool_t *tio_pool;
199 struct bio_set *bs;
200};
201
193#define MIN_IOS 256 202#define MIN_IOS 256
194static struct kmem_cache *_io_cache; 203static struct kmem_cache *_io_cache;
195static struct kmem_cache *_tio_cache; 204static struct kmem_cache *_tio_cache;
@@ -1739,10 +1748,22 @@ static struct mapped_device *alloc_dev(int minor)
1739 INIT_LIST_HEAD(&md->uevent_list); 1748 INIT_LIST_HEAD(&md->uevent_list);
1740 spin_lock_init(&md->uevent_lock); 1749 spin_lock_init(&md->uevent_lock);
1741 1750
1742 md->queue = blk_alloc_queue(GFP_KERNEL); 1751 md->queue = blk_init_queue(dm_request_fn, NULL);
1743 if (!md->queue) 1752 if (!md->queue)
1744 goto bad_queue; 1753 goto bad_queue;
1745 1754
1755 /*
1756 * Request-based dm devices cannot be stacked on top of bio-based dm
1757 * devices. The type of this dm device has not been decided yet,
1758 * although we initialized the queue using blk_init_queue().
1759 * The type is decided at the first table loading time.
1760 * To prevent problematic device stacking, clear the queue flag
1761 * for request stacking support until then.
1762 *
1763 * This queue is new, so no concurrency on the queue_flags.
1764 */
1765 queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
1766 md->saved_make_request_fn = md->queue->make_request_fn;
1746 md->queue->queuedata = md; 1767 md->queue->queuedata = md;
1747 md->queue->backing_dev_info.congested_fn = dm_any_congested; 1768 md->queue->backing_dev_info.congested_fn = dm_any_congested;
1748 md->queue->backing_dev_info.congested_data = md; 1769 md->queue->backing_dev_info.congested_data = md;
@@ -1751,18 +1772,9 @@ static struct mapped_device *alloc_dev(int minor)
1751 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 1772 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1752 md->queue->unplug_fn = dm_unplug_all; 1773 md->queue->unplug_fn = dm_unplug_all;
1753 blk_queue_merge_bvec(md->queue, dm_merge_bvec); 1774 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
1754 1775 blk_queue_softirq_done(md->queue, dm_softirq_done);
1755 md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); 1776 blk_queue_prep_rq(md->queue, dm_prep_fn);
1756 if (!md->io_pool) 1777 blk_queue_lld_busy(md->queue, dm_lld_busy);
1757 goto bad_io_pool;
1758
1759 md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache);
1760 if (!md->tio_pool)
1761 goto bad_tio_pool;
1762
1763 md->bs = bioset_create(16, 0);
1764 if (!md->bs)
1765 goto bad_no_bioset;
1766 1778
1767 md->disk = alloc_disk(1); 1779 md->disk = alloc_disk(1);
1768 if (!md->disk) 1780 if (!md->disk)
@@ -1804,12 +1816,6 @@ bad_bdev:
1804bad_thread: 1816bad_thread:
1805 put_disk(md->disk); 1817 put_disk(md->disk);
1806bad_disk: 1818bad_disk:
1807 bioset_free(md->bs);
1808bad_no_bioset:
1809 mempool_destroy(md->tio_pool);
1810bad_tio_pool:
1811 mempool_destroy(md->io_pool);
1812bad_io_pool:
1813 blk_cleanup_queue(md->queue); 1819 blk_cleanup_queue(md->queue);
1814bad_queue: 1820bad_queue:
1815 free_minor(minor); 1821 free_minor(minor);
@@ -1829,9 +1835,12 @@ static void free_dev(struct mapped_device *md)
1829 unlock_fs(md); 1835 unlock_fs(md);
1830 bdput(md->bdev); 1836 bdput(md->bdev);
1831 destroy_workqueue(md->wq); 1837 destroy_workqueue(md->wq);
1832 mempool_destroy(md->tio_pool); 1838 if (md->tio_pool)
1833 mempool_destroy(md->io_pool); 1839 mempool_destroy(md->tio_pool);
1834 bioset_free(md->bs); 1840 if (md->io_pool)
1841 mempool_destroy(md->io_pool);
1842 if (md->bs)
1843 bioset_free(md->bs);
1835 blk_integrity_unregister(md->disk); 1844 blk_integrity_unregister(md->disk);
1836 del_gendisk(md->disk); 1845 del_gendisk(md->disk);
1837 free_minor(minor); 1846 free_minor(minor);
@@ -1846,6 +1855,29 @@ static void free_dev(struct mapped_device *md)
1846 kfree(md); 1855 kfree(md);
1847} 1856}
1848 1857
1858static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
1859{
1860 struct dm_md_mempools *p;
1861
1862 if (md->io_pool && md->tio_pool && md->bs)
1863 /* the md already has necessary mempools */
1864 goto out;
1865
1866 p = dm_table_get_md_mempools(t);
1867 BUG_ON(!p || md->io_pool || md->tio_pool || md->bs);
1868
1869 md->io_pool = p->io_pool;
1870 p->io_pool = NULL;
1871 md->tio_pool = p->tio_pool;
1872 p->tio_pool = NULL;
1873 md->bs = p->bs;
1874 p->bs = NULL;
1875
1876out:
1877 /* mempool bind completed, now no need any mempools in the table */
1878 dm_table_free_md_mempools(t);
1879}
1880
1849/* 1881/*
1850 * Bind a table to the device. 1882 * Bind a table to the device.
1851 */ 1883 */
@@ -1897,6 +1929,18 @@ static int __bind(struct mapped_device *md, struct dm_table *t,
1897 1929
1898 dm_table_event_callback(t, event_callback, md); 1930 dm_table_event_callback(t, event_callback, md);
1899 1931
1932 /*
1933 * The queue hasn't been stopped yet, if the old table type wasn't
1934 * for request-based during suspension. So stop it to prevent
1935 * I/O mapping before resume.
1936 * This must be done before setting the queue restrictions,
1937 * because request-based dm may be run just after the setting.
1938 */
1939 if (dm_table_request_based(t) && !blk_queue_stopped(q))
1940 stop_queue(q);
1941
1942 __bind_mempools(md, t);
1943
1900 write_lock(&md->map_lock); 1944 write_lock(&md->map_lock);
1901 md->map = t; 1945 md->map = t;
1902 dm_table_set_restrictions(t, q, limits); 1946 dm_table_set_restrictions(t, q, limits);
@@ -2110,10 +2154,14 @@ static void dm_wq_work(struct work_struct *work)
2110 2154
2111 up_write(&md->io_lock); 2155 up_write(&md->io_lock);
2112 2156
2113 if (bio_barrier(c)) 2157 if (dm_request_based(md))
2114 process_barrier(md, c); 2158 generic_make_request(c);
2115 else 2159 else {
2116 __split_and_process_bio(md, c); 2160 if (bio_barrier(c))
2161 process_barrier(md, c);
2162 else
2163 __split_and_process_bio(md, c);
2164 }
2117 2165
2118 down_write(&md->io_lock); 2166 down_write(&md->io_lock);
2119 } 2167 }
@@ -2146,6 +2194,13 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
2146 if (r) 2194 if (r)
2147 goto out; 2195 goto out;
2148 2196
2197 /* cannot change the device type, once a table is bound */
2198 if (md->map &&
2199 (dm_table_get_type(md->map) != dm_table_get_type(table))) {
2200 DMWARN("can't change the device type after a table is bound");
2201 goto out;
2202 }
2203
2149 __unbind(md); 2204 __unbind(md);
2150 r = __bind(md, table, &limits); 2205 r = __bind(md, table, &limits);
2151 2206
@@ -2542,6 +2597,61 @@ int dm_noflush_suspending(struct dm_target *ti)
2542} 2597}
2543EXPORT_SYMBOL_GPL(dm_noflush_suspending); 2598EXPORT_SYMBOL_GPL(dm_noflush_suspending);
2544 2599
2600struct dm_md_mempools *dm_alloc_md_mempools(unsigned type)
2601{
2602 struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
2603
2604 if (!pools)
2605 return NULL;
2606
2607 pools->io_pool = (type == DM_TYPE_BIO_BASED) ?
2608 mempool_create_slab_pool(MIN_IOS, _io_cache) :
2609 mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
2610 if (!pools->io_pool)
2611 goto free_pools_and_out;
2612
2613 pools->tio_pool = (type == DM_TYPE_BIO_BASED) ?
2614 mempool_create_slab_pool(MIN_IOS, _tio_cache) :
2615 mempool_create_slab_pool(MIN_IOS, _rq_tio_cache);
2616 if (!pools->tio_pool)
2617 goto free_io_pool_and_out;
2618
2619 pools->bs = (type == DM_TYPE_BIO_BASED) ?
2620 bioset_create(16, 0) : bioset_create(MIN_IOS, 0);
2621 if (!pools->bs)
2622 goto free_tio_pool_and_out;
2623
2624 return pools;
2625
2626free_tio_pool_and_out:
2627 mempool_destroy(pools->tio_pool);
2628
2629free_io_pool_and_out:
2630 mempool_destroy(pools->io_pool);
2631
2632free_pools_and_out:
2633 kfree(pools);
2634
2635 return NULL;
2636}
2637
2638void dm_free_md_mempools(struct dm_md_mempools *pools)
2639{
2640 if (!pools)
2641 return;
2642
2643 if (pools->io_pool)
2644 mempool_destroy(pools->io_pool);
2645
2646 if (pools->tio_pool)
2647 mempool_destroy(pools->tio_pool);
2648
2649 if (pools->bs)
2650 bioset_free(pools->bs);
2651
2652 kfree(pools);
2653}
2654
2545static struct block_device_operations dm_blk_dops = { 2655static struct block_device_operations dm_blk_dops = {
2546 .open = dm_blk_open, 2656 .open = dm_blk_open,
2547 .release = dm_blk_close, 2657 .release = dm_blk_close,
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 8dcabb1caff..a7663eba17e 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -23,6 +23,13 @@
23#define DM_SUSPEND_NOFLUSH_FLAG (1 << 1) 23#define DM_SUSPEND_NOFLUSH_FLAG (1 << 1)
24 24
25/* 25/*
26 * Type of table and mapped_device's mempool
27 */
28#define DM_TYPE_NONE 0
29#define DM_TYPE_BIO_BASED 1
30#define DM_TYPE_REQUEST_BASED 2
31
32/*
26 * List of devices that a metadevice uses and should open/close. 33 * List of devices that a metadevice uses and should open/close.
27 */ 34 */
28struct dm_dev_internal { 35struct dm_dev_internal {
@@ -32,6 +39,7 @@ struct dm_dev_internal {
32}; 39};
33 40
34struct dm_table; 41struct dm_table;
42struct dm_md_mempools;
35 43
36/*----------------------------------------------------------------- 44/*-----------------------------------------------------------------
37 * Internal table functions. 45 * Internal table functions.
@@ -51,12 +59,23 @@ void dm_table_postsuspend_targets(struct dm_table *t);
51int dm_table_resume_targets(struct dm_table *t); 59int dm_table_resume_targets(struct dm_table *t);
52int dm_table_any_congested(struct dm_table *t, int bdi_bits); 60int dm_table_any_congested(struct dm_table *t, int bdi_bits);
53int dm_table_any_busy_target(struct dm_table *t); 61int dm_table_any_busy_target(struct dm_table *t);
62int dm_table_set_type(struct dm_table *t);
63unsigned dm_table_get_type(struct dm_table *t);
64bool dm_table_request_based(struct dm_table *t);
65int dm_table_alloc_md_mempools(struct dm_table *t);
66void dm_table_free_md_mempools(struct dm_table *t);
67struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
54 68
55/* 69/*
56 * To check the return value from dm_table_find_target(). 70 * To check the return value from dm_table_find_target().
57 */ 71 */
58#define dm_target_is_valid(t) ((t)->table) 72#define dm_target_is_valid(t) ((t)->table)
59 73
74/*
75 * To check whether the target type is request-based or not (bio-based).
76 */
77#define dm_target_request_based(t) ((t)->type->map_rq != NULL)
78
60/*----------------------------------------------------------------- 79/*-----------------------------------------------------------------
61 * A registry of target types. 80 * A registry of target types.
62 *---------------------------------------------------------------*/ 81 *---------------------------------------------------------------*/
@@ -102,4 +121,10 @@ void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
102int dm_kcopyd_init(void); 121int dm_kcopyd_init(void);
103void dm_kcopyd_exit(void); 122void dm_kcopyd_exit(void);
104 123
124/*
125 * Mempool operations
126 */
127struct dm_md_mempools *dm_alloc_md_mempools(unsigned type);
128void dm_free_md_mempools(struct dm_md_mempools *pools);
129
105#endif 130#endif