aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-10 12:02:50 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-10 12:02:50 -0400
commitec6671589a07d9b27ff5832138ff435b3a3c9b09 (patch)
tree4866cfd09e45a492b5b96380818fb5d1e3a4fac0
parentf755407dd19072b7d20719bc5454caed9ab41cc1 (diff)
parent2f14f4b51ed34fe2b704af8df178f5cd8c81f65e (diff)
Merge tag 'dm-3.10-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm
Pull device-mapper updates from Alasdair Kergon: "Allow devices that hold metadata for the device-mapper thin provisioning target to be extended easily; allow WRITE SAME on multipath devices; an assortment of little fixes and clean-ups." * tag 'dm-3.10-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm: (21 commits) dm cache: set config value dm cache: move config fns dm thin: generate event when metadata threshold passed dm persistent metadata: add space map threshold callback dm persistent data: add threshold callback to space map dm thin: detect metadata device resizing dm persistent data: support space map resizing dm thin: open dev read only when possible dm thin: refactor data dev resize dm cache: replace memcpy with struct assignment dm cache: fix typos in comments dm cache policy: fix description of lookup fn dm: document iterate_devices dm persistent data: fix error message typos dm cache: tune migration throttling dm mpath: enable WRITE SAME support dm table: fix write same support dm bufio: avoid a possible __vmalloc deadlock dm snapshot: fix error return code in snapshot_ctr dm cache: fix error return code in cache_create ...
-rw-r--r--drivers/md/dm-bufio.c24
-rw-r--r--drivers/md/dm-cache-metadata.c4
-rw-r--r--drivers/md/dm-cache-policy.h4
-rw-r--r--drivers/md/dm-cache-target.c100
-rw-r--r--drivers/md/dm-mpath.c1
-rw-r--r--drivers/md/dm-snap.c1
-rw-r--r--drivers/md/dm-stripe.c11
-rw-r--r--drivers/md/dm-table.c2
-rw-r--r--drivers/md/dm-thin-metadata.c36
-rw-r--r--drivers/md/dm-thin-metadata.h7
-rw-r--r--drivers/md/dm-thin.c200
-rw-r--r--drivers/md/persistent-data/dm-space-map-disk.c3
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.c127
-rw-r--r--drivers/md/persistent-data/dm-space-map.h23
-rw-r--r--include/linux/device-mapper.h15
15 files changed, 442 insertions, 116 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index c6083132c4b8..0387e05cdb98 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -319,6 +319,9 @@ static void __cache_size_refresh(void)
319static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, 319static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
320 enum data_mode *data_mode) 320 enum data_mode *data_mode)
321{ 321{
322 unsigned noio_flag;
323 void *ptr;
324
322 if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) { 325 if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
323 *data_mode = DATA_MODE_SLAB; 326 *data_mode = DATA_MODE_SLAB;
324 return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask); 327 return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
@@ -332,7 +335,26 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
332 } 335 }
333 336
334 *data_mode = DATA_MODE_VMALLOC; 337 *data_mode = DATA_MODE_VMALLOC;
335 return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); 338
339 /*
340 * __vmalloc allocates the data pages and auxiliary structures with
341 * gfp_flags that were specified, but pagetables are always allocated
342 * with GFP_KERNEL, no matter what was specified as gfp_mask.
343 *
344 * Consequently, we must set per-process flag PF_MEMALLOC_NOIO so that
345 * all allocations done by this process (including pagetables) are done
346 * as if GFP_NOIO was specified.
347 */
348
349 if (gfp_mask & __GFP_NORETRY)
350 noio_flag = memalloc_noio_save();
351
352 ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
353
354 if (gfp_mask & __GFP_NORETRY)
355 memalloc_noio_restore(noio_flag);
356
357 return ptr;
336} 358}
337 359
338/* 360/*
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 83e995fece88..1af7255bbffb 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -1044,7 +1044,7 @@ void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
1044 struct dm_cache_statistics *stats) 1044 struct dm_cache_statistics *stats)
1045{ 1045{
1046 down_read(&cmd->root_lock); 1046 down_read(&cmd->root_lock);
1047 memcpy(stats, &cmd->stats, sizeof(*stats)); 1047 *stats = cmd->stats;
1048 up_read(&cmd->root_lock); 1048 up_read(&cmd->root_lock);
1049} 1049}
1050 1050
@@ -1052,7 +1052,7 @@ void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
1052 struct dm_cache_statistics *stats) 1052 struct dm_cache_statistics *stats)
1053{ 1053{
1054 down_write(&cmd->root_lock); 1054 down_write(&cmd->root_lock);
1055 memcpy(&cmd->stats, stats, sizeof(*stats)); 1055 cmd->stats = *stats;
1056 up_write(&cmd->root_lock); 1056 up_write(&cmd->root_lock);
1057} 1057}
1058 1058
diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h
index 558bdfdabf5f..33369ca9614f 100644
--- a/drivers/md/dm-cache-policy.h
+++ b/drivers/md/dm-cache-policy.h
@@ -130,8 +130,8 @@ struct dm_cache_policy {
130 * 130 *
131 * Must not block. 131 * Must not block.
132 * 132 *
133 * Returns 1 iff in cache, 0 iff not, < 0 on error (-EWOULDBLOCK 133 * Returns 0 if in cache, -ENOENT if not, < 0 for other errors
134 * would be typical). 134 * (-EWOULDBLOCK would be typical).
135 */ 135 */
136 int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock); 136 int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock);
137 137
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 10744091e6ca..df44b60e66f2 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -205,7 +205,7 @@ struct per_bio_data {
205 /* 205 /*
206 * writethrough fields. These MUST remain at the end of this 206 * writethrough fields. These MUST remain at the end of this
207 * structure and the 'cache' member must be the first as it 207 * structure and the 'cache' member must be the first as it
208 * is used to determine the offsetof the writethrough fields. 208 * is used to determine the offset of the writethrough fields.
209 */ 209 */
210 struct cache *cache; 210 struct cache *cache;
211 dm_cblock_t cblock; 211 dm_cblock_t cblock;
@@ -393,7 +393,7 @@ static int get_cell(struct cache *cache,
393 return r; 393 return r;
394} 394}
395 395
396 /*----------------------------------------------------------------*/ 396/*----------------------------------------------------------------*/
397 397
398static bool is_dirty(struct cache *cache, dm_cblock_t b) 398static bool is_dirty(struct cache *cache, dm_cblock_t b)
399{ 399{
@@ -419,6 +419,7 @@ static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cbl
419} 419}
420 420
421/*----------------------------------------------------------------*/ 421/*----------------------------------------------------------------*/
422
422static bool block_size_is_power_of_two(struct cache *cache) 423static bool block_size_is_power_of_two(struct cache *cache)
423{ 424{
424 return cache->sectors_per_block_shift >= 0; 425 return cache->sectors_per_block_shift >= 0;
@@ -667,7 +668,7 @@ static void writethrough_endio(struct bio *bio, int err)
667 668
668 /* 669 /*
669 * We can't issue this bio directly, since we're in interrupt 670 * We can't issue this bio directly, since we're in interrupt
670 * context. So it get's put on a bio list for processing by the 671 * context. So it gets put on a bio list for processing by the
671 * worker thread. 672 * worker thread.
672 */ 673 */
673 defer_writethrough_bio(pb->cache, bio); 674 defer_writethrough_bio(pb->cache, bio);
@@ -1445,6 +1446,7 @@ static void do_worker(struct work_struct *ws)
1445static void do_waker(struct work_struct *ws) 1446static void do_waker(struct work_struct *ws)
1446{ 1447{
1447 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); 1448 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
1449 policy_tick(cache->policy);
1448 wake_worker(cache); 1450 wake_worker(cache);
1449 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); 1451 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
1450} 1452}
@@ -1809,7 +1811,37 @@ static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
1809 1811
1810static struct kmem_cache *migration_cache; 1812static struct kmem_cache *migration_cache;
1811 1813
1812static int set_config_values(struct dm_cache_policy *p, int argc, const char **argv) 1814#define NOT_CORE_OPTION 1
1815
1816static int process_config_option(struct cache *cache, const char *key, const char *value)
1817{
1818 unsigned long tmp;
1819
1820 if (!strcasecmp(key, "migration_threshold")) {
1821 if (kstrtoul(value, 10, &tmp))
1822 return -EINVAL;
1823
1824 cache->migration_threshold = tmp;
1825 return 0;
1826 }
1827
1828 return NOT_CORE_OPTION;
1829}
1830
1831static int set_config_value(struct cache *cache, const char *key, const char *value)
1832{
1833 int r = process_config_option(cache, key, value);
1834
1835 if (r == NOT_CORE_OPTION)
1836 r = policy_set_config_value(cache->policy, key, value);
1837
1838 if (r)
1839 DMWARN("bad config value for %s: %s", key, value);
1840
1841 return r;
1842}
1843
1844static int set_config_values(struct cache *cache, int argc, const char **argv)
1813{ 1845{
1814 int r = 0; 1846 int r = 0;
1815 1847
@@ -1819,12 +1851,9 @@ static int set_config_values(struct dm_cache_policy *p, int argc, const char **a
1819 } 1851 }
1820 1852
1821 while (argc) { 1853 while (argc) {
1822 r = policy_set_config_value(p, argv[0], argv[1]); 1854 r = set_config_value(cache, argv[0], argv[1]);
1823 if (r) { 1855 if (r)
1824 DMWARN("policy_set_config_value failed: key = '%s', value = '%s'", 1856 break;
1825 argv[0], argv[1]);
1826 return r;
1827 }
1828 1857
1829 argc -= 2; 1858 argc -= 2;
1830 argv += 2; 1859 argv += 2;
@@ -1836,8 +1865,6 @@ static int set_config_values(struct dm_cache_policy *p, int argc, const char **a
1836static int create_cache_policy(struct cache *cache, struct cache_args *ca, 1865static int create_cache_policy(struct cache *cache, struct cache_args *ca,
1837 char **error) 1866 char **error)
1838{ 1867{
1839 int r;
1840
1841 cache->policy = dm_cache_policy_create(ca->policy_name, 1868 cache->policy = dm_cache_policy_create(ca->policy_name,
1842 cache->cache_size, 1869 cache->cache_size,
1843 cache->origin_sectors, 1870 cache->origin_sectors,
@@ -1847,14 +1874,7 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca,
1847 return -ENOMEM; 1874 return -ENOMEM;
1848 } 1875 }
1849 1876
1850 r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv); 1877 return 0;
1851 if (r) {
1852 *error = "Error setting cache policy's config values";
1853 dm_cache_policy_destroy(cache->policy);
1854 cache->policy = NULL;
1855 }
1856
1857 return r;
1858} 1878}
1859 1879
1860/* 1880/*
@@ -1886,7 +1906,7 @@ static sector_t calculate_discard_block_size(sector_t cache_block_size,
1886 return discard_block_size; 1906 return discard_block_size;
1887} 1907}
1888 1908
1889#define DEFAULT_MIGRATION_THRESHOLD (2048 * 100) 1909#define DEFAULT_MIGRATION_THRESHOLD 2048
1890 1910
1891static int cache_create(struct cache_args *ca, struct cache **result) 1911static int cache_create(struct cache_args *ca, struct cache **result)
1892{ 1912{
@@ -1911,7 +1931,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
1911 ti->discards_supported = true; 1931 ti->discards_supported = true;
1912 ti->discard_zeroes_data_unsupported = true; 1932 ti->discard_zeroes_data_unsupported = true;
1913 1933
1914 memcpy(&cache->features, &ca->features, sizeof(cache->features)); 1934 cache->features = ca->features;
1915 ti->per_bio_data_size = get_per_bio_data_size(cache); 1935 ti->per_bio_data_size = get_per_bio_data_size(cache);
1916 1936
1917 cache->callbacks.congested_fn = cache_is_congested; 1937 cache->callbacks.congested_fn = cache_is_congested;
@@ -1948,7 +1968,15 @@ static int cache_create(struct cache_args *ca, struct cache **result)
1948 r = create_cache_policy(cache, ca, error); 1968 r = create_cache_policy(cache, ca, error);
1949 if (r) 1969 if (r)
1950 goto bad; 1970 goto bad;
1971
1951 cache->policy_nr_args = ca->policy_argc; 1972 cache->policy_nr_args = ca->policy_argc;
1973 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
1974
1975 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
1976 if (r) {
1977 *error = "Error setting cache policy's config values";
1978 goto bad;
1979 }
1952 1980
1953 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, 1981 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
1954 ca->block_size, may_format, 1982 ca->block_size, may_format,
@@ -1967,10 +1995,10 @@ static int cache_create(struct cache_args *ca, struct cache **result)
1967 INIT_LIST_HEAD(&cache->quiesced_migrations); 1995 INIT_LIST_HEAD(&cache->quiesced_migrations);
1968 INIT_LIST_HEAD(&cache->completed_migrations); 1996 INIT_LIST_HEAD(&cache->completed_migrations);
1969 INIT_LIST_HEAD(&cache->need_commit_migrations); 1997 INIT_LIST_HEAD(&cache->need_commit_migrations);
1970 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
1971 atomic_set(&cache->nr_migrations, 0); 1998 atomic_set(&cache->nr_migrations, 0);
1972 init_waitqueue_head(&cache->migration_wait); 1999 init_waitqueue_head(&cache->migration_wait);
1973 2000
2001 r = -ENOMEM;
1974 cache->nr_dirty = 0; 2002 cache->nr_dirty = 0;
1975 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); 2003 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
1976 if (!cache->dirty_bitset) { 2004 if (!cache->dirty_bitset) {
@@ -2517,23 +2545,6 @@ err:
2517 DMEMIT("Error"); 2545 DMEMIT("Error");
2518} 2546}
2519 2547
2520#define NOT_CORE_OPTION 1
2521
2522static int process_config_option(struct cache *cache, char **argv)
2523{
2524 unsigned long tmp;
2525
2526 if (!strcasecmp(argv[0], "migration_threshold")) {
2527 if (kstrtoul(argv[1], 10, &tmp))
2528 return -EINVAL;
2529
2530 cache->migration_threshold = tmp;
2531 return 0;
2532 }
2533
2534 return NOT_CORE_OPTION;
2535}
2536
2537/* 2548/*
2538 * Supports <key> <value>. 2549 * Supports <key> <value>.
2539 * 2550 *
@@ -2541,17 +2552,12 @@ static int process_config_option(struct cache *cache, char **argv)
2541 */ 2552 */
2542static int cache_message(struct dm_target *ti, unsigned argc, char **argv) 2553static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
2543{ 2554{
2544 int r;
2545 struct cache *cache = ti->private; 2555 struct cache *cache = ti->private;
2546 2556
2547 if (argc != 2) 2557 if (argc != 2)
2548 return -EINVAL; 2558 return -EINVAL;
2549 2559
2550 r = process_config_option(cache, argv); 2560 return set_config_value(cache, argv[0], argv[1]);
2551 if (r == NOT_CORE_OPTION)
2552 return policy_set_config_value(cache->policy, argv[0], argv[1]);
2553
2554 return r;
2555} 2561}
2556 2562
2557static int cache_iterate_devices(struct dm_target *ti, 2563static int cache_iterate_devices(struct dm_target *ti,
@@ -2609,7 +2615,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
2609 2615
2610static struct target_type cache_target = { 2616static struct target_type cache_target = {
2611 .name = "cache", 2617 .name = "cache",
2612 .version = {1, 1, 0}, 2618 .version = {1, 1, 1},
2613 .module = THIS_MODULE, 2619 .module = THIS_MODULE,
2614 .ctr = cache_ctr, 2620 .ctr = cache_ctr,
2615 .dtr = cache_dtr, 2621 .dtr = cache_dtr,
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 51bb81676be3..bdf26f5bd326 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -907,6 +907,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
907 907
908 ti->num_flush_bios = 1; 908 ti->num_flush_bios = 1;
909 ti->num_discard_bios = 1; 909 ti->num_discard_bios = 1;
910 ti->num_write_same_bios = 1;
910 911
911 return 0; 912 return 0;
912 913
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index c0e07026a8d1..c434e5aab2df 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1121,6 +1121,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1121 s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); 1121 s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
1122 if (!s->pending_pool) { 1122 if (!s->pending_pool) {
1123 ti->error = "Could not allocate mempool for pending exceptions"; 1123 ti->error = "Could not allocate mempool for pending exceptions";
1124 r = -ENOMEM;
1124 goto bad_pending_pool; 1125 goto bad_pending_pool;
1125 } 1126 }
1126 1127
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index ea5e878a30b9..d907ca6227ce 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -94,7 +94,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
94static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) 94static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
95{ 95{
96 struct stripe_c *sc; 96 struct stripe_c *sc;
97 sector_t width; 97 sector_t width, tmp_len;
98 uint32_t stripes; 98 uint32_t stripes;
99 uint32_t chunk_size; 99 uint32_t chunk_size;
100 int r; 100 int r;
@@ -116,15 +116,16 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
116 } 116 }
117 117
118 width = ti->len; 118 width = ti->len;
119 if (sector_div(width, chunk_size)) { 119 if (sector_div(width, stripes)) {
120 ti->error = "Target length not divisible by " 120 ti->error = "Target length not divisible by "
121 "chunk size"; 121 "number of stripes";
122 return -EINVAL; 122 return -EINVAL;
123 } 123 }
124 124
125 if (sector_div(width, stripes)) { 125 tmp_len = width;
126 if (sector_div(tmp_len, chunk_size)) {
126 ti->error = "Target length not divisible by " 127 ti->error = "Target length not divisible by "
127 "number of stripes"; 128 "chunk size";
128 return -EINVAL; 129 return -EINVAL;
129 } 130 }
130 131
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e50dad0c65f4..1ff252ab7d46 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1442,7 +1442,7 @@ static bool dm_table_supports_write_same(struct dm_table *t)
1442 return false; 1442 return false;
1443 1443
1444 if (!ti->type->iterate_devices || 1444 if (!ti->type->iterate_devices ||
1445 !ti->type->iterate_devices(ti, device_not_write_same_capable, NULL)) 1445 ti->type->iterate_devices(ti, device_not_write_same_capable, NULL))
1446 return false; 1446 return false;
1447 } 1447 }
1448 1448
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 00cee02f8fc9..60bce435f4fa 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1645,12 +1645,12 @@ int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
1645 return r; 1645 return r;
1646} 1646}
1647 1647
1648static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) 1648static int __resize_space_map(struct dm_space_map *sm, dm_block_t new_count)
1649{ 1649{
1650 int r; 1650 int r;
1651 dm_block_t old_count; 1651 dm_block_t old_count;
1652 1652
1653 r = dm_sm_get_nr_blocks(pmd->data_sm, &old_count); 1653 r = dm_sm_get_nr_blocks(sm, &old_count);
1654 if (r) 1654 if (r)
1655 return r; 1655 return r;
1656 1656
@@ -1658,11 +1658,11 @@ static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
1658 return 0; 1658 return 0;
1659 1659
1660 if (new_count < old_count) { 1660 if (new_count < old_count) {
1661 DMERR("cannot reduce size of data device"); 1661 DMERR("cannot reduce size of space map");
1662 return -EINVAL; 1662 return -EINVAL;
1663 } 1663 }
1664 1664
1665 return dm_sm_extend(pmd->data_sm, new_count - old_count); 1665 return dm_sm_extend(sm, new_count - old_count);
1666} 1666}
1667 1667
1668int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) 1668int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
@@ -1671,7 +1671,19 @@ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
1671 1671
1672 down_write(&pmd->root_lock); 1672 down_write(&pmd->root_lock);
1673 if (!pmd->fail_io) 1673 if (!pmd->fail_io)
1674 r = __resize_data_dev(pmd, new_count); 1674 r = __resize_space_map(pmd->data_sm, new_count);
1675 up_write(&pmd->root_lock);
1676
1677 return r;
1678}
1679
1680int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
1681{
1682 int r = -EINVAL;
1683
1684 down_write(&pmd->root_lock);
1685 if (!pmd->fail_io)
1686 r = __resize_space_map(pmd->metadata_sm, new_count);
1675 up_write(&pmd->root_lock); 1687 up_write(&pmd->root_lock);
1676 1688
1677 return r; 1689 return r;
@@ -1684,3 +1696,17 @@ void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd)
1684 dm_bm_set_read_only(pmd->bm); 1696 dm_bm_set_read_only(pmd->bm);
1685 up_write(&pmd->root_lock); 1697 up_write(&pmd->root_lock);
1686} 1698}
1699
1700int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
1701 dm_block_t threshold,
1702 dm_sm_threshold_fn fn,
1703 void *context)
1704{
1705 int r;
1706
1707 down_write(&pmd->root_lock);
1708 r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
1709 up_write(&pmd->root_lock);
1710
1711 return r;
1712}
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
index 0cecc3702885..845ebbe589a9 100644
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -8,6 +8,7 @@
8#define DM_THIN_METADATA_H 8#define DM_THIN_METADATA_H
9 9
10#include "persistent-data/dm-block-manager.h" 10#include "persistent-data/dm-block-manager.h"
11#include "persistent-data/dm-space-map.h"
11 12
12#define THIN_METADATA_BLOCK_SIZE 4096 13#define THIN_METADATA_BLOCK_SIZE 4096
13 14
@@ -185,6 +186,7 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result);
185 * blocks would be lost. 186 * blocks would be lost.
186 */ 187 */
187int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size); 188int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size);
189int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_size);
188 190
189/* 191/*
190 * Flicks the underlying block manager into read only mode, so you know 192 * Flicks the underlying block manager into read only mode, so you know
@@ -192,6 +194,11 @@ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size);
192 */ 194 */
193void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd); 195void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd);
194 196
197int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
198 dm_block_t threshold,
199 dm_sm_threshold_fn fn,
200 void *context);
201
195/*----------------------------------------------------------------*/ 202/*----------------------------------------------------------------*/
196 203
197#endif 204#endif
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 004ad1652b73..759cffc45cab 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -922,7 +922,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
922 return r; 922 return r;
923 923
924 if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) { 924 if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
925 DMWARN("%s: reached low water mark, sending event.", 925 DMWARN("%s: reached low water mark for data device: sending event.",
926 dm_device_name(pool->pool_md)); 926 dm_device_name(pool->pool_md));
927 spin_lock_irqsave(&pool->lock, flags); 927 spin_lock_irqsave(&pool->lock, flags);
928 pool->low_water_triggered = 1; 928 pool->low_water_triggered = 1;
@@ -1281,6 +1281,10 @@ static void process_bio_fail(struct thin_c *tc, struct bio *bio)
1281 bio_io_error(bio); 1281 bio_io_error(bio);
1282} 1282}
1283 1283
1284/*
1285 * FIXME: should we also commit due to size of transaction, measured in
1286 * metadata blocks?
1287 */
1284static int need_commit_due_to_time(struct pool *pool) 1288static int need_commit_due_to_time(struct pool *pool)
1285{ 1289{
1286 return jiffies < pool->last_commit_jiffies || 1290 return jiffies < pool->last_commit_jiffies ||
@@ -1909,6 +1913,56 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
1909 return r; 1913 return r;
1910} 1914}
1911 1915
1916static void metadata_low_callback(void *context)
1917{
1918 struct pool *pool = context;
1919
1920 DMWARN("%s: reached low water mark for metadata device: sending event.",
1921 dm_device_name(pool->pool_md));
1922
1923 dm_table_event(pool->ti->table);
1924}
1925
1926static sector_t get_metadata_dev_size(struct block_device *bdev)
1927{
1928 sector_t metadata_dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
1929 char buffer[BDEVNAME_SIZE];
1930
1931 if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING) {
1932 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
1933 bdevname(bdev, buffer), THIN_METADATA_MAX_SECTORS);
1934 metadata_dev_size = THIN_METADATA_MAX_SECTORS_WARNING;
1935 }
1936
1937 return metadata_dev_size;
1938}
1939
1940static dm_block_t get_metadata_dev_size_in_blocks(struct block_device *bdev)
1941{
1942 sector_t metadata_dev_size = get_metadata_dev_size(bdev);
1943
1944 sector_div(metadata_dev_size, THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
1945
1946 return metadata_dev_size;
1947}
1948
1949/*
1950 * When a metadata threshold is crossed a dm event is triggered, and
1951 * userland should respond by growing the metadata device. We could let
1952 * userland set the threshold, like we do with the data threshold, but I'm
1953 * not sure they know enough to do this well.
1954 */
1955static dm_block_t calc_metadata_threshold(struct pool_c *pt)
1956{
1957 /*
1958 * 4M is ample for all ops with the possible exception of thin
1959 * device deletion which is harmless if it fails (just retry the
1960 * delete after you've grown the device).
1961 */
1962 dm_block_t quarter = get_metadata_dev_size_in_blocks(pt->metadata_dev->bdev) / 4;
1963 return min((dm_block_t)1024ULL /* 4M */, quarter);
1964}
1965
1912/* 1966/*
1913 * thin-pool <metadata dev> <data dev> 1967 * thin-pool <metadata dev> <data dev>
1914 * <data block size (sectors)> 1968 * <data block size (sectors)>
@@ -1931,8 +1985,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
1931 unsigned long block_size; 1985 unsigned long block_size;
1932 dm_block_t low_water_blocks; 1986 dm_block_t low_water_blocks;
1933 struct dm_dev *metadata_dev; 1987 struct dm_dev *metadata_dev;
1934 sector_t metadata_dev_size; 1988 fmode_t metadata_mode;
1935 char b[BDEVNAME_SIZE];
1936 1989
1937 /* 1990 /*
1938 * FIXME Remove validation from scope of lock. 1991 * FIXME Remove validation from scope of lock.
@@ -1944,19 +1997,32 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
1944 r = -EINVAL; 1997 r = -EINVAL;
1945 goto out_unlock; 1998 goto out_unlock;
1946 } 1999 }
2000
1947 as.argc = argc; 2001 as.argc = argc;
1948 as.argv = argv; 2002 as.argv = argv;
1949 2003
1950 r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &metadata_dev); 2004 /*
2005 * Set default pool features.
2006 */
2007 pool_features_init(&pf);
2008
2009 dm_consume_args(&as, 4);
2010 r = parse_pool_features(&as, &pf, ti);
2011 if (r)
2012 goto out_unlock;
2013
2014 metadata_mode = FMODE_READ | ((pf.mode == PM_READ_ONLY) ? 0 : FMODE_WRITE);
2015 r = dm_get_device(ti, argv[0], metadata_mode, &metadata_dev);
1951 if (r) { 2016 if (r) {
1952 ti->error = "Error opening metadata block device"; 2017 ti->error = "Error opening metadata block device";
1953 goto out_unlock; 2018 goto out_unlock;
1954 } 2019 }
1955 2020
1956 metadata_dev_size = i_size_read(metadata_dev->bdev->bd_inode) >> SECTOR_SHIFT; 2021 /*
1957 if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING) 2022 * Run for the side-effect of possibly issuing a warning if the
1958 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", 2023 * device is too big.
1959 bdevname(metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); 2024 */
2025 (void) get_metadata_dev_size(metadata_dev->bdev);
1960 2026
1961 r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev); 2027 r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev);
1962 if (r) { 2028 if (r) {
@@ -1979,16 +2045,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
1979 goto out; 2045 goto out;
1980 } 2046 }
1981 2047
1982 /*
1983 * Set default pool features.
1984 */
1985 pool_features_init(&pf);
1986
1987 dm_consume_args(&as, 4);
1988 r = parse_pool_features(&as, &pf, ti);
1989 if (r)
1990 goto out;
1991
1992 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 2048 pt = kzalloc(sizeof(*pt), GFP_KERNEL);
1993 if (!pt) { 2049 if (!pt) {
1994 r = -ENOMEM; 2050 r = -ENOMEM;
@@ -2040,6 +2096,13 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
2040 } 2096 }
2041 ti->private = pt; 2097 ti->private = pt;
2042 2098
2099 r = dm_pool_register_metadata_threshold(pt->pool->pmd,
2100 calc_metadata_threshold(pt),
2101 metadata_low_callback,
2102 pool);
2103 if (r)
2104 goto out_free_pt;
2105
2043 pt->callbacks.congested_fn = pool_is_congested; 2106 pt->callbacks.congested_fn = pool_is_congested;
2044 dm_table_add_target_callbacks(ti->table, &pt->callbacks); 2107 dm_table_add_target_callbacks(ti->table, &pt->callbacks);
2045 2108
@@ -2079,18 +2142,7 @@ static int pool_map(struct dm_target *ti, struct bio *bio)
2079 return r; 2142 return r;
2080} 2143}
2081 2144
2082/* 2145static int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit)
2083 * Retrieves the number of blocks of the data device from
2084 * the superblock and compares it to the actual device size,
2085 * thus resizing the data device in case it has grown.
2086 *
2087 * This both copes with opening preallocated data devices in the ctr
2088 * being followed by a resume
2089 * -and-
2090 * calling the resume method individually after userspace has
2091 * grown the data device in reaction to a table event.
2092 */
2093static int pool_preresume(struct dm_target *ti)
2094{ 2146{
2095 int r; 2147 int r;
2096 struct pool_c *pt = ti->private; 2148 struct pool_c *pt = ti->private;
@@ -2098,12 +2150,7 @@ static int pool_preresume(struct dm_target *ti)
2098 sector_t data_size = ti->len; 2150 sector_t data_size = ti->len;
2099 dm_block_t sb_data_size; 2151 dm_block_t sb_data_size;
2100 2152
2101 /* 2153 *need_commit = false;
2102 * Take control of the pool object.
2103 */
2104 r = bind_control_target(pool, ti);
2105 if (r)
2106 return r;
2107 2154
2108 (void) sector_div(data_size, pool->sectors_per_block); 2155 (void) sector_div(data_size, pool->sectors_per_block);
2109 2156
@@ -2114,7 +2161,7 @@ static int pool_preresume(struct dm_target *ti)
2114 } 2161 }
2115 2162
2116 if (data_size < sb_data_size) { 2163 if (data_size < sb_data_size) {
2117 DMERR("pool target too small, is %llu blocks (expected %llu)", 2164 DMERR("pool target (%llu blocks) too small: expected %llu",
2118 (unsigned long long)data_size, sb_data_size); 2165 (unsigned long long)data_size, sb_data_size);
2119 return -EINVAL; 2166 return -EINVAL;
2120 2167
@@ -2122,17 +2169,90 @@ static int pool_preresume(struct dm_target *ti)
2122 r = dm_pool_resize_data_dev(pool->pmd, data_size); 2169 r = dm_pool_resize_data_dev(pool->pmd, data_size);
2123 if (r) { 2170 if (r) {
2124 DMERR("failed to resize data device"); 2171 DMERR("failed to resize data device");
2125 /* FIXME Stricter than necessary: Rollback transaction instead here */
2126 set_pool_mode(pool, PM_READ_ONLY); 2172 set_pool_mode(pool, PM_READ_ONLY);
2127 return r; 2173 return r;
2128 } 2174 }
2129 2175
2130 (void) commit_or_fallback(pool); 2176 *need_commit = true;
2131 } 2177 }
2132 2178
2133 return 0; 2179 return 0;
2134} 2180}
2135 2181
2182static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit)
2183{
2184 int r;
2185 struct pool_c *pt = ti->private;
2186 struct pool *pool = pt->pool;
2187 dm_block_t metadata_dev_size, sb_metadata_dev_size;
2188
2189 *need_commit = false;
2190
2191 metadata_dev_size = get_metadata_dev_size(pool->md_dev);
2192
2193 r = dm_pool_get_metadata_dev_size(pool->pmd, &sb_metadata_dev_size);
2194 if (r) {
2195 DMERR("failed to retrieve data device size");
2196 return r;
2197 }
2198
2199 if (metadata_dev_size < sb_metadata_dev_size) {
2200 DMERR("metadata device (%llu sectors) too small: expected %llu",
2201 metadata_dev_size, sb_metadata_dev_size);
2202 return -EINVAL;
2203
2204 } else if (metadata_dev_size > sb_metadata_dev_size) {
2205 r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
2206 if (r) {
2207 DMERR("failed to resize metadata device");
2208 return r;
2209 }
2210
2211 *need_commit = true;
2212 }
2213
2214 return 0;
2215}
2216
2217/*
2218 * Retrieves the number of blocks of the data device from
2219 * the superblock and compares it to the actual device size,
2220 * thus resizing the data device in case it has grown.
2221 *
2222 * This both copes with opening preallocated data devices in the ctr
2223 * being followed by a resume
2224 * -and-
2225 * calling the resume method individually after userspace has
2226 * grown the data device in reaction to a table event.
2227 */
2228static int pool_preresume(struct dm_target *ti)
2229{
2230 int r;
2231 bool need_commit1, need_commit2;
2232 struct pool_c *pt = ti->private;
2233 struct pool *pool = pt->pool;
2234
2235 /*
2236 * Take control of the pool object.
2237 */
2238 r = bind_control_target(pool, ti);
2239 if (r)
2240 return r;
2241
2242 r = maybe_resize_data_dev(ti, &need_commit1);
2243 if (r)
2244 return r;
2245
2246 r = maybe_resize_metadata_dev(ti, &need_commit2);
2247 if (r)
2248 return r;
2249
2250 if (need_commit1 || need_commit2)
2251 (void) commit_or_fallback(pool);
2252
2253 return 0;
2254}
2255
2136static void pool_resume(struct dm_target *ti) 2256static void pool_resume(struct dm_target *ti)
2137{ 2257{
2138 struct pool_c *pt = ti->private; 2258 struct pool_c *pt = ti->private;
@@ -2549,7 +2669,7 @@ static struct target_type pool_target = {
2549 .name = "thin-pool", 2669 .name = "thin-pool",
2550 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | 2670 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
2551 DM_TARGET_IMMUTABLE, 2671 DM_TARGET_IMMUTABLE,
2552 .version = {1, 7, 0}, 2672 .version = {1, 8, 0},
2553 .module = THIS_MODULE, 2673 .module = THIS_MODULE,
2554 .ctr = pool_ctr, 2674 .ctr = pool_ctr,
2555 .dtr = pool_dtr, 2675 .dtr = pool_dtr,
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index f6d29e614ab7..e735a6d5a793 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -248,7 +248,8 @@ static struct dm_space_map ops = {
248 .new_block = sm_disk_new_block, 248 .new_block = sm_disk_new_block,
249 .commit = sm_disk_commit, 249 .commit = sm_disk_commit,
250 .root_size = sm_disk_root_size, 250 .root_size = sm_disk_root_size,
251 .copy_root = sm_disk_copy_root 251 .copy_root = sm_disk_copy_root,
252 .register_threshold_callback = NULL
252}; 253};
253 254
254struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, 255struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index 906cf3df71af..1c959684caef 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -17,6 +17,55 @@
17/*----------------------------------------------------------------*/ 17/*----------------------------------------------------------------*/
18 18
19/* 19/*
20 * An edge triggered threshold.
21 */
22struct threshold {
23 bool threshold_set;
24 bool value_set;
25 dm_block_t threshold;
26 dm_block_t current_value;
27 dm_sm_threshold_fn fn;
28 void *context;
29};
30
31static void threshold_init(struct threshold *t)
32{
33 t->threshold_set = false;
34 t->value_set = false;
35}
36
37static void set_threshold(struct threshold *t, dm_block_t value,
38 dm_sm_threshold_fn fn, void *context)
39{
40 t->threshold_set = true;
41 t->threshold = value;
42 t->fn = fn;
43 t->context = context;
44}
45
46static bool below_threshold(struct threshold *t, dm_block_t value)
47{
48 return t->threshold_set && value <= t->threshold;
49}
50
51static bool threshold_already_triggered(struct threshold *t)
52{
53 return t->value_set && below_threshold(t, t->current_value);
54}
55
56static void check_threshold(struct threshold *t, dm_block_t value)
57{
58 if (below_threshold(t, value) &&
59 !threshold_already_triggered(t))
60 t->fn(t->context);
61
62 t->value_set = true;
63 t->current_value = value;
64}
65
66/*----------------------------------------------------------------*/
67
68/*
20 * Space map interface. 69 * Space map interface.
21 * 70 *
22 * The low level disk format is written using the standard btree and 71 * The low level disk format is written using the standard btree and
@@ -54,6 +103,8 @@ struct sm_metadata {
54 unsigned allocated_this_transaction; 103 unsigned allocated_this_transaction;
55 unsigned nr_uncommitted; 104 unsigned nr_uncommitted;
56 struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS]; 105 struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS];
106
107 struct threshold threshold;
57}; 108};
58 109
59static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) 110static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
@@ -144,12 +195,6 @@ static void sm_metadata_destroy(struct dm_space_map *sm)
144 kfree(smm); 195 kfree(smm);
145} 196}
146 197
147static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
148{
149 DMERR("doesn't support extend");
150 return -EINVAL;
151}
152
153static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 198static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
154{ 199{
155 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 200 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
@@ -335,9 +380,19 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
335 380
336static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 381static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
337{ 382{
383 dm_block_t count;
384 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
385
338 int r = sm_metadata_new_block_(sm, b); 386 int r = sm_metadata_new_block_(sm, b);
339 if (r) 387 if (r)
340 DMERR("unable to allocate new metadata block"); 388 DMERR("unable to allocate new metadata block");
389
390 r = sm_metadata_get_nr_free(sm, &count);
391 if (r)
392 DMERR("couldn't get free block count");
393
394 check_threshold(&smm->threshold, count);
395
341 return r; 396 return r;
342} 397}
343 398
@@ -357,6 +412,18 @@ static int sm_metadata_commit(struct dm_space_map *sm)
357 return 0; 412 return 0;
358} 413}
359 414
415static int sm_metadata_register_threshold_callback(struct dm_space_map *sm,
416 dm_block_t threshold,
417 dm_sm_threshold_fn fn,
418 void *context)
419{
420 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
421
422 set_threshold(&smm->threshold, threshold, fn, context);
423
424 return 0;
425}
426
360static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 427static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
361{ 428{
362 *result = sizeof(struct disk_sm_root); 429 *result = sizeof(struct disk_sm_root);
@@ -382,6 +449,8 @@ static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t
382 return 0; 449 return 0;
383} 450}
384 451
452static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks);
453
385static struct dm_space_map ops = { 454static struct dm_space_map ops = {
386 .destroy = sm_metadata_destroy, 455 .destroy = sm_metadata_destroy,
387 .extend = sm_metadata_extend, 456 .extend = sm_metadata_extend,
@@ -395,7 +464,8 @@ static struct dm_space_map ops = {
395 .new_block = sm_metadata_new_block, 464 .new_block = sm_metadata_new_block,
396 .commit = sm_metadata_commit, 465 .commit = sm_metadata_commit,
397 .root_size = sm_metadata_root_size, 466 .root_size = sm_metadata_root_size,
398 .copy_root = sm_metadata_copy_root 467 .copy_root = sm_metadata_copy_root,
468 .register_threshold_callback = sm_metadata_register_threshold_callback
399}; 469};
400 470
401/*----------------------------------------------------------------*/ 471/*----------------------------------------------------------------*/
@@ -410,7 +480,7 @@ static void sm_bootstrap_destroy(struct dm_space_map *sm)
410 480
411static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 481static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
412{ 482{
413 DMERR("boostrap doesn't support extend"); 483 DMERR("bootstrap doesn't support extend");
414 484
415 return -EINVAL; 485 return -EINVAL;
416} 486}
@@ -450,7 +520,7 @@ static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
450static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 520static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b,
451 uint32_t count) 521 uint32_t count)
452{ 522{
453 DMERR("boostrap doesn't support set_count"); 523 DMERR("bootstrap doesn't support set_count");
454 524
455 return -EINVAL; 525 return -EINVAL;
456} 526}
@@ -491,7 +561,7 @@ static int sm_bootstrap_commit(struct dm_space_map *sm)
491 561
492static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 562static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
493{ 563{
494 DMERR("boostrap doesn't support root_size"); 564 DMERR("bootstrap doesn't support root_size");
495 565
496 return -EINVAL; 566 return -EINVAL;
497} 567}
@@ -499,7 +569,7 @@ static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
499static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 569static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
500 size_t max) 570 size_t max)
501{ 571{
502 DMERR("boostrap doesn't support copy_root"); 572 DMERR("bootstrap doesn't support copy_root");
503 573
504 return -EINVAL; 574 return -EINVAL;
505} 575}
@@ -517,11 +587,42 @@ static struct dm_space_map bootstrap_ops = {
517 .new_block = sm_bootstrap_new_block, 587 .new_block = sm_bootstrap_new_block,
518 .commit = sm_bootstrap_commit, 588 .commit = sm_bootstrap_commit,
519 .root_size = sm_bootstrap_root_size, 589 .root_size = sm_bootstrap_root_size,
520 .copy_root = sm_bootstrap_copy_root 590 .copy_root = sm_bootstrap_copy_root,
591 .register_threshold_callback = NULL
521}; 592};
522 593
523/*----------------------------------------------------------------*/ 594/*----------------------------------------------------------------*/
524 595
596static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
597{
598 int r, i;
599 enum allocation_event ev;
600 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
601 dm_block_t old_len = smm->ll.nr_blocks;
602
603 /*
604 * Flick into a mode where all blocks get allocated in the new area.
605 */
606 smm->begin = old_len;
607 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
608
609 /*
610 * Extend.
611 */
612 r = sm_ll_extend(&smm->ll, extra_blocks);
613
614 /*
615 * Switch back to normal behaviour.
616 */
617 memcpy(&smm->sm, &ops, sizeof(smm->sm));
618 for (i = old_len; !r && i < smm->begin; i++)
619 r = sm_ll_inc(&smm->ll, i, &ev);
620
621 return r;
622}
623
624/*----------------------------------------------------------------*/
625
525struct dm_space_map *dm_sm_metadata_init(void) 626struct dm_space_map *dm_sm_metadata_init(void)
526{ 627{
527 struct sm_metadata *smm; 628 struct sm_metadata *smm;
@@ -549,6 +650,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
549 smm->recursion_count = 0; 650 smm->recursion_count = 0;
550 smm->allocated_this_transaction = 0; 651 smm->allocated_this_transaction = 0;
551 smm->nr_uncommitted = 0; 652 smm->nr_uncommitted = 0;
653 threshold_init(&smm->threshold);
552 654
553 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 655 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
554 656
@@ -590,6 +692,7 @@ int dm_sm_metadata_open(struct dm_space_map *sm,
590 smm->recursion_count = 0; 692 smm->recursion_count = 0;
591 smm->allocated_this_transaction = 0; 693 smm->allocated_this_transaction = 0;
592 smm->nr_uncommitted = 0; 694 smm->nr_uncommitted = 0;
695 threshold_init(&smm->threshold);
593 696
594 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 697 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
595 return 0; 698 return 0;
diff --git a/drivers/md/persistent-data/dm-space-map.h b/drivers/md/persistent-data/dm-space-map.h
index 1cbfc6b1638a..3e6d1153b7c4 100644
--- a/drivers/md/persistent-data/dm-space-map.h
+++ b/drivers/md/persistent-data/dm-space-map.h
@@ -9,6 +9,8 @@
9 9
10#include "dm-block-manager.h" 10#include "dm-block-manager.h"
11 11
12typedef void (*dm_sm_threshold_fn)(void *context);
13
12/* 14/*
13 * struct dm_space_map keeps a record of how many times each block in a device 15 * struct dm_space_map keeps a record of how many times each block in a device
14 * is referenced. It needs to be fixed on disk as part of the transaction. 16 * is referenced. It needs to be fixed on disk as part of the transaction.
@@ -59,6 +61,15 @@ struct dm_space_map {
59 */ 61 */
60 int (*root_size)(struct dm_space_map *sm, size_t *result); 62 int (*root_size)(struct dm_space_map *sm, size_t *result);
61 int (*copy_root)(struct dm_space_map *sm, void *copy_to_here_le, size_t len); 63 int (*copy_root)(struct dm_space_map *sm, void *copy_to_here_le, size_t len);
64
65 /*
66 * You can register one threshold callback which is edge-triggered
67 * when the free space in the space map drops below the threshold.
68 */
69 int (*register_threshold_callback)(struct dm_space_map *sm,
70 dm_block_t threshold,
71 dm_sm_threshold_fn fn,
72 void *context);
62}; 73};
63 74
64/*----------------------------------------------------------------*/ 75/*----------------------------------------------------------------*/
@@ -131,4 +142,16 @@ static inline int dm_sm_copy_root(struct dm_space_map *sm, void *copy_to_here_le
131 return sm->copy_root(sm, copy_to_here_le, len); 142 return sm->copy_root(sm, copy_to_here_le, len);
132} 143}
133 144
145static inline int dm_sm_register_threshold_callback(struct dm_space_map *sm,
146 dm_block_t threshold,
147 dm_sm_threshold_fn fn,
148 void *context)
149{
150 if (sm->register_threshold_callback)
151 return sm->register_threshold_callback(sm, threshold, fn, context);
152
153 return -EINVAL;
154}
155
156
134#endif /* _LINUX_DM_SPACE_MAP_H */ 157#endif /* _LINUX_DM_SPACE_MAP_H */
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 1e483fa7afb4..3cd32478f2fd 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -79,11 +79,26 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
79typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, 79typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm,
80 struct bio_vec *biovec, int max_size); 80 struct bio_vec *biovec, int max_size);
81 81
82/*
83 * These iteration functions are typically used to check (and combine)
84 * properties of underlying devices.
85 * E.g. Does at least one underlying device support flush?
86 * Does any underlying device not support WRITE_SAME?
87 *
88 * The callout function is called once for each contiguous section of
89 * an underlying device. State can be maintained in *data.
90 * Return non-zero to stop iterating through any further devices.
91 */
82typedef int (*iterate_devices_callout_fn) (struct dm_target *ti, 92typedef int (*iterate_devices_callout_fn) (struct dm_target *ti,
83 struct dm_dev *dev, 93 struct dm_dev *dev,
84 sector_t start, sector_t len, 94 sector_t start, sector_t len,
85 void *data); 95 void *data);
86 96
97/*
98 * This function must iterate through each section of device used by the
99 * target until it encounters a non-zero return code, which it then returns.
100 * Returns zero if no callout returned non-zero.
101 */
87typedef int (*dm_iterate_devices_fn) (struct dm_target *ti, 102typedef int (*dm_iterate_devices_fn) (struct dm_target *ti,
88 iterate_devices_callout_fn fn, 103 iterate_devices_callout_fn fn,
89 void *data); 104 void *data);