diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-22 23:17:48 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-22 23:17:48 -0500 |
commit | fe41c2c018b8af9b370a40845f547e22894ff68a (patch) | |
tree | 3573a10912e24ffcd48177785043e0de17b8e9d0 /drivers | |
parent | 194e57fd1835564735fd0ba5e3870230861cacd2 (diff) | |
parent | 5066a4df1f427faac8372d20494483bb09a4a1cd (diff) |
Merge tag 'dm-3.14-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device-mapper changes from Mike Snitzer:
"A lot of attention was paid to improving the thin-provisioning
target's handling of metadata operation failures and running out of
space. A new 'error_if_no_space' feature was added to allow users to
error IOs rather than queue them when either the data or metadata
space is exhausted.
Additional fixes/features include:
- a few fixes to properly support thin metadata device resizing
- a solution for reliably waiting for a DM device's embedded kobject
to be released before destroying the device
- old dm-snapshot is updated to use the dm-bufio interface to take
advantage of readahead capabilities that improve snapshot
activation
- new dm-cache target tunables to control how quickly data is
promoted to the cache (fast) device
- improved write efficiency of cluster mirror target by combining
userspace flush and mark requests"
* tag 'dm-3.14-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (35 commits)
dm log userspace: allow mark requests to piggyback on flush requests
dm space map metadata: fix bug in resizing of thin metadata
dm cache: add policy name to status output
dm thin: fix pool feature parsing
dm sysfs: fix a module unload race
dm snapshot: use dm-bufio prefetch
dm snapshot: use dm-bufio
dm snapshot: prepare for switch to using dm-bufio
dm snapshot: use GFP_KERNEL when initializing exceptions
dm cache: add block sizes and total cache blocks to status output
dm btree: add dm_btree_find_lowest_key
dm space map metadata: fix extending the space map
dm space map common: make sure new space is used during extend
dm: wait until embedded kobject is released before destroying a device
dm: remove pointless kobject comparison in dm_get_from_kobject
dm snapshot: call destroy_work_on_stack() to pair with INIT_WORK_ONSTACK()
dm cache policy mq: introduce three promotion threshold tunables
dm cache policy mq: use list_del_init instead of list_del + INIT_LIST_HEAD
dm thin: fix set_pool_mode exposed pool operation races
dm thin: eliminate the no_free_space flag
...
Diffstat (limited to 'drivers')
25 files changed, 700 insertions, 294 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index f2ccbc3b9fe4..9a06fe883766 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -176,8 +176,12 @@ config MD_FAULTY | |||
176 | 176 | ||
177 | source "drivers/md/bcache/Kconfig" | 177 | source "drivers/md/bcache/Kconfig" |
178 | 178 | ||
179 | config BLK_DEV_DM_BUILTIN | ||
180 | boolean | ||
181 | |||
179 | config BLK_DEV_DM | 182 | config BLK_DEV_DM |
180 | tristate "Device mapper support" | 183 | tristate "Device mapper support" |
184 | select BLK_DEV_DM_BUILTIN | ||
181 | ---help--- | 185 | ---help--- |
182 | Device-mapper is a low level volume manager. It works by allowing | 186 | Device-mapper is a low level volume manager. It works by allowing |
183 | people to specify mappings for ranges of logical sectors. Various | 187 | people to specify mappings for ranges of logical sectors. Various |
@@ -238,6 +242,7 @@ config DM_CRYPT | |||
238 | config DM_SNAPSHOT | 242 | config DM_SNAPSHOT |
239 | tristate "Snapshot target" | 243 | tristate "Snapshot target" |
240 | depends on BLK_DEV_DM | 244 | depends on BLK_DEV_DM |
245 | select DM_BUFIO | ||
241 | ---help--- | 246 | ---help--- |
242 | Allow volume managers to take writable snapshots of a device. | 247 | Allow volume managers to take writable snapshots of a device. |
243 | 248 | ||
@@ -250,12 +255,12 @@ config DM_THIN_PROVISIONING | |||
250 | Provides thin provisioning and snapshots that share a data store. | 255 | Provides thin provisioning and snapshots that share a data store. |
251 | 256 | ||
252 | config DM_DEBUG_BLOCK_STACK_TRACING | 257 | config DM_DEBUG_BLOCK_STACK_TRACING |
253 | boolean "Keep stack trace of thin provisioning block lock holders" | 258 | boolean "Keep stack trace of persistent data block lock holders" |
254 | depends on STACKTRACE_SUPPORT && DM_THIN_PROVISIONING | 259 | depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA |
255 | select STACKTRACE | 260 | select STACKTRACE |
256 | ---help--- | 261 | ---help--- |
257 | Enable this for messages that may help debug problems with the | 262 | Enable this for messages that may help debug problems with the |
258 | block manager locking used by thin provisioning. | 263 | block manager locking used by thin provisioning and caching. |
259 | 264 | ||
260 | If unsure, say N. | 265 | If unsure, say N. |
261 | 266 | ||
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 2acc43fe0229..f26d83292579 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -32,6 +32,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o | |||
32 | obj-$(CONFIG_BCACHE) += bcache/ | 32 | obj-$(CONFIG_BCACHE) += bcache/ |
33 | obj-$(CONFIG_BLK_DEV_MD) += md-mod.o | 33 | obj-$(CONFIG_BLK_DEV_MD) += md-mod.o |
34 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o | 34 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o |
35 | obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o | ||
35 | obj-$(CONFIG_DM_BUFIO) += dm-bufio.o | 36 | obj-$(CONFIG_DM_BUFIO) += dm-bufio.o |
36 | obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o | 37 | obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o |
37 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o | 38 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o |
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 54bdd923316f..9ed42125514b 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c | |||
@@ -104,6 +104,8 @@ struct dm_bufio_client { | |||
104 | struct list_head reserved_buffers; | 104 | struct list_head reserved_buffers; |
105 | unsigned need_reserved_buffers; | 105 | unsigned need_reserved_buffers; |
106 | 106 | ||
107 | unsigned minimum_buffers; | ||
108 | |||
107 | struct hlist_head *cache_hash; | 109 | struct hlist_head *cache_hash; |
108 | wait_queue_head_t free_buffer_wait; | 110 | wait_queue_head_t free_buffer_wait; |
109 | 111 | ||
@@ -861,8 +863,8 @@ static void __get_memory_limit(struct dm_bufio_client *c, | |||
861 | buffers = dm_bufio_cache_size_per_client >> | 863 | buffers = dm_bufio_cache_size_per_client >> |
862 | (c->sectors_per_block_bits + SECTOR_SHIFT); | 864 | (c->sectors_per_block_bits + SECTOR_SHIFT); |
863 | 865 | ||
864 | if (buffers < DM_BUFIO_MIN_BUFFERS) | 866 | if (buffers < c->minimum_buffers) |
865 | buffers = DM_BUFIO_MIN_BUFFERS; | 867 | buffers = c->minimum_buffers; |
866 | 868 | ||
867 | *limit_buffers = buffers; | 869 | *limit_buffers = buffers; |
868 | *threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100; | 870 | *threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100; |
@@ -1350,6 +1352,34 @@ retry: | |||
1350 | } | 1352 | } |
1351 | EXPORT_SYMBOL_GPL(dm_bufio_release_move); | 1353 | EXPORT_SYMBOL_GPL(dm_bufio_release_move); |
1352 | 1354 | ||
1355 | /* | ||
1356 | * Free the given buffer. | ||
1357 | * | ||
1358 | * This is just a hint, if the buffer is in use or dirty, this function | ||
1359 | * does nothing. | ||
1360 | */ | ||
1361 | void dm_bufio_forget(struct dm_bufio_client *c, sector_t block) | ||
1362 | { | ||
1363 | struct dm_buffer *b; | ||
1364 | |||
1365 | dm_bufio_lock(c); | ||
1366 | |||
1367 | b = __find(c, block); | ||
1368 | if (b && likely(!b->hold_count) && likely(!b->state)) { | ||
1369 | __unlink_buffer(b); | ||
1370 | __free_buffer_wake(b); | ||
1371 | } | ||
1372 | |||
1373 | dm_bufio_unlock(c); | ||
1374 | } | ||
1375 | EXPORT_SYMBOL(dm_bufio_forget); | ||
1376 | |||
1377 | void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n) | ||
1378 | { | ||
1379 | c->minimum_buffers = n; | ||
1380 | } | ||
1381 | EXPORT_SYMBOL(dm_bufio_set_minimum_buffers); | ||
1382 | |||
1353 | unsigned dm_bufio_get_block_size(struct dm_bufio_client *c) | 1383 | unsigned dm_bufio_get_block_size(struct dm_bufio_client *c) |
1354 | { | 1384 | { |
1355 | return c->block_size; | 1385 | return c->block_size; |
@@ -1546,6 +1576,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign | |||
1546 | INIT_LIST_HEAD(&c->reserved_buffers); | 1576 | INIT_LIST_HEAD(&c->reserved_buffers); |
1547 | c->need_reserved_buffers = reserved_buffers; | 1577 | c->need_reserved_buffers = reserved_buffers; |
1548 | 1578 | ||
1579 | c->minimum_buffers = DM_BUFIO_MIN_BUFFERS; | ||
1580 | |||
1549 | init_waitqueue_head(&c->free_buffer_wait); | 1581 | init_waitqueue_head(&c->free_buffer_wait); |
1550 | c->async_write_error = 0; | 1582 | c->async_write_error = 0; |
1551 | 1583 | ||
diff --git a/drivers/md/dm-bufio.h b/drivers/md/dm-bufio.h index b142946a9e32..c096779a7292 100644 --- a/drivers/md/dm-bufio.h +++ b/drivers/md/dm-bufio.h | |||
@@ -108,6 +108,18 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c); | |||
108 | */ | 108 | */ |
109 | void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block); | 109 | void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block); |
110 | 110 | ||
111 | /* | ||
112 | * Free the given buffer. | ||
113 | * This is just a hint, if the buffer is in use or dirty, this function | ||
114 | * does nothing. | ||
115 | */ | ||
116 | void dm_bufio_forget(struct dm_bufio_client *c, sector_t block); | ||
117 | |||
118 | /* | ||
119 | * Set the minimum number of buffers before cleanup happens. | ||
120 | */ | ||
121 | void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n); | ||
122 | |||
111 | unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); | 123 | unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); |
112 | sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); | 124 | sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); |
113 | sector_t dm_bufio_get_block_number(struct dm_buffer *b); | 125 | sector_t dm_bufio_get_block_number(struct dm_buffer *b); |
diff --git a/drivers/md/dm-builtin.c b/drivers/md/dm-builtin.c new file mode 100644 index 000000000000..6c9049c51b2b --- /dev/null +++ b/drivers/md/dm-builtin.c | |||
@@ -0,0 +1,48 @@ | |||
1 | #include "dm.h" | ||
2 | |||
3 | /* | ||
4 | * The kobject release method must not be placed in the module itself, | ||
5 | * otherwise we are subject to module unload races. | ||
6 | * | ||
7 | * The release method is called when the last reference to the kobject is | ||
8 | * dropped. It may be called by any other kernel code that drops the last | ||
9 | * reference. | ||
10 | * | ||
11 | * The release method suffers from module unload race. We may prevent the | ||
12 | * module from being unloaded at the start of the release method (using | ||
13 | * increased module reference count or synchronizing against the release | ||
14 | * method), however there is no way to prevent the module from being | ||
15 | * unloaded at the end of the release method. | ||
16 | * | ||
17 | * If this code were placed in the dm module, the following race may | ||
18 | * happen: | ||
19 | * 1. Some other process takes a reference to dm kobject | ||
20 | * 2. The user issues ioctl function to unload the dm device | ||
21 | * 3. dm_sysfs_exit calls kobject_put, however the object is not released | ||
22 | * because of the other reference taken at step 1 | ||
23 | * 4. dm_sysfs_exit waits on the completion | ||
24 | * 5. The other process that took the reference in step 1 drops it, | ||
25 | * dm_kobject_release is called from this process | ||
26 | * 6. dm_kobject_release calls complete() | ||
27 | * 7. a reschedule happens before dm_kobject_release returns | ||
28 | * 8. dm_sysfs_exit continues, the dm device is unloaded, module reference | ||
29 | * count is decremented | ||
30 | * 9. The user unloads the dm module | ||
31 | * 10. The other process that was rescheduled in step 7 continues to run, | ||
32 | * it is now executing code in unloaded module, so it crashes | ||
33 | * | ||
34 | * Note that if the process that takes the foreign reference to dm kobject | ||
35 | * has a low priority and the system is sufficiently loaded with | ||
36 | * higher-priority processes that prevent the low-priority process from | ||
37 | * being scheduled long enough, this bug may really happen. | ||
38 | * | ||
39 | * In order to fix this module unload race, we place the release method | ||
40 | * into a helper code that is compiled directly into the kernel. | ||
41 | */ | ||
42 | |||
43 | void dm_kobject_release(struct kobject *kobj) | ||
44 | { | ||
45 | complete(dm_get_completion_from_kobject(kobj)); | ||
46 | } | ||
47 | |||
48 | EXPORT_SYMBOL(dm_kobject_release); | ||
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 64780ad73bb0..930e8c3d73e9 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c | |||
@@ -287,9 +287,8 @@ static struct entry *alloc_entry(struct entry_pool *ep) | |||
287 | static struct entry *alloc_particular_entry(struct entry_pool *ep, dm_cblock_t cblock) | 287 | static struct entry *alloc_particular_entry(struct entry_pool *ep, dm_cblock_t cblock) |
288 | { | 288 | { |
289 | struct entry *e = ep->entries + from_cblock(cblock); | 289 | struct entry *e = ep->entries + from_cblock(cblock); |
290 | list_del(&e->list); | ||
291 | 290 | ||
292 | INIT_LIST_HEAD(&e->list); | 291 | list_del_init(&e->list); |
293 | INIT_HLIST_NODE(&e->hlist); | 292 | INIT_HLIST_NODE(&e->hlist); |
294 | ep->nr_allocated++; | 293 | ep->nr_allocated++; |
295 | 294 | ||
@@ -391,6 +390,10 @@ struct mq_policy { | |||
391 | */ | 390 | */ |
392 | unsigned promote_threshold; | 391 | unsigned promote_threshold; |
393 | 392 | ||
393 | unsigned discard_promote_adjustment; | ||
394 | unsigned read_promote_adjustment; | ||
395 | unsigned write_promote_adjustment; | ||
396 | |||
394 | /* | 397 | /* |
395 | * The hash table allows us to quickly find an entry by origin | 398 | * The hash table allows us to quickly find an entry by origin |
396 | * block. Both pre_cache and cache entries are in here. | 399 | * block. Both pre_cache and cache entries are in here. |
@@ -400,6 +403,10 @@ struct mq_policy { | |||
400 | struct hlist_head *table; | 403 | struct hlist_head *table; |
401 | }; | 404 | }; |
402 | 405 | ||
406 | #define DEFAULT_DISCARD_PROMOTE_ADJUSTMENT 1 | ||
407 | #define DEFAULT_READ_PROMOTE_ADJUSTMENT 4 | ||
408 | #define DEFAULT_WRITE_PROMOTE_ADJUSTMENT 8 | ||
409 | |||
403 | /*----------------------------------------------------------------*/ | 410 | /*----------------------------------------------------------------*/ |
404 | 411 | ||
405 | /* | 412 | /* |
@@ -642,25 +649,21 @@ static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) | |||
642 | * We bias towards reads, since they can be demoted at no cost if they | 649 | * We bias towards reads, since they can be demoted at no cost if they |
643 | * haven't been dirtied. | 650 | * haven't been dirtied. |
644 | */ | 651 | */ |
645 | #define DISCARDED_PROMOTE_THRESHOLD 1 | ||
646 | #define READ_PROMOTE_THRESHOLD 4 | ||
647 | #define WRITE_PROMOTE_THRESHOLD 8 | ||
648 | |||
649 | static unsigned adjusted_promote_threshold(struct mq_policy *mq, | 652 | static unsigned adjusted_promote_threshold(struct mq_policy *mq, |
650 | bool discarded_oblock, int data_dir) | 653 | bool discarded_oblock, int data_dir) |
651 | { | 654 | { |
652 | if (data_dir == READ) | 655 | if (data_dir == READ) |
653 | return mq->promote_threshold + READ_PROMOTE_THRESHOLD; | 656 | return mq->promote_threshold + mq->read_promote_adjustment; |
654 | 657 | ||
655 | if (discarded_oblock && (any_free_cblocks(mq) || any_clean_cblocks(mq))) { | 658 | if (discarded_oblock && (any_free_cblocks(mq) || any_clean_cblocks(mq))) { |
656 | /* | 659 | /* |
657 | * We don't need to do any copying at all, so give this a | 660 | * We don't need to do any copying at all, so give this a |
658 | * very low threshold. | 661 | * very low threshold. |
659 | */ | 662 | */ |
660 | return DISCARDED_PROMOTE_THRESHOLD; | 663 | return mq->discard_promote_adjustment; |
661 | } | 664 | } |
662 | 665 | ||
663 | return mq->promote_threshold + WRITE_PROMOTE_THRESHOLD; | 666 | return mq->promote_threshold + mq->write_promote_adjustment; |
664 | } | 667 | } |
665 | 668 | ||
666 | static bool should_promote(struct mq_policy *mq, struct entry *e, | 669 | static bool should_promote(struct mq_policy *mq, struct entry *e, |
@@ -809,7 +812,7 @@ static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, | |||
809 | bool can_migrate, bool discarded_oblock, | 812 | bool can_migrate, bool discarded_oblock, |
810 | int data_dir, struct policy_result *result) | 813 | int data_dir, struct policy_result *result) |
811 | { | 814 | { |
812 | if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) == 1) { | 815 | if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) <= 1) { |
813 | if (can_migrate) | 816 | if (can_migrate) |
814 | insert_in_cache(mq, oblock, result); | 817 | insert_in_cache(mq, oblock, result); |
815 | else | 818 | else |
@@ -1135,20 +1138,28 @@ static int mq_set_config_value(struct dm_cache_policy *p, | |||
1135 | const char *key, const char *value) | 1138 | const char *key, const char *value) |
1136 | { | 1139 | { |
1137 | struct mq_policy *mq = to_mq_policy(p); | 1140 | struct mq_policy *mq = to_mq_policy(p); |
1138 | enum io_pattern pattern; | ||
1139 | unsigned long tmp; | 1141 | unsigned long tmp; |
1140 | 1142 | ||
1141 | if (!strcasecmp(key, "random_threshold")) | ||
1142 | pattern = PATTERN_RANDOM; | ||
1143 | else if (!strcasecmp(key, "sequential_threshold")) | ||
1144 | pattern = PATTERN_SEQUENTIAL; | ||
1145 | else | ||
1146 | return -EINVAL; | ||
1147 | |||
1148 | if (kstrtoul(value, 10, &tmp)) | 1143 | if (kstrtoul(value, 10, &tmp)) |
1149 | return -EINVAL; | 1144 | return -EINVAL; |
1150 | 1145 | ||
1151 | mq->tracker.thresholds[pattern] = tmp; | 1146 | if (!strcasecmp(key, "random_threshold")) { |
1147 | mq->tracker.thresholds[PATTERN_RANDOM] = tmp; | ||
1148 | |||
1149 | } else if (!strcasecmp(key, "sequential_threshold")) { | ||
1150 | mq->tracker.thresholds[PATTERN_SEQUENTIAL] = tmp; | ||
1151 | |||
1152 | } else if (!strcasecmp(key, "discard_promote_adjustment")) | ||
1153 | mq->discard_promote_adjustment = tmp; | ||
1154 | |||
1155 | else if (!strcasecmp(key, "read_promote_adjustment")) | ||
1156 | mq->read_promote_adjustment = tmp; | ||
1157 | |||
1158 | else if (!strcasecmp(key, "write_promote_adjustment")) | ||
1159 | mq->write_promote_adjustment = tmp; | ||
1160 | |||
1161 | else | ||
1162 | return -EINVAL; | ||
1152 | 1163 | ||
1153 | return 0; | 1164 | return 0; |
1154 | } | 1165 | } |
@@ -1158,9 +1169,16 @@ static int mq_emit_config_values(struct dm_cache_policy *p, char *result, unsign | |||
1158 | ssize_t sz = 0; | 1169 | ssize_t sz = 0; |
1159 | struct mq_policy *mq = to_mq_policy(p); | 1170 | struct mq_policy *mq = to_mq_policy(p); |
1160 | 1171 | ||
1161 | DMEMIT("4 random_threshold %u sequential_threshold %u", | 1172 | DMEMIT("10 random_threshold %u " |
1173 | "sequential_threshold %u " | ||
1174 | "discard_promote_adjustment %u " | ||
1175 | "read_promote_adjustment %u " | ||
1176 | "write_promote_adjustment %u", | ||
1162 | mq->tracker.thresholds[PATTERN_RANDOM], | 1177 | mq->tracker.thresholds[PATTERN_RANDOM], |
1163 | mq->tracker.thresholds[PATTERN_SEQUENTIAL]); | 1178 | mq->tracker.thresholds[PATTERN_SEQUENTIAL], |
1179 | mq->discard_promote_adjustment, | ||
1180 | mq->read_promote_adjustment, | ||
1181 | mq->write_promote_adjustment); | ||
1164 | 1182 | ||
1165 | return 0; | 1183 | return 0; |
1166 | } | 1184 | } |
@@ -1213,6 +1231,9 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, | |||
1213 | mq->hit_count = 0; | 1231 | mq->hit_count = 0; |
1214 | mq->generation = 0; | 1232 | mq->generation = 0; |
1215 | mq->promote_threshold = 0; | 1233 | mq->promote_threshold = 0; |
1234 | mq->discard_promote_adjustment = DEFAULT_DISCARD_PROMOTE_ADJUSTMENT; | ||
1235 | mq->read_promote_adjustment = DEFAULT_READ_PROMOTE_ADJUSTMENT; | ||
1236 | mq->write_promote_adjustment = DEFAULT_WRITE_PROMOTE_ADJUSTMENT; | ||
1216 | mutex_init(&mq->lock); | 1237 | mutex_init(&mq->lock); |
1217 | spin_lock_init(&mq->tick_lock); | 1238 | spin_lock_init(&mq->tick_lock); |
1218 | 1239 | ||
@@ -1244,7 +1265,7 @@ bad_pre_cache_init: | |||
1244 | 1265 | ||
1245 | static struct dm_cache_policy_type mq_policy_type = { | 1266 | static struct dm_cache_policy_type mq_policy_type = { |
1246 | .name = "mq", | 1267 | .name = "mq", |
1247 | .version = {1, 1, 0}, | 1268 | .version = {1, 2, 0}, |
1248 | .hint_size = 4, | 1269 | .hint_size = 4, |
1249 | .owner = THIS_MODULE, | 1270 | .owner = THIS_MODULE, |
1250 | .create = mq_create | 1271 | .create = mq_create |
@@ -1252,10 +1273,11 @@ static struct dm_cache_policy_type mq_policy_type = { | |||
1252 | 1273 | ||
1253 | static struct dm_cache_policy_type default_policy_type = { | 1274 | static struct dm_cache_policy_type default_policy_type = { |
1254 | .name = "default", | 1275 | .name = "default", |
1255 | .version = {1, 1, 0}, | 1276 | .version = {1, 2, 0}, |
1256 | .hint_size = 4, | 1277 | .hint_size = 4, |
1257 | .owner = THIS_MODULE, | 1278 | .owner = THIS_MODULE, |
1258 | .create = mq_create | 1279 | .create = mq_create, |
1280 | .real = &mq_policy_type | ||
1259 | }; | 1281 | }; |
1260 | 1282 | ||
1261 | static int __init mq_init(void) | 1283 | static int __init mq_init(void) |
diff --git a/drivers/md/dm-cache-policy.c b/drivers/md/dm-cache-policy.c index d80057968407..c1a3cee99b44 100644 --- a/drivers/md/dm-cache-policy.c +++ b/drivers/md/dm-cache-policy.c | |||
@@ -146,6 +146,10 @@ const char *dm_cache_policy_get_name(struct dm_cache_policy *p) | |||
146 | { | 146 | { |
147 | struct dm_cache_policy_type *t = p->private; | 147 | struct dm_cache_policy_type *t = p->private; |
148 | 148 | ||
149 | /* if t->real is set then an alias was used (e.g. "default") */ | ||
150 | if (t->real) | ||
151 | return t->real->name; | ||
152 | |||
149 | return t->name; | 153 | return t->name; |
150 | } | 154 | } |
151 | EXPORT_SYMBOL_GPL(dm_cache_policy_get_name); | 155 | EXPORT_SYMBOL_GPL(dm_cache_policy_get_name); |
diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h index 052c00a84a5c..f50fe360c546 100644 --- a/drivers/md/dm-cache-policy.h +++ b/drivers/md/dm-cache-policy.h | |||
@@ -223,6 +223,12 @@ struct dm_cache_policy_type { | |||
223 | unsigned version[CACHE_POLICY_VERSION_SIZE]; | 223 | unsigned version[CACHE_POLICY_VERSION_SIZE]; |
224 | 224 | ||
225 | /* | 225 | /* |
226 | * For use by an alias dm_cache_policy_type to point to the | ||
227 | * real dm_cache_policy_type. | ||
228 | */ | ||
229 | struct dm_cache_policy_type *real; | ||
230 | |||
231 | /* | ||
226 | * Policies may store a hint for each each cache block. | 232 | * Policies may store a hint for each each cache block. |
227 | * Currently the size of this hint must be 0 or 4 bytes but we | 233 | * Currently the size of this hint must be 0 or 4 bytes but we |
228 | * expect to relax this in future. | 234 | * expect to relax this in future. |
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1b1469ebe5cb..09334c275c79 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c | |||
@@ -2826,12 +2826,13 @@ static void cache_resume(struct dm_target *ti) | |||
2826 | /* | 2826 | /* |
2827 | * Status format: | 2827 | * Status format: |
2828 | * | 2828 | * |
2829 | * <#used metadata blocks>/<#total metadata blocks> | 2829 | * <metadata block size> <#used metadata blocks>/<#total metadata blocks> |
2830 | * <cache block size> <#used cache blocks>/<#total cache blocks> | ||
2830 | * <#read hits> <#read misses> <#write hits> <#write misses> | 2831 | * <#read hits> <#read misses> <#write hits> <#write misses> |
2831 | * <#demotions> <#promotions> <#blocks in cache> <#dirty> | 2832 | * <#demotions> <#promotions> <#dirty> |
2832 | * <#features> <features>* | 2833 | * <#features> <features>* |
2833 | * <#core args> <core args> | 2834 | * <#core args> <core args> |
2834 | * <#policy args> <policy args>* | 2835 | * <policy name> <#policy args> <policy args>* |
2835 | */ | 2836 | */ |
2836 | static void cache_status(struct dm_target *ti, status_type_t type, | 2837 | static void cache_status(struct dm_target *ti, status_type_t type, |
2837 | unsigned status_flags, char *result, unsigned maxlen) | 2838 | unsigned status_flags, char *result, unsigned maxlen) |
@@ -2869,17 +2870,20 @@ static void cache_status(struct dm_target *ti, status_type_t type, | |||
2869 | 2870 | ||
2870 | residency = policy_residency(cache->policy); | 2871 | residency = policy_residency(cache->policy); |
2871 | 2872 | ||
2872 | DMEMIT("%llu/%llu %u %u %u %u %u %u %llu %u ", | 2873 | DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %llu ", |
2874 | (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), | ||
2873 | (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), | 2875 | (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), |
2874 | (unsigned long long)nr_blocks_metadata, | 2876 | (unsigned long long)nr_blocks_metadata, |
2877 | cache->sectors_per_block, | ||
2878 | (unsigned long long) from_cblock(residency), | ||
2879 | (unsigned long long) from_cblock(cache->cache_size), | ||
2875 | (unsigned) atomic_read(&cache->stats.read_hit), | 2880 | (unsigned) atomic_read(&cache->stats.read_hit), |
2876 | (unsigned) atomic_read(&cache->stats.read_miss), | 2881 | (unsigned) atomic_read(&cache->stats.read_miss), |
2877 | (unsigned) atomic_read(&cache->stats.write_hit), | 2882 | (unsigned) atomic_read(&cache->stats.write_hit), |
2878 | (unsigned) atomic_read(&cache->stats.write_miss), | 2883 | (unsigned) atomic_read(&cache->stats.write_miss), |
2879 | (unsigned) atomic_read(&cache->stats.demotion), | 2884 | (unsigned) atomic_read(&cache->stats.demotion), |
2880 | (unsigned) atomic_read(&cache->stats.promotion), | 2885 | (unsigned) atomic_read(&cache->stats.promotion), |
2881 | (unsigned long long) from_cblock(residency), | 2886 | (unsigned long long) from_cblock(cache->nr_dirty)); |
2882 | cache->nr_dirty); | ||
2883 | 2887 | ||
2884 | if (writethrough_mode(&cache->features)) | 2888 | if (writethrough_mode(&cache->features)) |
2885 | DMEMIT("1 writethrough "); | 2889 | DMEMIT("1 writethrough "); |
@@ -2896,6 +2900,8 @@ static void cache_status(struct dm_target *ti, status_type_t type, | |||
2896 | } | 2900 | } |
2897 | 2901 | ||
2898 | DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); | 2902 | DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); |
2903 | |||
2904 | DMEMIT("%s ", dm_cache_policy_get_name(cache->policy)); | ||
2899 | if (sz < maxlen) { | 2905 | if (sz < maxlen) { |
2900 | r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz); | 2906 | r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz); |
2901 | if (r) | 2907 | if (r) |
@@ -3129,7 +3135,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
3129 | 3135 | ||
3130 | static struct target_type cache_target = { | 3136 | static struct target_type cache_target = { |
3131 | .name = "cache", | 3137 | .name = "cache", |
3132 | .version = {1, 2, 0}, | 3138 | .version = {1, 3, 0}, |
3133 | .module = THIS_MODULE, | 3139 | .module = THIS_MODULE, |
3134 | .ctr = cache_ctr, | 3140 | .ctr = cache_ctr, |
3135 | .dtr = cache_dtr, | 3141 | .dtr = cache_dtr, |
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 2f91d6d4a2cc..a8a511c053a5 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c | |||
@@ -24,7 +24,6 @@ struct delay_c { | |||
24 | struct work_struct flush_expired_bios; | 24 | struct work_struct flush_expired_bios; |
25 | struct list_head delayed_bios; | 25 | struct list_head delayed_bios; |
26 | atomic_t may_delay; | 26 | atomic_t may_delay; |
27 | mempool_t *delayed_pool; | ||
28 | 27 | ||
29 | struct dm_dev *dev_read; | 28 | struct dm_dev *dev_read; |
30 | sector_t start_read; | 29 | sector_t start_read; |
@@ -40,14 +39,11 @@ struct delay_c { | |||
40 | struct dm_delay_info { | 39 | struct dm_delay_info { |
41 | struct delay_c *context; | 40 | struct delay_c *context; |
42 | struct list_head list; | 41 | struct list_head list; |
43 | struct bio *bio; | ||
44 | unsigned long expires; | 42 | unsigned long expires; |
45 | }; | 43 | }; |
46 | 44 | ||
47 | static DEFINE_MUTEX(delayed_bios_lock); | 45 | static DEFINE_MUTEX(delayed_bios_lock); |
48 | 46 | ||
49 | static struct kmem_cache *delayed_cache; | ||
50 | |||
51 | static void handle_delayed_timer(unsigned long data) | 47 | static void handle_delayed_timer(unsigned long data) |
52 | { | 48 | { |
53 | struct delay_c *dc = (struct delay_c *)data; | 49 | struct delay_c *dc = (struct delay_c *)data; |
@@ -87,13 +83,14 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) | |||
87 | mutex_lock(&delayed_bios_lock); | 83 | mutex_lock(&delayed_bios_lock); |
88 | list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { | 84 | list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { |
89 | if (flush_all || time_after_eq(jiffies, delayed->expires)) { | 85 | if (flush_all || time_after_eq(jiffies, delayed->expires)) { |
86 | struct bio *bio = dm_bio_from_per_bio_data(delayed, | ||
87 | sizeof(struct dm_delay_info)); | ||
90 | list_del(&delayed->list); | 88 | list_del(&delayed->list); |
91 | bio_list_add(&flush_bios, delayed->bio); | 89 | bio_list_add(&flush_bios, bio); |
92 | if ((bio_data_dir(delayed->bio) == WRITE)) | 90 | if ((bio_data_dir(bio) == WRITE)) |
93 | delayed->context->writes--; | 91 | delayed->context->writes--; |
94 | else | 92 | else |
95 | delayed->context->reads--; | 93 | delayed->context->reads--; |
96 | mempool_free(delayed, dc->delayed_pool); | ||
97 | continue; | 94 | continue; |
98 | } | 95 | } |
99 | 96 | ||
@@ -185,12 +182,6 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
185 | } | 182 | } |
186 | 183 | ||
187 | out: | 184 | out: |
188 | dc->delayed_pool = mempool_create_slab_pool(128, delayed_cache); | ||
189 | if (!dc->delayed_pool) { | ||
190 | DMERR("Couldn't create delayed bio pool."); | ||
191 | goto bad_dev_write; | ||
192 | } | ||
193 | |||
194 | dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); | 185 | dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); |
195 | if (!dc->kdelayd_wq) { | 186 | if (!dc->kdelayd_wq) { |
196 | DMERR("Couldn't start kdelayd"); | 187 | DMERR("Couldn't start kdelayd"); |
@@ -206,12 +197,11 @@ out: | |||
206 | 197 | ||
207 | ti->num_flush_bios = 1; | 198 | ti->num_flush_bios = 1; |
208 | ti->num_discard_bios = 1; | 199 | ti->num_discard_bios = 1; |
200 | ti->per_bio_data_size = sizeof(struct dm_delay_info); | ||
209 | ti->private = dc; | 201 | ti->private = dc; |
210 | return 0; | 202 | return 0; |
211 | 203 | ||
212 | bad_queue: | 204 | bad_queue: |
213 | mempool_destroy(dc->delayed_pool); | ||
214 | bad_dev_write: | ||
215 | if (dc->dev_write) | 205 | if (dc->dev_write) |
216 | dm_put_device(ti, dc->dev_write); | 206 | dm_put_device(ti, dc->dev_write); |
217 | bad_dev_read: | 207 | bad_dev_read: |
@@ -232,7 +222,6 @@ static void delay_dtr(struct dm_target *ti) | |||
232 | if (dc->dev_write) | 222 | if (dc->dev_write) |
233 | dm_put_device(ti, dc->dev_write); | 223 | dm_put_device(ti, dc->dev_write); |
234 | 224 | ||
235 | mempool_destroy(dc->delayed_pool); | ||
236 | kfree(dc); | 225 | kfree(dc); |
237 | } | 226 | } |
238 | 227 | ||
@@ -244,10 +233,9 @@ static int delay_bio(struct delay_c *dc, int delay, struct bio *bio) | |||
244 | if (!delay || !atomic_read(&dc->may_delay)) | 233 | if (!delay || !atomic_read(&dc->may_delay)) |
245 | return 1; | 234 | return 1; |
246 | 235 | ||
247 | delayed = mempool_alloc(dc->delayed_pool, GFP_NOIO); | 236 | delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); |
248 | 237 | ||
249 | delayed->context = dc; | 238 | delayed->context = dc; |
250 | delayed->bio = bio; | ||
251 | delayed->expires = expires = jiffies + (delay * HZ / 1000); | 239 | delayed->expires = expires = jiffies + (delay * HZ / 1000); |
252 | 240 | ||
253 | mutex_lock(&delayed_bios_lock); | 241 | mutex_lock(&delayed_bios_lock); |
@@ -356,13 +344,7 @@ static struct target_type delay_target = { | |||
356 | 344 | ||
357 | static int __init dm_delay_init(void) | 345 | static int __init dm_delay_init(void) |
358 | { | 346 | { |
359 | int r = -ENOMEM; | 347 | int r; |
360 | |||
361 | delayed_cache = KMEM_CACHE(dm_delay_info, 0); | ||
362 | if (!delayed_cache) { | ||
363 | DMERR("Couldn't create delayed bio cache."); | ||
364 | goto bad_memcache; | ||
365 | } | ||
366 | 348 | ||
367 | r = dm_register_target(&delay_target); | 349 | r = dm_register_target(&delay_target); |
368 | if (r < 0) { | 350 | if (r < 0) { |
@@ -373,15 +355,12 @@ static int __init dm_delay_init(void) | |||
373 | return 0; | 355 | return 0; |
374 | 356 | ||
375 | bad_register: | 357 | bad_register: |
376 | kmem_cache_destroy(delayed_cache); | ||
377 | bad_memcache: | ||
378 | return r; | 358 | return r; |
379 | } | 359 | } |
380 | 360 | ||
381 | static void __exit dm_delay_exit(void) | 361 | static void __exit dm_delay_exit(void) |
382 | { | 362 | { |
383 | dm_unregister_target(&delay_target); | 363 | dm_unregister_target(&delay_target); |
384 | kmem_cache_destroy(delayed_cache); | ||
385 | } | 364 | } |
386 | 365 | ||
387 | /* Module hooks */ | 366 | /* Module hooks */ |
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index 9429159d9ee3..b953db6cc229 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c | |||
@@ -10,10 +10,11 @@ | |||
10 | #include <linux/device-mapper.h> | 10 | #include <linux/device-mapper.h> |
11 | #include <linux/dm-log-userspace.h> | 11 | #include <linux/dm-log-userspace.h> |
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/workqueue.h> | ||
13 | 14 | ||
14 | #include "dm-log-userspace-transfer.h" | 15 | #include "dm-log-userspace-transfer.h" |
15 | 16 | ||
16 | #define DM_LOG_USERSPACE_VSN "1.1.0" | 17 | #define DM_LOG_USERSPACE_VSN "1.3.0" |
17 | 18 | ||
18 | struct flush_entry { | 19 | struct flush_entry { |
19 | int type; | 20 | int type; |
@@ -58,6 +59,18 @@ struct log_c { | |||
58 | spinlock_t flush_lock; | 59 | spinlock_t flush_lock; |
59 | struct list_head mark_list; | 60 | struct list_head mark_list; |
60 | struct list_head clear_list; | 61 | struct list_head clear_list; |
62 | |||
63 | /* | ||
64 | * Workqueue for flush of clear region requests. | ||
65 | */ | ||
66 | struct workqueue_struct *dmlog_wq; | ||
67 | struct delayed_work flush_log_work; | ||
68 | atomic_t sched_flush; | ||
69 | |||
70 | /* | ||
71 | * Combine userspace flush and mark requests for efficiency. | ||
72 | */ | ||
73 | uint32_t integrated_flush; | ||
61 | }; | 74 | }; |
62 | 75 | ||
63 | static mempool_t *flush_entry_pool; | 76 | static mempool_t *flush_entry_pool; |
@@ -122,6 +135,9 @@ static int build_constructor_string(struct dm_target *ti, | |||
122 | 135 | ||
123 | *ctr_str = NULL; | 136 | *ctr_str = NULL; |
124 | 137 | ||
138 | /* | ||
139 | * Determine overall size of the string. | ||
140 | */ | ||
125 | for (i = 0, str_size = 0; i < argc; i++) | 141 | for (i = 0, str_size = 0; i < argc; i++) |
126 | str_size += strlen(argv[i]) + 1; /* +1 for space between args */ | 142 | str_size += strlen(argv[i]) + 1; /* +1 for space between args */ |
127 | 143 | ||
@@ -141,18 +157,39 @@ static int build_constructor_string(struct dm_target *ti, | |||
141 | return str_size; | 157 | return str_size; |
142 | } | 158 | } |
143 | 159 | ||
160 | static void do_flush(struct work_struct *work) | ||
161 | { | ||
162 | int r; | ||
163 | struct log_c *lc = container_of(work, struct log_c, flush_log_work.work); | ||
164 | |||
165 | atomic_set(&lc->sched_flush, 0); | ||
166 | |||
167 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, NULL, 0, NULL, NULL); | ||
168 | |||
169 | if (r) | ||
170 | dm_table_event(lc->ti->table); | ||
171 | } | ||
172 | |||
144 | /* | 173 | /* |
145 | * userspace_ctr | 174 | * userspace_ctr |
146 | * | 175 | * |
147 | * argv contains: | 176 | * argv contains: |
148 | * <UUID> <other args> | 177 | * <UUID> [integrated_flush] <other args> |
149 | * Where 'other args' is the userspace implementation specific log | 178 | * Where 'other args' are the userspace implementation-specific log |
150 | * arguments. An example might be: | 179 | * arguments. |
151 | * <UUID> clustered-disk <arg count> <log dev> <region_size> [[no]sync] | 180 | * |
181 | * Example: | ||
182 | * <UUID> [integrated_flush] clustered-disk <arg count> <log dev> | ||
183 | * <region_size> [[no]sync] | ||
184 | * | ||
185 | * This module strips off the <UUID> and uses it for identification | ||
186 | * purposes when communicating with userspace about a log. | ||
152 | * | 187 | * |
153 | * So, this module will strip off the <UUID> for identification purposes | 188 | * If integrated_flush is defined, the kernel combines flush |
154 | * when communicating with userspace about a log; but will pass on everything | 189 | * and mark requests. |
155 | * else. | 190 | * |
191 | * The rest of the line, beginning with 'clustered-disk', is passed | ||
192 | * to the userspace ctr function. | ||
156 | */ | 193 | */ |
157 | static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, | 194 | static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, |
158 | unsigned argc, char **argv) | 195 | unsigned argc, char **argv) |
@@ -188,12 +225,22 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, | |||
188 | return -EINVAL; | 225 | return -EINVAL; |
189 | } | 226 | } |
190 | 227 | ||
228 | lc->usr_argc = argc; | ||
229 | |||
191 | strncpy(lc->uuid, argv[0], DM_UUID_LEN); | 230 | strncpy(lc->uuid, argv[0], DM_UUID_LEN); |
231 | argc--; | ||
232 | argv++; | ||
192 | spin_lock_init(&lc->flush_lock); | 233 | spin_lock_init(&lc->flush_lock); |
193 | INIT_LIST_HEAD(&lc->mark_list); | 234 | INIT_LIST_HEAD(&lc->mark_list); |
194 | INIT_LIST_HEAD(&lc->clear_list); | 235 | INIT_LIST_HEAD(&lc->clear_list); |
195 | 236 | ||
196 | str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); | 237 | if (!strcasecmp(argv[0], "integrated_flush")) { |
238 | lc->integrated_flush = 1; | ||
239 | argc--; | ||
240 | argv++; | ||
241 | } | ||
242 | |||
243 | str_size = build_constructor_string(ti, argc, argv, &ctr_str); | ||
197 | if (str_size < 0) { | 244 | if (str_size < 0) { |
198 | kfree(lc); | 245 | kfree(lc); |
199 | return str_size; | 246 | return str_size; |
@@ -246,6 +293,19 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, | |||
246 | DMERR("Failed to register %s with device-mapper", | 293 | DMERR("Failed to register %s with device-mapper", |
247 | devices_rdata); | 294 | devices_rdata); |
248 | } | 295 | } |
296 | |||
297 | if (lc->integrated_flush) { | ||
298 | lc->dmlog_wq = alloc_workqueue("dmlogd", WQ_MEM_RECLAIM, 0); | ||
299 | if (!lc->dmlog_wq) { | ||
300 | DMERR("couldn't start dmlogd"); | ||
301 | r = -ENOMEM; | ||
302 | goto out; | ||
303 | } | ||
304 | |||
305 | INIT_DELAYED_WORK(&lc->flush_log_work, do_flush); | ||
306 | atomic_set(&lc->sched_flush, 0); | ||
307 | } | ||
308 | |||
249 | out: | 309 | out: |
250 | kfree(devices_rdata); | 310 | kfree(devices_rdata); |
251 | if (r) { | 311 | if (r) { |
@@ -253,7 +313,6 @@ out: | |||
253 | kfree(ctr_str); | 313 | kfree(ctr_str); |
254 | } else { | 314 | } else { |
255 | lc->usr_argv_str = ctr_str; | 315 | lc->usr_argv_str = ctr_str; |
256 | lc->usr_argc = argc; | ||
257 | log->context = lc; | 316 | log->context = lc; |
258 | } | 317 | } |
259 | 318 | ||
@@ -264,9 +323,16 @@ static void userspace_dtr(struct dm_dirty_log *log) | |||
264 | { | 323 | { |
265 | struct log_c *lc = log->context; | 324 | struct log_c *lc = log->context; |
266 | 325 | ||
326 | if (lc->integrated_flush) { | ||
327 | /* flush workqueue */ | ||
328 | if (atomic_read(&lc->sched_flush)) | ||
329 | flush_delayed_work(&lc->flush_log_work); | ||
330 | |||
331 | destroy_workqueue(lc->dmlog_wq); | ||
332 | } | ||
333 | |||
267 | (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, | 334 | (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, |
268 | NULL, 0, | 335 | NULL, 0, NULL, NULL); |
269 | NULL, NULL); | ||
270 | 336 | ||
271 | if (lc->log_dev) | 337 | if (lc->log_dev) |
272 | dm_put_device(lc->ti, lc->log_dev); | 338 | dm_put_device(lc->ti, lc->log_dev); |
@@ -283,8 +349,7 @@ static int userspace_presuspend(struct dm_dirty_log *log) | |||
283 | struct log_c *lc = log->context; | 349 | struct log_c *lc = log->context; |
284 | 350 | ||
285 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND, | 351 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND, |
286 | NULL, 0, | 352 | NULL, 0, NULL, NULL); |
287 | NULL, NULL); | ||
288 | 353 | ||
289 | return r; | 354 | return r; |
290 | } | 355 | } |
@@ -294,9 +359,14 @@ static int userspace_postsuspend(struct dm_dirty_log *log) | |||
294 | int r; | 359 | int r; |
295 | struct log_c *lc = log->context; | 360 | struct log_c *lc = log->context; |
296 | 361 | ||
362 | /* | ||
363 | * Run planned flush earlier. | ||
364 | */ | ||
365 | if (lc->integrated_flush && atomic_read(&lc->sched_flush)) | ||
366 | flush_delayed_work(&lc->flush_log_work); | ||
367 | |||
297 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND, | 368 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND, |
298 | NULL, 0, | 369 | NULL, 0, NULL, NULL); |
299 | NULL, NULL); | ||
300 | 370 | ||
301 | return r; | 371 | return r; |
302 | } | 372 | } |
@@ -308,8 +378,7 @@ static int userspace_resume(struct dm_dirty_log *log) | |||
308 | 378 | ||
309 | lc->in_sync_hint = 0; | 379 | lc->in_sync_hint = 0; |
310 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME, | 380 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME, |
311 | NULL, 0, | 381 | NULL, 0, NULL, NULL); |
312 | NULL, NULL); | ||
313 | 382 | ||
314 | return r; | 383 | return r; |
315 | } | 384 | } |
@@ -405,7 +474,8 @@ static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list) | |||
405 | return r; | 474 | return r; |
406 | } | 475 | } |
407 | 476 | ||
408 | static int flush_by_group(struct log_c *lc, struct list_head *flush_list) | 477 | static int flush_by_group(struct log_c *lc, struct list_head *flush_list, |
478 | int flush_with_payload) | ||
409 | { | 479 | { |
410 | int r = 0; | 480 | int r = 0; |
411 | int count; | 481 | int count; |
@@ -431,15 +501,29 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list) | |||
431 | break; | 501 | break; |
432 | } | 502 | } |
433 | 503 | ||
434 | r = userspace_do_request(lc, lc->uuid, type, | 504 | if (flush_with_payload) { |
435 | (char *)(group), | 505 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, |
436 | count * sizeof(uint64_t), | 506 | (char *)(group), |
437 | NULL, NULL); | 507 | count * sizeof(uint64_t), |
438 | if (r) { | 508 | NULL, NULL); |
439 | /* Group send failed. Attempt one-by-one. */ | 509 | /* |
440 | list_splice_init(&tmp_list, flush_list); | 510 | * Integrated flush failed. |
441 | r = flush_one_by_one(lc, flush_list); | 511 | */ |
442 | break; | 512 | if (r) |
513 | break; | ||
514 | } else { | ||
515 | r = userspace_do_request(lc, lc->uuid, type, | ||
516 | (char *)(group), | ||
517 | count * sizeof(uint64_t), | ||
518 | NULL, NULL); | ||
519 | if (r) { | ||
520 | /* | ||
521 | * Group send failed. Attempt one-by-one. | ||
522 | */ | ||
523 | list_splice_init(&tmp_list, flush_list); | ||
524 | r = flush_one_by_one(lc, flush_list); | ||
525 | break; | ||
526 | } | ||
443 | } | 527 | } |
444 | } | 528 | } |
445 | 529 | ||
@@ -476,6 +560,8 @@ static int userspace_flush(struct dm_dirty_log *log) | |||
476 | struct log_c *lc = log->context; | 560 | struct log_c *lc = log->context; |
477 | LIST_HEAD(mark_list); | 561 | LIST_HEAD(mark_list); |
478 | LIST_HEAD(clear_list); | 562 | LIST_HEAD(clear_list); |
563 | int mark_list_is_empty; | ||
564 | int clear_list_is_empty; | ||
479 | struct flush_entry *fe, *tmp_fe; | 565 | struct flush_entry *fe, *tmp_fe; |
480 | 566 | ||
481 | spin_lock_irqsave(&lc->flush_lock, flags); | 567 | spin_lock_irqsave(&lc->flush_lock, flags); |
@@ -483,23 +569,51 @@ static int userspace_flush(struct dm_dirty_log *log) | |||
483 | list_splice_init(&lc->clear_list, &clear_list); | 569 | list_splice_init(&lc->clear_list, &clear_list); |
484 | spin_unlock_irqrestore(&lc->flush_lock, flags); | 570 | spin_unlock_irqrestore(&lc->flush_lock, flags); |
485 | 571 | ||
486 | if (list_empty(&mark_list) && list_empty(&clear_list)) | 572 | mark_list_is_empty = list_empty(&mark_list); |
573 | clear_list_is_empty = list_empty(&clear_list); | ||
574 | |||
575 | if (mark_list_is_empty && clear_list_is_empty) | ||
487 | return 0; | 576 | return 0; |
488 | 577 | ||
489 | r = flush_by_group(lc, &mark_list); | 578 | r = flush_by_group(lc, &clear_list, 0); |
490 | if (r) | 579 | if (r) |
491 | goto fail; | 580 | goto out; |
492 | 581 | ||
493 | r = flush_by_group(lc, &clear_list); | 582 | if (!lc->integrated_flush) { |
583 | r = flush_by_group(lc, &mark_list, 0); | ||
584 | if (r) | ||
585 | goto out; | ||
586 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, | ||
587 | NULL, 0, NULL, NULL); | ||
588 | goto out; | ||
589 | } | ||
590 | |||
591 | /* | ||
592 | * Send integrated flush request with mark_list as payload. | ||
593 | */ | ||
594 | r = flush_by_group(lc, &mark_list, 1); | ||
494 | if (r) | 595 | if (r) |
495 | goto fail; | 596 | goto out; |
496 | 597 | ||
497 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, | 598 | if (mark_list_is_empty && !atomic_read(&lc->sched_flush)) { |
498 | NULL, 0, NULL, NULL); | 599 | /* |
600 | * When there are only clear region requests, | ||
601 | * we schedule a flush in the future. | ||
602 | */ | ||
603 | queue_delayed_work(lc->dmlog_wq, &lc->flush_log_work, 3 * HZ); | ||
604 | atomic_set(&lc->sched_flush, 1); | ||
605 | } else { | ||
606 | /* | ||
607 | * Cancel pending flush because we | ||
608 | * have already flushed in mark_region. | ||
609 | */ | ||
610 | cancel_delayed_work(&lc->flush_log_work); | ||
611 | atomic_set(&lc->sched_flush, 0); | ||
612 | } | ||
499 | 613 | ||
500 | fail: | 614 | out: |
501 | /* | 615 | /* |
502 | * We can safely remove these entries, even if failure. | 616 | * We can safely remove these entries, even after failure. |
503 | * Calling code will receive an error and will know that | 617 | * Calling code will receive an error and will know that |
504 | * the log facility has failed. | 618 | * the log facility has failed. |
505 | */ | 619 | */ |
@@ -603,8 +717,7 @@ static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region) | |||
603 | 717 | ||
604 | rdata_size = sizeof(pkg); | 718 | rdata_size = sizeof(pkg); |
605 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, | 719 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, |
606 | NULL, 0, | 720 | NULL, 0, (char *)&pkg, &rdata_size); |
607 | (char *)&pkg, &rdata_size); | ||
608 | 721 | ||
609 | *region = pkg.r; | 722 | *region = pkg.r; |
610 | return (r) ? r : (int)pkg.i; | 723 | return (r) ? r : (int)pkg.i; |
@@ -630,8 +743,7 @@ static void userspace_set_region_sync(struct dm_dirty_log *log, | |||
630 | pkg.i = (int64_t)in_sync; | 743 | pkg.i = (int64_t)in_sync; |
631 | 744 | ||
632 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, | 745 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, |
633 | (char *)&pkg, sizeof(pkg), | 746 | (char *)&pkg, sizeof(pkg), NULL, NULL); |
634 | NULL, NULL); | ||
635 | 747 | ||
636 | /* | 748 | /* |
637 | * It would be nice to be able to report failures. | 749 | * It would be nice to be able to report failures. |
@@ -657,8 +769,7 @@ static region_t userspace_get_sync_count(struct dm_dirty_log *log) | |||
657 | 769 | ||
658 | rdata_size = sizeof(sync_count); | 770 | rdata_size = sizeof(sync_count); |
659 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, | 771 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, |
660 | NULL, 0, | 772 | NULL, 0, (char *)&sync_count, &rdata_size); |
661 | (char *)&sync_count, &rdata_size); | ||
662 | 773 | ||
663 | if (r) | 774 | if (r) |
664 | return 0; | 775 | return 0; |
@@ -685,8 +796,7 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, | |||
685 | switch (status_type) { | 796 | switch (status_type) { |
686 | case STATUSTYPE_INFO: | 797 | case STATUSTYPE_INFO: |
687 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, | 798 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, |
688 | NULL, 0, | 799 | NULL, 0, result, &sz); |
689 | result, &sz); | ||
690 | 800 | ||
691 | if (r) { | 801 | if (r) { |
692 | sz = 0; | 802 | sz = 0; |
@@ -699,8 +809,10 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, | |||
699 | BUG_ON(!table_args); /* There will always be a ' ' */ | 809 | BUG_ON(!table_args); /* There will always be a ' ' */ |
700 | table_args++; | 810 | table_args++; |
701 | 811 | ||
702 | DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc, | 812 | DMEMIT("%s %u %s ", log->type->name, lc->usr_argc, lc->uuid); |
703 | lc->uuid, table_args); | 813 | if (lc->integrated_flush) |
814 | DMEMIT("integrated_flush "); | ||
815 | DMEMIT("%s ", table_args); | ||
704 | break; | 816 | break; |
705 | } | 817 | } |
706 | return (r) ? 0 : (int)sz; | 818 | return (r) ? 0 : (int)sz; |
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 2d2b1b7588d7..afc3d017de4c 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c | |||
@@ -13,10 +13,13 @@ | |||
13 | #include <linux/export.h> | 13 | #include <linux/export.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/dm-io.h> | 15 | #include <linux/dm-io.h> |
16 | #include "dm-bufio.h" | ||
16 | 17 | ||
17 | #define DM_MSG_PREFIX "persistent snapshot" | 18 | #define DM_MSG_PREFIX "persistent snapshot" |
18 | #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ | 19 | #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ |
19 | 20 | ||
21 | #define DM_PREFETCH_CHUNKS 12 | ||
22 | |||
20 | /*----------------------------------------------------------------- | 23 | /*----------------------------------------------------------------- |
21 | * Persistent snapshots, by persistent we mean that the snapshot | 24 | * Persistent snapshots, by persistent we mean that the snapshot |
22 | * will survive a reboot. | 25 | * will survive a reboot. |
@@ -257,6 +260,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw, | |||
257 | INIT_WORK_ONSTACK(&req.work, do_metadata); | 260 | INIT_WORK_ONSTACK(&req.work, do_metadata); |
258 | queue_work(ps->metadata_wq, &req.work); | 261 | queue_work(ps->metadata_wq, &req.work); |
259 | flush_workqueue(ps->metadata_wq); | 262 | flush_workqueue(ps->metadata_wq); |
263 | destroy_work_on_stack(&req.work); | ||
260 | 264 | ||
261 | return req.result; | 265 | return req.result; |
262 | } | 266 | } |
@@ -401,17 +405,18 @@ static int write_header(struct pstore *ps) | |||
401 | /* | 405 | /* |
402 | * Access functions for the disk exceptions, these do the endian conversions. | 406 | * Access functions for the disk exceptions, these do the endian conversions. |
403 | */ | 407 | */ |
404 | static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) | 408 | static struct disk_exception *get_exception(struct pstore *ps, void *ps_area, |
409 | uint32_t index) | ||
405 | { | 410 | { |
406 | BUG_ON(index >= ps->exceptions_per_area); | 411 | BUG_ON(index >= ps->exceptions_per_area); |
407 | 412 | ||
408 | return ((struct disk_exception *) ps->area) + index; | 413 | return ((struct disk_exception *) ps_area) + index; |
409 | } | 414 | } |
410 | 415 | ||
411 | static void read_exception(struct pstore *ps, | 416 | static void read_exception(struct pstore *ps, void *ps_area, |
412 | uint32_t index, struct core_exception *result) | 417 | uint32_t index, struct core_exception *result) |
413 | { | 418 | { |
414 | struct disk_exception *de = get_exception(ps, index); | 419 | struct disk_exception *de = get_exception(ps, ps_area, index); |
415 | 420 | ||
416 | /* copy it */ | 421 | /* copy it */ |
417 | result->old_chunk = le64_to_cpu(de->old_chunk); | 422 | result->old_chunk = le64_to_cpu(de->old_chunk); |
@@ -421,7 +426,7 @@ static void read_exception(struct pstore *ps, | |||
421 | static void write_exception(struct pstore *ps, | 426 | static void write_exception(struct pstore *ps, |
422 | uint32_t index, struct core_exception *e) | 427 | uint32_t index, struct core_exception *e) |
423 | { | 428 | { |
424 | struct disk_exception *de = get_exception(ps, index); | 429 | struct disk_exception *de = get_exception(ps, ps->area, index); |
425 | 430 | ||
426 | /* copy it */ | 431 | /* copy it */ |
427 | de->old_chunk = cpu_to_le64(e->old_chunk); | 432 | de->old_chunk = cpu_to_le64(e->old_chunk); |
@@ -430,7 +435,7 @@ static void write_exception(struct pstore *ps, | |||
430 | 435 | ||
431 | static void clear_exception(struct pstore *ps, uint32_t index) | 436 | static void clear_exception(struct pstore *ps, uint32_t index) |
432 | { | 437 | { |
433 | struct disk_exception *de = get_exception(ps, index); | 438 | struct disk_exception *de = get_exception(ps, ps->area, index); |
434 | 439 | ||
435 | /* clear it */ | 440 | /* clear it */ |
436 | de->old_chunk = 0; | 441 | de->old_chunk = 0; |
@@ -442,7 +447,7 @@ static void clear_exception(struct pstore *ps, uint32_t index) | |||
442 | * 'full' is filled in to indicate if the area has been | 447 | * 'full' is filled in to indicate if the area has been |
443 | * filled. | 448 | * filled. |
444 | */ | 449 | */ |
445 | static int insert_exceptions(struct pstore *ps, | 450 | static int insert_exceptions(struct pstore *ps, void *ps_area, |
446 | int (*callback)(void *callback_context, | 451 | int (*callback)(void *callback_context, |
447 | chunk_t old, chunk_t new), | 452 | chunk_t old, chunk_t new), |
448 | void *callback_context, | 453 | void *callback_context, |
@@ -456,7 +461,7 @@ static int insert_exceptions(struct pstore *ps, | |||
456 | *full = 1; | 461 | *full = 1; |
457 | 462 | ||
458 | for (i = 0; i < ps->exceptions_per_area; i++) { | 463 | for (i = 0; i < ps->exceptions_per_area; i++) { |
459 | read_exception(ps, i, &e); | 464 | read_exception(ps, ps_area, i, &e); |
460 | 465 | ||
461 | /* | 466 | /* |
462 | * If the new_chunk is pointing at the start of | 467 | * If the new_chunk is pointing at the start of |
@@ -493,26 +498,72 @@ static int read_exceptions(struct pstore *ps, | |||
493 | void *callback_context) | 498 | void *callback_context) |
494 | { | 499 | { |
495 | int r, full = 1; | 500 | int r, full = 1; |
501 | struct dm_bufio_client *client; | ||
502 | chunk_t prefetch_area = 0; | ||
503 | |||
504 | client = dm_bufio_client_create(dm_snap_cow(ps->store->snap)->bdev, | ||
505 | ps->store->chunk_size << SECTOR_SHIFT, | ||
506 | 1, 0, NULL, NULL); | ||
507 | |||
508 | if (IS_ERR(client)) | ||
509 | return PTR_ERR(client); | ||
510 | |||
511 | /* | ||
512 | * Setup for one current buffer + desired readahead buffers. | ||
513 | */ | ||
514 | dm_bufio_set_minimum_buffers(client, 1 + DM_PREFETCH_CHUNKS); | ||
496 | 515 | ||
497 | /* | 516 | /* |
498 | * Keeping reading chunks and inserting exceptions until | 517 | * Keeping reading chunks and inserting exceptions until |
499 | * we find a partially full area. | 518 | * we find a partially full area. |
500 | */ | 519 | */ |
501 | for (ps->current_area = 0; full; ps->current_area++) { | 520 | for (ps->current_area = 0; full; ps->current_area++) { |
502 | r = area_io(ps, READ); | 521 | struct dm_buffer *bp; |
503 | if (r) | 522 | void *area; |
504 | return r; | 523 | chunk_t chunk; |
524 | |||
525 | if (unlikely(prefetch_area < ps->current_area)) | ||
526 | prefetch_area = ps->current_area; | ||
527 | |||
528 | if (DM_PREFETCH_CHUNKS) do { | ||
529 | chunk_t pf_chunk = area_location(ps, prefetch_area); | ||
530 | if (unlikely(pf_chunk >= dm_bufio_get_device_size(client))) | ||
531 | break; | ||
532 | dm_bufio_prefetch(client, pf_chunk, 1); | ||
533 | prefetch_area++; | ||
534 | if (unlikely(!prefetch_area)) | ||
535 | break; | ||
536 | } while (prefetch_area <= ps->current_area + DM_PREFETCH_CHUNKS); | ||
537 | |||
538 | chunk = area_location(ps, ps->current_area); | ||
539 | |||
540 | area = dm_bufio_read(client, chunk, &bp); | ||
541 | if (unlikely(IS_ERR(area))) { | ||
542 | r = PTR_ERR(area); | ||
543 | goto ret_destroy_bufio; | ||
544 | } | ||
505 | 545 | ||
506 | r = insert_exceptions(ps, callback, callback_context, &full); | 546 | r = insert_exceptions(ps, area, callback, callback_context, |
507 | if (r) | 547 | &full); |
508 | return r; | 548 | |
549 | dm_bufio_release(bp); | ||
550 | |||
551 | dm_bufio_forget(client, chunk); | ||
552 | |||
553 | if (unlikely(r)) | ||
554 | goto ret_destroy_bufio; | ||
509 | } | 555 | } |
510 | 556 | ||
511 | ps->current_area--; | 557 | ps->current_area--; |
512 | 558 | ||
513 | skip_metadata(ps); | 559 | skip_metadata(ps); |
514 | 560 | ||
515 | return 0; | 561 | r = 0; |
562 | |||
563 | ret_destroy_bufio: | ||
564 | dm_bufio_client_destroy(client); | ||
565 | |||
566 | return r; | ||
516 | } | 567 | } |
517 | 568 | ||
518 | static struct pstore *get_info(struct dm_exception_store *store) | 569 | static struct pstore *get_info(struct dm_exception_store *store) |
@@ -733,7 +784,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store, | |||
733 | ps->current_committed = ps->exceptions_per_area; | 784 | ps->current_committed = ps->exceptions_per_area; |
734 | } | 785 | } |
735 | 786 | ||
736 | read_exception(ps, ps->current_committed - 1, &ce); | 787 | read_exception(ps, ps->area, ps->current_committed - 1, &ce); |
737 | *last_old_chunk = ce.old_chunk; | 788 | *last_old_chunk = ce.old_chunk; |
738 | *last_new_chunk = ce.new_chunk; | 789 | *last_new_chunk = ce.new_chunk; |
739 | 790 | ||
@@ -743,8 +794,8 @@ static int persistent_prepare_merge(struct dm_exception_store *store, | |||
743 | */ | 794 | */ |
744 | for (nr_consecutive = 1; nr_consecutive < ps->current_committed; | 795 | for (nr_consecutive = 1; nr_consecutive < ps->current_committed; |
745 | nr_consecutive++) { | 796 | nr_consecutive++) { |
746 | read_exception(ps, ps->current_committed - 1 - nr_consecutive, | 797 | read_exception(ps, ps->area, |
747 | &ce); | 798 | ps->current_committed - 1 - nr_consecutive, &ce); |
748 | if (ce.old_chunk != *last_old_chunk - nr_consecutive || | 799 | if (ce.old_chunk != *last_old_chunk - nr_consecutive || |
749 | ce.new_chunk != *last_new_chunk - nr_consecutive) | 800 | ce.new_chunk != *last_new_chunk - nr_consecutive) |
750 | break; | 801 | break; |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 944690bafd93..717718558bd9 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -610,12 +610,12 @@ static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, | |||
610 | return NULL; | 610 | return NULL; |
611 | } | 611 | } |
612 | 612 | ||
613 | static struct dm_exception *alloc_completed_exception(void) | 613 | static struct dm_exception *alloc_completed_exception(gfp_t gfp) |
614 | { | 614 | { |
615 | struct dm_exception *e; | 615 | struct dm_exception *e; |
616 | 616 | ||
617 | e = kmem_cache_alloc(exception_cache, GFP_NOIO); | 617 | e = kmem_cache_alloc(exception_cache, gfp); |
618 | if (!e) | 618 | if (!e && gfp == GFP_NOIO) |
619 | e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); | 619 | e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); |
620 | 620 | ||
621 | return e; | 621 | return e; |
@@ -697,7 +697,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) | |||
697 | struct dm_snapshot *s = context; | 697 | struct dm_snapshot *s = context; |
698 | struct dm_exception *e; | 698 | struct dm_exception *e; |
699 | 699 | ||
700 | e = alloc_completed_exception(); | 700 | e = alloc_completed_exception(GFP_KERNEL); |
701 | if (!e) | 701 | if (!e) |
702 | return -ENOMEM; | 702 | return -ENOMEM; |
703 | 703 | ||
@@ -1405,7 +1405,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) | |||
1405 | goto out; | 1405 | goto out; |
1406 | } | 1406 | } |
1407 | 1407 | ||
1408 | e = alloc_completed_exception(); | 1408 | e = alloc_completed_exception(GFP_NOIO); |
1409 | if (!e) { | 1409 | if (!e) { |
1410 | down_write(&s->lock); | 1410 | down_write(&s->lock); |
1411 | __invalidate_snapshot(s, -ENOMEM); | 1411 | __invalidate_snapshot(s, -ENOMEM); |
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c index 84d2b91e4efb..c62c5ab6aed5 100644 --- a/drivers/md/dm-sysfs.c +++ b/drivers/md/dm-sysfs.c | |||
@@ -86,6 +86,7 @@ static const struct sysfs_ops dm_sysfs_ops = { | |||
86 | static struct kobj_type dm_ktype = { | 86 | static struct kobj_type dm_ktype = { |
87 | .sysfs_ops = &dm_sysfs_ops, | 87 | .sysfs_ops = &dm_sysfs_ops, |
88 | .default_attrs = dm_attrs, | 88 | .default_attrs = dm_attrs, |
89 | .release = dm_kobject_release, | ||
89 | }; | 90 | }; |
90 | 91 | ||
91 | /* | 92 | /* |
@@ -104,5 +105,7 @@ int dm_sysfs_init(struct mapped_device *md) | |||
104 | */ | 105 | */ |
105 | void dm_sysfs_exit(struct mapped_device *md) | 106 | void dm_sysfs_exit(struct mapped_device *md) |
106 | { | 107 | { |
107 | kobject_put(dm_kobject(md)); | 108 | struct kobject *kobj = dm_kobject(md); |
109 | kobject_put(kobj); | ||
110 | wait_for_completion(dm_get_completion_from_kobject(kobj)); | ||
108 | } | 111 | } |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 3ba6a3859ce3..6a7f2b83a126 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -155,7 +155,6 @@ static int alloc_targets(struct dm_table *t, unsigned int num) | |||
155 | { | 155 | { |
156 | sector_t *n_highs; | 156 | sector_t *n_highs; |
157 | struct dm_target *n_targets; | 157 | struct dm_target *n_targets; |
158 | int n = t->num_targets; | ||
159 | 158 | ||
160 | /* | 159 | /* |
161 | * Allocate both the target array and offset array at once. | 160 | * Allocate both the target array and offset array at once. |
@@ -169,12 +168,7 @@ static int alloc_targets(struct dm_table *t, unsigned int num) | |||
169 | 168 | ||
170 | n_targets = (struct dm_target *) (n_highs + num); | 169 | n_targets = (struct dm_target *) (n_highs + num); |
171 | 170 | ||
172 | if (n) { | 171 | memset(n_highs, -1, sizeof(*n_highs) * num); |
173 | memcpy(n_highs, t->highs, sizeof(*n_highs) * n); | ||
174 | memcpy(n_targets, t->targets, sizeof(*n_targets) * n); | ||
175 | } | ||
176 | |||
177 | memset(n_highs + n, -1, sizeof(*n_highs) * (num - n)); | ||
178 | vfree(t->highs); | 172 | vfree(t->highs); |
179 | 173 | ||
180 | t->num_allocated = num; | 174 | t->num_allocated = num; |
@@ -261,17 +255,6 @@ void dm_table_destroy(struct dm_table *t) | |||
261 | } | 255 | } |
262 | 256 | ||
263 | /* | 257 | /* |
264 | * Checks to see if we need to extend highs or targets. | ||
265 | */ | ||
266 | static inline int check_space(struct dm_table *t) | ||
267 | { | ||
268 | if (t->num_targets >= t->num_allocated) | ||
269 | return alloc_targets(t, t->num_allocated * 2); | ||
270 | |||
271 | return 0; | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * See if we've already got a device in the list. | 258 | * See if we've already got a device in the list. |
276 | */ | 259 | */ |
277 | static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) | 260 | static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) |
@@ -731,8 +714,7 @@ int dm_table_add_target(struct dm_table *t, const char *type, | |||
731 | return -EINVAL; | 714 | return -EINVAL; |
732 | } | 715 | } |
733 | 716 | ||
734 | if ((r = check_space(t))) | 717 | BUG_ON(t->num_targets >= t->num_allocated); |
735 | return r; | ||
736 | 718 | ||
737 | tgt = t->targets + t->num_targets; | 719 | tgt = t->targets + t->num_targets; |
738 | memset(tgt, 0, sizeof(*tgt)); | 720 | memset(tgt, 0, sizeof(*tgt)); |
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 8a30ad54bd46..7da347665552 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c | |||
@@ -1349,6 +1349,12 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td) | |||
1349 | return td->id; | 1349 | return td->id; |
1350 | } | 1350 | } |
1351 | 1351 | ||
1352 | /* | ||
1353 | * Check whether @time (of block creation) is older than @td's last snapshot. | ||
1354 | * If so then the associated block is shared with the last snapshot device. | ||
1355 | * Any block on a device created *after* the device last got snapshotted is | ||
1356 | * necessarily not shared. | ||
1357 | */ | ||
1352 | static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) | 1358 | static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) |
1353 | { | 1359 | { |
1354 | return td->snapshotted_time > time; | 1360 | return td->snapshotted_time > time; |
@@ -1458,6 +1464,20 @@ int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) | |||
1458 | return r; | 1464 | return r; |
1459 | } | 1465 | } |
1460 | 1466 | ||
1467 | int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) | ||
1468 | { | ||
1469 | int r; | ||
1470 | uint32_t ref_count; | ||
1471 | |||
1472 | down_read(&pmd->root_lock); | ||
1473 | r = dm_sm_get_count(pmd->data_sm, b, &ref_count); | ||
1474 | if (!r) | ||
1475 | *result = (ref_count != 0); | ||
1476 | up_read(&pmd->root_lock); | ||
1477 | |||
1478 | return r; | ||
1479 | } | ||
1480 | |||
1461 | bool dm_thin_changed_this_transaction(struct dm_thin_device *td) | 1481 | bool dm_thin_changed_this_transaction(struct dm_thin_device *td) |
1462 | { | 1482 | { |
1463 | int r; | 1483 | int r; |
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 7bcc0e1d6238..9a368567632f 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h | |||
@@ -131,7 +131,7 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td); | |||
131 | 131 | ||
132 | struct dm_thin_lookup_result { | 132 | struct dm_thin_lookup_result { |
133 | dm_block_t block; | 133 | dm_block_t block; |
134 | unsigned shared:1; | 134 | bool shared:1; |
135 | }; | 135 | }; |
136 | 136 | ||
137 | /* | 137 | /* |
@@ -181,6 +181,8 @@ int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result); | |||
181 | 181 | ||
182 | int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); | 182 | int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); |
183 | 183 | ||
184 | int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result); | ||
185 | |||
184 | /* | 186 | /* |
185 | * Returns -ENOSPC if the new size is too small and already allocated | 187 | * Returns -ENOSPC if the new size is too small and already allocated |
186 | * blocks would be lost. | 188 | * blocks would be lost. |
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index ee29037ffc2e..726228b33a01 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -144,6 +144,7 @@ struct pool_features { | |||
144 | bool zero_new_blocks:1; | 144 | bool zero_new_blocks:1; |
145 | bool discard_enabled:1; | 145 | bool discard_enabled:1; |
146 | bool discard_passdown:1; | 146 | bool discard_passdown:1; |
147 | bool error_if_no_space:1; | ||
147 | }; | 148 | }; |
148 | 149 | ||
149 | struct thin_c; | 150 | struct thin_c; |
@@ -163,8 +164,7 @@ struct pool { | |||
163 | int sectors_per_block_shift; | 164 | int sectors_per_block_shift; |
164 | 165 | ||
165 | struct pool_features pf; | 166 | struct pool_features pf; |
166 | unsigned low_water_triggered:1; /* A dm event has been sent */ | 167 | bool low_water_triggered:1; /* A dm event has been sent */ |
167 | unsigned no_free_space:1; /* A -ENOSPC warning has been issued */ | ||
168 | 168 | ||
169 | struct dm_bio_prison *prison; | 169 | struct dm_bio_prison *prison; |
170 | struct dm_kcopyd_client *copier; | 170 | struct dm_kcopyd_client *copier; |
@@ -198,7 +198,8 @@ struct pool { | |||
198 | }; | 198 | }; |
199 | 199 | ||
200 | static enum pool_mode get_pool_mode(struct pool *pool); | 200 | static enum pool_mode get_pool_mode(struct pool *pool); |
201 | static void set_pool_mode(struct pool *pool, enum pool_mode mode); | 201 | static void out_of_data_space(struct pool *pool); |
202 | static void metadata_operation_failed(struct pool *pool, const char *op, int r); | ||
202 | 203 | ||
203 | /* | 204 | /* |
204 | * Target context for a pool. | 205 | * Target context for a pool. |
@@ -509,15 +510,16 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio, | |||
509 | struct dm_thin_new_mapping { | 510 | struct dm_thin_new_mapping { |
510 | struct list_head list; | 511 | struct list_head list; |
511 | 512 | ||
512 | unsigned quiesced:1; | 513 | bool quiesced:1; |
513 | unsigned prepared:1; | 514 | bool prepared:1; |
514 | unsigned pass_discard:1; | 515 | bool pass_discard:1; |
516 | bool definitely_not_shared:1; | ||
515 | 517 | ||
518 | int err; | ||
516 | struct thin_c *tc; | 519 | struct thin_c *tc; |
517 | dm_block_t virt_block; | 520 | dm_block_t virt_block; |
518 | dm_block_t data_block; | 521 | dm_block_t data_block; |
519 | struct dm_bio_prison_cell *cell, *cell2; | 522 | struct dm_bio_prison_cell *cell, *cell2; |
520 | int err; | ||
521 | 523 | ||
522 | /* | 524 | /* |
523 | * If the bio covers the whole area of a block then we can avoid | 525 | * If the bio covers the whole area of a block then we can avoid |
@@ -534,7 +536,7 @@ static void __maybe_add_mapping(struct dm_thin_new_mapping *m) | |||
534 | struct pool *pool = m->tc->pool; | 536 | struct pool *pool = m->tc->pool; |
535 | 537 | ||
536 | if (m->quiesced && m->prepared) { | 538 | if (m->quiesced && m->prepared) { |
537 | list_add(&m->list, &pool->prepared_mappings); | 539 | list_add_tail(&m->list, &pool->prepared_mappings); |
538 | wake_worker(pool); | 540 | wake_worker(pool); |
539 | } | 541 | } |
540 | } | 542 | } |
@@ -548,7 +550,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) | |||
548 | m->err = read_err || write_err ? -EIO : 0; | 550 | m->err = read_err || write_err ? -EIO : 0; |
549 | 551 | ||
550 | spin_lock_irqsave(&pool->lock, flags); | 552 | spin_lock_irqsave(&pool->lock, flags); |
551 | m->prepared = 1; | 553 | m->prepared = true; |
552 | __maybe_add_mapping(m); | 554 | __maybe_add_mapping(m); |
553 | spin_unlock_irqrestore(&pool->lock, flags); | 555 | spin_unlock_irqrestore(&pool->lock, flags); |
554 | } | 556 | } |
@@ -563,7 +565,7 @@ static void overwrite_endio(struct bio *bio, int err) | |||
563 | m->err = err; | 565 | m->err = err; |
564 | 566 | ||
565 | spin_lock_irqsave(&pool->lock, flags); | 567 | spin_lock_irqsave(&pool->lock, flags); |
566 | m->prepared = 1; | 568 | m->prepared = true; |
567 | __maybe_add_mapping(m); | 569 | __maybe_add_mapping(m); |
568 | spin_unlock_irqrestore(&pool->lock, flags); | 570 | spin_unlock_irqrestore(&pool->lock, flags); |
569 | } | 571 | } |
@@ -640,9 +642,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) | |||
640 | */ | 642 | */ |
641 | r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); | 643 | r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); |
642 | if (r) { | 644 | if (r) { |
643 | DMERR_LIMIT("%s: dm_thin_insert_block() failed: error = %d", | 645 | metadata_operation_failed(pool, "dm_thin_insert_block", r); |
644 | dm_device_name(pool->pool_md), r); | ||
645 | set_pool_mode(pool, PM_READ_ONLY); | ||
646 | cell_error(pool, m->cell); | 646 | cell_error(pool, m->cell); |
647 | goto out; | 647 | goto out; |
648 | } | 648 | } |
@@ -683,7 +683,15 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) | |||
683 | cell_defer_no_holder(tc, m->cell2); | 683 | cell_defer_no_holder(tc, m->cell2); |
684 | 684 | ||
685 | if (m->pass_discard) | 685 | if (m->pass_discard) |
686 | remap_and_issue(tc, m->bio, m->data_block); | 686 | if (m->definitely_not_shared) |
687 | remap_and_issue(tc, m->bio, m->data_block); | ||
688 | else { | ||
689 | bool used = false; | ||
690 | if (dm_pool_block_is_used(tc->pool->pmd, m->data_block, &used) || used) | ||
691 | bio_endio(m->bio, 0); | ||
692 | else | ||
693 | remap_and_issue(tc, m->bio, m->data_block); | ||
694 | } | ||
687 | else | 695 | else |
688 | bio_endio(m->bio, 0); | 696 | bio_endio(m->bio, 0); |
689 | 697 | ||
@@ -751,13 +759,17 @@ static int ensure_next_mapping(struct pool *pool) | |||
751 | 759 | ||
752 | static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool) | 760 | static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool) |
753 | { | 761 | { |
754 | struct dm_thin_new_mapping *r = pool->next_mapping; | 762 | struct dm_thin_new_mapping *m = pool->next_mapping; |
755 | 763 | ||
756 | BUG_ON(!pool->next_mapping); | 764 | BUG_ON(!pool->next_mapping); |
757 | 765 | ||
766 | memset(m, 0, sizeof(struct dm_thin_new_mapping)); | ||
767 | INIT_LIST_HEAD(&m->list); | ||
768 | m->bio = NULL; | ||
769 | |||
758 | pool->next_mapping = NULL; | 770 | pool->next_mapping = NULL; |
759 | 771 | ||
760 | return r; | 772 | return m; |
761 | } | 773 | } |
762 | 774 | ||
763 | static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, | 775 | static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, |
@@ -769,18 +781,13 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, | |||
769 | struct pool *pool = tc->pool; | 781 | struct pool *pool = tc->pool; |
770 | struct dm_thin_new_mapping *m = get_next_mapping(pool); | 782 | struct dm_thin_new_mapping *m = get_next_mapping(pool); |
771 | 783 | ||
772 | INIT_LIST_HEAD(&m->list); | ||
773 | m->quiesced = 0; | ||
774 | m->prepared = 0; | ||
775 | m->tc = tc; | 784 | m->tc = tc; |
776 | m->virt_block = virt_block; | 785 | m->virt_block = virt_block; |
777 | m->data_block = data_dest; | 786 | m->data_block = data_dest; |
778 | m->cell = cell; | 787 | m->cell = cell; |
779 | m->err = 0; | ||
780 | m->bio = NULL; | ||
781 | 788 | ||
782 | if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list)) | 789 | if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list)) |
783 | m->quiesced = 1; | 790 | m->quiesced = true; |
784 | 791 | ||
785 | /* | 792 | /* |
786 | * IO to pool_dev remaps to the pool target's data_dev. | 793 | * IO to pool_dev remaps to the pool target's data_dev. |
@@ -840,15 +847,12 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, | |||
840 | struct pool *pool = tc->pool; | 847 | struct pool *pool = tc->pool; |
841 | struct dm_thin_new_mapping *m = get_next_mapping(pool); | 848 | struct dm_thin_new_mapping *m = get_next_mapping(pool); |
842 | 849 | ||
843 | INIT_LIST_HEAD(&m->list); | 850 | m->quiesced = true; |
844 | m->quiesced = 1; | 851 | m->prepared = false; |
845 | m->prepared = 0; | ||
846 | m->tc = tc; | 852 | m->tc = tc; |
847 | m->virt_block = virt_block; | 853 | m->virt_block = virt_block; |
848 | m->data_block = data_block; | 854 | m->data_block = data_block; |
849 | m->cell = cell; | 855 | m->cell = cell; |
850 | m->err = 0; | ||
851 | m->bio = NULL; | ||
852 | 856 | ||
853 | /* | 857 | /* |
854 | * If the whole block of data is being overwritten or we are not | 858 | * If the whole block of data is being overwritten or we are not |
@@ -895,41 +899,42 @@ static int commit(struct pool *pool) | |||
895 | return -EINVAL; | 899 | return -EINVAL; |
896 | 900 | ||
897 | r = dm_pool_commit_metadata(pool->pmd); | 901 | r = dm_pool_commit_metadata(pool->pmd); |
898 | if (r) { | 902 | if (r) |
899 | DMERR_LIMIT("%s: dm_pool_commit_metadata failed: error = %d", | 903 | metadata_operation_failed(pool, "dm_pool_commit_metadata", r); |
900 | dm_device_name(pool->pool_md), r); | ||
901 | set_pool_mode(pool, PM_READ_ONLY); | ||
902 | } | ||
903 | 904 | ||
904 | return r; | 905 | return r; |
905 | } | 906 | } |
906 | 907 | ||
907 | static int alloc_data_block(struct thin_c *tc, dm_block_t *result) | 908 | static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks) |
908 | { | 909 | { |
909 | int r; | ||
910 | dm_block_t free_blocks; | ||
911 | unsigned long flags; | 910 | unsigned long flags; |
912 | struct pool *pool = tc->pool; | ||
913 | |||
914 | /* | ||
915 | * Once no_free_space is set we must not allow allocation to succeed. | ||
916 | * Otherwise it is difficult to explain, debug, test and support. | ||
917 | */ | ||
918 | if (pool->no_free_space) | ||
919 | return -ENOSPC; | ||
920 | |||
921 | r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); | ||
922 | if (r) | ||
923 | return r; | ||
924 | 911 | ||
925 | if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) { | 912 | if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) { |
926 | DMWARN("%s: reached low water mark for data device: sending event.", | 913 | DMWARN("%s: reached low water mark for data device: sending event.", |
927 | dm_device_name(pool->pool_md)); | 914 | dm_device_name(pool->pool_md)); |
928 | spin_lock_irqsave(&pool->lock, flags); | 915 | spin_lock_irqsave(&pool->lock, flags); |
929 | pool->low_water_triggered = 1; | 916 | pool->low_water_triggered = true; |
930 | spin_unlock_irqrestore(&pool->lock, flags); | 917 | spin_unlock_irqrestore(&pool->lock, flags); |
931 | dm_table_event(pool->ti->table); | 918 | dm_table_event(pool->ti->table); |
932 | } | 919 | } |
920 | } | ||
921 | |||
922 | static int alloc_data_block(struct thin_c *tc, dm_block_t *result) | ||
923 | { | ||
924 | int r; | ||
925 | dm_block_t free_blocks; | ||
926 | struct pool *pool = tc->pool; | ||
927 | |||
928 | if (get_pool_mode(pool) != PM_WRITE) | ||
929 | return -EINVAL; | ||
930 | |||
931 | r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); | ||
932 | if (r) { | ||
933 | metadata_operation_failed(pool, "dm_pool_get_free_block_count", r); | ||
934 | return r; | ||
935 | } | ||
936 | |||
937 | check_low_water_mark(pool, free_blocks); | ||
933 | 938 | ||
934 | if (!free_blocks) { | 939 | if (!free_blocks) { |
935 | /* | 940 | /* |
@@ -941,35 +946,20 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) | |||
941 | return r; | 946 | return r; |
942 | 947 | ||
943 | r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); | 948 | r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); |
944 | if (r) | 949 | if (r) { |
950 | metadata_operation_failed(pool, "dm_pool_get_free_block_count", r); | ||
945 | return r; | 951 | return r; |
952 | } | ||
946 | 953 | ||
947 | /* | ||
948 | * If we still have no space we set a flag to avoid | ||
949 | * doing all this checking and return -ENOSPC. This | ||
950 | * flag serves as a latch that disallows allocations from | ||
951 | * this pool until the admin takes action (e.g. resize or | ||
952 | * table reload). | ||
953 | */ | ||
954 | if (!free_blocks) { | 954 | if (!free_blocks) { |
955 | DMWARN("%s: no free data space available.", | 955 | out_of_data_space(pool); |
956 | dm_device_name(pool->pool_md)); | ||
957 | spin_lock_irqsave(&pool->lock, flags); | ||
958 | pool->no_free_space = 1; | ||
959 | spin_unlock_irqrestore(&pool->lock, flags); | ||
960 | return -ENOSPC; | 956 | return -ENOSPC; |
961 | } | 957 | } |
962 | } | 958 | } |
963 | 959 | ||
964 | r = dm_pool_alloc_data_block(pool->pmd, result); | 960 | r = dm_pool_alloc_data_block(pool->pmd, result); |
965 | if (r) { | 961 | if (r) { |
966 | if (r == -ENOSPC && | 962 | metadata_operation_failed(pool, "dm_pool_alloc_data_block", r); |
967 | !dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks) && | ||
968 | !free_blocks) { | ||
969 | DMWARN("%s: no free metadata space available.", | ||
970 | dm_device_name(pool->pool_md)); | ||
971 | set_pool_mode(pool, PM_READ_ONLY); | ||
972 | } | ||
973 | return r; | 963 | return r; |
974 | } | 964 | } |
975 | 965 | ||
@@ -992,7 +982,21 @@ static void retry_on_resume(struct bio *bio) | |||
992 | spin_unlock_irqrestore(&pool->lock, flags); | 982 | spin_unlock_irqrestore(&pool->lock, flags); |
993 | } | 983 | } |
994 | 984 | ||
995 | static void no_space(struct pool *pool, struct dm_bio_prison_cell *cell) | 985 | static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) |
986 | { | ||
987 | /* | ||
988 | * When pool is read-only, no cell locking is needed because | ||
989 | * nothing is changing. | ||
990 | */ | ||
991 | WARN_ON_ONCE(get_pool_mode(pool) != PM_READ_ONLY); | ||
992 | |||
993 | if (pool->pf.error_if_no_space) | ||
994 | bio_io_error(bio); | ||
995 | else | ||
996 | retry_on_resume(bio); | ||
997 | } | ||
998 | |||
999 | static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *cell) | ||
996 | { | 1000 | { |
997 | struct bio *bio; | 1001 | struct bio *bio; |
998 | struct bio_list bios; | 1002 | struct bio_list bios; |
@@ -1001,7 +1005,7 @@ static void no_space(struct pool *pool, struct dm_bio_prison_cell *cell) | |||
1001 | cell_release(pool, cell, &bios); | 1005 | cell_release(pool, cell, &bios); |
1002 | 1006 | ||
1003 | while ((bio = bio_list_pop(&bios))) | 1007 | while ((bio = bio_list_pop(&bios))) |
1004 | retry_on_resume(bio); | 1008 | handle_unserviceable_bio(pool, bio); |
1005 | } | 1009 | } |
1006 | 1010 | ||
1007 | static void process_discard(struct thin_c *tc, struct bio *bio) | 1011 | static void process_discard(struct thin_c *tc, struct bio *bio) |
@@ -1040,17 +1044,17 @@ static void process_discard(struct thin_c *tc, struct bio *bio) | |||
1040 | */ | 1044 | */ |
1041 | m = get_next_mapping(pool); | 1045 | m = get_next_mapping(pool); |
1042 | m->tc = tc; | 1046 | m->tc = tc; |
1043 | m->pass_discard = (!lookup_result.shared) && pool->pf.discard_passdown; | 1047 | m->pass_discard = pool->pf.discard_passdown; |
1048 | m->definitely_not_shared = !lookup_result.shared; | ||
1044 | m->virt_block = block; | 1049 | m->virt_block = block; |
1045 | m->data_block = lookup_result.block; | 1050 | m->data_block = lookup_result.block; |
1046 | m->cell = cell; | 1051 | m->cell = cell; |
1047 | m->cell2 = cell2; | 1052 | m->cell2 = cell2; |
1048 | m->err = 0; | ||
1049 | m->bio = bio; | 1053 | m->bio = bio; |
1050 | 1054 | ||
1051 | if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) { | 1055 | if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) { |
1052 | spin_lock_irqsave(&pool->lock, flags); | 1056 | spin_lock_irqsave(&pool->lock, flags); |
1053 | list_add(&m->list, &pool->prepared_discards); | 1057 | list_add_tail(&m->list, &pool->prepared_discards); |
1054 | spin_unlock_irqrestore(&pool->lock, flags); | 1058 | spin_unlock_irqrestore(&pool->lock, flags); |
1055 | wake_worker(pool); | 1059 | wake_worker(pool); |
1056 | } | 1060 | } |
@@ -1105,13 +1109,12 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, | |||
1105 | break; | 1109 | break; |
1106 | 1110 | ||
1107 | case -ENOSPC: | 1111 | case -ENOSPC: |
1108 | no_space(pool, cell); | 1112 | retry_bios_on_resume(pool, cell); |
1109 | break; | 1113 | break; |
1110 | 1114 | ||
1111 | default: | 1115 | default: |
1112 | DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", | 1116 | DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", |
1113 | __func__, r); | 1117 | __func__, r); |
1114 | set_pool_mode(pool, PM_READ_ONLY); | ||
1115 | cell_error(pool, cell); | 1118 | cell_error(pool, cell); |
1116 | break; | 1119 | break; |
1117 | } | 1120 | } |
@@ -1184,13 +1187,12 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block | |||
1184 | break; | 1187 | break; |
1185 | 1188 | ||
1186 | case -ENOSPC: | 1189 | case -ENOSPC: |
1187 | no_space(pool, cell); | 1190 | retry_bios_on_resume(pool, cell); |
1188 | break; | 1191 | break; |
1189 | 1192 | ||
1190 | default: | 1193 | default: |
1191 | DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", | 1194 | DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", |
1192 | __func__, r); | 1195 | __func__, r); |
1193 | set_pool_mode(pool, PM_READ_ONLY); | ||
1194 | cell_error(pool, cell); | 1196 | cell_error(pool, cell); |
1195 | break; | 1197 | break; |
1196 | } | 1198 | } |
@@ -1257,7 +1259,7 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | |||
1257 | switch (r) { | 1259 | switch (r) { |
1258 | case 0: | 1260 | case 0: |
1259 | if (lookup_result.shared && (rw == WRITE) && bio->bi_size) | 1261 | if (lookup_result.shared && (rw == WRITE) && bio->bi_size) |
1260 | bio_io_error(bio); | 1262 | handle_unserviceable_bio(tc->pool, bio); |
1261 | else { | 1263 | else { |
1262 | inc_all_io_entry(tc->pool, bio); | 1264 | inc_all_io_entry(tc->pool, bio); |
1263 | remap_and_issue(tc, bio, lookup_result.block); | 1265 | remap_and_issue(tc, bio, lookup_result.block); |
@@ -1266,7 +1268,7 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | |||
1266 | 1268 | ||
1267 | case -ENODATA: | 1269 | case -ENODATA: |
1268 | if (rw != READ) { | 1270 | if (rw != READ) { |
1269 | bio_io_error(bio); | 1271 | handle_unserviceable_bio(tc->pool, bio); |
1270 | break; | 1272 | break; |
1271 | } | 1273 | } |
1272 | 1274 | ||
@@ -1390,16 +1392,16 @@ static enum pool_mode get_pool_mode(struct pool *pool) | |||
1390 | return pool->pf.mode; | 1392 | return pool->pf.mode; |
1391 | } | 1393 | } |
1392 | 1394 | ||
1393 | static void set_pool_mode(struct pool *pool, enum pool_mode mode) | 1395 | static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) |
1394 | { | 1396 | { |
1395 | int r; | 1397 | int r; |
1398 | enum pool_mode old_mode = pool->pf.mode; | ||
1396 | 1399 | ||
1397 | pool->pf.mode = mode; | 1400 | switch (new_mode) { |
1398 | |||
1399 | switch (mode) { | ||
1400 | case PM_FAIL: | 1401 | case PM_FAIL: |
1401 | DMERR("%s: switching pool to failure mode", | 1402 | if (old_mode != new_mode) |
1402 | dm_device_name(pool->pool_md)); | 1403 | DMERR("%s: switching pool to failure mode", |
1404 | dm_device_name(pool->pool_md)); | ||
1403 | dm_pool_metadata_read_only(pool->pmd); | 1405 | dm_pool_metadata_read_only(pool->pmd); |
1404 | pool->process_bio = process_bio_fail; | 1406 | pool->process_bio = process_bio_fail; |
1405 | pool->process_discard = process_bio_fail; | 1407 | pool->process_discard = process_bio_fail; |
@@ -1408,13 +1410,15 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode) | |||
1408 | break; | 1410 | break; |
1409 | 1411 | ||
1410 | case PM_READ_ONLY: | 1412 | case PM_READ_ONLY: |
1411 | DMERR("%s: switching pool to read-only mode", | 1413 | if (old_mode != new_mode) |
1412 | dm_device_name(pool->pool_md)); | 1414 | DMERR("%s: switching pool to read-only mode", |
1415 | dm_device_name(pool->pool_md)); | ||
1413 | r = dm_pool_abort_metadata(pool->pmd); | 1416 | r = dm_pool_abort_metadata(pool->pmd); |
1414 | if (r) { | 1417 | if (r) { |
1415 | DMERR("%s: aborting transaction failed", | 1418 | DMERR("%s: aborting transaction failed", |
1416 | dm_device_name(pool->pool_md)); | 1419 | dm_device_name(pool->pool_md)); |
1417 | set_pool_mode(pool, PM_FAIL); | 1420 | new_mode = PM_FAIL; |
1421 | set_pool_mode(pool, new_mode); | ||
1418 | } else { | 1422 | } else { |
1419 | dm_pool_metadata_read_only(pool->pmd); | 1423 | dm_pool_metadata_read_only(pool->pmd); |
1420 | pool->process_bio = process_bio_read_only; | 1424 | pool->process_bio = process_bio_read_only; |
@@ -1425,6 +1429,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode) | |||
1425 | break; | 1429 | break; |
1426 | 1430 | ||
1427 | case PM_WRITE: | 1431 | case PM_WRITE: |
1432 | if (old_mode != new_mode) | ||
1433 | DMINFO("%s: switching pool to write mode", | ||
1434 | dm_device_name(pool->pool_md)); | ||
1428 | dm_pool_metadata_read_write(pool->pmd); | 1435 | dm_pool_metadata_read_write(pool->pmd); |
1429 | pool->process_bio = process_bio; | 1436 | pool->process_bio = process_bio; |
1430 | pool->process_discard = process_discard; | 1437 | pool->process_discard = process_discard; |
@@ -1432,6 +1439,35 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode) | |||
1432 | pool->process_prepared_discard = process_prepared_discard; | 1439 | pool->process_prepared_discard = process_prepared_discard; |
1433 | break; | 1440 | break; |
1434 | } | 1441 | } |
1442 | |||
1443 | pool->pf.mode = new_mode; | ||
1444 | } | ||
1445 | |||
1446 | /* | ||
1447 | * Rather than calling set_pool_mode directly, use these which describe the | ||
1448 | * reason for mode degradation. | ||
1449 | */ | ||
1450 | static void out_of_data_space(struct pool *pool) | ||
1451 | { | ||
1452 | DMERR_LIMIT("%s: no free data space available.", | ||
1453 | dm_device_name(pool->pool_md)); | ||
1454 | set_pool_mode(pool, PM_READ_ONLY); | ||
1455 | } | ||
1456 | |||
1457 | static void metadata_operation_failed(struct pool *pool, const char *op, int r) | ||
1458 | { | ||
1459 | dm_block_t free_blocks; | ||
1460 | |||
1461 | DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d", | ||
1462 | dm_device_name(pool->pool_md), op, r); | ||
1463 | |||
1464 | if (r == -ENOSPC && | ||
1465 | !dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks) && | ||
1466 | !free_blocks) | ||
1467 | DMERR_LIMIT("%s: no free metadata space available.", | ||
1468 | dm_device_name(pool->pool_md)); | ||
1469 | |||
1470 | set_pool_mode(pool, PM_READ_ONLY); | ||
1435 | } | 1471 | } |
1436 | 1472 | ||
1437 | /*----------------------------------------------------------------*/ | 1473 | /*----------------------------------------------------------------*/ |
@@ -1538,9 +1574,9 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
1538 | if (get_pool_mode(tc->pool) == PM_READ_ONLY) { | 1574 | if (get_pool_mode(tc->pool) == PM_READ_ONLY) { |
1539 | /* | 1575 | /* |
1540 | * This block isn't provisioned, and we have no way | 1576 | * This block isn't provisioned, and we have no way |
1541 | * of doing so. Just error it. | 1577 | * of doing so. |
1542 | */ | 1578 | */ |
1543 | bio_io_error(bio); | 1579 | handle_unserviceable_bio(tc->pool, bio); |
1544 | return DM_MAPIO_SUBMITTED; | 1580 | return DM_MAPIO_SUBMITTED; |
1545 | } | 1581 | } |
1546 | /* fall through */ | 1582 | /* fall through */ |
@@ -1648,6 +1684,17 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) | |||
1648 | enum pool_mode new_mode = pt->adjusted_pf.mode; | 1684 | enum pool_mode new_mode = pt->adjusted_pf.mode; |
1649 | 1685 | ||
1650 | /* | 1686 | /* |
1687 | * Don't change the pool's mode until set_pool_mode() below. | ||
1688 | * Otherwise the pool's process_* function pointers may | ||
1689 | * not match the desired pool mode. | ||
1690 | */ | ||
1691 | pt->adjusted_pf.mode = old_mode; | ||
1692 | |||
1693 | pool->ti = ti; | ||
1694 | pool->pf = pt->adjusted_pf; | ||
1695 | pool->low_water_blocks = pt->low_water_blocks; | ||
1696 | |||
1697 | /* | ||
1651 | * If we were in PM_FAIL mode, rollback of metadata failed. We're | 1698 | * If we were in PM_FAIL mode, rollback of metadata failed. We're |
1652 | * not going to recover without a thin_repair. So we never let the | 1699 | * not going to recover without a thin_repair. So we never let the |
1653 | * pool move out of the old mode. On the other hand a PM_READ_ONLY | 1700 | * pool move out of the old mode. On the other hand a PM_READ_ONLY |
@@ -1657,10 +1704,6 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) | |||
1657 | if (old_mode == PM_FAIL) | 1704 | if (old_mode == PM_FAIL) |
1658 | new_mode = old_mode; | 1705 | new_mode = old_mode; |
1659 | 1706 | ||
1660 | pool->ti = ti; | ||
1661 | pool->low_water_blocks = pt->low_water_blocks; | ||
1662 | pool->pf = pt->adjusted_pf; | ||
1663 | |||
1664 | set_pool_mode(pool, new_mode); | 1707 | set_pool_mode(pool, new_mode); |
1665 | 1708 | ||
1666 | return 0; | 1709 | return 0; |
@@ -1682,6 +1725,7 @@ static void pool_features_init(struct pool_features *pf) | |||
1682 | pf->zero_new_blocks = true; | 1725 | pf->zero_new_blocks = true; |
1683 | pf->discard_enabled = true; | 1726 | pf->discard_enabled = true; |
1684 | pf->discard_passdown = true; | 1727 | pf->discard_passdown = true; |
1728 | pf->error_if_no_space = false; | ||
1685 | } | 1729 | } |
1686 | 1730 | ||
1687 | static void __pool_destroy(struct pool *pool) | 1731 | static void __pool_destroy(struct pool *pool) |
@@ -1772,8 +1816,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
1772 | bio_list_init(&pool->deferred_flush_bios); | 1816 | bio_list_init(&pool->deferred_flush_bios); |
1773 | INIT_LIST_HEAD(&pool->prepared_mappings); | 1817 | INIT_LIST_HEAD(&pool->prepared_mappings); |
1774 | INIT_LIST_HEAD(&pool->prepared_discards); | 1818 | INIT_LIST_HEAD(&pool->prepared_discards); |
1775 | pool->low_water_triggered = 0; | 1819 | pool->low_water_triggered = false; |
1776 | pool->no_free_space = 0; | ||
1777 | bio_list_init(&pool->retry_on_resume_list); | 1820 | bio_list_init(&pool->retry_on_resume_list); |
1778 | 1821 | ||
1779 | pool->shared_read_ds = dm_deferred_set_create(); | 1822 | pool->shared_read_ds = dm_deferred_set_create(); |
@@ -1898,7 +1941,7 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, | |||
1898 | const char *arg_name; | 1941 | const char *arg_name; |
1899 | 1942 | ||
1900 | static struct dm_arg _args[] = { | 1943 | static struct dm_arg _args[] = { |
1901 | {0, 3, "Invalid number of pool feature arguments"}, | 1944 | {0, 4, "Invalid number of pool feature arguments"}, |
1902 | }; | 1945 | }; |
1903 | 1946 | ||
1904 | /* | 1947 | /* |
@@ -1927,6 +1970,9 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, | |||
1927 | else if (!strcasecmp(arg_name, "read_only")) | 1970 | else if (!strcasecmp(arg_name, "read_only")) |
1928 | pf->mode = PM_READ_ONLY; | 1971 | pf->mode = PM_READ_ONLY; |
1929 | 1972 | ||
1973 | else if (!strcasecmp(arg_name, "error_if_no_space")) | ||
1974 | pf->error_if_no_space = true; | ||
1975 | |||
1930 | else { | 1976 | else { |
1931 | ti->error = "Unrecognised pool feature requested"; | 1977 | ti->error = "Unrecognised pool feature requested"; |
1932 | r = -EINVAL; | 1978 | r = -EINVAL; |
@@ -1997,6 +2043,8 @@ static dm_block_t calc_metadata_threshold(struct pool_c *pt) | |||
1997 | * skip_block_zeroing: skips the zeroing of newly-provisioned blocks. | 2043 | * skip_block_zeroing: skips the zeroing of newly-provisioned blocks. |
1998 | * ignore_discard: disable discard | 2044 | * ignore_discard: disable discard |
1999 | * no_discard_passdown: don't pass discards down to the data device | 2045 | * no_discard_passdown: don't pass discards down to the data device |
2046 | * read_only: Don't allow any changes to be made to the pool metadata. | ||
2047 | * error_if_no_space: error IOs, instead of queueing, if no space. | ||
2000 | */ | 2048 | */ |
2001 | static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | 2049 | static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) |
2002 | { | 2050 | { |
@@ -2192,11 +2240,13 @@ static int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit) | |||
2192 | return -EINVAL; | 2240 | return -EINVAL; |
2193 | 2241 | ||
2194 | } else if (data_size > sb_data_size) { | 2242 | } else if (data_size > sb_data_size) { |
2243 | if (sb_data_size) | ||
2244 | DMINFO("%s: growing the data device from %llu to %llu blocks", | ||
2245 | dm_device_name(pool->pool_md), | ||
2246 | sb_data_size, (unsigned long long)data_size); | ||
2195 | r = dm_pool_resize_data_dev(pool->pmd, data_size); | 2247 | r = dm_pool_resize_data_dev(pool->pmd, data_size); |
2196 | if (r) { | 2248 | if (r) { |
2197 | DMERR("%s: failed to resize data device", | 2249 | metadata_operation_failed(pool, "dm_pool_resize_data_dev", r); |
2198 | dm_device_name(pool->pool_md)); | ||
2199 | set_pool_mode(pool, PM_READ_ONLY); | ||
2200 | return r; | 2250 | return r; |
2201 | } | 2251 | } |
2202 | 2252 | ||
@@ -2231,10 +2281,12 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) | |||
2231 | return -EINVAL; | 2281 | return -EINVAL; |
2232 | 2282 | ||
2233 | } else if (metadata_dev_size > sb_metadata_dev_size) { | 2283 | } else if (metadata_dev_size > sb_metadata_dev_size) { |
2284 | DMINFO("%s: growing the metadata device from %llu to %llu blocks", | ||
2285 | dm_device_name(pool->pool_md), | ||
2286 | sb_metadata_dev_size, metadata_dev_size); | ||
2234 | r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size); | 2287 | r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size); |
2235 | if (r) { | 2288 | if (r) { |
2236 | DMERR("%s: failed to resize metadata device", | 2289 | metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r); |
2237 | dm_device_name(pool->pool_md)); | ||
2238 | return r; | 2290 | return r; |
2239 | } | 2291 | } |
2240 | 2292 | ||
@@ -2290,8 +2342,7 @@ static void pool_resume(struct dm_target *ti) | |||
2290 | unsigned long flags; | 2342 | unsigned long flags; |
2291 | 2343 | ||
2292 | spin_lock_irqsave(&pool->lock, flags); | 2344 | spin_lock_irqsave(&pool->lock, flags); |
2293 | pool->low_water_triggered = 0; | 2345 | pool->low_water_triggered = false; |
2294 | pool->no_free_space = 0; | ||
2295 | __requeue_bios(pool); | 2346 | __requeue_bios(pool); |
2296 | spin_unlock_irqrestore(&pool->lock, flags); | 2347 | spin_unlock_irqrestore(&pool->lock, flags); |
2297 | 2348 | ||
@@ -2510,7 +2561,8 @@ static void emit_flags(struct pool_features *pf, char *result, | |||
2510 | unsigned sz, unsigned maxlen) | 2561 | unsigned sz, unsigned maxlen) |
2511 | { | 2562 | { |
2512 | unsigned count = !pf->zero_new_blocks + !pf->discard_enabled + | 2563 | unsigned count = !pf->zero_new_blocks + !pf->discard_enabled + |
2513 | !pf->discard_passdown + (pf->mode == PM_READ_ONLY); | 2564 | !pf->discard_passdown + (pf->mode == PM_READ_ONLY) + |
2565 | pf->error_if_no_space; | ||
2514 | DMEMIT("%u ", count); | 2566 | DMEMIT("%u ", count); |
2515 | 2567 | ||
2516 | if (!pf->zero_new_blocks) | 2568 | if (!pf->zero_new_blocks) |
@@ -2524,6 +2576,9 @@ static void emit_flags(struct pool_features *pf, char *result, | |||
2524 | 2576 | ||
2525 | if (pf->mode == PM_READ_ONLY) | 2577 | if (pf->mode == PM_READ_ONLY) |
2526 | DMEMIT("read_only "); | 2578 | DMEMIT("read_only "); |
2579 | |||
2580 | if (pf->error_if_no_space) | ||
2581 | DMEMIT("error_if_no_space "); | ||
2527 | } | 2582 | } |
2528 | 2583 | ||
2529 | /* | 2584 | /* |
@@ -2618,11 +2673,16 @@ static void pool_status(struct dm_target *ti, status_type_t type, | |||
2618 | DMEMIT("rw "); | 2673 | DMEMIT("rw "); |
2619 | 2674 | ||
2620 | if (!pool->pf.discard_enabled) | 2675 | if (!pool->pf.discard_enabled) |
2621 | DMEMIT("ignore_discard"); | 2676 | DMEMIT("ignore_discard "); |
2622 | else if (pool->pf.discard_passdown) | 2677 | else if (pool->pf.discard_passdown) |
2623 | DMEMIT("discard_passdown"); | 2678 | DMEMIT("discard_passdown "); |
2679 | else | ||
2680 | DMEMIT("no_discard_passdown "); | ||
2681 | |||
2682 | if (pool->pf.error_if_no_space) | ||
2683 | DMEMIT("error_if_no_space "); | ||
2624 | else | 2684 | else |
2625 | DMEMIT("no_discard_passdown"); | 2685 | DMEMIT("queue_if_no_space "); |
2626 | 2686 | ||
2627 | break; | 2687 | break; |
2628 | 2688 | ||
@@ -2721,7 +2781,7 @@ static struct target_type pool_target = { | |||
2721 | .name = "thin-pool", | 2781 | .name = "thin-pool", |
2722 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | | 2782 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | |
2723 | DM_TARGET_IMMUTABLE, | 2783 | DM_TARGET_IMMUTABLE, |
2724 | .version = {1, 9, 0}, | 2784 | .version = {1, 10, 0}, |
2725 | .module = THIS_MODULE, | 2785 | .module = THIS_MODULE, |
2726 | .ctr = pool_ctr, | 2786 | .ctr = pool_ctr, |
2727 | .dtr = pool_dtr, | 2787 | .dtr = pool_dtr, |
@@ -2899,7 +2959,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err) | |||
2899 | spin_lock_irqsave(&pool->lock, flags); | 2959 | spin_lock_irqsave(&pool->lock, flags); |
2900 | list_for_each_entry_safe(m, tmp, &work, list) { | 2960 | list_for_each_entry_safe(m, tmp, &work, list) { |
2901 | list_del(&m->list); | 2961 | list_del(&m->list); |
2902 | m->quiesced = 1; | 2962 | m->quiesced = true; |
2903 | __maybe_add_mapping(m); | 2963 | __maybe_add_mapping(m); |
2904 | } | 2964 | } |
2905 | spin_unlock_irqrestore(&pool->lock, flags); | 2965 | spin_unlock_irqrestore(&pool->lock, flags); |
@@ -2911,7 +2971,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err) | |||
2911 | if (!list_empty(&work)) { | 2971 | if (!list_empty(&work)) { |
2912 | spin_lock_irqsave(&pool->lock, flags); | 2972 | spin_lock_irqsave(&pool->lock, flags); |
2913 | list_for_each_entry_safe(m, tmp, &work, list) | 2973 | list_for_each_entry_safe(m, tmp, &work, list) |
2914 | list_add(&m->list, &pool->prepared_discards); | 2974 | list_add_tail(&m->list, &pool->prepared_discards); |
2915 | spin_unlock_irqrestore(&pool->lock, flags); | 2975 | spin_unlock_irqrestore(&pool->lock, flags); |
2916 | wake_worker(pool); | 2976 | wake_worker(pool); |
2917 | } | 2977 | } |
@@ -3008,7 +3068,7 @@ static int thin_iterate_devices(struct dm_target *ti, | |||
3008 | 3068 | ||
3009 | static struct target_type thin_target = { | 3069 | static struct target_type thin_target = { |
3010 | .name = "thin", | 3070 | .name = "thin", |
3011 | .version = {1, 9, 0}, | 3071 | .version = {1, 10, 0}, |
3012 | .module = THIS_MODULE, | 3072 | .module = THIS_MODULE, |
3013 | .ctr = thin_ctr, | 3073 | .ctr = thin_ctr, |
3014 | .dtr = thin_dtr, | 3074 | .dtr = thin_dtr, |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0704c523a76b..b49c76284241 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -200,8 +200,8 @@ struct mapped_device { | |||
200 | /* forced geometry settings */ | 200 | /* forced geometry settings */ |
201 | struct hd_geometry geometry; | 201 | struct hd_geometry geometry; |
202 | 202 | ||
203 | /* sysfs handle */ | 203 | /* kobject and completion */ |
204 | struct kobject kobj; | 204 | struct dm_kobject_holder kobj_holder; |
205 | 205 | ||
206 | /* zero-length flush that will be cloned and submitted to targets */ | 206 | /* zero-length flush that will be cloned and submitted to targets */ |
207 | struct bio flush_bio; | 207 | struct bio flush_bio; |
@@ -2041,6 +2041,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
2041 | init_waitqueue_head(&md->wait); | 2041 | init_waitqueue_head(&md->wait); |
2042 | INIT_WORK(&md->work, dm_wq_work); | 2042 | INIT_WORK(&md->work, dm_wq_work); |
2043 | init_waitqueue_head(&md->eventq); | 2043 | init_waitqueue_head(&md->eventq); |
2044 | init_completion(&md->kobj_holder.completion); | ||
2044 | 2045 | ||
2045 | md->disk->major = _major; | 2046 | md->disk->major = _major; |
2046 | md->disk->first_minor = minor; | 2047 | md->disk->first_minor = minor; |
@@ -2902,20 +2903,14 @@ struct gendisk *dm_disk(struct mapped_device *md) | |||
2902 | 2903 | ||
2903 | struct kobject *dm_kobject(struct mapped_device *md) | 2904 | struct kobject *dm_kobject(struct mapped_device *md) |
2904 | { | 2905 | { |
2905 | return &md->kobj; | 2906 | return &md->kobj_holder.kobj; |
2906 | } | 2907 | } |
2907 | 2908 | ||
2908 | /* | ||
2909 | * struct mapped_device should not be exported outside of dm.c | ||
2910 | * so use this check to verify that kobj is part of md structure | ||
2911 | */ | ||
2912 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj) | 2909 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj) |
2913 | { | 2910 | { |
2914 | struct mapped_device *md; | 2911 | struct mapped_device *md; |
2915 | 2912 | ||
2916 | md = container_of(kobj, struct mapped_device, kobj); | 2913 | md = container_of(kobj, struct mapped_device, kobj_holder.kobj); |
2917 | if (&md->kobj != kobj) | ||
2918 | return NULL; | ||
2919 | 2914 | ||
2920 | if (test_bit(DMF_FREEING, &md->flags) || | 2915 | if (test_bit(DMF_FREEING, &md->flags) || |
2921 | dm_deleting_md(md)) | 2916 | dm_deleting_md(md)) |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index c57ba550f69e..c4569f02f50f 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -15,6 +15,8 @@ | |||
15 | #include <linux/list.h> | 15 | #include <linux/list.h> |
16 | #include <linux/blkdev.h> | 16 | #include <linux/blkdev.h> |
17 | #include <linux/hdreg.h> | 17 | #include <linux/hdreg.h> |
18 | #include <linux/completion.h> | ||
19 | #include <linux/kobject.h> | ||
18 | 20 | ||
19 | #include "dm-stats.h" | 21 | #include "dm-stats.h" |
20 | 22 | ||
@@ -148,12 +150,27 @@ void dm_interface_exit(void); | |||
148 | /* | 150 | /* |
149 | * sysfs interface | 151 | * sysfs interface |
150 | */ | 152 | */ |
153 | struct dm_kobject_holder { | ||
154 | struct kobject kobj; | ||
155 | struct completion completion; | ||
156 | }; | ||
157 | |||
158 | static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj) | ||
159 | { | ||
160 | return &container_of(kobj, struct dm_kobject_holder, kobj)->completion; | ||
161 | } | ||
162 | |||
151 | int dm_sysfs_init(struct mapped_device *md); | 163 | int dm_sysfs_init(struct mapped_device *md); |
152 | void dm_sysfs_exit(struct mapped_device *md); | 164 | void dm_sysfs_exit(struct mapped_device *md); |
153 | struct kobject *dm_kobject(struct mapped_device *md); | 165 | struct kobject *dm_kobject(struct mapped_device *md); |
154 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj); | 166 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj); |
155 | 167 | ||
156 | /* | 168 | /* |
169 | * The kobject helper | ||
170 | */ | ||
171 | void dm_kobject_release(struct kobject *kobj); | ||
172 | |||
173 | /* | ||
157 | * Targets for linear and striped mappings | 174 | * Targets for linear and striped mappings |
158 | */ | 175 | */ |
159 | int dm_linear_init(void); | 176 | int dm_linear_init(void); |
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 064a3c271baa..455f79279a16 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c | |||
@@ -104,7 +104,7 @@ static int __check_holder(struct block_lock *lock) | |||
104 | 104 | ||
105 | for (i = 0; i < MAX_HOLDERS; i++) { | 105 | for (i = 0; i < MAX_HOLDERS; i++) { |
106 | if (lock->holders[i] == current) { | 106 | if (lock->holders[i] == current) { |
107 | DMERR("recursive lock detected in pool metadata"); | 107 | DMERR("recursive lock detected in metadata"); |
108 | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING | 108 | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING |
109 | DMERR("previously held here:"); | 109 | DMERR("previously held here:"); |
110 | print_stack_trace(lock->traces + i, 4); | 110 | print_stack_trace(lock->traces + i, 4); |
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 468e371ee9b2..416060c25709 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c | |||
@@ -770,8 +770,8 @@ EXPORT_SYMBOL_GPL(dm_btree_insert_notify); | |||
770 | 770 | ||
771 | /*----------------------------------------------------------------*/ | 771 | /*----------------------------------------------------------------*/ |
772 | 772 | ||
773 | static int find_highest_key(struct ro_spine *s, dm_block_t block, | 773 | static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest, |
774 | uint64_t *result_key, dm_block_t *next_block) | 774 | uint64_t *result_key, dm_block_t *next_block) |
775 | { | 775 | { |
776 | int i, r; | 776 | int i, r; |
777 | uint32_t flags; | 777 | uint32_t flags; |
@@ -788,7 +788,11 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block, | |||
788 | else | 788 | else |
789 | i--; | 789 | i--; |
790 | 790 | ||
791 | *result_key = le64_to_cpu(ro_node(s)->keys[i]); | 791 | if (find_highest) |
792 | *result_key = le64_to_cpu(ro_node(s)->keys[i]); | ||
793 | else | ||
794 | *result_key = le64_to_cpu(ro_node(s)->keys[0]); | ||
795 | |||
792 | if (next_block || flags & INTERNAL_NODE) | 796 | if (next_block || flags & INTERNAL_NODE) |
793 | block = value64(ro_node(s), i); | 797 | block = value64(ro_node(s), i); |
794 | 798 | ||
@@ -799,16 +803,16 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block, | |||
799 | return 0; | 803 | return 0; |
800 | } | 804 | } |
801 | 805 | ||
802 | int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, | 806 | static int dm_btree_find_key(struct dm_btree_info *info, dm_block_t root, |
803 | uint64_t *result_keys) | 807 | bool find_highest, uint64_t *result_keys) |
804 | { | 808 | { |
805 | int r = 0, count = 0, level; | 809 | int r = 0, count = 0, level; |
806 | struct ro_spine spine; | 810 | struct ro_spine spine; |
807 | 811 | ||
808 | init_ro_spine(&spine, info); | 812 | init_ro_spine(&spine, info); |
809 | for (level = 0; level < info->levels; level++) { | 813 | for (level = 0; level < info->levels; level++) { |
810 | r = find_highest_key(&spine, root, result_keys + level, | 814 | r = find_key(&spine, root, find_highest, result_keys + level, |
811 | level == info->levels - 1 ? NULL : &root); | 815 | level == info->levels - 1 ? NULL : &root); |
812 | if (r == -ENODATA) { | 816 | if (r == -ENODATA) { |
813 | r = 0; | 817 | r = 0; |
814 | break; | 818 | break; |
@@ -822,8 +826,23 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, | |||
822 | 826 | ||
823 | return r ? r : count; | 827 | return r ? r : count; |
824 | } | 828 | } |
829 | |||
830 | int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, | ||
831 | uint64_t *result_keys) | ||
832 | { | ||
833 | return dm_btree_find_key(info, root, true, result_keys); | ||
834 | } | ||
825 | EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); | 835 | EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); |
826 | 836 | ||
837 | int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, | ||
838 | uint64_t *result_keys) | ||
839 | { | ||
840 | return dm_btree_find_key(info, root, false, result_keys); | ||
841 | } | ||
842 | EXPORT_SYMBOL_GPL(dm_btree_find_lowest_key); | ||
843 | |||
844 | /*----------------------------------------------------------------*/ | ||
845 | |||
827 | /* | 846 | /* |
828 | * FIXME: We shouldn't use a recursive algorithm when we have limited stack | 847 | * FIXME: We shouldn't use a recursive algorithm when we have limited stack |
829 | * space. Also this only works for single level trees. | 848 | * space. Also this only works for single level trees. |
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h index 8672d159e0b5..dacfc34180b4 100644 --- a/drivers/md/persistent-data/dm-btree.h +++ b/drivers/md/persistent-data/dm-btree.h | |||
@@ -137,6 +137,14 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, | |||
137 | /* | 137 | /* |
138 | * Returns < 0 on failure. Otherwise the number of key entries that have | 138 | * Returns < 0 on failure. Otherwise the number of key entries that have |
139 | * been filled out. Remember trees can have zero entries, and as such have | 139 | * been filled out. Remember trees can have zero entries, and as such have |
140 | * no lowest key. | ||
141 | */ | ||
142 | int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, | ||
143 | uint64_t *result_keys); | ||
144 | |||
145 | /* | ||
146 | * Returns < 0 on failure. Otherwise the number of key entries that have | ||
147 | * been filled out. Remember trees can have zero entries, and as such have | ||
140 | * no highest key. | 148 | * no highest key. |
141 | */ | 149 | */ |
142 | int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, | 150 | int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, |
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 466a60bbd716..aacbe70c2c2e 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c | |||
@@ -245,6 +245,10 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) | |||
245 | return -EINVAL; | 245 | return -EINVAL; |
246 | } | 246 | } |
247 | 247 | ||
248 | /* | ||
249 | * We need to set this before the dm_tm_new_block() call below. | ||
250 | */ | ||
251 | ll->nr_blocks = nr_blocks; | ||
248 | for (i = old_blocks; i < blocks; i++) { | 252 | for (i = old_blocks; i < blocks; i++) { |
249 | struct dm_block *b; | 253 | struct dm_block *b; |
250 | struct disk_index_entry idx; | 254 | struct disk_index_entry idx; |
@@ -252,6 +256,7 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) | |||
252 | r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b); | 256 | r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b); |
253 | if (r < 0) | 257 | if (r < 0) |
254 | return r; | 258 | return r; |
259 | |||
255 | idx.blocknr = cpu_to_le64(dm_block_location(b)); | 260 | idx.blocknr = cpu_to_le64(dm_block_location(b)); |
256 | 261 | ||
257 | r = dm_tm_unlock(ll->tm, b); | 262 | r = dm_tm_unlock(ll->tm, b); |
@@ -266,7 +271,6 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) | |||
266 | return r; | 271 | return r; |
267 | } | 272 | } |
268 | 273 | ||
269 | ll->nr_blocks = nr_blocks; | ||
270 | return 0; | 274 | return 0; |
271 | } | 275 | } |
272 | 276 | ||
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 58fc1eef7499..536782e3bcb7 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c | |||
@@ -385,13 +385,13 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) | |||
385 | 385 | ||
386 | int r = sm_metadata_new_block_(sm, b); | 386 | int r = sm_metadata_new_block_(sm, b); |
387 | if (r) { | 387 | if (r) { |
388 | DMERR("unable to allocate new metadata block"); | 388 | DMERR_LIMIT("unable to allocate new metadata block"); |
389 | return r; | 389 | return r; |
390 | } | 390 | } |
391 | 391 | ||
392 | r = sm_metadata_get_nr_free(sm, &count); | 392 | r = sm_metadata_get_nr_free(sm, &count); |
393 | if (r) { | 393 | if (r) { |
394 | DMERR("couldn't get free block count"); | 394 | DMERR_LIMIT("couldn't get free block count"); |
395 | return r; | 395 | return r; |
396 | } | 396 | } |
397 | 397 | ||
@@ -608,20 +608,38 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) | |||
608 | * Flick into a mode where all blocks get allocated in the new area. | 608 | * Flick into a mode where all blocks get allocated in the new area. |
609 | */ | 609 | */ |
610 | smm->begin = old_len; | 610 | smm->begin = old_len; |
611 | memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); | 611 | memcpy(sm, &bootstrap_ops, sizeof(*sm)); |
612 | 612 | ||
613 | /* | 613 | /* |
614 | * Extend. | 614 | * Extend. |
615 | */ | 615 | */ |
616 | r = sm_ll_extend(&smm->ll, extra_blocks); | 616 | r = sm_ll_extend(&smm->ll, extra_blocks); |
617 | if (r) | ||
618 | goto out; | ||
617 | 619 | ||
618 | /* | 620 | /* |
619 | * Switch back to normal behaviour. | 621 | * We repeatedly increment then commit until the commit doesn't |
622 | * allocate any new blocks. | ||
620 | */ | 623 | */ |
621 | memcpy(&smm->sm, &ops, sizeof(smm->sm)); | 624 | do { |
622 | for (i = old_len; !r && i < smm->begin; i++) | 625 | for (i = old_len; !r && i < smm->begin; i++) { |
623 | r = sm_ll_inc(&smm->ll, i, &ev); | 626 | r = sm_ll_inc(&smm->ll, i, &ev); |
627 | if (r) | ||
628 | goto out; | ||
629 | } | ||
630 | old_len = smm->begin; | ||
631 | |||
632 | r = sm_ll_commit(&smm->ll); | ||
633 | if (r) | ||
634 | goto out; | ||
635 | |||
636 | } while (old_len != smm->begin); | ||
624 | 637 | ||
638 | out: | ||
639 | /* | ||
640 | * Switch back to normal behaviour. | ||
641 | */ | ||
642 | memcpy(sm, &ops, sizeof(*sm)); | ||
625 | return r; | 643 | return r; |
626 | } | 644 | } |
627 | 645 | ||