summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/device-mapper/cache.txt10
-rw-r--r--drivers/md/dm-bufio.c5
-rw-r--r--drivers/md/dm-cache-policy-mq.c13
-rw-r--r--drivers/md/dm-cache-target.c2
-rw-r--r--drivers/md/dm-delay.c23
-rw-r--r--drivers/md/dm-snap.c71
-rw-r--r--drivers/md/dm-stats.c1
-rw-r--r--drivers/md/dm-table.c5
-rw-r--r--drivers/md/dm-thin-metadata.c8
-rw-r--r--drivers/md/dm-thin-metadata.h1
-rw-r--r--drivers/md/dm-thin.c66
-rw-r--r--drivers/md/persistent-data/dm-array.c10
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c6
-rw-r--r--drivers/md/persistent-data/dm-block-manager.h7
-rw-r--r--drivers/md/persistent-data/dm-space-map-common.c32
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.c8
16 files changed, 197 insertions, 71 deletions
diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt
index 274752f8bdf9..719320b5ed3f 100644
--- a/Documentation/device-mapper/cache.txt
+++ b/Documentation/device-mapper/cache.txt
@@ -266,10 +266,12 @@ E.g.
266Invalidation is removing an entry from the cache without writing it 266Invalidation is removing an entry from the cache without writing it
267back. Cache blocks can be invalidated via the invalidate_cblocks 267back. Cache blocks can be invalidated via the invalidate_cblocks
268message, which takes an arbitrary number of cblock ranges. Each cblock 268message, which takes an arbitrary number of cblock ranges. Each cblock
269must be expressed as a decimal value, in the future a variant message 269range's end value is "one past the end", meaning 5-10 expresses a range
270that takes cblock ranges expressed in hexidecimal may be needed to 270of values from 5 to 9. Each cblock must be expressed as a decimal
271better support efficient invalidation of larger caches. The cache must 271value, in the future a variant message that takes cblock ranges
272be in passthrough mode when invalidate_cblocks is used. 272expressed in hexidecimal may be needed to better support efficient
273invalidation of larger caches. The cache must be in passthrough mode
274when invalidate_cblocks is used.
273 275
274 invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]* 276 invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
275 277
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 173cbb20d104..54bdd923316f 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1717,6 +1717,11 @@ static int __init dm_bufio_init(void)
1717{ 1717{
1718 __u64 mem; 1718 __u64 mem;
1719 1719
1720 dm_bufio_allocated_kmem_cache = 0;
1721 dm_bufio_allocated_get_free_pages = 0;
1722 dm_bufio_allocated_vmalloc = 0;
1723 dm_bufio_current_allocated = 0;
1724
1720 memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches); 1725 memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches);
1721 memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names); 1726 memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names);
1722 1727
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 416b7b752a6e..64780ad73bb0 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -730,15 +730,18 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,
730 int r = 0; 730 int r = 0;
731 bool updated = updated_this_tick(mq, e); 731 bool updated = updated_this_tick(mq, e);
732 732
733 requeue_and_update_tick(mq, e);
734
735 if ((!discarded_oblock && updated) || 733 if ((!discarded_oblock && updated) ||
736 !should_promote(mq, e, discarded_oblock, data_dir)) 734 !should_promote(mq, e, discarded_oblock, data_dir)) {
735 requeue_and_update_tick(mq, e);
737 result->op = POLICY_MISS; 736 result->op = POLICY_MISS;
738 else if (!can_migrate) 737
738 } else if (!can_migrate)
739 r = -EWOULDBLOCK; 739 r = -EWOULDBLOCK;
740 else 740
741 else {
742 requeue_and_update_tick(mq, e);
741 r = pre_cache_to_cache(mq, e, result); 743 r = pre_cache_to_cache(mq, e, result);
744 }
742 745
743 return r; 746 return r;
744} 747}
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 9efcf1059b99..1b1469ebe5cb 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2755,7 +2755,7 @@ static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
2755{ 2755{
2756 int r; 2756 int r;
2757 2757
2758 r = dm_cache_resize(cache->cmd, cache->cache_size); 2758 r = dm_cache_resize(cache->cmd, new_size);
2759 if (r) { 2759 if (r) {
2760 DMERR("could not resize cache metadata"); 2760 DMERR("could not resize cache metadata");
2761 return r; 2761 return r;
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 496d5f3646a5..2f91d6d4a2cc 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -20,6 +20,7 @@
20struct delay_c { 20struct delay_c {
21 struct timer_list delay_timer; 21 struct timer_list delay_timer;
22 struct mutex timer_lock; 22 struct mutex timer_lock;
23 struct workqueue_struct *kdelayd_wq;
23 struct work_struct flush_expired_bios; 24 struct work_struct flush_expired_bios;
24 struct list_head delayed_bios; 25 struct list_head delayed_bios;
25 atomic_t may_delay; 26 atomic_t may_delay;
@@ -45,14 +46,13 @@ struct dm_delay_info {
45 46
46static DEFINE_MUTEX(delayed_bios_lock); 47static DEFINE_MUTEX(delayed_bios_lock);
47 48
48static struct workqueue_struct *kdelayd_wq;
49static struct kmem_cache *delayed_cache; 49static struct kmem_cache *delayed_cache;
50 50
51static void handle_delayed_timer(unsigned long data) 51static void handle_delayed_timer(unsigned long data)
52{ 52{
53 struct delay_c *dc = (struct delay_c *)data; 53 struct delay_c *dc = (struct delay_c *)data;
54 54
55 queue_work(kdelayd_wq, &dc->flush_expired_bios); 55 queue_work(dc->kdelayd_wq, &dc->flush_expired_bios);
56} 56}
57 57
58static void queue_timeout(struct delay_c *dc, unsigned long expires) 58static void queue_timeout(struct delay_c *dc, unsigned long expires)
@@ -191,6 +191,12 @@ out:
191 goto bad_dev_write; 191 goto bad_dev_write;
192 } 192 }
193 193
194 dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
195 if (!dc->kdelayd_wq) {
196 DMERR("Couldn't start kdelayd");
197 goto bad_queue;
198 }
199
194 setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc); 200 setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc);
195 201
196 INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); 202 INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
@@ -203,6 +209,8 @@ out:
203 ti->private = dc; 209 ti->private = dc;
204 return 0; 210 return 0;
205 211
212bad_queue:
213 mempool_destroy(dc->delayed_pool);
206bad_dev_write: 214bad_dev_write:
207 if (dc->dev_write) 215 if (dc->dev_write)
208 dm_put_device(ti, dc->dev_write); 216 dm_put_device(ti, dc->dev_write);
@@ -217,7 +225,7 @@ static void delay_dtr(struct dm_target *ti)
217{ 225{
218 struct delay_c *dc = ti->private; 226 struct delay_c *dc = ti->private;
219 227
220 flush_workqueue(kdelayd_wq); 228 destroy_workqueue(dc->kdelayd_wq);
221 229
222 dm_put_device(ti, dc->dev_read); 230 dm_put_device(ti, dc->dev_read);
223 231
@@ -350,12 +358,6 @@ static int __init dm_delay_init(void)
350{ 358{
351 int r = -ENOMEM; 359 int r = -ENOMEM;
352 360
353 kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
354 if (!kdelayd_wq) {
355 DMERR("Couldn't start kdelayd");
356 goto bad_queue;
357 }
358
359 delayed_cache = KMEM_CACHE(dm_delay_info, 0); 361 delayed_cache = KMEM_CACHE(dm_delay_info, 0);
360 if (!delayed_cache) { 362 if (!delayed_cache) {
361 DMERR("Couldn't create delayed bio cache."); 363 DMERR("Couldn't create delayed bio cache.");
@@ -373,8 +375,6 @@ static int __init dm_delay_init(void)
373bad_register: 375bad_register:
374 kmem_cache_destroy(delayed_cache); 376 kmem_cache_destroy(delayed_cache);
375bad_memcache: 377bad_memcache:
376 destroy_workqueue(kdelayd_wq);
377bad_queue:
378 return r; 378 return r;
379} 379}
380 380
@@ -382,7 +382,6 @@ static void __exit dm_delay_exit(void)
382{ 382{
383 dm_unregister_target(&delay_target); 383 dm_unregister_target(&delay_target);
384 kmem_cache_destroy(delayed_cache); 384 kmem_cache_destroy(delayed_cache);
385 destroy_workqueue(kdelayd_wq);
386} 385}
387 386
388/* Module hooks */ 387/* Module hooks */
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index aec57d76db5d..944690bafd93 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -66,6 +66,18 @@ struct dm_snapshot {
66 66
67 atomic_t pending_exceptions_count; 67 atomic_t pending_exceptions_count;
68 68
69 /* Protected by "lock" */
70 sector_t exception_start_sequence;
71
72 /* Protected by kcopyd single-threaded callback */
73 sector_t exception_complete_sequence;
74
75 /*
76 * A list of pending exceptions that completed out of order.
77 * Protected by kcopyd single-threaded callback.
78 */
79 struct list_head out_of_order_list;
80
69 mempool_t *pending_pool; 81 mempool_t *pending_pool;
70 82
71 struct dm_exception_table pending; 83 struct dm_exception_table pending;
@@ -173,6 +185,14 @@ struct dm_snap_pending_exception {
173 */ 185 */
174 int started; 186 int started;
175 187
188 /* There was copying error. */
189 int copy_error;
190
191 /* A sequence number, it is used for in-order completion. */
192 sector_t exception_sequence;
193
194 struct list_head out_of_order_entry;
195
176 /* 196 /*
177 * For writing a complete chunk, bypassing the copy. 197 * For writing a complete chunk, bypassing the copy.
178 */ 198 */
@@ -1094,6 +1114,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1094 s->valid = 1; 1114 s->valid = 1;
1095 s->active = 0; 1115 s->active = 0;
1096 atomic_set(&s->pending_exceptions_count, 0); 1116 atomic_set(&s->pending_exceptions_count, 0);
1117 s->exception_start_sequence = 0;
1118 s->exception_complete_sequence = 0;
1119 INIT_LIST_HEAD(&s->out_of_order_list);
1097 init_rwsem(&s->lock); 1120 init_rwsem(&s->lock);
1098 INIT_LIST_HEAD(&s->list); 1121 INIT_LIST_HEAD(&s->list);
1099 spin_lock_init(&s->pe_lock); 1122 spin_lock_init(&s->pe_lock);
@@ -1443,6 +1466,19 @@ static void commit_callback(void *context, int success)
1443 pending_complete(pe, success); 1466 pending_complete(pe, success);
1444} 1467}
1445 1468
1469static void complete_exception(struct dm_snap_pending_exception *pe)
1470{
1471 struct dm_snapshot *s = pe->snap;
1472
1473 if (unlikely(pe->copy_error))
1474 pending_complete(pe, 0);
1475
1476 else
1477 /* Update the metadata if we are persistent */
1478 s->store->type->commit_exception(s->store, &pe->e,
1479 commit_callback, pe);
1480}
1481
1446/* 1482/*
1447 * Called when the copy I/O has finished. kcopyd actually runs 1483 * Called when the copy I/O has finished. kcopyd actually runs
1448 * this code so don't block. 1484 * this code so don't block.
@@ -1452,13 +1488,32 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
1452 struct dm_snap_pending_exception *pe = context; 1488 struct dm_snap_pending_exception *pe = context;
1453 struct dm_snapshot *s = pe->snap; 1489 struct dm_snapshot *s = pe->snap;
1454 1490
1455 if (read_err || write_err) 1491 pe->copy_error = read_err || write_err;
1456 pending_complete(pe, 0);
1457 1492
1458 else 1493 if (pe->exception_sequence == s->exception_complete_sequence) {
1459 /* Update the metadata if we are persistent */ 1494 s->exception_complete_sequence++;
1460 s->store->type->commit_exception(s->store, &pe->e, 1495 complete_exception(pe);
1461 commit_callback, pe); 1496
1497 while (!list_empty(&s->out_of_order_list)) {
1498 pe = list_entry(s->out_of_order_list.next,
1499 struct dm_snap_pending_exception, out_of_order_entry);
1500 if (pe->exception_sequence != s->exception_complete_sequence)
1501 break;
1502 s->exception_complete_sequence++;
1503 list_del(&pe->out_of_order_entry);
1504 complete_exception(pe);
1505 }
1506 } else {
1507 struct list_head *lh;
1508 struct dm_snap_pending_exception *pe2;
1509
1510 list_for_each_prev(lh, &s->out_of_order_list) {
1511 pe2 = list_entry(lh, struct dm_snap_pending_exception, out_of_order_entry);
1512 if (pe2->exception_sequence < pe->exception_sequence)
1513 break;
1514 }
1515 list_add(&pe->out_of_order_entry, lh);
1516 }
1462} 1517}
1463 1518
1464/* 1519/*
@@ -1553,6 +1608,8 @@ __find_pending_exception(struct dm_snapshot *s,
1553 return NULL; 1608 return NULL;
1554 } 1609 }
1555 1610
1611 pe->exception_sequence = s->exception_start_sequence++;
1612
1556 dm_insert_exception(&s->pending, &pe->e); 1613 dm_insert_exception(&s->pending, &pe->e);
1557 1614
1558 return pe; 1615 return pe;
@@ -2192,7 +2249,7 @@ static struct target_type origin_target = {
2192 2249
2193static struct target_type snapshot_target = { 2250static struct target_type snapshot_target = {
2194 .name = "snapshot", 2251 .name = "snapshot",
2195 .version = {1, 11, 1}, 2252 .version = {1, 12, 0},
2196 .module = THIS_MODULE, 2253 .module = THIS_MODULE,
2197 .ctr = snapshot_ctr, 2254 .ctr = snapshot_ctr,
2198 .dtr = snapshot_dtr, 2255 .dtr = snapshot_dtr,
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index 3d404c1371ed..28a90122a5a8 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -964,6 +964,7 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv,
964 964
965int __init dm_statistics_init(void) 965int __init dm_statistics_init(void)
966{ 966{
967 shared_memory_amount = 0;
967 dm_stat_need_rcu_barrier = 0; 968 dm_stat_need_rcu_barrier = 0;
968 return 0; 969 return 0;
969} 970}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 465f08ca62b1..3ba6a3859ce3 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -200,6 +200,11 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
200 200
201 num_targets = dm_round_up(num_targets, KEYS_PER_NODE); 201 num_targets = dm_round_up(num_targets, KEYS_PER_NODE);
202 202
203 if (!num_targets) {
204 kfree(t);
205 return -ENOMEM;
206 }
207
203 if (alloc_targets(t, num_targets)) { 208 if (alloc_targets(t, num_targets)) {
204 kfree(t); 209 kfree(t);
205 return -ENOMEM; 210 return -ENOMEM;
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 60bce435f4fa..8a30ad54bd46 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1697,6 +1697,14 @@ void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd)
1697 up_write(&pmd->root_lock); 1697 up_write(&pmd->root_lock);
1698} 1698}
1699 1699
1700void dm_pool_metadata_read_write(struct dm_pool_metadata *pmd)
1701{
1702 down_write(&pmd->root_lock);
1703 pmd->read_only = false;
1704 dm_bm_set_read_write(pmd->bm);
1705 up_write(&pmd->root_lock);
1706}
1707
1700int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, 1708int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
1701 dm_block_t threshold, 1709 dm_block_t threshold,
1702 dm_sm_threshold_fn fn, 1710 dm_sm_threshold_fn fn,
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
index 845ebbe589a9..7bcc0e1d6238 100644
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -193,6 +193,7 @@ int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_siz
193 * that nothing is changing. 193 * that nothing is changing.
194 */ 194 */
195void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd); 195void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd);
196void dm_pool_metadata_read_write(struct dm_pool_metadata *pmd);
196 197
197int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, 198int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
198 dm_block_t threshold, 199 dm_block_t threshold,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 2c0cf511ec23..ee29037ffc2e 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -640,7 +640,9 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
640 */ 640 */
641 r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); 641 r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
642 if (r) { 642 if (r) {
643 DMERR_LIMIT("dm_thin_insert_block() failed"); 643 DMERR_LIMIT("%s: dm_thin_insert_block() failed: error = %d",
644 dm_device_name(pool->pool_md), r);
645 set_pool_mode(pool, PM_READ_ONLY);
644 cell_error(pool, m->cell); 646 cell_error(pool, m->cell);
645 goto out; 647 goto out;
646 } 648 }
@@ -881,32 +883,23 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
881 } 883 }
882} 884}
883 885
884static int commit(struct pool *pool)
885{
886 int r;
887
888 r = dm_pool_commit_metadata(pool->pmd);
889 if (r)
890 DMERR_LIMIT("%s: commit failed: error = %d",
891 dm_device_name(pool->pool_md), r);
892
893 return r;
894}
895
896/* 886/*
897 * A non-zero return indicates read_only or fail_io mode. 887 * A non-zero return indicates read_only or fail_io mode.
898 * Many callers don't care about the return value. 888 * Many callers don't care about the return value.
899 */ 889 */
900static int commit_or_fallback(struct pool *pool) 890static int commit(struct pool *pool)
901{ 891{
902 int r; 892 int r;
903 893
904 if (get_pool_mode(pool) != PM_WRITE) 894 if (get_pool_mode(pool) != PM_WRITE)
905 return -EINVAL; 895 return -EINVAL;
906 896
907 r = commit(pool); 897 r = dm_pool_commit_metadata(pool->pmd);
908 if (r) 898 if (r) {
899 DMERR_LIMIT("%s: dm_pool_commit_metadata failed: error = %d",
900 dm_device_name(pool->pool_md), r);
909 set_pool_mode(pool, PM_READ_ONLY); 901 set_pool_mode(pool, PM_READ_ONLY);
902 }
910 903
911 return r; 904 return r;
912} 905}
@@ -943,7 +936,9 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
943 * Try to commit to see if that will free up some 936 * Try to commit to see if that will free up some
944 * more space. 937 * more space.
945 */ 938 */
946 (void) commit_or_fallback(pool); 939 r = commit(pool);
940 if (r)
941 return r;
947 942
948 r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); 943 r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
949 if (r) 944 if (r)
@@ -957,7 +952,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
957 * table reload). 952 * table reload).
958 */ 953 */
959 if (!free_blocks) { 954 if (!free_blocks) {
960 DMWARN("%s: no free space available.", 955 DMWARN("%s: no free data space available.",
961 dm_device_name(pool->pool_md)); 956 dm_device_name(pool->pool_md));
962 spin_lock_irqsave(&pool->lock, flags); 957 spin_lock_irqsave(&pool->lock, flags);
963 pool->no_free_space = 1; 958 pool->no_free_space = 1;
@@ -967,8 +962,16 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
967 } 962 }
968 963
969 r = dm_pool_alloc_data_block(pool->pmd, result); 964 r = dm_pool_alloc_data_block(pool->pmd, result);
970 if (r) 965 if (r) {
966 if (r == -ENOSPC &&
967 !dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks) &&
968 !free_blocks) {
969 DMWARN("%s: no free metadata space available.",
970 dm_device_name(pool->pool_md));
971 set_pool_mode(pool, PM_READ_ONLY);
972 }
971 return r; 973 return r;
974 }
972 975
973 return 0; 976 return 0;
974} 977}
@@ -1349,7 +1352,7 @@ static void process_deferred_bios(struct pool *pool)
1349 if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) 1352 if (bio_list_empty(&bios) && !need_commit_due_to_time(pool))
1350 return; 1353 return;
1351 1354
1352 if (commit_or_fallback(pool)) { 1355 if (commit(pool)) {
1353 while ((bio = bio_list_pop(&bios))) 1356 while ((bio = bio_list_pop(&bios)))
1354 bio_io_error(bio); 1357 bio_io_error(bio);
1355 return; 1358 return;
@@ -1397,6 +1400,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode)
1397 case PM_FAIL: 1400 case PM_FAIL:
1398 DMERR("%s: switching pool to failure mode", 1401 DMERR("%s: switching pool to failure mode",
1399 dm_device_name(pool->pool_md)); 1402 dm_device_name(pool->pool_md));
1403 dm_pool_metadata_read_only(pool->pmd);
1400 pool->process_bio = process_bio_fail; 1404 pool->process_bio = process_bio_fail;
1401 pool->process_discard = process_bio_fail; 1405 pool->process_discard = process_bio_fail;
1402 pool->process_prepared_mapping = process_prepared_mapping_fail; 1406 pool->process_prepared_mapping = process_prepared_mapping_fail;
@@ -1421,6 +1425,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode)
1421 break; 1425 break;
1422 1426
1423 case PM_WRITE: 1427 case PM_WRITE:
1428 dm_pool_metadata_read_write(pool->pmd);
1424 pool->process_bio = process_bio; 1429 pool->process_bio = process_bio;
1425 pool->process_discard = process_discard; 1430 pool->process_discard = process_discard;
1426 pool->process_prepared_mapping = process_prepared_mapping; 1431 pool->process_prepared_mapping = process_prepared_mapping;
@@ -1637,12 +1642,19 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti)
1637 struct pool_c *pt = ti->private; 1642 struct pool_c *pt = ti->private;
1638 1643
1639 /* 1644 /*
1640 * We want to make sure that degraded pools are never upgraded. 1645 * We want to make sure that a pool in PM_FAIL mode is never upgraded.
1641 */ 1646 */
1642 enum pool_mode old_mode = pool->pf.mode; 1647 enum pool_mode old_mode = pool->pf.mode;
1643 enum pool_mode new_mode = pt->adjusted_pf.mode; 1648 enum pool_mode new_mode = pt->adjusted_pf.mode;
1644 1649
1645 if (old_mode > new_mode) 1650 /*
1651 * If we were in PM_FAIL mode, rollback of metadata failed. We're
1652 * not going to recover without a thin_repair. So we never let the
1653 * pool move out of the old mode. On the other hand a PM_READ_ONLY
1654 * may have been due to a lack of metadata or data space, and may
1655 * now work (ie. if the underlying devices have been resized).
1656 */
1657 if (old_mode == PM_FAIL)
1646 new_mode = old_mode; 1658 new_mode = old_mode;
1647 1659
1648 pool->ti = ti; 1660 pool->ti = ti;
@@ -2266,7 +2278,7 @@ static int pool_preresume(struct dm_target *ti)
2266 return r; 2278 return r;
2267 2279
2268 if (need_commit1 || need_commit2) 2280 if (need_commit1 || need_commit2)
2269 (void) commit_or_fallback(pool); 2281 (void) commit(pool);
2270 2282
2271 return 0; 2283 return 0;
2272} 2284}
@@ -2293,7 +2305,7 @@ static void pool_postsuspend(struct dm_target *ti)
2293 2305
2294 cancel_delayed_work(&pool->waker); 2306 cancel_delayed_work(&pool->waker);
2295 flush_workqueue(pool->wq); 2307 flush_workqueue(pool->wq);
2296 (void) commit_or_fallback(pool); 2308 (void) commit(pool);
2297} 2309}
2298 2310
2299static int check_arg_count(unsigned argc, unsigned args_required) 2311static int check_arg_count(unsigned argc, unsigned args_required)
@@ -2427,7 +2439,7 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct
2427 if (r) 2439 if (r)
2428 return r; 2440 return r;
2429 2441
2430 (void) commit_or_fallback(pool); 2442 (void) commit(pool);
2431 2443
2432 r = dm_pool_reserve_metadata_snap(pool->pmd); 2444 r = dm_pool_reserve_metadata_snap(pool->pmd);
2433 if (r) 2445 if (r)
@@ -2489,7 +2501,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
2489 DMWARN("Unrecognised thin pool target message received: %s", argv[0]); 2501 DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
2490 2502
2491 if (!r) 2503 if (!r)
2492 (void) commit_or_fallback(pool); 2504 (void) commit(pool);
2493 2505
2494 return r; 2506 return r;
2495} 2507}
@@ -2544,7 +2556,7 @@ static void pool_status(struct dm_target *ti, status_type_t type,
2544 2556
2545 /* Commit to ensure statistics aren't out-of-date */ 2557 /* Commit to ensure statistics aren't out-of-date */
2546 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) 2558 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
2547 (void) commit_or_fallback(pool); 2559 (void) commit(pool);
2548 2560
2549 r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id); 2561 r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id);
2550 if (r) { 2562 if (r) {
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index af96e24ec328..1d75b1dc1e2e 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -317,8 +317,16 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
317 * The shadow op will often be a noop. Only insert if it really 317 * The shadow op will often be a noop. Only insert if it really
318 * copied data. 318 * copied data.
319 */ 319 */
320 if (dm_block_location(*block) != b) 320 if (dm_block_location(*block) != b) {
321 /*
322 * dm_tm_shadow_block will have already decremented the old
323 * block, but it is still referenced by the btree. We
324 * increment to stop the insert decrementing it below zero
325 * when overwriting the old value.
326 */
327 dm_tm_inc(info->btree_info.tm, b);
321 r = insert_ablock(info, index, *block, root); 328 r = insert_ablock(info, index, *block, root);
329 }
322 330
323 return r; 331 return r;
324} 332}
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index a7e8bf296388..064a3c271baa 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -626,6 +626,12 @@ void dm_bm_set_read_only(struct dm_block_manager *bm)
626} 626}
627EXPORT_SYMBOL_GPL(dm_bm_set_read_only); 627EXPORT_SYMBOL_GPL(dm_bm_set_read_only);
628 628
629void dm_bm_set_read_write(struct dm_block_manager *bm)
630{
631 bm->read_only = false;
632}
633EXPORT_SYMBOL_GPL(dm_bm_set_read_write);
634
629u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) 635u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
630{ 636{
631 return crc32c(~(u32) 0, data, len) ^ init_xor; 637 return crc32c(~(u32) 0, data, len) ^ init_xor;
diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h
index 9a82083a66b6..13cd58e1fe69 100644
--- a/drivers/md/persistent-data/dm-block-manager.h
+++ b/drivers/md/persistent-data/dm-block-manager.h
@@ -108,9 +108,9 @@ int dm_bm_unlock(struct dm_block *b);
108int dm_bm_flush_and_unlock(struct dm_block_manager *bm, 108int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
109 struct dm_block *superblock); 109 struct dm_block *superblock);
110 110
111 /* 111/*
112 * Request data be prefetched into the cache. 112 * Request data is prefetched into the cache.
113 */ 113 */
114void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b); 114void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b);
115 115
116/* 116/*
@@ -125,6 +125,7 @@ void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b);
125 * be returned if you do. 125 * be returned if you do.
126 */ 126 */
127void dm_bm_set_read_only(struct dm_block_manager *bm); 127void dm_bm_set_read_only(struct dm_block_manager *bm);
128void dm_bm_set_read_write(struct dm_block_manager *bm);
128 129
129u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor); 130u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor);
130 131
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index 6058569fe86c..466a60bbd716 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -381,7 +381,7 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
381} 381}
382 382
383static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, 383static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
384 uint32_t (*mutator)(void *context, uint32_t old), 384 int (*mutator)(void *context, uint32_t old, uint32_t *new),
385 void *context, enum allocation_event *ev) 385 void *context, enum allocation_event *ev)
386{ 386{
387 int r; 387 int r;
@@ -410,11 +410,17 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
410 410
411 if (old > 2) { 411 if (old > 2) {
412 r = sm_ll_lookup_big_ref_count(ll, b, &old); 412 r = sm_ll_lookup_big_ref_count(ll, b, &old);
413 if (r < 0) 413 if (r < 0) {
414 dm_tm_unlock(ll->tm, nb);
414 return r; 415 return r;
416 }
415 } 417 }
416 418
417 ref_count = mutator(context, old); 419 r = mutator(context, old, &ref_count);
420 if (r) {
421 dm_tm_unlock(ll->tm, nb);
422 return r;
423 }
418 424
419 if (ref_count <= 2) { 425 if (ref_count <= 2) {
420 sm_set_bitmap(bm_le, bit, ref_count); 426 sm_set_bitmap(bm_le, bit, ref_count);
@@ -465,9 +471,10 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
465 return ll->save_ie(ll, index, &ie_disk); 471 return ll->save_ie(ll, index, &ie_disk);
466} 472}
467 473
468static uint32_t set_ref_count(void *context, uint32_t old) 474static int set_ref_count(void *context, uint32_t old, uint32_t *new)
469{ 475{
470 return *((uint32_t *) context); 476 *new = *((uint32_t *) context);
477 return 0;
471} 478}
472 479
473int sm_ll_insert(struct ll_disk *ll, dm_block_t b, 480int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
@@ -476,9 +483,10 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
476 return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev); 483 return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev);
477} 484}
478 485
479static uint32_t inc_ref_count(void *context, uint32_t old) 486static int inc_ref_count(void *context, uint32_t old, uint32_t *new)
480{ 487{
481 return old + 1; 488 *new = old + 1;
489 return 0;
482} 490}
483 491
484int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) 492int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
@@ -486,9 +494,15 @@ int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
486 return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev); 494 return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev);
487} 495}
488 496
489static uint32_t dec_ref_count(void *context, uint32_t old) 497static int dec_ref_count(void *context, uint32_t old, uint32_t *new)
490{ 498{
491 return old - 1; 499 if (!old) {
500 DMERR_LIMIT("unable to decrement a reference count below 0");
501 return -EINVAL;
502 }
503
504 *new = old - 1;
505 return 0;
492} 506}
493 507
494int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) 508int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index 1c959684caef..58fc1eef7499 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -384,12 +384,16 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
384 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 384 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
385 385
386 int r = sm_metadata_new_block_(sm, b); 386 int r = sm_metadata_new_block_(sm, b);
387 if (r) 387 if (r) {
388 DMERR("unable to allocate new metadata block"); 388 DMERR("unable to allocate new metadata block");
389 return r;
390 }
389 391
390 r = sm_metadata_get_nr_free(sm, &count); 392 r = sm_metadata_get_nr_free(sm, &count);
391 if (r) 393 if (r) {
392 DMERR("couldn't get free block count"); 394 DMERR("couldn't get free block count");
395 return r;
396 }
393 397
394 check_threshold(&smm->threshold, count); 398 check_threshold(&smm->threshold, count);
395 399