From f62b6b8f498658a9d537c7d380e9966f15e1b2a1 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 2 Dec 2013 16:47:01 -0500 Subject: dm space map metadata: return on failure in sm_metadata_new_block Commit 2fc48021f4afdd109b9e52b6eef5db89ca80bac7 ("dm persistent metadata: add space map threshold callback") introduced a regression to the metadata block allocation path that resulted in errors being ignored. This regression was uncovered by running the following device-mapper-test-suite test: dmtest run --suite thin-provisioning -n /exhausting_metadata_space_causes_fail_mode/ The ignored error codes in sm_metadata_new_block() could crash the kernel through use of either the dm-thin or dm-cache targets, e.g.: device-mapper: thin: 253:4: reached low water mark for metadata device: sending event. device-mapper: space map metadata: unable to allocate new metadata block general protection fault: 0000 [#1] SMP ... Workqueue: dm-thin do_worker [dm_thin_pool] task: ffff880035ce2ab0 ti: ffff88021a054000 task.ti: ffff88021a054000 RIP: 0010:[] [] metadata_ll_load_ie+0x15/0x30 [dm_persistent_data] RSP: 0018:ffff88021a055a68 EFLAGS: 00010202 RAX: 003fc8243d212ba0 RBX: ffff88021a780070 RCX: ffff88021a055a78 RDX: ffff88021a055a78 RSI: 0040402222a92a80 RDI: ffff88021a780070 RBP: ffff88021a055a68 R08: ffff88021a055ba4 R09: 0000000000000010 R10: 0000000000000000 R11: 00000002a02e1000 R12: ffff88021a055ad4 R13: 0000000000000598 R14: ffffffffa0338470 R15: ffff88021a055ba4 FS: 0000000000000000(0000) GS:ffff88033fca0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f467c0291b8 CR3: 0000000001a0b000 CR4: 00000000000007e0 Stack: ffff88021a055ab8 ffffffffa0332020 ffff88021a055b30 0000000000000001 ffff88021a055b30 0000000000000000 ffff88021a055b18 0000000000000000 ffff88021a055ba4 ffff88021a055b98 ffff88021a055ae8 ffffffffa033304c Call Trace: [] sm_ll_lookup_bitmap+0x40/0xa0 [dm_persistent_data] [] sm_metadata_count_is_more_than_one+0x8c/0xc0 [dm_persistent_data] [] dm_tm_shadow_block+0x65/0x110 [dm_persistent_data] [] sm_ll_mutate+0x80/0x300 [dm_persistent_data] [] ? set_ref_count+0x10/0x10 [dm_persistent_data] [] sm_ll_inc+0x1a/0x20 [dm_persistent_data] [] sm_disk_new_block+0x60/0x80 [dm_persistent_data] [] ? down_write+0x16/0x40 [] dm_pool_alloc_data_block+0x54/0x80 [dm_thin_pool] [] alloc_data_block+0x9c/0x130 [dm_thin_pool] [] provision_block+0x4e/0x180 [dm_thin_pool] [] ? dm_thin_find_block+0x6a/0x110 [dm_thin_pool] [] process_bio+0x1ca/0x1f0 [dm_thin_pool] [] ? mempool_free+0x8d/0xa0 [] process_deferred_bios+0xc5/0x230 [dm_thin_pool] [] do_worker+0x51/0x60 [dm_thin_pool] [] process_one_work+0x182/0x3b0 [] worker_thread+0x120/0x3a0 [] ? manage_workers+0x160/0x160 [] kthread+0xce/0xe0 [] ? kthread_freezable_should_stop+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? kthread_freezable_should_stop+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? kthread_freezable_should_stop+0x70/0x70 Signed-off-by: Mike Snitzer Acked-by: Joe Thornber Cc: stable@vger.kernel.org # v3.10+ --- drivers/md/persistent-data/dm-space-map-metadata.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/md/persistent-data') diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 1c959684caef..58fc1eef7499 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -384,12 +384,16 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); int r = sm_metadata_new_block_(sm, b); - if (r) + if (r) { DMERR("unable to allocate new metadata block"); + return r; + } r = sm_metadata_get_nr_free(sm, &count); - if (r) + if (r) { DMERR("couldn't get free block count"); + return r; + } check_threshold(&smm->threshold, count); -- cgit v1.2.2 From 9b7aaa64f96f7ca280d75326fca42f42017b89ef Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 4 Dec 2013 16:58:19 -0500 Subject: dm thin: allow pool in read-only mode to transition to read-write mode A thin-pool may be in read-only mode because the pool's data or metadata space was exhausted. To allow for recovery, by adding more space to the pool, we must allow a pool to transition from PM_READ_ONLY to PM_WRITE mode. Otherwise, running out of space will render the pool permanently read-only. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/persistent-data/dm-block-manager.c | 6 ++++++ drivers/md/persistent-data/dm-block-manager.h | 7 ++++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/md/persistent-data') diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index a7e8bf296388..064a3c271baa 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -626,6 +626,12 @@ void dm_bm_set_read_only(struct dm_block_manager *bm) } EXPORT_SYMBOL_GPL(dm_bm_set_read_only); +void dm_bm_set_read_write(struct dm_block_manager *bm) +{ + bm->read_only = false; +} +EXPORT_SYMBOL_GPL(dm_bm_set_read_write); + u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) { return crc32c(~(u32) 0, data, len) ^ init_xor; diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index 9a82083a66b6..13cd58e1fe69 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -108,9 +108,9 @@ int dm_bm_unlock(struct dm_block *b); int dm_bm_flush_and_unlock(struct dm_block_manager *bm, struct dm_block *superblock); - /* - * Request data be prefetched into the cache. - */ +/* + * Request data is prefetched into the cache. + */ void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b); /* @@ -125,6 +125,7 @@ void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b); * be returned if you do. */ void dm_bm_set_read_only(struct dm_block_manager *bm); +void dm_bm_set_read_write(struct dm_block_manager *bm); u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor); -- cgit v1.2.2 From 5b564d80f8bc21094c0cd2b19b679d983aabcc29 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 13 Dec 2013 12:31:08 +0000 Subject: dm space map: disallow decrementing a reference count below zero The old behaviour, returning -EINVAL if a ref_count of 0 would be decremented, was removed in commit f722063 ("dm space map: optimise sm_ll_dec and sm_ll_inc"). To fix this regression we return an error code from the mutator function pointer passed to sm_ll_mutate() and have dec_ref_count() return -EINVAL if the old ref_count is 0. Add a DMERR to reflect the potential seriousness of this error. Also, add missing dm_tm_unlock() to sm_ll_mutate()'s error path. With this fix the following dmts regression test now passes: dmtest run --suite cache -n /metadata_use_kernel/ The next patch fixes the higher-level dm-array code that exposed this regression. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 3.12+ --- drivers/md/persistent-data/dm-space-map-common.c | 32 +++++++++++++++++------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'drivers/md/persistent-data') diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 6058569fe86c..466a60bbd716 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -381,7 +381,7 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, } static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, - uint32_t (*mutator)(void *context, uint32_t old), + int (*mutator)(void *context, uint32_t old, uint32_t *new), void *context, enum allocation_event *ev) { int r; @@ -410,11 +410,17 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, if (old > 2) { r = sm_ll_lookup_big_ref_count(ll, b, &old); - if (r < 0) + if (r < 0) { + dm_tm_unlock(ll->tm, nb); return r; + } } - ref_count = mutator(context, old); + r = mutator(context, old, &ref_count); + if (r) { + dm_tm_unlock(ll->tm, nb); + return r; + } if (ref_count <= 2) { sm_set_bitmap(bm_le, bit, ref_count); @@ -465,9 +471,10 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, return ll->save_ie(ll, index, &ie_disk); } -static uint32_t set_ref_count(void *context, uint32_t old) +static int set_ref_count(void *context, uint32_t old, uint32_t *new) { - return *((uint32_t *) context); + *new = *((uint32_t *) context); + return 0; } int sm_ll_insert(struct ll_disk *ll, dm_block_t b, @@ -476,9 +483,10 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b, return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev); } -static uint32_t inc_ref_count(void *context, uint32_t old) +static int inc_ref_count(void *context, uint32_t old, uint32_t *new) { - return old + 1; + *new = old + 1; + return 0; } int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) @@ -486,9 +494,15 @@ int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev); } -static uint32_t dec_ref_count(void *context, uint32_t old) +static int dec_ref_count(void *context, uint32_t old, uint32_t *new) { - return old - 1; + if (!old) { + DMERR_LIMIT("unable to decrement a reference count below 0"); + return -EINVAL; + } + + *new = old - 1; + return 0; } int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) -- cgit v1.2.2 From ed9571f0cf1fe09d3506302610f3ccdfa1d22c4a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 13 Dec 2013 14:55:55 +0000 Subject: dm array: fix a reference counting bug in shadow_ablock An old array block could have its reference count decremented below zero when it is being replaced in the btree by a new array block. The fix is to increment the old ablock's reference count just before inserting a new ablock into the btree. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 3.9+ --- drivers/md/persistent-data/dm-array.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/md/persistent-data') diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index af96e24ec328..1d75b1dc1e2e 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c @@ -317,8 +317,16 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root, * The shadow op will often be a noop. Only insert if it really * copied data. */ - if (dm_block_location(*block) != b) + if (dm_block_location(*block) != b) { + /* + * dm_tm_shadow_block will have already decremented the old + * block, but it is still referenced by the btree. We + * increment to stop the insert decrementing it below zero + * when overwriting the old value. + */ + dm_tm_inc(info->btree_info.tm, b); r = insert_ablock(info, index, *block, root); + } return r; } -- cgit v1.2.2