aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorJoe Thornber <ejt@redhat.com>2014-11-10 10:03:24 -0500
committerMike Snitzer <snitzer@redhat.com>2014-11-10 15:23:58 -0500
commit9b460d3699324d570a4d4161c3741431887f102f (patch)
tree2349384b5bb988ddb1b9a1bca9b55c99d577dd71 /drivers/md
parentc822ed967cba38505713d59ed40a114386ef6c01 (diff)
dm btree: fix a recursion depth bug in btree walking code
The walk code was using a 'ro_spine' to hold it's locked btree nodes. But this data structure is designed for the rolling lock scheme, and as such automatically unlocks blocks that are two steps up the call chain. This is not suitable for the simple recursive walk algorithm, which retraces its steps. This code is only used by the persistent array code, which in turn is only used by dm-cache. In order to trigger it you need to have a mapping tree that is more than 2 levels deep; which equates to 8-16 million cache blocks. For instance a 4T ssd with a very small block size of 32k only just triggers this bug. The fix just places the locked blocks on the stack, and stops using the ro_spine altogether. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> Cc: stable@vger.kernel.org
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/persistent-data/dm-btree-internal.h6
-rw-r--r--drivers/md/persistent-data/dm-btree-spine.c2
-rw-r--r--drivers/md/persistent-data/dm-btree.c24
3 files changed, 17 insertions, 15 deletions
diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h
index 37d367bb9aa8..bf2b80d5c470 100644
--- a/drivers/md/persistent-data/dm-btree-internal.h
+++ b/drivers/md/persistent-data/dm-btree-internal.h
@@ -42,6 +42,12 @@ struct btree_node {
42} __packed; 42} __packed;
43 43
44 44
45/*
46 * Locks a block using the btree node validator.
47 */
48int bn_read_lock(struct dm_btree_info *info, dm_block_t b,
49 struct dm_block **result);
50
45void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, 51void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
46 struct dm_btree_value_type *vt); 52 struct dm_btree_value_type *vt);
47 53
diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c
index cf9fd676ae44..1b5e13ec7f96 100644
--- a/drivers/md/persistent-data/dm-btree-spine.c
+++ b/drivers/md/persistent-data/dm-btree-spine.c
@@ -92,7 +92,7 @@ struct dm_block_validator btree_node_validator = {
92 92
93/*----------------------------------------------------------------*/ 93/*----------------------------------------------------------------*/
94 94
95static int bn_read_lock(struct dm_btree_info *info, dm_block_t b, 95int bn_read_lock(struct dm_btree_info *info, dm_block_t b,
96 struct dm_block **result) 96 struct dm_block **result)
97{ 97{
98 return dm_tm_read_lock(info->tm, b, &btree_node_validator, result); 98 return dm_tm_read_lock(info->tm, b, &btree_node_validator, result);
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 416060c25709..200ac12a1d40 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -847,22 +847,26 @@ EXPORT_SYMBOL_GPL(dm_btree_find_lowest_key);
847 * FIXME: We shouldn't use a recursive algorithm when we have limited stack 847 * FIXME: We shouldn't use a recursive algorithm when we have limited stack
848 * space. Also this only works for single level trees. 848 * space. Also this only works for single level trees.
849 */ 849 */
850static int walk_node(struct ro_spine *s, dm_block_t block, 850static int walk_node(struct dm_btree_info *info, dm_block_t block,
851 int (*fn)(void *context, uint64_t *keys, void *leaf), 851 int (*fn)(void *context, uint64_t *keys, void *leaf),
852 void *context) 852 void *context)
853{ 853{
854 int r; 854 int r;
855 unsigned i, nr; 855 unsigned i, nr;
856 struct dm_block *node;
856 struct btree_node *n; 857 struct btree_node *n;
857 uint64_t keys; 858 uint64_t keys;
858 859
859 r = ro_step(s, block); 860 r = bn_read_lock(info, block, &node);
860 n = ro_node(s); 861 if (r)
862 return r;
863
864 n = dm_block_data(node);
861 865
862 nr = le32_to_cpu(n->header.nr_entries); 866 nr = le32_to_cpu(n->header.nr_entries);
863 for (i = 0; i < nr; i++) { 867 for (i = 0; i < nr; i++) {
864 if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) { 868 if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) {
865 r = walk_node(s, value64(n, i), fn, context); 869 r = walk_node(info, value64(n, i), fn, context);
866 if (r) 870 if (r)
867 goto out; 871 goto out;
868 } else { 872 } else {
@@ -874,7 +878,7 @@ static int walk_node(struct ro_spine *s, dm_block_t block,
874 } 878 }
875 879
876out: 880out:
877 ro_pop(s); 881 dm_tm_unlock(info->tm, node);
878 return r; 882 return r;
879} 883}
880 884
@@ -882,15 +886,7 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
882 int (*fn)(void *context, uint64_t *keys, void *leaf), 886 int (*fn)(void *context, uint64_t *keys, void *leaf),
883 void *context) 887 void *context)
884{ 888{
885 int r;
886 struct ro_spine spine;
887
888 BUG_ON(info->levels > 1); 889 BUG_ON(info->levels > 1);
889 890 return walk_node(info, root, fn, context);
890 init_ro_spine(&spine, info);
891 r = walk_node(&spine, root, fn, context);
892 exit_ro_spine(&spine);
893
894 return r;
895} 891}
896EXPORT_SYMBOL_GPL(dm_btree_walk); 892EXPORT_SYMBOL_GPL(dm_btree_walk);