diff options
author | Joe Thornber <ejt@redhat.com> | 2014-10-06 16:30:06 -0400 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2014-11-10 15:25:26 -0500 |
commit | a195db2d29a47c2c3a61386009bd400df18c86cf (patch) | |
tree | 17f9c41a52fa28c6a03840a0df24a3781c280217 | |
parent | 33096a7822de63bc7dbdd090870b656a0304fa35 (diff) |
dm bio prison: switch to using a red black tree
Previously it was using a fixed sized hash table. There are times
when very many concurrent cells are held (such as when processing a very
large discard). When this happens the hash table performance becomes
very poor.
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
-rw-r--r-- | drivers/md/dm-bio-prison.c | 172 | ||||
-rw-r--r-- | drivers/md/dm-bio-prison.h | 7 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 3 |
4 files changed, 79 insertions, 106 deletions
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index f752d12081ff..90a56625245a 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c | |||
@@ -14,68 +14,38 @@ | |||
14 | 14 | ||
15 | /*----------------------------------------------------------------*/ | 15 | /*----------------------------------------------------------------*/ |
16 | 16 | ||
17 | struct bucket { | 17 | #define MIN_CELLS 1024 |
18 | spinlock_t lock; | ||
19 | struct hlist_head cells; | ||
20 | }; | ||
21 | 18 | ||
22 | struct dm_bio_prison { | 19 | struct dm_bio_prison { |
20 | spinlock_t lock; | ||
23 | mempool_t *cell_pool; | 21 | mempool_t *cell_pool; |
24 | 22 | struct rb_root cells; | |
25 | unsigned nr_buckets; | ||
26 | unsigned hash_mask; | ||
27 | struct bucket *buckets; | ||
28 | }; | 23 | }; |
29 | 24 | ||
30 | /*----------------------------------------------------------------*/ | ||
31 | |||
32 | static uint32_t calc_nr_buckets(unsigned nr_cells) | ||
33 | { | ||
34 | uint32_t n = 128; | ||
35 | |||
36 | nr_cells /= 4; | ||
37 | nr_cells = min(nr_cells, 8192u); | ||
38 | |||
39 | while (n < nr_cells) | ||
40 | n <<= 1; | ||
41 | |||
42 | return n; | ||
43 | } | ||
44 | |||
45 | static struct kmem_cache *_cell_cache; | 25 | static struct kmem_cache *_cell_cache; |
46 | 26 | ||
47 | static void init_bucket(struct bucket *b) | 27 | /*----------------------------------------------------------------*/ |
48 | { | ||
49 | spin_lock_init(&b->lock); | ||
50 | INIT_HLIST_HEAD(&b->cells); | ||
51 | } | ||
52 | 28 | ||
53 | /* | 29 | /* |
54 | * @nr_cells should be the number of cells you want in use _concurrently_. | 30 | * @nr_cells should be the number of cells you want in use _concurrently_. |
55 | * Don't confuse it with the number of distinct keys. | 31 | * Don't confuse it with the number of distinct keys. |
56 | */ | 32 | */ |
57 | struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) | 33 | struct dm_bio_prison *dm_bio_prison_create(void) |
58 | { | 34 | { |
59 | unsigned i; | 35 | struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL); |
60 | uint32_t nr_buckets = calc_nr_buckets(nr_cells); | ||
61 | size_t len = sizeof(struct dm_bio_prison) + | ||
62 | (sizeof(struct bucket) * nr_buckets); | ||
63 | struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); | ||
64 | 36 | ||
65 | if (!prison) | 37 | if (!prison) |
66 | return NULL; | 38 | return NULL; |
67 | 39 | ||
68 | prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); | 40 | spin_lock_init(&prison->lock); |
41 | |||
42 | prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache); | ||
69 | if (!prison->cell_pool) { | 43 | if (!prison->cell_pool) { |
70 | kfree(prison); | 44 | kfree(prison); |
71 | return NULL; | 45 | return NULL; |
72 | } | 46 | } |
73 | 47 | ||
74 | prison->nr_buckets = nr_buckets; | 48 | prison->cells = RB_ROOT; |
75 | prison->hash_mask = nr_buckets - 1; | ||
76 | prison->buckets = (struct bucket *) (prison + 1); | ||
77 | for (i = 0; i < nr_buckets; i++) | ||
78 | init_bucket(prison->buckets + i); | ||
79 | 49 | ||
80 | return prison; | 50 | return prison; |
81 | } | 51 | } |
@@ -101,68 +71,73 @@ void dm_bio_prison_free_cell(struct dm_bio_prison *prison, | |||
101 | } | 71 | } |
102 | EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell); | 72 | EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell); |
103 | 73 | ||
104 | static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) | 74 | static void __setup_new_cell(struct dm_cell_key *key, |
75 | struct bio *holder, | ||
76 | struct dm_bio_prison_cell *cell) | ||
105 | { | 77 | { |
106 | const unsigned long BIG_PRIME = 4294967291UL; | 78 | memcpy(&cell->key, key, sizeof(cell->key)); |
107 | uint64_t hash = key->block * BIG_PRIME; | 79 | cell->holder = holder; |
108 | 80 | bio_list_init(&cell->bios); | |
109 | return (uint32_t) (hash & prison->hash_mask); | ||
110 | } | 81 | } |
111 | 82 | ||
112 | static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) | 83 | static int cmp_keys(struct dm_cell_key *lhs, |
84 | struct dm_cell_key *rhs) | ||
113 | { | 85 | { |
114 | return (lhs->virtual == rhs->virtual) && | 86 | if (lhs->virtual < rhs->virtual) |
115 | (lhs->dev == rhs->dev) && | 87 | return -1; |
116 | (lhs->block == rhs->block); | ||
117 | } | ||
118 | 88 | ||
119 | static struct bucket *get_bucket(struct dm_bio_prison *prison, | 89 | if (lhs->virtual > rhs->virtual) |
120 | struct dm_cell_key *key) | 90 | return 1; |
121 | { | ||
122 | return prison->buckets + hash_key(prison, key); | ||
123 | } | ||
124 | 91 | ||
125 | static struct dm_bio_prison_cell *__search_bucket(struct bucket *b, | 92 | if (lhs->dev < rhs->dev) |
126 | struct dm_cell_key *key) | 93 | return -1; |
127 | { | ||
128 | struct dm_bio_prison_cell *cell; | ||
129 | 94 | ||
130 | hlist_for_each_entry(cell, &b->cells, list) | 95 | if (lhs->dev > rhs->dev) |
131 | if (keys_equal(&cell->key, key)) | 96 | return 1; |
132 | return cell; | ||
133 | 97 | ||
134 | return NULL; | 98 | if (lhs->block < rhs->block) |
135 | } | 99 | return -1; |
136 | 100 | ||
137 | static void __setup_new_cell(struct bucket *b, | 101 | if (lhs->block > rhs->block) |
138 | struct dm_cell_key *key, | 102 | return 1; |
139 | struct bio *holder, | 103 | |
140 | struct dm_bio_prison_cell *cell) | 104 | return 0; |
141 | { | ||
142 | memcpy(&cell->key, key, sizeof(cell->key)); | ||
143 | cell->holder = holder; | ||
144 | bio_list_init(&cell->bios); | ||
145 | hlist_add_head(&cell->list, &b->cells); | ||
146 | } | 105 | } |
147 | 106 | ||
148 | static int __bio_detain(struct bucket *b, | 107 | static int __bio_detain(struct dm_bio_prison *prison, |
149 | struct dm_cell_key *key, | 108 | struct dm_cell_key *key, |
150 | struct bio *inmate, | 109 | struct bio *inmate, |
151 | struct dm_bio_prison_cell *cell_prealloc, | 110 | struct dm_bio_prison_cell *cell_prealloc, |
152 | struct dm_bio_prison_cell **cell_result) | 111 | struct dm_bio_prison_cell **cell_result) |
153 | { | 112 | { |
154 | struct dm_bio_prison_cell *cell; | 113 | int r; |
155 | 114 | struct rb_node **new = &prison->cells.rb_node, *parent = NULL; | |
156 | cell = __search_bucket(b, key); | 115 | |
157 | if (cell) { | 116 | while (*new) { |
158 | if (inmate) | 117 | struct dm_bio_prison_cell *cell = |
159 | bio_list_add(&cell->bios, inmate); | 118 | container_of(*new, struct dm_bio_prison_cell, node); |
160 | *cell_result = cell; | 119 | |
161 | return 1; | 120 | r = cmp_keys(key, &cell->key); |
121 | |||
122 | parent = *new; | ||
123 | if (r < 0) | ||
124 | new = &((*new)->rb_left); | ||
125 | else if (r > 0) | ||
126 | new = &((*new)->rb_right); | ||
127 | else { | ||
128 | if (inmate) | ||
129 | bio_list_add(&cell->bios, inmate); | ||
130 | *cell_result = cell; | ||
131 | return 1; | ||
132 | } | ||
162 | } | 133 | } |
163 | 134 | ||
164 | __setup_new_cell(b, key, inmate, cell_prealloc); | 135 | __setup_new_cell(key, inmate, cell_prealloc); |
165 | *cell_result = cell_prealloc; | 136 | *cell_result = cell_prealloc; |
137 | |||
138 | rb_link_node(&cell_prealloc->node, parent, new); | ||
139 | rb_insert_color(&cell_prealloc->node, &prison->cells); | ||
140 | |||
166 | return 0; | 141 | return 0; |
167 | } | 142 | } |
168 | 143 | ||
@@ -174,11 +149,10 @@ static int bio_detain(struct dm_bio_prison *prison, | |||
174 | { | 149 | { |
175 | int r; | 150 | int r; |
176 | unsigned long flags; | 151 | unsigned long flags; |
177 | struct bucket *b = get_bucket(prison, key); | ||
178 | 152 | ||
179 | spin_lock_irqsave(&b->lock, flags); | 153 | spin_lock_irqsave(&prison->lock, flags); |
180 | r = __bio_detain(b, key, inmate, cell_prealloc, cell_result); | 154 | r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result); |
181 | spin_unlock_irqrestore(&b->lock, flags); | 155 | spin_unlock_irqrestore(&prison->lock, flags); |
182 | 156 | ||
183 | return r; | 157 | return r; |
184 | } | 158 | } |
@@ -205,10 +179,11 @@ EXPORT_SYMBOL_GPL(dm_get_cell); | |||
205 | /* | 179 | /* |
206 | * @inmates must have been initialised prior to this call | 180 | * @inmates must have been initialised prior to this call |
207 | */ | 181 | */ |
208 | static void __cell_release(struct dm_bio_prison_cell *cell, | 182 | static void __cell_release(struct dm_bio_prison *prison, |
183 | struct dm_bio_prison_cell *cell, | ||
209 | struct bio_list *inmates) | 184 | struct bio_list *inmates) |
210 | { | 185 | { |
211 | hlist_del(&cell->list); | 186 | rb_erase(&cell->node, &prison->cells); |
212 | 187 | ||
213 | if (inmates) { | 188 | if (inmates) { |
214 | if (cell->holder) | 189 | if (cell->holder) |
@@ -222,21 +197,21 @@ void dm_cell_release(struct dm_bio_prison *prison, | |||
222 | struct bio_list *bios) | 197 | struct bio_list *bios) |
223 | { | 198 | { |
224 | unsigned long flags; | 199 | unsigned long flags; |
225 | struct bucket *b = get_bucket(prison, &cell->key); | ||
226 | 200 | ||
227 | spin_lock_irqsave(&b->lock, flags); | 201 | spin_lock_irqsave(&prison->lock, flags); |
228 | __cell_release(cell, bios); | 202 | __cell_release(prison, cell, bios); |
229 | spin_unlock_irqrestore(&b->lock, flags); | 203 | spin_unlock_irqrestore(&prison->lock, flags); |
230 | } | 204 | } |
231 | EXPORT_SYMBOL_GPL(dm_cell_release); | 205 | EXPORT_SYMBOL_GPL(dm_cell_release); |
232 | 206 | ||
233 | /* | 207 | /* |
234 | * Sometimes we don't want the holder, just the additional bios. | 208 | * Sometimes we don't want the holder, just the additional bios. |
235 | */ | 209 | */ |
236 | static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, | 210 | static void __cell_release_no_holder(struct dm_bio_prison *prison, |
211 | struct dm_bio_prison_cell *cell, | ||
237 | struct bio_list *inmates) | 212 | struct bio_list *inmates) |
238 | { | 213 | { |
239 | hlist_del(&cell->list); | 214 | rb_erase(&cell->node, &prison->cells); |
240 | bio_list_merge(inmates, &cell->bios); | 215 | bio_list_merge(inmates, &cell->bios); |
241 | } | 216 | } |
242 | 217 | ||
@@ -245,11 +220,10 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison, | |||
245 | struct bio_list *inmates) | 220 | struct bio_list *inmates) |
246 | { | 221 | { |
247 | unsigned long flags; | 222 | unsigned long flags; |
248 | struct bucket *b = get_bucket(prison, &cell->key); | ||
249 | 223 | ||
250 | spin_lock_irqsave(&b->lock, flags); | 224 | spin_lock_irqsave(&prison->lock, flags); |
251 | __cell_release_no_holder(cell, inmates); | 225 | __cell_release_no_holder(prison, cell, inmates); |
252 | spin_unlock_irqrestore(&b->lock, flags); | 226 | spin_unlock_irqrestore(&prison->lock, flags); |
253 | } | 227 | } |
254 | EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); | 228 | EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); |
255 | 229 | ||
diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h index 6805a142b750..997a43960e77 100644 --- a/drivers/md/dm-bio-prison.h +++ b/drivers/md/dm-bio-prison.h | |||
@@ -10,8 +10,8 @@ | |||
10 | #include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ | 10 | #include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ |
11 | #include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ | 11 | #include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ |
12 | 12 | ||
13 | #include <linux/list.h> | ||
14 | #include <linux/bio.h> | 13 | #include <linux/bio.h> |
14 | #include <linux/rbtree.h> | ||
15 | 15 | ||
16 | /*----------------------------------------------------------------*/ | 16 | /*----------------------------------------------------------------*/ |
17 | 17 | ||
@@ -35,13 +35,14 @@ struct dm_cell_key { | |||
35 | * themselves. | 35 | * themselves. |
36 | */ | 36 | */ |
37 | struct dm_bio_prison_cell { | 37 | struct dm_bio_prison_cell { |
38 | struct hlist_node list; | 38 | struct rb_node node; |
39 | |||
39 | struct dm_cell_key key; | 40 | struct dm_cell_key key; |
40 | struct bio *holder; | 41 | struct bio *holder; |
41 | struct bio_list bios; | 42 | struct bio_list bios; |
42 | }; | 43 | }; |
43 | 44 | ||
44 | struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells); | 45 | struct dm_bio_prison *dm_bio_prison_create(void); |
45 | void dm_bio_prison_destroy(struct dm_bio_prison *prison); | 46 | void dm_bio_prison_destroy(struct dm_bio_prison *prison); |
46 | 47 | ||
47 | /* | 48 | /* |
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 7130505c2425..69de8b43ca12 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c | |||
@@ -95,7 +95,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) | |||
95 | 95 | ||
96 | /*----------------------------------------------------------------*/ | 96 | /*----------------------------------------------------------------*/ |
97 | 97 | ||
98 | #define PRISON_CELLS 1024 | ||
99 | #define MIGRATION_POOL_SIZE 128 | 98 | #define MIGRATION_POOL_SIZE 128 |
100 | #define COMMIT_PERIOD HZ | 99 | #define COMMIT_PERIOD HZ |
101 | #define MIGRATION_COUNT_WINDOW 10 | 100 | #define MIGRATION_COUNT_WINDOW 10 |
@@ -2327,7 +2326,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2327 | INIT_DELAYED_WORK(&cache->waker, do_waker); | 2326 | INIT_DELAYED_WORK(&cache->waker, do_waker); |
2328 | cache->last_commit_jiffies = jiffies; | 2327 | cache->last_commit_jiffies = jiffies; |
2329 | 2328 | ||
2330 | cache->prison = dm_bio_prison_create(PRISON_CELLS); | 2329 | cache->prison = dm_bio_prison_create(); |
2331 | if (!cache->prison) { | 2330 | if (!cache->prison) { |
2332 | *error = "could not create bio prison"; | 2331 | *error = "could not create bio prison"; |
2333 | goto bad; | 2332 | goto bad; |
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 0f86d802b533..eecfe7495232 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -25,7 +25,6 @@ | |||
25 | */ | 25 | */ |
26 | #define ENDIO_HOOK_POOL_SIZE 1024 | 26 | #define ENDIO_HOOK_POOL_SIZE 1024 |
27 | #define MAPPING_POOL_SIZE 1024 | 27 | #define MAPPING_POOL_SIZE 1024 |
28 | #define PRISON_CELLS 1024 | ||
29 | #define COMMIT_PERIOD HZ | 28 | #define COMMIT_PERIOD HZ |
30 | #define NO_SPACE_TIMEOUT_SECS 60 | 29 | #define NO_SPACE_TIMEOUT_SECS 60 |
31 | 30 | ||
@@ -2193,7 +2192,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
2193 | pool->sectors_per_block_shift = __ffs(block_size); | 2192 | pool->sectors_per_block_shift = __ffs(block_size); |
2194 | pool->low_water_blocks = 0; | 2193 | pool->low_water_blocks = 0; |
2195 | pool_features_init(&pool->pf); | 2194 | pool_features_init(&pool->pf); |
2196 | pool->prison = dm_bio_prison_create(PRISON_CELLS); | 2195 | pool->prison = dm_bio_prison_create(); |
2197 | if (!pool->prison) { | 2196 | if (!pool->prison) { |
2198 | *error = "Error creating pool's bio prison"; | 2197 | *error = "Error creating pool's bio prison"; |
2199 | err_p = ERR_PTR(-ENOMEM); | 2198 | err_p = ERR_PTR(-ENOMEM); |