diff options
author | Mike Snitzer <snitzer@redhat.com> | 2012-10-12 16:02:13 -0400 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2012-10-12 16:02:13 -0400 |
commit | 4f81a4176297db57c7ef3b2893092dd837c1e2a8 (patch) | |
tree | 80a52e60b29377dc82c7f4c902ca0053479d6909 /drivers/md/dm-thin.c | |
parent | 44feb387f6f5584535bd6e3ad7ccfdce715d7dba (diff) |
dm thin: move bio_prison code to separate module
The bio prison code will be useful to other future DM targets so
move it to a separate module.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r-- | drivers/md/dm-thin.c | 407 |
1 files changed, 3 insertions, 404 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 22a22a701e16..058acf3a5ba7 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -5,6 +5,7 @@ | |||
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include "dm-thin-metadata.h" | 7 | #include "dm-thin-metadata.h" |
8 | #include "dm-bio-prison.h" | ||
8 | #include "dm.h" | 9 | #include "dm.h" |
9 | 10 | ||
10 | #include <linux/device-mapper.h> | 11 | #include <linux/device-mapper.h> |
@@ -21,7 +22,6 @@ | |||
21 | * Tunable constants | 22 | * Tunable constants |
22 | */ | 23 | */ |
23 | #define ENDIO_HOOK_POOL_SIZE 1024 | 24 | #define ENDIO_HOOK_POOL_SIZE 1024 |
24 | #define DEFERRED_SET_SIZE 64 | ||
25 | #define MAPPING_POOL_SIZE 1024 | 25 | #define MAPPING_POOL_SIZE 1024 |
26 | #define PRISON_CELLS 1024 | 26 | #define PRISON_CELLS 1024 |
27 | #define COMMIT_PERIOD HZ | 27 | #define COMMIT_PERIOD HZ |
@@ -99,404 +99,6 @@ | |||
99 | /*----------------------------------------------------------------*/ | 99 | /*----------------------------------------------------------------*/ |
100 | 100 | ||
101 | /* | 101 | /* |
102 | * Sometimes we can't deal with a bio straight away. We put them in prison | ||
103 | * where they can't cause any mischief. Bios are put in a cell identified | ||
104 | * by a key, multiple bios can be in the same cell. When the cell is | ||
105 | * subsequently unlocked the bios become available. | ||
106 | */ | ||
107 | struct dm_bio_prison; | ||
108 | |||
109 | struct dm_cell_key { | ||
110 | int virtual; | ||
111 | dm_thin_id dev; | ||
112 | dm_block_t block; | ||
113 | }; | ||
114 | |||
115 | struct dm_bio_prison_cell { | ||
116 | struct hlist_node list; | ||
117 | struct dm_bio_prison *prison; | ||
118 | struct dm_cell_key key; | ||
119 | struct bio *holder; | ||
120 | struct bio_list bios; | ||
121 | }; | ||
122 | |||
123 | struct dm_bio_prison { | ||
124 | spinlock_t lock; | ||
125 | mempool_t *cell_pool; | ||
126 | |||
127 | unsigned nr_buckets; | ||
128 | unsigned hash_mask; | ||
129 | struct hlist_head *cells; | ||
130 | }; | ||
131 | |||
132 | static uint32_t calc_nr_buckets(unsigned nr_cells) | ||
133 | { | ||
134 | uint32_t n = 128; | ||
135 | |||
136 | nr_cells /= 4; | ||
137 | nr_cells = min(nr_cells, 8192u); | ||
138 | |||
139 | while (n < nr_cells) | ||
140 | n <<= 1; | ||
141 | |||
142 | return n; | ||
143 | } | ||
144 | |||
145 | static struct kmem_cache *_cell_cache; | ||
146 | |||
147 | /* | ||
148 | * @nr_cells should be the number of cells you want in use _concurrently_. | ||
149 | * Don't confuse it with the number of distinct keys. | ||
150 | */ | ||
151 | static struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) | ||
152 | { | ||
153 | unsigned i; | ||
154 | uint32_t nr_buckets = calc_nr_buckets(nr_cells); | ||
155 | size_t len = sizeof(struct dm_bio_prison) + | ||
156 | (sizeof(struct hlist_head) * nr_buckets); | ||
157 | struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); | ||
158 | |||
159 | if (!prison) | ||
160 | return NULL; | ||
161 | |||
162 | spin_lock_init(&prison->lock); | ||
163 | prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); | ||
164 | if (!prison->cell_pool) { | ||
165 | kfree(prison); | ||
166 | return NULL; | ||
167 | } | ||
168 | |||
169 | prison->nr_buckets = nr_buckets; | ||
170 | prison->hash_mask = nr_buckets - 1; | ||
171 | prison->cells = (struct hlist_head *) (prison + 1); | ||
172 | for (i = 0; i < nr_buckets; i++) | ||
173 | INIT_HLIST_HEAD(prison->cells + i); | ||
174 | |||
175 | return prison; | ||
176 | } | ||
177 | |||
178 | static void dm_bio_prison_destroy(struct dm_bio_prison *prison) | ||
179 | { | ||
180 | mempool_destroy(prison->cell_pool); | ||
181 | kfree(prison); | ||
182 | } | ||
183 | |||
184 | static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) | ||
185 | { | ||
186 | const unsigned long BIG_PRIME = 4294967291UL; | ||
187 | uint64_t hash = key->block * BIG_PRIME; | ||
188 | |||
189 | return (uint32_t) (hash & prison->hash_mask); | ||
190 | } | ||
191 | |||
192 | static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) | ||
193 | { | ||
194 | return (lhs->virtual == rhs->virtual) && | ||
195 | (lhs->dev == rhs->dev) && | ||
196 | (lhs->block == rhs->block); | ||
197 | } | ||
198 | |||
199 | static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, | ||
200 | struct dm_cell_key *key) | ||
201 | { | ||
202 | struct dm_bio_prison_cell *cell; | ||
203 | struct hlist_node *tmp; | ||
204 | |||
205 | hlist_for_each_entry(cell, tmp, bucket, list) | ||
206 | if (keys_equal(&cell->key, key)) | ||
207 | return cell; | ||
208 | |||
209 | return NULL; | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * This may block if a new cell needs allocating. You must ensure that | ||
214 | * cells will be unlocked even if the calling thread is blocked. | ||
215 | * | ||
216 | * Returns 1 if the cell was already held, 0 if @inmate is the new holder. | ||
217 | */ | ||
218 | static int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, | ||
219 | struct bio *inmate, struct dm_bio_prison_cell **ref) | ||
220 | { | ||
221 | int r = 1; | ||
222 | unsigned long flags; | ||
223 | uint32_t hash = hash_key(prison, key); | ||
224 | struct dm_bio_prison_cell *cell, *cell2; | ||
225 | |||
226 | BUG_ON(hash > prison->nr_buckets); | ||
227 | |||
228 | spin_lock_irqsave(&prison->lock, flags); | ||
229 | |||
230 | cell = __search_bucket(prison->cells + hash, key); | ||
231 | if (cell) { | ||
232 | bio_list_add(&cell->bios, inmate); | ||
233 | goto out; | ||
234 | } | ||
235 | |||
236 | /* | ||
237 | * Allocate a new cell | ||
238 | */ | ||
239 | spin_unlock_irqrestore(&prison->lock, flags); | ||
240 | cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); | ||
241 | spin_lock_irqsave(&prison->lock, flags); | ||
242 | |||
243 | /* | ||
244 | * We've been unlocked, so we have to double check that | ||
245 | * nobody else has inserted this cell in the meantime. | ||
246 | */ | ||
247 | cell = __search_bucket(prison->cells + hash, key); | ||
248 | if (cell) { | ||
249 | mempool_free(cell2, prison->cell_pool); | ||
250 | bio_list_add(&cell->bios, inmate); | ||
251 | goto out; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Use new cell. | ||
256 | */ | ||
257 | cell = cell2; | ||
258 | |||
259 | cell->prison = prison; | ||
260 | memcpy(&cell->key, key, sizeof(cell->key)); | ||
261 | cell->holder = inmate; | ||
262 | bio_list_init(&cell->bios); | ||
263 | hlist_add_head(&cell->list, prison->cells + hash); | ||
264 | |||
265 | r = 0; | ||
266 | |||
267 | out: | ||
268 | spin_unlock_irqrestore(&prison->lock, flags); | ||
269 | |||
270 | *ref = cell; | ||
271 | |||
272 | return r; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * @inmates must have been initialised prior to this call | ||
277 | */ | ||
278 | static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) | ||
279 | { | ||
280 | struct dm_bio_prison *prison = cell->prison; | ||
281 | |||
282 | hlist_del(&cell->list); | ||
283 | |||
284 | if (inmates) { | ||
285 | bio_list_add(inmates, cell->holder); | ||
286 | bio_list_merge(inmates, &cell->bios); | ||
287 | } | ||
288 | |||
289 | mempool_free(cell, prison->cell_pool); | ||
290 | } | ||
291 | |||
292 | static void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) | ||
293 | { | ||
294 | unsigned long flags; | ||
295 | struct dm_bio_prison *prison = cell->prison; | ||
296 | |||
297 | spin_lock_irqsave(&prison->lock, flags); | ||
298 | __cell_release(cell, bios); | ||
299 | spin_unlock_irqrestore(&prison->lock, flags); | ||
300 | } | ||
301 | |||
302 | /* | ||
303 | * There are a couple of places where we put a bio into a cell briefly | ||
304 | * before taking it out again. In these situations we know that no other | ||
305 | * bio may be in the cell. This function releases the cell, and also does | ||
306 | * a sanity check. | ||
307 | */ | ||
308 | static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) | ||
309 | { | ||
310 | BUG_ON(cell->holder != bio); | ||
311 | BUG_ON(!bio_list_empty(&cell->bios)); | ||
312 | |||
313 | __cell_release(cell, NULL); | ||
314 | } | ||
315 | |||
316 | static void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) | ||
317 | { | ||
318 | unsigned long flags; | ||
319 | struct dm_bio_prison *prison = cell->prison; | ||
320 | |||
321 | spin_lock_irqsave(&prison->lock, flags); | ||
322 | __cell_release_singleton(cell, bio); | ||
323 | spin_unlock_irqrestore(&prison->lock, flags); | ||
324 | } | ||
325 | |||
326 | /* | ||
327 | * Sometimes we don't want the holder, just the additional bios. | ||
328 | */ | ||
329 | static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, | ||
330 | struct bio_list *inmates) | ||
331 | { | ||
332 | struct dm_bio_prison *prison = cell->prison; | ||
333 | |||
334 | hlist_del(&cell->list); | ||
335 | bio_list_merge(inmates, &cell->bios); | ||
336 | |||
337 | mempool_free(cell, prison->cell_pool); | ||
338 | } | ||
339 | |||
340 | static void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, | ||
341 | struct bio_list *inmates) | ||
342 | { | ||
343 | unsigned long flags; | ||
344 | struct dm_bio_prison *prison = cell->prison; | ||
345 | |||
346 | spin_lock_irqsave(&prison->lock, flags); | ||
347 | __cell_release_no_holder(cell, inmates); | ||
348 | spin_unlock_irqrestore(&prison->lock, flags); | ||
349 | } | ||
350 | |||
351 | static void dm_cell_error(struct dm_bio_prison_cell *cell) | ||
352 | { | ||
353 | struct dm_bio_prison *prison = cell->prison; | ||
354 | struct bio_list bios; | ||
355 | struct bio *bio; | ||
356 | unsigned long flags; | ||
357 | |||
358 | bio_list_init(&bios); | ||
359 | |||
360 | spin_lock_irqsave(&prison->lock, flags); | ||
361 | __cell_release(cell, &bios); | ||
362 | spin_unlock_irqrestore(&prison->lock, flags); | ||
363 | |||
364 | while ((bio = bio_list_pop(&bios))) | ||
365 | bio_io_error(bio); | ||
366 | } | ||
367 | |||
368 | /*----------------------------------------------------------------*/ | ||
369 | |||
370 | /* | ||
371 | * We use the deferred set to keep track of pending reads to shared blocks. | ||
372 | * We do this to ensure the new mapping caused by a write isn't performed | ||
373 | * until these prior reads have completed. Otherwise the insertion of the | ||
374 | * new mapping could free the old block that the read bios are mapped to. | ||
375 | */ | ||
376 | |||
377 | struct dm_deferred_set; | ||
378 | struct dm_deferred_entry { | ||
379 | struct dm_deferred_set *ds; | ||
380 | unsigned count; | ||
381 | struct list_head work_items; | ||
382 | }; | ||
383 | |||
384 | struct dm_deferred_set { | ||
385 | spinlock_t lock; | ||
386 | unsigned current_entry; | ||
387 | unsigned sweeper; | ||
388 | struct dm_deferred_entry entries[DEFERRED_SET_SIZE]; | ||
389 | }; | ||
390 | |||
391 | static struct dm_deferred_set *dm_deferred_set_create(void) | ||
392 | { | ||
393 | int i; | ||
394 | struct dm_deferred_set *ds; | ||
395 | |||
396 | ds = kmalloc(sizeof(*ds), GFP_KERNEL); | ||
397 | if (!ds) | ||
398 | return NULL; | ||
399 | |||
400 | spin_lock_init(&ds->lock); | ||
401 | ds->current_entry = 0; | ||
402 | ds->sweeper = 0; | ||
403 | for (i = 0; i < DEFERRED_SET_SIZE; i++) { | ||
404 | ds->entries[i].ds = ds; | ||
405 | ds->entries[i].count = 0; | ||
406 | INIT_LIST_HEAD(&ds->entries[i].work_items); | ||
407 | } | ||
408 | |||
409 | return ds; | ||
410 | } | ||
411 | |||
412 | static void dm_deferred_set_destroy(struct dm_deferred_set *ds) | ||
413 | { | ||
414 | kfree(ds); | ||
415 | } | ||
416 | |||
417 | static struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds) | ||
418 | { | ||
419 | unsigned long flags; | ||
420 | struct dm_deferred_entry *entry; | ||
421 | |||
422 | spin_lock_irqsave(&ds->lock, flags); | ||
423 | entry = ds->entries + ds->current_entry; | ||
424 | entry->count++; | ||
425 | spin_unlock_irqrestore(&ds->lock, flags); | ||
426 | |||
427 | return entry; | ||
428 | } | ||
429 | |||
430 | static unsigned ds_next(unsigned index) | ||
431 | { | ||
432 | return (index + 1) % DEFERRED_SET_SIZE; | ||
433 | } | ||
434 | |||
435 | static void __sweep(struct dm_deferred_set *ds, struct list_head *head) | ||
436 | { | ||
437 | while ((ds->sweeper != ds->current_entry) && | ||
438 | !ds->entries[ds->sweeper].count) { | ||
439 | list_splice_init(&ds->entries[ds->sweeper].work_items, head); | ||
440 | ds->sweeper = ds_next(ds->sweeper); | ||
441 | } | ||
442 | |||
443 | if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count) | ||
444 | list_splice_init(&ds->entries[ds->sweeper].work_items, head); | ||
445 | } | ||
446 | |||
447 | static void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head) | ||
448 | { | ||
449 | unsigned long flags; | ||
450 | |||
451 | spin_lock_irqsave(&entry->ds->lock, flags); | ||
452 | BUG_ON(!entry->count); | ||
453 | --entry->count; | ||
454 | __sweep(entry->ds, head); | ||
455 | spin_unlock_irqrestore(&entry->ds->lock, flags); | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * Returns 1 if deferred or 0 if no pending items to delay job. | ||
460 | */ | ||
461 | static int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work) | ||
462 | { | ||
463 | int r = 1; | ||
464 | unsigned long flags; | ||
465 | unsigned next_entry; | ||
466 | |||
467 | spin_lock_irqsave(&ds->lock, flags); | ||
468 | if ((ds->sweeper == ds->current_entry) && | ||
469 | !ds->entries[ds->current_entry].count) | ||
470 | r = 0; | ||
471 | else { | ||
472 | list_add(work, &ds->entries[ds->current_entry].work_items); | ||
473 | next_entry = ds_next(ds->current_entry); | ||
474 | if (!ds->entries[next_entry].count) | ||
475 | ds->current_entry = next_entry; | ||
476 | } | ||
477 | spin_unlock_irqrestore(&ds->lock, flags); | ||
478 | |||
479 | return r; | ||
480 | } | ||
481 | |||
482 | static int __init dm_bio_prison_init(void) | ||
483 | { | ||
484 | _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); | ||
485 | if (!_cell_cache) | ||
486 | return -ENOMEM; | ||
487 | |||
488 | return 0; | ||
489 | } | ||
490 | |||
491 | static void __exit dm_bio_prison_exit(void) | ||
492 | { | ||
493 | kmem_cache_destroy(_cell_cache); | ||
494 | _cell_cache = NULL; | ||
495 | } | ||
496 | |||
497 | /*----------------------------------------------------------------*/ | ||
498 | |||
499 | /* | ||
500 | * Key building. | 102 | * Key building. |
501 | */ | 103 | */ |
502 | static void build_data_key(struct dm_thin_device *td, | 104 | static void build_data_key(struct dm_thin_device *td, |
@@ -2852,7 +2454,7 @@ static struct target_type pool_target = { | |||
2852 | .name = "thin-pool", | 2454 | .name = "thin-pool", |
2853 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | | 2455 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | |
2854 | DM_TARGET_IMMUTABLE, | 2456 | DM_TARGET_IMMUTABLE, |
2855 | .version = {1, 4, 0}, | 2457 | .version = {1, 5, 0}, |
2856 | .module = THIS_MODULE, | 2458 | .module = THIS_MODULE, |
2857 | .ctr = pool_ctr, | 2459 | .ctr = pool_ctr, |
2858 | .dtr = pool_dtr, | 2460 | .dtr = pool_dtr, |
@@ -3143,7 +2745,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
3143 | 2745 | ||
3144 | static struct target_type thin_target = { | 2746 | static struct target_type thin_target = { |
3145 | .name = "thin", | 2747 | .name = "thin", |
3146 | .version = {1, 4, 0}, | 2748 | .version = {1, 5, 0}, |
3147 | .module = THIS_MODULE, | 2749 | .module = THIS_MODULE, |
3148 | .ctr = thin_ctr, | 2750 | .ctr = thin_ctr, |
3149 | .dtr = thin_dtr, | 2751 | .dtr = thin_dtr, |
@@ -3173,8 +2775,6 @@ static int __init dm_thin_init(void) | |||
3173 | 2775 | ||
3174 | r = -ENOMEM; | 2776 | r = -ENOMEM; |
3175 | 2777 | ||
3176 | dm_bio_prison_init(); | ||
3177 | |||
3178 | _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0); | 2778 | _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0); |
3179 | if (!_new_mapping_cache) | 2779 | if (!_new_mapping_cache) |
3180 | goto bad_new_mapping_cache; | 2780 | goto bad_new_mapping_cache; |
@@ -3200,7 +2800,6 @@ static void dm_thin_exit(void) | |||
3200 | dm_unregister_target(&thin_target); | 2800 | dm_unregister_target(&thin_target); |
3201 | dm_unregister_target(&pool_target); | 2801 | dm_unregister_target(&pool_target); |
3202 | 2802 | ||
3203 | dm_bio_prison_exit(); | ||
3204 | kmem_cache_destroy(_new_mapping_cache); | 2803 | kmem_cache_destroy(_new_mapping_cache); |
3205 | kmem_cache_destroy(_endio_hook_cache); | 2804 | kmem_cache_destroy(_endio_hook_cache); |
3206 | } | 2805 | } |