diff options
-rw-r--r-- | drivers/md/Kconfig | 8 | ||||
-rw-r--r-- | drivers/md/Makefile | 1 | ||||
-rw-r--r-- | drivers/md/dm-bio-prison.c | 415 | ||||
-rw-r--r-- | drivers/md/dm-bio-prison.h | 72 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 407 |
5 files changed, 499 insertions, 404 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index d949b781f6f8..91a02eeeb319 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -216,6 +216,13 @@ config DM_BUFIO | |||
216 | as a cache, holding recently-read blocks in memory and performing | 216 | as a cache, holding recently-read blocks in memory and performing |
217 | delayed writes. | 217 | delayed writes. |
218 | 218 | ||
219 | config DM_BIO_PRISON | ||
220 | tristate | ||
221 | depends on BLK_DEV_DM && EXPERIMENTAL | ||
222 | ---help--- | ||
223 | Some bio locking schemes used by other device-mapper targets | ||
224 | including thin provisioning. | ||
225 | |||
219 | source "drivers/md/persistent-data/Kconfig" | 226 | source "drivers/md/persistent-data/Kconfig" |
220 | 227 | ||
221 | config DM_CRYPT | 228 | config DM_CRYPT |
@@ -247,6 +254,7 @@ config DM_THIN_PROVISIONING | |||
247 | tristate "Thin provisioning target (EXPERIMENTAL)" | 254 | tristate "Thin provisioning target (EXPERIMENTAL)" |
248 | depends on BLK_DEV_DM && EXPERIMENTAL | 255 | depends on BLK_DEV_DM && EXPERIMENTAL |
249 | select DM_PERSISTENT_DATA | 256 | select DM_PERSISTENT_DATA |
257 | select DM_BIO_PRISON | ||
250 | ---help--- | 258 | ---help--- |
251 | Provides thin provisioning and snapshots that share a data store. | 259 | Provides thin provisioning and snapshots that share a data store. |
252 | 260 | ||
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 8b2e0dffe82e..94dce8b49324 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -29,6 +29,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o | |||
29 | obj-$(CONFIG_BLK_DEV_MD) += md-mod.o | 29 | obj-$(CONFIG_BLK_DEV_MD) += md-mod.o |
30 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o | 30 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o |
31 | obj-$(CONFIG_DM_BUFIO) += dm-bufio.o | 31 | obj-$(CONFIG_DM_BUFIO) += dm-bufio.o |
32 | obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o | ||
32 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o | 33 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o |
33 | obj-$(CONFIG_DM_DELAY) += dm-delay.o | 34 | obj-$(CONFIG_DM_DELAY) += dm-delay.o |
34 | obj-$(CONFIG_DM_FLAKEY) += dm-flakey.o | 35 | obj-$(CONFIG_DM_FLAKEY) += dm-flakey.o |
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c new file mode 100644 index 000000000000..e4e841567459 --- /dev/null +++ b/drivers/md/dm-bio-prison.c | |||
@@ -0,0 +1,415 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the GPL. | ||
5 | */ | ||
6 | |||
7 | #include "dm.h" | ||
8 | #include "dm-bio-prison.h" | ||
9 | |||
10 | #include <linux/spinlock.h> | ||
11 | #include <linux/mempool.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/slab.h> | ||
14 | |||
15 | /*----------------------------------------------------------------*/ | ||
16 | |||
17 | struct dm_bio_prison_cell { | ||
18 | struct hlist_node list; | ||
19 | struct dm_bio_prison *prison; | ||
20 | struct dm_cell_key key; | ||
21 | struct bio *holder; | ||
22 | struct bio_list bios; | ||
23 | }; | ||
24 | |||
25 | struct dm_bio_prison { | ||
26 | spinlock_t lock; | ||
27 | mempool_t *cell_pool; | ||
28 | |||
29 | unsigned nr_buckets; | ||
30 | unsigned hash_mask; | ||
31 | struct hlist_head *cells; | ||
32 | }; | ||
33 | |||
34 | /*----------------------------------------------------------------*/ | ||
35 | |||
36 | static uint32_t calc_nr_buckets(unsigned nr_cells) | ||
37 | { | ||
38 | uint32_t n = 128; | ||
39 | |||
40 | nr_cells /= 4; | ||
41 | nr_cells = min(nr_cells, 8192u); | ||
42 | |||
43 | while (n < nr_cells) | ||
44 | n <<= 1; | ||
45 | |||
46 | return n; | ||
47 | } | ||
48 | |||
49 | static struct kmem_cache *_cell_cache; | ||
50 | |||
51 | /* | ||
52 | * @nr_cells should be the number of cells you want in use _concurrently_. | ||
53 | * Don't confuse it with the number of distinct keys. | ||
54 | */ | ||
55 | struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) | ||
56 | { | ||
57 | unsigned i; | ||
58 | uint32_t nr_buckets = calc_nr_buckets(nr_cells); | ||
59 | size_t len = sizeof(struct dm_bio_prison) + | ||
60 | (sizeof(struct hlist_head) * nr_buckets); | ||
61 | struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); | ||
62 | |||
63 | if (!prison) | ||
64 | return NULL; | ||
65 | |||
66 | spin_lock_init(&prison->lock); | ||
67 | prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); | ||
68 | if (!prison->cell_pool) { | ||
69 | kfree(prison); | ||
70 | return NULL; | ||
71 | } | ||
72 | |||
73 | prison->nr_buckets = nr_buckets; | ||
74 | prison->hash_mask = nr_buckets - 1; | ||
75 | prison->cells = (struct hlist_head *) (prison + 1); | ||
76 | for (i = 0; i < nr_buckets; i++) | ||
77 | INIT_HLIST_HEAD(prison->cells + i); | ||
78 | |||
79 | return prison; | ||
80 | } | ||
81 | EXPORT_SYMBOL_GPL(dm_bio_prison_create); | ||
82 | |||
83 | void dm_bio_prison_destroy(struct dm_bio_prison *prison) | ||
84 | { | ||
85 | mempool_destroy(prison->cell_pool); | ||
86 | kfree(prison); | ||
87 | } | ||
88 | EXPORT_SYMBOL_GPL(dm_bio_prison_destroy); | ||
89 | |||
90 | static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) | ||
91 | { | ||
92 | const unsigned long BIG_PRIME = 4294967291UL; | ||
93 | uint64_t hash = key->block * BIG_PRIME; | ||
94 | |||
95 | return (uint32_t) (hash & prison->hash_mask); | ||
96 | } | ||
97 | |||
98 | static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) | ||
99 | { | ||
100 | return (lhs->virtual == rhs->virtual) && | ||
101 | (lhs->dev == rhs->dev) && | ||
102 | (lhs->block == rhs->block); | ||
103 | } | ||
104 | |||
105 | static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, | ||
106 | struct dm_cell_key *key) | ||
107 | { | ||
108 | struct dm_bio_prison_cell *cell; | ||
109 | struct hlist_node *tmp; | ||
110 | |||
111 | hlist_for_each_entry(cell, tmp, bucket, list) | ||
112 | if (keys_equal(&cell->key, key)) | ||
113 | return cell; | ||
114 | |||
115 | return NULL; | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * This may block if a new cell needs allocating. You must ensure that | ||
120 | * cells will be unlocked even if the calling thread is blocked. | ||
121 | * | ||
122 | * Returns 1 if the cell was already held, 0 if @inmate is the new holder. | ||
123 | */ | ||
124 | int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, | ||
125 | struct bio *inmate, struct dm_bio_prison_cell **ref) | ||
126 | { | ||
127 | int r = 1; | ||
128 | unsigned long flags; | ||
129 | uint32_t hash = hash_key(prison, key); | ||
130 | struct dm_bio_prison_cell *cell, *cell2; | ||
131 | |||
132 | BUG_ON(hash > prison->nr_buckets); | ||
133 | |||
134 | spin_lock_irqsave(&prison->lock, flags); | ||
135 | |||
136 | cell = __search_bucket(prison->cells + hash, key); | ||
137 | if (cell) { | ||
138 | bio_list_add(&cell->bios, inmate); | ||
139 | goto out; | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Allocate a new cell | ||
144 | */ | ||
145 | spin_unlock_irqrestore(&prison->lock, flags); | ||
146 | cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); | ||
147 | spin_lock_irqsave(&prison->lock, flags); | ||
148 | |||
149 | /* | ||
150 | * We've been unlocked, so we have to double check that | ||
151 | * nobody else has inserted this cell in the meantime. | ||
152 | */ | ||
153 | cell = __search_bucket(prison->cells + hash, key); | ||
154 | if (cell) { | ||
155 | mempool_free(cell2, prison->cell_pool); | ||
156 | bio_list_add(&cell->bios, inmate); | ||
157 | goto out; | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * Use new cell. | ||
162 | */ | ||
163 | cell = cell2; | ||
164 | |||
165 | cell->prison = prison; | ||
166 | memcpy(&cell->key, key, sizeof(cell->key)); | ||
167 | cell->holder = inmate; | ||
168 | bio_list_init(&cell->bios); | ||
169 | hlist_add_head(&cell->list, prison->cells + hash); | ||
170 | |||
171 | r = 0; | ||
172 | |||
173 | out: | ||
174 | spin_unlock_irqrestore(&prison->lock, flags); | ||
175 | |||
176 | *ref = cell; | ||
177 | |||
178 | return r; | ||
179 | } | ||
180 | EXPORT_SYMBOL_GPL(dm_bio_detain); | ||
181 | |||
182 | /* | ||
183 | * @inmates must have been initialised prior to this call | ||
184 | */ | ||
185 | static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) | ||
186 | { | ||
187 | struct dm_bio_prison *prison = cell->prison; | ||
188 | |||
189 | hlist_del(&cell->list); | ||
190 | |||
191 | if (inmates) { | ||
192 | bio_list_add(inmates, cell->holder); | ||
193 | bio_list_merge(inmates, &cell->bios); | ||
194 | } | ||
195 | |||
196 | mempool_free(cell, prison->cell_pool); | ||
197 | } | ||
198 | |||
199 | void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) | ||
200 | { | ||
201 | unsigned long flags; | ||
202 | struct dm_bio_prison *prison = cell->prison; | ||
203 | |||
204 | spin_lock_irqsave(&prison->lock, flags); | ||
205 | __cell_release(cell, bios); | ||
206 | spin_unlock_irqrestore(&prison->lock, flags); | ||
207 | } | ||
208 | EXPORT_SYMBOL_GPL(dm_cell_release); | ||
209 | |||
210 | /* | ||
211 | * There are a couple of places where we put a bio into a cell briefly | ||
212 | * before taking it out again. In these situations we know that no other | ||
213 | * bio may be in the cell. This function releases the cell, and also does | ||
214 | * a sanity check. | ||
215 | */ | ||
216 | static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) | ||
217 | { | ||
218 | BUG_ON(cell->holder != bio); | ||
219 | BUG_ON(!bio_list_empty(&cell->bios)); | ||
220 | |||
221 | __cell_release(cell, NULL); | ||
222 | } | ||
223 | |||
224 | void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) | ||
225 | { | ||
226 | unsigned long flags; | ||
227 | struct dm_bio_prison *prison = cell->prison; | ||
228 | |||
229 | spin_lock_irqsave(&prison->lock, flags); | ||
230 | __cell_release_singleton(cell, bio); | ||
231 | spin_unlock_irqrestore(&prison->lock, flags); | ||
232 | } | ||
233 | EXPORT_SYMBOL_GPL(dm_cell_release_singleton); | ||
234 | |||
235 | /* | ||
236 | * Sometimes we don't want the holder, just the additional bios. | ||
237 | */ | ||
238 | static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) | ||
239 | { | ||
240 | struct dm_bio_prison *prison = cell->prison; | ||
241 | |||
242 | hlist_del(&cell->list); | ||
243 | bio_list_merge(inmates, &cell->bios); | ||
244 | |||
245 | mempool_free(cell, prison->cell_pool); | ||
246 | } | ||
247 | |||
248 | void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) | ||
249 | { | ||
250 | unsigned long flags; | ||
251 | struct dm_bio_prison *prison = cell->prison; | ||
252 | |||
253 | spin_lock_irqsave(&prison->lock, flags); | ||
254 | __cell_release_no_holder(cell, inmates); | ||
255 | spin_unlock_irqrestore(&prison->lock, flags); | ||
256 | } | ||
257 | EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); | ||
258 | |||
259 | void dm_cell_error(struct dm_bio_prison_cell *cell) | ||
260 | { | ||
261 | struct dm_bio_prison *prison = cell->prison; | ||
262 | struct bio_list bios; | ||
263 | struct bio *bio; | ||
264 | unsigned long flags; | ||
265 | |||
266 | bio_list_init(&bios); | ||
267 | |||
268 | spin_lock_irqsave(&prison->lock, flags); | ||
269 | __cell_release(cell, &bios); | ||
270 | spin_unlock_irqrestore(&prison->lock, flags); | ||
271 | |||
272 | while ((bio = bio_list_pop(&bios))) | ||
273 | bio_io_error(bio); | ||
274 | } | ||
275 | EXPORT_SYMBOL_GPL(dm_cell_error); | ||
276 | |||
277 | /*----------------------------------------------------------------*/ | ||
278 | |||
279 | #define DEFERRED_SET_SIZE 64 | ||
280 | |||
281 | struct dm_deferred_entry { | ||
282 | struct dm_deferred_set *ds; | ||
283 | unsigned count; | ||
284 | struct list_head work_items; | ||
285 | }; | ||
286 | |||
287 | struct dm_deferred_set { | ||
288 | spinlock_t lock; | ||
289 | unsigned current_entry; | ||
290 | unsigned sweeper; | ||
291 | struct dm_deferred_entry entries[DEFERRED_SET_SIZE]; | ||
292 | }; | ||
293 | |||
294 | struct dm_deferred_set *dm_deferred_set_create(void) | ||
295 | { | ||
296 | int i; | ||
297 | struct dm_deferred_set *ds; | ||
298 | |||
299 | ds = kmalloc(sizeof(*ds), GFP_KERNEL); | ||
300 | if (!ds) | ||
301 | return NULL; | ||
302 | |||
303 | spin_lock_init(&ds->lock); | ||
304 | ds->current_entry = 0; | ||
305 | ds->sweeper = 0; | ||
306 | for (i = 0; i < DEFERRED_SET_SIZE; i++) { | ||
307 | ds->entries[i].ds = ds; | ||
308 | ds->entries[i].count = 0; | ||
309 | INIT_LIST_HEAD(&ds->entries[i].work_items); | ||
310 | } | ||
311 | |||
312 | return ds; | ||
313 | } | ||
314 | EXPORT_SYMBOL_GPL(dm_deferred_set_create); | ||
315 | |||
316 | void dm_deferred_set_destroy(struct dm_deferred_set *ds) | ||
317 | { | ||
318 | kfree(ds); | ||
319 | } | ||
320 | EXPORT_SYMBOL_GPL(dm_deferred_set_destroy); | ||
321 | |||
322 | struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds) | ||
323 | { | ||
324 | unsigned long flags; | ||
325 | struct dm_deferred_entry *entry; | ||
326 | |||
327 | spin_lock_irqsave(&ds->lock, flags); | ||
328 | entry = ds->entries + ds->current_entry; | ||
329 | entry->count++; | ||
330 | spin_unlock_irqrestore(&ds->lock, flags); | ||
331 | |||
332 | return entry; | ||
333 | } | ||
334 | EXPORT_SYMBOL_GPL(dm_deferred_entry_inc); | ||
335 | |||
336 | static unsigned ds_next(unsigned index) | ||
337 | { | ||
338 | return (index + 1) % DEFERRED_SET_SIZE; | ||
339 | } | ||
340 | |||
341 | static void __sweep(struct dm_deferred_set *ds, struct list_head *head) | ||
342 | { | ||
343 | while ((ds->sweeper != ds->current_entry) && | ||
344 | !ds->entries[ds->sweeper].count) { | ||
345 | list_splice_init(&ds->entries[ds->sweeper].work_items, head); | ||
346 | ds->sweeper = ds_next(ds->sweeper); | ||
347 | } | ||
348 | |||
349 | if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count) | ||
350 | list_splice_init(&ds->entries[ds->sweeper].work_items, head); | ||
351 | } | ||
352 | |||
353 | void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head) | ||
354 | { | ||
355 | unsigned long flags; | ||
356 | |||
357 | spin_lock_irqsave(&entry->ds->lock, flags); | ||
358 | BUG_ON(!entry->count); | ||
359 | --entry->count; | ||
360 | __sweep(entry->ds, head); | ||
361 | spin_unlock_irqrestore(&entry->ds->lock, flags); | ||
362 | } | ||
363 | EXPORT_SYMBOL_GPL(dm_deferred_entry_dec); | ||
364 | |||
365 | /* | ||
366 | * Returns 1 if deferred or 0 if no pending items to delay job. | ||
367 | */ | ||
368 | int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work) | ||
369 | { | ||
370 | int r = 1; | ||
371 | unsigned long flags; | ||
372 | unsigned next_entry; | ||
373 | |||
374 | spin_lock_irqsave(&ds->lock, flags); | ||
375 | if ((ds->sweeper == ds->current_entry) && | ||
376 | !ds->entries[ds->current_entry].count) | ||
377 | r = 0; | ||
378 | else { | ||
379 | list_add(work, &ds->entries[ds->current_entry].work_items); | ||
380 | next_entry = ds_next(ds->current_entry); | ||
381 | if (!ds->entries[next_entry].count) | ||
382 | ds->current_entry = next_entry; | ||
383 | } | ||
384 | spin_unlock_irqrestore(&ds->lock, flags); | ||
385 | |||
386 | return r; | ||
387 | } | ||
388 | EXPORT_SYMBOL_GPL(dm_deferred_set_add_work); | ||
389 | |||
390 | /*----------------------------------------------------------------*/ | ||
391 | |||
392 | static int __init dm_bio_prison_init(void) | ||
393 | { | ||
394 | _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); | ||
395 | if (!_cell_cache) | ||
396 | return -ENOMEM; | ||
397 | |||
398 | return 0; | ||
399 | } | ||
400 | |||
401 | static void __exit dm_bio_prison_exit(void) | ||
402 | { | ||
403 | kmem_cache_destroy(_cell_cache); | ||
404 | _cell_cache = NULL; | ||
405 | } | ||
406 | |||
407 | /* | ||
408 | * module hooks | ||
409 | */ | ||
410 | module_init(dm_bio_prison_init); | ||
411 | module_exit(dm_bio_prison_exit); | ||
412 | |||
413 | MODULE_DESCRIPTION(DM_NAME " bio prison"); | ||
414 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); | ||
415 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h new file mode 100644 index 000000000000..4e0ac376700a --- /dev/null +++ b/drivers/md/dm-bio-prison.h | |||
@@ -0,0 +1,72 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011-2012 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the GPL. | ||
5 | */ | ||
6 | |||
7 | #ifndef DM_BIO_PRISON_H | ||
8 | #define DM_BIO_PRISON_H | ||
9 | |||
10 | #include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ | ||
11 | #include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ | ||
12 | |||
13 | #include <linux/list.h> | ||
14 | #include <linux/bio.h> | ||
15 | |||
16 | /*----------------------------------------------------------------*/ | ||
17 | |||
18 | /* | ||
19 | * Sometimes we can't deal with a bio straight away. We put them in prison | ||
20 | * where they can't cause any mischief. Bios are put in a cell identified | ||
21 | * by a key, multiple bios can be in the same cell. When the cell is | ||
22 | * subsequently unlocked the bios become available. | ||
23 | */ | ||
24 | struct dm_bio_prison; | ||
25 | struct dm_bio_prison_cell; | ||
26 | |||
27 | /* FIXME: this needs to be more abstract */ | ||
28 | struct dm_cell_key { | ||
29 | int virtual; | ||
30 | dm_thin_id dev; | ||
31 | dm_block_t block; | ||
32 | }; | ||
33 | |||
34 | struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells); | ||
35 | void dm_bio_prison_destroy(struct dm_bio_prison *prison); | ||
36 | |||
37 | /* | ||
38 | * This may block if a new cell needs allocating. You must ensure that | ||
39 | * cells will be unlocked even if the calling thread is blocked. | ||
40 | * | ||
41 | * Returns 1 if the cell was already held, 0 if @inmate is the new holder. | ||
42 | */ | ||
43 | int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, | ||
44 | struct bio *inmate, struct dm_bio_prison_cell **ref); | ||
45 | |||
46 | void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios); | ||
47 | void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio); // FIXME: bio arg not needed | ||
48 | void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates); | ||
49 | void dm_cell_error(struct dm_bio_prison_cell *cell); | ||
50 | |||
51 | /*----------------------------------------------------------------*/ | ||
52 | |||
53 | /* | ||
54 | * We use the deferred set to keep track of pending reads to shared blocks. | ||
55 | * We do this to ensure the new mapping caused by a write isn't performed | ||
56 | * until these prior reads have completed. Otherwise the insertion of the | ||
57 | * new mapping could free the old block that the read bios are mapped to. | ||
58 | */ | ||
59 | |||
60 | struct dm_deferred_set; | ||
61 | struct dm_deferred_entry; | ||
62 | |||
63 | struct dm_deferred_set *dm_deferred_set_create(void); | ||
64 | void dm_deferred_set_destroy(struct dm_deferred_set *ds); | ||
65 | |||
66 | struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds); | ||
67 | void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head); | ||
68 | int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work); | ||
69 | |||
70 | /*----------------------------------------------------------------*/ | ||
71 | |||
72 | #endif | ||
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 22a22a701e16..058acf3a5ba7 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -5,6 +5,7 @@ | |||
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include "dm-thin-metadata.h" | 7 | #include "dm-thin-metadata.h" |
8 | #include "dm-bio-prison.h" | ||
8 | #include "dm.h" | 9 | #include "dm.h" |
9 | 10 | ||
10 | #include <linux/device-mapper.h> | 11 | #include <linux/device-mapper.h> |
@@ -21,7 +22,6 @@ | |||
21 | * Tunable constants | 22 | * Tunable constants |
22 | */ | 23 | */ |
23 | #define ENDIO_HOOK_POOL_SIZE 1024 | 24 | #define ENDIO_HOOK_POOL_SIZE 1024 |
24 | #define DEFERRED_SET_SIZE 64 | ||
25 | #define MAPPING_POOL_SIZE 1024 | 25 | #define MAPPING_POOL_SIZE 1024 |
26 | #define PRISON_CELLS 1024 | 26 | #define PRISON_CELLS 1024 |
27 | #define COMMIT_PERIOD HZ | 27 | #define COMMIT_PERIOD HZ |
@@ -99,404 +99,6 @@ | |||
99 | /*----------------------------------------------------------------*/ | 99 | /*----------------------------------------------------------------*/ |
100 | 100 | ||
101 | /* | 101 | /* |
102 | * Sometimes we can't deal with a bio straight away. We put them in prison | ||
103 | * where they can't cause any mischief. Bios are put in a cell identified | ||
104 | * by a key, multiple bios can be in the same cell. When the cell is | ||
105 | * subsequently unlocked the bios become available. | ||
106 | */ | ||
107 | struct dm_bio_prison; | ||
108 | |||
109 | struct dm_cell_key { | ||
110 | int virtual; | ||
111 | dm_thin_id dev; | ||
112 | dm_block_t block; | ||
113 | }; | ||
114 | |||
115 | struct dm_bio_prison_cell { | ||
116 | struct hlist_node list; | ||
117 | struct dm_bio_prison *prison; | ||
118 | struct dm_cell_key key; | ||
119 | struct bio *holder; | ||
120 | struct bio_list bios; | ||
121 | }; | ||
122 | |||
123 | struct dm_bio_prison { | ||
124 | spinlock_t lock; | ||
125 | mempool_t *cell_pool; | ||
126 | |||
127 | unsigned nr_buckets; | ||
128 | unsigned hash_mask; | ||
129 | struct hlist_head *cells; | ||
130 | }; | ||
131 | |||
132 | static uint32_t calc_nr_buckets(unsigned nr_cells) | ||
133 | { | ||
134 | uint32_t n = 128; | ||
135 | |||
136 | nr_cells /= 4; | ||
137 | nr_cells = min(nr_cells, 8192u); | ||
138 | |||
139 | while (n < nr_cells) | ||
140 | n <<= 1; | ||
141 | |||
142 | return n; | ||
143 | } | ||
144 | |||
145 | static struct kmem_cache *_cell_cache; | ||
146 | |||
147 | /* | ||
148 | * @nr_cells should be the number of cells you want in use _concurrently_. | ||
149 | * Don't confuse it with the number of distinct keys. | ||
150 | */ | ||
151 | static struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) | ||
152 | { | ||
153 | unsigned i; | ||
154 | uint32_t nr_buckets = calc_nr_buckets(nr_cells); | ||
155 | size_t len = sizeof(struct dm_bio_prison) + | ||
156 | (sizeof(struct hlist_head) * nr_buckets); | ||
157 | struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); | ||
158 | |||
159 | if (!prison) | ||
160 | return NULL; | ||
161 | |||
162 | spin_lock_init(&prison->lock); | ||
163 | prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); | ||
164 | if (!prison->cell_pool) { | ||
165 | kfree(prison); | ||
166 | return NULL; | ||
167 | } | ||
168 | |||
169 | prison->nr_buckets = nr_buckets; | ||
170 | prison->hash_mask = nr_buckets - 1; | ||
171 | prison->cells = (struct hlist_head *) (prison + 1); | ||
172 | for (i = 0; i < nr_buckets; i++) | ||
173 | INIT_HLIST_HEAD(prison->cells + i); | ||
174 | |||
175 | return prison; | ||
176 | } | ||
177 | |||
178 | static void dm_bio_prison_destroy(struct dm_bio_prison *prison) | ||
179 | { | ||
180 | mempool_destroy(prison->cell_pool); | ||
181 | kfree(prison); | ||
182 | } | ||
183 | |||
184 | static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) | ||
185 | { | ||
186 | const unsigned long BIG_PRIME = 4294967291UL; | ||
187 | uint64_t hash = key->block * BIG_PRIME; | ||
188 | |||
189 | return (uint32_t) (hash & prison->hash_mask); | ||
190 | } | ||
191 | |||
192 | static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) | ||
193 | { | ||
194 | return (lhs->virtual == rhs->virtual) && | ||
195 | (lhs->dev == rhs->dev) && | ||
196 | (lhs->block == rhs->block); | ||
197 | } | ||
198 | |||
199 | static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, | ||
200 | struct dm_cell_key *key) | ||
201 | { | ||
202 | struct dm_bio_prison_cell *cell; | ||
203 | struct hlist_node *tmp; | ||
204 | |||
205 | hlist_for_each_entry(cell, tmp, bucket, list) | ||
206 | if (keys_equal(&cell->key, key)) | ||
207 | return cell; | ||
208 | |||
209 | return NULL; | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * This may block if a new cell needs allocating. You must ensure that | ||
214 | * cells will be unlocked even if the calling thread is blocked. | ||
215 | * | ||
216 | * Returns 1 if the cell was already held, 0 if @inmate is the new holder. | ||
217 | */ | ||
218 | static int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, | ||
219 | struct bio *inmate, struct dm_bio_prison_cell **ref) | ||
220 | { | ||
221 | int r = 1; | ||
222 | unsigned long flags; | ||
223 | uint32_t hash = hash_key(prison, key); | ||
224 | struct dm_bio_prison_cell *cell, *cell2; | ||
225 | |||
226 | BUG_ON(hash > prison->nr_buckets); | ||
227 | |||
228 | spin_lock_irqsave(&prison->lock, flags); | ||
229 | |||
230 | cell = __search_bucket(prison->cells + hash, key); | ||
231 | if (cell) { | ||
232 | bio_list_add(&cell->bios, inmate); | ||
233 | goto out; | ||
234 | } | ||
235 | |||
236 | /* | ||
237 | * Allocate a new cell | ||
238 | */ | ||
239 | spin_unlock_irqrestore(&prison->lock, flags); | ||
240 | cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); | ||
241 | spin_lock_irqsave(&prison->lock, flags); | ||
242 | |||
243 | /* | ||
244 | * We've been unlocked, so we have to double check that | ||
245 | * nobody else has inserted this cell in the meantime. | ||
246 | */ | ||
247 | cell = __search_bucket(prison->cells + hash, key); | ||
248 | if (cell) { | ||
249 | mempool_free(cell2, prison->cell_pool); | ||
250 | bio_list_add(&cell->bios, inmate); | ||
251 | goto out; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Use new cell. | ||
256 | */ | ||
257 | cell = cell2; | ||
258 | |||
259 | cell->prison = prison; | ||
260 | memcpy(&cell->key, key, sizeof(cell->key)); | ||
261 | cell->holder = inmate; | ||
262 | bio_list_init(&cell->bios); | ||
263 | hlist_add_head(&cell->list, prison->cells + hash); | ||
264 | |||
265 | r = 0; | ||
266 | |||
267 | out: | ||
268 | spin_unlock_irqrestore(&prison->lock, flags); | ||
269 | |||
270 | *ref = cell; | ||
271 | |||
272 | return r; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * @inmates must have been initialised prior to this call | ||
277 | */ | ||
278 | static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) | ||
279 | { | ||
280 | struct dm_bio_prison *prison = cell->prison; | ||
281 | |||
282 | hlist_del(&cell->list); | ||
283 | |||
284 | if (inmates) { | ||
285 | bio_list_add(inmates, cell->holder); | ||
286 | bio_list_merge(inmates, &cell->bios); | ||
287 | } | ||
288 | |||
289 | mempool_free(cell, prison->cell_pool); | ||
290 | } | ||
291 | |||
292 | static void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) | ||
293 | { | ||
294 | unsigned long flags; | ||
295 | struct dm_bio_prison *prison = cell->prison; | ||
296 | |||
297 | spin_lock_irqsave(&prison->lock, flags); | ||
298 | __cell_release(cell, bios); | ||
299 | spin_unlock_irqrestore(&prison->lock, flags); | ||
300 | } | ||
301 | |||
302 | /* | ||
303 | * There are a couple of places where we put a bio into a cell briefly | ||
304 | * before taking it out again. In these situations we know that no other | ||
305 | * bio may be in the cell. This function releases the cell, and also does | ||
306 | * a sanity check. | ||
307 | */ | ||
308 | static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) | ||
309 | { | ||
310 | BUG_ON(cell->holder != bio); | ||
311 | BUG_ON(!bio_list_empty(&cell->bios)); | ||
312 | |||
313 | __cell_release(cell, NULL); | ||
314 | } | ||
315 | |||
316 | static void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) | ||
317 | { | ||
318 | unsigned long flags; | ||
319 | struct dm_bio_prison *prison = cell->prison; | ||
320 | |||
321 | spin_lock_irqsave(&prison->lock, flags); | ||
322 | __cell_release_singleton(cell, bio); | ||
323 | spin_unlock_irqrestore(&prison->lock, flags); | ||
324 | } | ||
325 | |||
326 | /* | ||
327 | * Sometimes we don't want the holder, just the additional bios. | ||
328 | */ | ||
329 | static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, | ||
330 | struct bio_list *inmates) | ||
331 | { | ||
332 | struct dm_bio_prison *prison = cell->prison; | ||
333 | |||
334 | hlist_del(&cell->list); | ||
335 | bio_list_merge(inmates, &cell->bios); | ||
336 | |||
337 | mempool_free(cell, prison->cell_pool); | ||
338 | } | ||
339 | |||
340 | static void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, | ||
341 | struct bio_list *inmates) | ||
342 | { | ||
343 | unsigned long flags; | ||
344 | struct dm_bio_prison *prison = cell->prison; | ||
345 | |||
346 | spin_lock_irqsave(&prison->lock, flags); | ||
347 | __cell_release_no_holder(cell, inmates); | ||
348 | spin_unlock_irqrestore(&prison->lock, flags); | ||
349 | } | ||
350 | |||
351 | static void dm_cell_error(struct dm_bio_prison_cell *cell) | ||
352 | { | ||
353 | struct dm_bio_prison *prison = cell->prison; | ||
354 | struct bio_list bios; | ||
355 | struct bio *bio; | ||
356 | unsigned long flags; | ||
357 | |||
358 | bio_list_init(&bios); | ||
359 | |||
360 | spin_lock_irqsave(&prison->lock, flags); | ||
361 | __cell_release(cell, &bios); | ||
362 | spin_unlock_irqrestore(&prison->lock, flags); | ||
363 | |||
364 | while ((bio = bio_list_pop(&bios))) | ||
365 | bio_io_error(bio); | ||
366 | } | ||
367 | |||
368 | /*----------------------------------------------------------------*/ | ||
369 | |||
370 | /* | ||
371 | * We use the deferred set to keep track of pending reads to shared blocks. | ||
372 | * We do this to ensure the new mapping caused by a write isn't performed | ||
373 | * until these prior reads have completed. Otherwise the insertion of the | ||
374 | * new mapping could free the old block that the read bios are mapped to. | ||
375 | */ | ||
376 | |||
377 | struct dm_deferred_set; | ||
378 | struct dm_deferred_entry { | ||
379 | struct dm_deferred_set *ds; | ||
380 | unsigned count; | ||
381 | struct list_head work_items; | ||
382 | }; | ||
383 | |||
384 | struct dm_deferred_set { | ||
385 | spinlock_t lock; | ||
386 | unsigned current_entry; | ||
387 | unsigned sweeper; | ||
388 | struct dm_deferred_entry entries[DEFERRED_SET_SIZE]; | ||
389 | }; | ||
390 | |||
391 | static struct dm_deferred_set *dm_deferred_set_create(void) | ||
392 | { | ||
393 | int i; | ||
394 | struct dm_deferred_set *ds; | ||
395 | |||
396 | ds = kmalloc(sizeof(*ds), GFP_KERNEL); | ||
397 | if (!ds) | ||
398 | return NULL; | ||
399 | |||
400 | spin_lock_init(&ds->lock); | ||
401 | ds->current_entry = 0; | ||
402 | ds->sweeper = 0; | ||
403 | for (i = 0; i < DEFERRED_SET_SIZE; i++) { | ||
404 | ds->entries[i].ds = ds; | ||
405 | ds->entries[i].count = 0; | ||
406 | INIT_LIST_HEAD(&ds->entries[i].work_items); | ||
407 | } | ||
408 | |||
409 | return ds; | ||
410 | } | ||
411 | |||
412 | static void dm_deferred_set_destroy(struct dm_deferred_set *ds) | ||
413 | { | ||
414 | kfree(ds); | ||
415 | } | ||
416 | |||
417 | static struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds) | ||
418 | { | ||
419 | unsigned long flags; | ||
420 | struct dm_deferred_entry *entry; | ||
421 | |||
422 | spin_lock_irqsave(&ds->lock, flags); | ||
423 | entry = ds->entries + ds->current_entry; | ||
424 | entry->count++; | ||
425 | spin_unlock_irqrestore(&ds->lock, flags); | ||
426 | |||
427 | return entry; | ||
428 | } | ||
429 | |||
430 | static unsigned ds_next(unsigned index) | ||
431 | { | ||
432 | return (index + 1) % DEFERRED_SET_SIZE; | ||
433 | } | ||
434 | |||
435 | static void __sweep(struct dm_deferred_set *ds, struct list_head *head) | ||
436 | { | ||
437 | while ((ds->sweeper != ds->current_entry) && | ||
438 | !ds->entries[ds->sweeper].count) { | ||
439 | list_splice_init(&ds->entries[ds->sweeper].work_items, head); | ||
440 | ds->sweeper = ds_next(ds->sweeper); | ||
441 | } | ||
442 | |||
443 | if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count) | ||
444 | list_splice_init(&ds->entries[ds->sweeper].work_items, head); | ||
445 | } | ||
446 | |||
447 | static void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head) | ||
448 | { | ||
449 | unsigned long flags; | ||
450 | |||
451 | spin_lock_irqsave(&entry->ds->lock, flags); | ||
452 | BUG_ON(!entry->count); | ||
453 | --entry->count; | ||
454 | __sweep(entry->ds, head); | ||
455 | spin_unlock_irqrestore(&entry->ds->lock, flags); | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * Returns 1 if deferred or 0 if no pending items to delay job. | ||
460 | */ | ||
461 | static int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work) | ||
462 | { | ||
463 | int r = 1; | ||
464 | unsigned long flags; | ||
465 | unsigned next_entry; | ||
466 | |||
467 | spin_lock_irqsave(&ds->lock, flags); | ||
468 | if ((ds->sweeper == ds->current_entry) && | ||
469 | !ds->entries[ds->current_entry].count) | ||
470 | r = 0; | ||
471 | else { | ||
472 | list_add(work, &ds->entries[ds->current_entry].work_items); | ||
473 | next_entry = ds_next(ds->current_entry); | ||
474 | if (!ds->entries[next_entry].count) | ||
475 | ds->current_entry = next_entry; | ||
476 | } | ||
477 | spin_unlock_irqrestore(&ds->lock, flags); | ||
478 | |||
479 | return r; | ||
480 | } | ||
481 | |||
482 | static int __init dm_bio_prison_init(void) | ||
483 | { | ||
484 | _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); | ||
485 | if (!_cell_cache) | ||
486 | return -ENOMEM; | ||
487 | |||
488 | return 0; | ||
489 | } | ||
490 | |||
491 | static void __exit dm_bio_prison_exit(void) | ||
492 | { | ||
493 | kmem_cache_destroy(_cell_cache); | ||
494 | _cell_cache = NULL; | ||
495 | } | ||
496 | |||
497 | /*----------------------------------------------------------------*/ | ||
498 | |||
499 | /* | ||
500 | * Key building. | 102 | * Key building. |
501 | */ | 103 | */ |
502 | static void build_data_key(struct dm_thin_device *td, | 104 | static void build_data_key(struct dm_thin_device *td, |
@@ -2852,7 +2454,7 @@ static struct target_type pool_target = { | |||
2852 | .name = "thin-pool", | 2454 | .name = "thin-pool", |
2853 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | | 2455 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | |
2854 | DM_TARGET_IMMUTABLE, | 2456 | DM_TARGET_IMMUTABLE, |
2855 | .version = {1, 4, 0}, | 2457 | .version = {1, 5, 0}, |
2856 | .module = THIS_MODULE, | 2458 | .module = THIS_MODULE, |
2857 | .ctr = pool_ctr, | 2459 | .ctr = pool_ctr, |
2858 | .dtr = pool_dtr, | 2460 | .dtr = pool_dtr, |
@@ -3143,7 +2745,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
3143 | 2745 | ||
3144 | static struct target_type thin_target = { | 2746 | static struct target_type thin_target = { |
3145 | .name = "thin", | 2747 | .name = "thin", |
3146 | .version = {1, 4, 0}, | 2748 | .version = {1, 5, 0}, |
3147 | .module = THIS_MODULE, | 2749 | .module = THIS_MODULE, |
3148 | .ctr = thin_ctr, | 2750 | .ctr = thin_ctr, |
3149 | .dtr = thin_dtr, | 2751 | .dtr = thin_dtr, |
@@ -3173,8 +2775,6 @@ static int __init dm_thin_init(void) | |||
3173 | 2775 | ||
3174 | r = -ENOMEM; | 2776 | r = -ENOMEM; |
3175 | 2777 | ||
3176 | dm_bio_prison_init(); | ||
3177 | |||
3178 | _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0); | 2778 | _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0); |
3179 | if (!_new_mapping_cache) | 2779 | if (!_new_mapping_cache) |
3180 | goto bad_new_mapping_cache; | 2780 | goto bad_new_mapping_cache; |
@@ -3200,7 +2800,6 @@ static void dm_thin_exit(void) | |||
3200 | dm_unregister_target(&thin_target); | 2800 | dm_unregister_target(&thin_target); |
3201 | dm_unregister_target(&pool_target); | 2801 | dm_unregister_target(&pool_target); |
3202 | 2802 | ||
3203 | dm_bio_prison_exit(); | ||
3204 | kmem_cache_destroy(_new_mapping_cache); | 2803 | kmem_cache_destroy(_new_mapping_cache); |
3205 | kmem_cache_destroy(_endio_hook_cache); | 2804 | kmem_cache_destroy(_endio_hook_cache); |
3206 | } | 2805 | } |