diff options
-rw-r--r-- | Documentation/device-mapper/cache-policies.txt | 5 | ||||
-rw-r--r-- | drivers/md/Kconfig | 8 | ||||
-rw-r--r-- | drivers/md/Makefile | 2 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-cleaner.c | 464 |
4 files changed, 479 insertions, 0 deletions
diff --git a/Documentation/device-mapper/cache-policies.txt b/Documentation/device-mapper/cache-policies.txt index 731879f97b80..d7c440b444cc 100644 --- a/Documentation/device-mapper/cache-policies.txt +++ b/Documentation/device-mapper/cache-policies.txt | |||
@@ -53,6 +53,11 @@ since spindles tend to have good bandwidth. The io_tracker counts | |||
53 | contiguous I/Os to try to spot when the io is in one of these sequential | 53 | contiguous I/Os to try to spot when the io is in one of these sequential |
54 | modes. | 54 | modes. |
55 | 55 | ||
56 | cleaner | ||
57 | ------- | ||
58 | |||
59 | The cleaner writes back all dirty blocks in a cache to decommission it. | ||
60 | |||
56 | Examples | 61 | Examples |
57 | ======== | 62 | ======== |
58 | 63 | ||
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 1a96cbc7afda..e30b490055aa 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -291,6 +291,14 @@ config DM_CACHE_MQ | |||
291 | This is meant to be a general purpose policy. It prioritises | 291 | This is meant to be a general purpose policy. It prioritises |
292 | reads over writes. | 292 | reads over writes. |
293 | 293 | ||
294 | config DM_CACHE_CLEANER | ||
295 | tristate "Cleaner Cache Policy (EXPERIMENTAL)" | ||
296 | depends on DM_CACHE | ||
297 | default y | ||
298 | ---help--- | ||
299 | A simple cache policy that writes back all data to the | ||
300 | origin. Used when decommissioning a dm-cache. | ||
301 | |||
294 | config DM_MIRROR | 302 | config DM_MIRROR |
295 | tristate "Mirror target" | 303 | tristate "Mirror target" |
296 | depends on BLK_DEV_DM | 304 | depends on BLK_DEV_DM |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index adc8710c2408..7ceeaefc0e95 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -13,6 +13,7 @@ dm-log-userspace-y \ | |||
13 | dm-thin-pool-y += dm-thin.o dm-thin-metadata.o | 13 | dm-thin-pool-y += dm-thin.o dm-thin-metadata.o |
14 | dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o | 14 | dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o |
15 | dm-cache-mq-y += dm-cache-policy-mq.o | 15 | dm-cache-mq-y += dm-cache-policy-mq.o |
16 | dm-cache-cleaner-y += dm-cache-policy-cleaner.o | ||
16 | md-mod-y += md.o bitmap.o | 17 | md-mod-y += md.o bitmap.o |
17 | raid456-y += raid5.o | 18 | raid456-y += raid5.o |
18 | 19 | ||
@@ -48,6 +49,7 @@ obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o | |||
48 | obj-$(CONFIG_DM_VERITY) += dm-verity.o | 49 | obj-$(CONFIG_DM_VERITY) += dm-verity.o |
49 | obj-$(CONFIG_DM_CACHE) += dm-cache.o | 50 | obj-$(CONFIG_DM_CACHE) += dm-cache.o |
50 | obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o | 51 | obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o |
52 | obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o | ||
51 | 53 | ||
52 | ifeq ($(CONFIG_DM_UEVENT),y) | 54 | ifeq ($(CONFIG_DM_UEVENT),y) |
53 | dm-mod-objs += dm-uevent.o | 55 | dm-mod-objs += dm-uevent.o |
diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c new file mode 100644 index 000000000000..cc05d70b3cb8 --- /dev/null +++ b/drivers/md/dm-cache-policy-cleaner.c | |||
@@ -0,0 +1,464 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 Red Hat. All rights reserved. | ||
3 | * | ||
4 | * writeback cache policy supporting flushing out dirty cache blocks. | ||
5 | * | ||
6 | * This file is released under the GPL. | ||
7 | */ | ||
8 | |||
9 | #include "dm-cache-policy.h" | ||
10 | #include "dm.h" | ||
11 | |||
12 | #include <linux/hash.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/vmalloc.h> | ||
16 | |||
17 | /*----------------------------------------------------------------*/ | ||
18 | |||
19 | #define DM_MSG_PREFIX "cache cleaner" | ||
20 | #define CLEANER_VERSION "1.0.0" | ||
21 | |||
22 | /* Cache entry struct. */ | ||
23 | struct wb_cache_entry { | ||
24 | struct list_head list; | ||
25 | struct hlist_node hlist; | ||
26 | |||
27 | dm_oblock_t oblock; | ||
28 | dm_cblock_t cblock; | ||
29 | bool dirty:1; | ||
30 | bool pending:1; | ||
31 | }; | ||
32 | |||
33 | struct hash { | ||
34 | struct hlist_head *table; | ||
35 | dm_block_t hash_bits; | ||
36 | unsigned nr_buckets; | ||
37 | }; | ||
38 | |||
39 | struct policy { | ||
40 | struct dm_cache_policy policy; | ||
41 | spinlock_t lock; | ||
42 | |||
43 | struct list_head free; | ||
44 | struct list_head clean; | ||
45 | struct list_head clean_pending; | ||
46 | struct list_head dirty; | ||
47 | |||
48 | /* | ||
49 | * We know exactly how many cblocks will be needed, | ||
50 | * so we can allocate them up front. | ||
51 | */ | ||
52 | dm_cblock_t cache_size, nr_cblocks_allocated; | ||
53 | struct wb_cache_entry *cblocks; | ||
54 | struct hash chash; | ||
55 | }; | ||
56 | |||
57 | /*----------------------------------------------------------------------------*/ | ||
58 | |||
59 | /* | ||
60 | * Low-level functions. | ||
61 | */ | ||
62 | static unsigned next_power(unsigned n, unsigned min) | ||
63 | { | ||
64 | return roundup_pow_of_two(max(n, min)); | ||
65 | } | ||
66 | |||
67 | static struct policy *to_policy(struct dm_cache_policy *p) | ||
68 | { | ||
69 | return container_of(p, struct policy, policy); | ||
70 | } | ||
71 | |||
72 | static struct list_head *list_pop(struct list_head *q) | ||
73 | { | ||
74 | struct list_head *r = q->next; | ||
75 | |||
76 | list_del(r); | ||
77 | |||
78 | return r; | ||
79 | } | ||
80 | |||
81 | /*----------------------------------------------------------------------------*/ | ||
82 | |||
83 | /* Allocate/free various resources. */ | ||
84 | static int alloc_hash(struct hash *hash, unsigned elts) | ||
85 | { | ||
86 | hash->nr_buckets = next_power(elts >> 4, 16); | ||
87 | hash->hash_bits = ffs(hash->nr_buckets) - 1; | ||
88 | hash->table = vzalloc(sizeof(*hash->table) * hash->nr_buckets); | ||
89 | |||
90 | return hash->table ? 0 : -ENOMEM; | ||
91 | } | ||
92 | |||
93 | static void free_hash(struct hash *hash) | ||
94 | { | ||
95 | vfree(hash->table); | ||
96 | } | ||
97 | |||
98 | static int alloc_cache_blocks_with_hash(struct policy *p, dm_cblock_t cache_size) | ||
99 | { | ||
100 | int r = -ENOMEM; | ||
101 | |||
102 | p->cblocks = vzalloc(sizeof(*p->cblocks) * from_cblock(cache_size)); | ||
103 | if (p->cblocks) { | ||
104 | unsigned u = from_cblock(cache_size); | ||
105 | |||
106 | while (u--) | ||
107 | list_add(&p->cblocks[u].list, &p->free); | ||
108 | |||
109 | p->nr_cblocks_allocated = 0; | ||
110 | |||
111 | /* Cache entries hash. */ | ||
112 | r = alloc_hash(&p->chash, from_cblock(cache_size)); | ||
113 | if (r) | ||
114 | vfree(p->cblocks); | ||
115 | } | ||
116 | |||
117 | return r; | ||
118 | } | ||
119 | |||
120 | static void free_cache_blocks_and_hash(struct policy *p) | ||
121 | { | ||
122 | free_hash(&p->chash); | ||
123 | vfree(p->cblocks); | ||
124 | } | ||
125 | |||
126 | static struct wb_cache_entry *alloc_cache_entry(struct policy *p) | ||
127 | { | ||
128 | struct wb_cache_entry *e; | ||
129 | |||
130 | BUG_ON(from_cblock(p->nr_cblocks_allocated) >= from_cblock(p->cache_size)); | ||
131 | |||
132 | e = list_entry(list_pop(&p->free), struct wb_cache_entry, list); | ||
133 | p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) + 1); | ||
134 | |||
135 | return e; | ||
136 | } | ||
137 | |||
138 | /*----------------------------------------------------------------------------*/ | ||
139 | |||
140 | /* Hash functions (lookup, insert, remove). */ | ||
141 | static struct wb_cache_entry *lookup_cache_entry(struct policy *p, dm_oblock_t oblock) | ||
142 | { | ||
143 | struct hash *hash = &p->chash; | ||
144 | unsigned h = hash_64(from_oblock(oblock), hash->hash_bits); | ||
145 | struct wb_cache_entry *cur; | ||
146 | struct hlist_head *bucket = &hash->table[h]; | ||
147 | |||
148 | hlist_for_each_entry(cur, bucket, hlist) { | ||
149 | if (cur->oblock == oblock) { | ||
150 | /* Move upfront bucket for faster access. */ | ||
151 | hlist_del(&cur->hlist); | ||
152 | hlist_add_head(&cur->hlist, bucket); | ||
153 | return cur; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | return NULL; | ||
158 | } | ||
159 | |||
160 | static void insert_cache_hash_entry(struct policy *p, struct wb_cache_entry *e) | ||
161 | { | ||
162 | unsigned h = hash_64(from_oblock(e->oblock), p->chash.hash_bits); | ||
163 | |||
164 | hlist_add_head(&e->hlist, &p->chash.table[h]); | ||
165 | } | ||
166 | |||
167 | static void remove_cache_hash_entry(struct wb_cache_entry *e) | ||
168 | { | ||
169 | hlist_del(&e->hlist); | ||
170 | } | ||
171 | |||
172 | /* Public interface (see dm-cache-policy.h */ | ||
173 | static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock, | ||
174 | bool can_block, bool can_migrate, bool discarded_oblock, | ||
175 | struct bio *bio, struct policy_result *result) | ||
176 | { | ||
177 | struct policy *p = to_policy(pe); | ||
178 | struct wb_cache_entry *e; | ||
179 | unsigned long flags; | ||
180 | |||
181 | result->op = POLICY_MISS; | ||
182 | |||
183 | if (can_block) | ||
184 | spin_lock_irqsave(&p->lock, flags); | ||
185 | |||
186 | else if (!spin_trylock_irqsave(&p->lock, flags)) | ||
187 | return -EWOULDBLOCK; | ||
188 | |||
189 | e = lookup_cache_entry(p, oblock); | ||
190 | if (e) { | ||
191 | result->op = POLICY_HIT; | ||
192 | result->cblock = e->cblock; | ||
193 | |||
194 | } | ||
195 | |||
196 | spin_unlock_irqrestore(&p->lock, flags); | ||
197 | |||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | static int wb_lookup(struct dm_cache_policy *pe, dm_oblock_t oblock, dm_cblock_t *cblock) | ||
202 | { | ||
203 | int r; | ||
204 | struct policy *p = to_policy(pe); | ||
205 | struct wb_cache_entry *e; | ||
206 | unsigned long flags; | ||
207 | |||
208 | if (!spin_trylock_irqsave(&p->lock, flags)) | ||
209 | return -EWOULDBLOCK; | ||
210 | |||
211 | e = lookup_cache_entry(p, oblock); | ||
212 | if (e) { | ||
213 | *cblock = e->cblock; | ||
214 | r = 0; | ||
215 | |||
216 | } else | ||
217 | r = -ENOENT; | ||
218 | |||
219 | spin_unlock_irqrestore(&p->lock, flags); | ||
220 | |||
221 | return r; | ||
222 | } | ||
223 | |||
224 | static void __set_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock, bool set) | ||
225 | { | ||
226 | struct policy *p = to_policy(pe); | ||
227 | struct wb_cache_entry *e; | ||
228 | |||
229 | e = lookup_cache_entry(p, oblock); | ||
230 | BUG_ON(!e); | ||
231 | |||
232 | if (set) { | ||
233 | if (!e->dirty) { | ||
234 | e->dirty = true; | ||
235 | list_move(&e->list, &p->dirty); | ||
236 | } | ||
237 | |||
238 | } else { | ||
239 | if (e->dirty) { | ||
240 | e->pending = false; | ||
241 | e->dirty = false; | ||
242 | list_move(&e->list, &p->clean); | ||
243 | } | ||
244 | } | ||
245 | } | ||
246 | |||
247 | static void wb_set_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock) | ||
248 | { | ||
249 | struct policy *p = to_policy(pe); | ||
250 | unsigned long flags; | ||
251 | |||
252 | spin_lock_irqsave(&p->lock, flags); | ||
253 | __set_clear_dirty(pe, oblock, true); | ||
254 | spin_unlock_irqrestore(&p->lock, flags); | ||
255 | } | ||
256 | |||
257 | static void wb_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock) | ||
258 | { | ||
259 | struct policy *p = to_policy(pe); | ||
260 | unsigned long flags; | ||
261 | |||
262 | spin_lock_irqsave(&p->lock, flags); | ||
263 | __set_clear_dirty(pe, oblock, false); | ||
264 | spin_unlock_irqrestore(&p->lock, flags); | ||
265 | } | ||
266 | |||
267 | static void add_cache_entry(struct policy *p, struct wb_cache_entry *e) | ||
268 | { | ||
269 | insert_cache_hash_entry(p, e); | ||
270 | if (e->dirty) | ||
271 | list_add(&e->list, &p->dirty); | ||
272 | else | ||
273 | list_add(&e->list, &p->clean); | ||
274 | } | ||
275 | |||
276 | static int wb_load_mapping(struct dm_cache_policy *pe, | ||
277 | dm_oblock_t oblock, dm_cblock_t cblock, | ||
278 | uint32_t hint, bool hint_valid) | ||
279 | { | ||
280 | int r; | ||
281 | struct policy *p = to_policy(pe); | ||
282 | struct wb_cache_entry *e = alloc_cache_entry(p); | ||
283 | |||
284 | if (e) { | ||
285 | e->cblock = cblock; | ||
286 | e->oblock = oblock; | ||
287 | e->dirty = false; /* blocks default to clean */ | ||
288 | add_cache_entry(p, e); | ||
289 | r = 0; | ||
290 | |||
291 | } else | ||
292 | r = -ENOMEM; | ||
293 | |||
294 | return r; | ||
295 | } | ||
296 | |||
297 | static void wb_destroy(struct dm_cache_policy *pe) | ||
298 | { | ||
299 | struct policy *p = to_policy(pe); | ||
300 | |||
301 | free_cache_blocks_and_hash(p); | ||
302 | kfree(p); | ||
303 | } | ||
304 | |||
305 | static struct wb_cache_entry *__wb_force_remove_mapping(struct policy *p, dm_oblock_t oblock) | ||
306 | { | ||
307 | struct wb_cache_entry *r = lookup_cache_entry(p, oblock); | ||
308 | |||
309 | BUG_ON(!r); | ||
310 | |||
311 | remove_cache_hash_entry(r); | ||
312 | list_del(&r->list); | ||
313 | |||
314 | return r; | ||
315 | } | ||
316 | |||
317 | static void wb_remove_mapping(struct dm_cache_policy *pe, dm_oblock_t oblock) | ||
318 | { | ||
319 | struct policy *p = to_policy(pe); | ||
320 | struct wb_cache_entry *e; | ||
321 | unsigned long flags; | ||
322 | |||
323 | spin_lock_irqsave(&p->lock, flags); | ||
324 | e = __wb_force_remove_mapping(p, oblock); | ||
325 | list_add_tail(&e->list, &p->free); | ||
326 | BUG_ON(!from_cblock(p->nr_cblocks_allocated)); | ||
327 | p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) - 1); | ||
328 | spin_unlock_irqrestore(&p->lock, flags); | ||
329 | } | ||
330 | |||
331 | static void wb_force_mapping(struct dm_cache_policy *pe, | ||
332 | dm_oblock_t current_oblock, dm_oblock_t oblock) | ||
333 | { | ||
334 | struct policy *p = to_policy(pe); | ||
335 | struct wb_cache_entry *e; | ||
336 | unsigned long flags; | ||
337 | |||
338 | spin_lock_irqsave(&p->lock, flags); | ||
339 | e = __wb_force_remove_mapping(p, current_oblock); | ||
340 | e->oblock = oblock; | ||
341 | add_cache_entry(p, e); | ||
342 | spin_unlock_irqrestore(&p->lock, flags); | ||
343 | } | ||
344 | |||
345 | static struct wb_cache_entry *get_next_dirty_entry(struct policy *p) | ||
346 | { | ||
347 | struct list_head *l; | ||
348 | struct wb_cache_entry *r; | ||
349 | |||
350 | if (list_empty(&p->dirty)) | ||
351 | return NULL; | ||
352 | |||
353 | l = list_pop(&p->dirty); | ||
354 | r = container_of(l, struct wb_cache_entry, list); | ||
355 | list_add(l, &p->clean_pending); | ||
356 | |||
357 | return r; | ||
358 | } | ||
359 | |||
360 | static int wb_writeback_work(struct dm_cache_policy *pe, | ||
361 | dm_oblock_t *oblock, | ||
362 | dm_cblock_t *cblock) | ||
363 | { | ||
364 | int r = -ENOENT; | ||
365 | struct policy *p = to_policy(pe); | ||
366 | struct wb_cache_entry *e; | ||
367 | unsigned long flags; | ||
368 | |||
369 | spin_lock_irqsave(&p->lock, flags); | ||
370 | |||
371 | e = get_next_dirty_entry(p); | ||
372 | if (e) { | ||
373 | *oblock = e->oblock; | ||
374 | *cblock = e->cblock; | ||
375 | r = 0; | ||
376 | } | ||
377 | |||
378 | spin_unlock_irqrestore(&p->lock, flags); | ||
379 | |||
380 | return r; | ||
381 | } | ||
382 | |||
383 | static dm_cblock_t wb_residency(struct dm_cache_policy *pe) | ||
384 | { | ||
385 | return to_policy(pe)->nr_cblocks_allocated; | ||
386 | } | ||
387 | |||
388 | /* Init the policy plugin interface function pointers. */ | ||
389 | static void init_policy_functions(struct policy *p) | ||
390 | { | ||
391 | p->policy.destroy = wb_destroy; | ||
392 | p->policy.map = wb_map; | ||
393 | p->policy.lookup = wb_lookup; | ||
394 | p->policy.set_dirty = wb_set_dirty; | ||
395 | p->policy.clear_dirty = wb_clear_dirty; | ||
396 | p->policy.load_mapping = wb_load_mapping; | ||
397 | p->policy.walk_mappings = NULL; | ||
398 | p->policy.remove_mapping = wb_remove_mapping; | ||
399 | p->policy.writeback_work = wb_writeback_work; | ||
400 | p->policy.force_mapping = wb_force_mapping; | ||
401 | p->policy.residency = wb_residency; | ||
402 | p->policy.tick = NULL; | ||
403 | } | ||
404 | |||
405 | static struct dm_cache_policy *wb_create(dm_cblock_t cache_size, | ||
406 | sector_t origin_size, | ||
407 | sector_t cache_block_size) | ||
408 | { | ||
409 | int r; | ||
410 | struct policy *p = kzalloc(sizeof(*p), GFP_KERNEL); | ||
411 | |||
412 | if (!p) | ||
413 | return NULL; | ||
414 | |||
415 | init_policy_functions(p); | ||
416 | INIT_LIST_HEAD(&p->free); | ||
417 | INIT_LIST_HEAD(&p->clean); | ||
418 | INIT_LIST_HEAD(&p->clean_pending); | ||
419 | INIT_LIST_HEAD(&p->dirty); | ||
420 | |||
421 | p->cache_size = cache_size; | ||
422 | spin_lock_init(&p->lock); | ||
423 | |||
424 | /* Allocate cache entry structs and add them to free list. */ | ||
425 | r = alloc_cache_blocks_with_hash(p, cache_size); | ||
426 | if (!r) | ||
427 | return &p->policy; | ||
428 | |||
429 | kfree(p); | ||
430 | |||
431 | return NULL; | ||
432 | } | ||
433 | /*----------------------------------------------------------------------------*/ | ||
434 | |||
435 | static struct dm_cache_policy_type wb_policy_type = { | ||
436 | .name = "cleaner", | ||
437 | .hint_size = 0, | ||
438 | .owner = THIS_MODULE, | ||
439 | .create = wb_create | ||
440 | }; | ||
441 | |||
442 | static int __init wb_init(void) | ||
443 | { | ||
444 | int r = dm_cache_policy_register(&wb_policy_type); | ||
445 | |||
446 | if (r < 0) | ||
447 | DMERR("register failed %d", r); | ||
448 | else | ||
449 | DMINFO("version " CLEANER_VERSION " loaded"); | ||
450 | |||
451 | return r; | ||
452 | } | ||
453 | |||
454 | static void __exit wb_exit(void) | ||
455 | { | ||
456 | dm_cache_policy_unregister(&wb_policy_type); | ||
457 | } | ||
458 | |||
459 | module_init(wb_init); | ||
460 | module_exit(wb_exit); | ||
461 | |||
462 | MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>"); | ||
463 | MODULE_LICENSE("GPL"); | ||
464 | MODULE_DESCRIPTION("cleaner cache policy"); | ||