diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/Kconfig | 22 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.c | 104 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.h | 5 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-internal.h | 7 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-mq.c | 681 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy.h | 21 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 687 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 214 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 36 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 34 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 23 | ||||
-rw-r--r-- | drivers/md/dm.c | 47 | ||||
-rw-r--r-- | drivers/md/dm.h | 13 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-array.c | 5 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-disk.c | 18 |
16 files changed, 1466 insertions, 455 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 30b426ed744b..f2ccbc3b9fe4 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -297,6 +297,17 @@ config DM_MIRROR | |||
297 | Allow volume managers to mirror logical volumes, also | 297 | Allow volume managers to mirror logical volumes, also |
298 | needed for live data migration tools such as 'pvmove'. | 298 | needed for live data migration tools such as 'pvmove'. |
299 | 299 | ||
300 | config DM_LOG_USERSPACE | ||
301 | tristate "Mirror userspace logging" | ||
302 | depends on DM_MIRROR && NET | ||
303 | select CONNECTOR | ||
304 | ---help--- | ||
305 | The userspace logging module provides a mechanism for | ||
306 | relaying the dm-dirty-log API to userspace. Log designs | ||
307 | which are more suited to userspace implementation (e.g. | ||
308 | shared storage logs) or experimental logs can be implemented | ||
309 | by leveraging this framework. | ||
310 | |||
300 | config DM_RAID | 311 | config DM_RAID |
301 | tristate "RAID 1/4/5/6/10 target" | 312 | tristate "RAID 1/4/5/6/10 target" |
302 | depends on BLK_DEV_DM | 313 | depends on BLK_DEV_DM |
@@ -323,17 +334,6 @@ config DM_RAID | |||
323 | RAID-5, RAID-6 distributes the syndromes across the drives | 334 | RAID-5, RAID-6 distributes the syndromes across the drives |
324 | in one of the available parity distribution methods. | 335 | in one of the available parity distribution methods. |
325 | 336 | ||
326 | config DM_LOG_USERSPACE | ||
327 | tristate "Mirror userspace logging" | ||
328 | depends on DM_MIRROR && NET | ||
329 | select CONNECTOR | ||
330 | ---help--- | ||
331 | The userspace logging module provides a mechanism for | ||
332 | relaying the dm-dirty-log API to userspace. Log designs | ||
333 | which are more suited to userspace implementation (e.g. | ||
334 | shared storage logs) or experimental logs can be implemented | ||
335 | by leveraging this framework. | ||
336 | |||
337 | config DM_ZERO | 337 | config DM_ZERO |
338 | tristate "Zero target" | 338 | tristate "Zero target" |
339 | depends on BLK_DEV_DM | 339 | depends on BLK_DEV_DM |
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 1af7255bbffb..9ef0752e8a08 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c | |||
@@ -20,7 +20,13 @@ | |||
20 | 20 | ||
21 | #define CACHE_SUPERBLOCK_MAGIC 06142003 | 21 | #define CACHE_SUPERBLOCK_MAGIC 06142003 |
22 | #define CACHE_SUPERBLOCK_LOCATION 0 | 22 | #define CACHE_SUPERBLOCK_LOCATION 0 |
23 | #define CACHE_VERSION 1 | 23 | |
24 | /* | ||
25 | * defines a range of metadata versions that this module can handle. | ||
26 | */ | ||
27 | #define MIN_CACHE_VERSION 1 | ||
28 | #define MAX_CACHE_VERSION 1 | ||
29 | |||
24 | #define CACHE_METADATA_CACHE_SIZE 64 | 30 | #define CACHE_METADATA_CACHE_SIZE 64 |
25 | 31 | ||
26 | /* | 32 | /* |
@@ -134,6 +140,18 @@ static void sb_prepare_for_write(struct dm_block_validator *v, | |||
134 | SUPERBLOCK_CSUM_XOR)); | 140 | SUPERBLOCK_CSUM_XOR)); |
135 | } | 141 | } |
136 | 142 | ||
143 | static int check_metadata_version(struct cache_disk_superblock *disk_super) | ||
144 | { | ||
145 | uint32_t metadata_version = le32_to_cpu(disk_super->version); | ||
146 | if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) { | ||
147 | DMERR("Cache metadata version %u found, but only versions between %u and %u supported.", | ||
148 | metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION); | ||
149 | return -EINVAL; | ||
150 | } | ||
151 | |||
152 | return 0; | ||
153 | } | ||
154 | |||
137 | static int sb_check(struct dm_block_validator *v, | 155 | static int sb_check(struct dm_block_validator *v, |
138 | struct dm_block *b, | 156 | struct dm_block *b, |
139 | size_t sb_block_size) | 157 | size_t sb_block_size) |
@@ -164,7 +182,7 @@ static int sb_check(struct dm_block_validator *v, | |||
164 | return -EILSEQ; | 182 | return -EILSEQ; |
165 | } | 183 | } |
166 | 184 | ||
167 | return 0; | 185 | return check_metadata_version(disk_super); |
168 | } | 186 | } |
169 | 187 | ||
170 | static struct dm_block_validator sb_validator = { | 188 | static struct dm_block_validator sb_validator = { |
@@ -198,7 +216,7 @@ static int superblock_lock(struct dm_cache_metadata *cmd, | |||
198 | 216 | ||
199 | /*----------------------------------------------------------------*/ | 217 | /*----------------------------------------------------------------*/ |
200 | 218 | ||
201 | static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result) | 219 | static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result) |
202 | { | 220 | { |
203 | int r; | 221 | int r; |
204 | unsigned i; | 222 | unsigned i; |
@@ -214,10 +232,10 @@ static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result) | |||
214 | return r; | 232 | return r; |
215 | 233 | ||
216 | data_le = dm_block_data(b); | 234 | data_le = dm_block_data(b); |
217 | *result = 1; | 235 | *result = true; |
218 | for (i = 0; i < sb_block_size; i++) { | 236 | for (i = 0; i < sb_block_size; i++) { |
219 | if (data_le[i] != zero) { | 237 | if (data_le[i] != zero) { |
220 | *result = 0; | 238 | *result = false; |
221 | break; | 239 | break; |
222 | } | 240 | } |
223 | } | 241 | } |
@@ -270,7 +288,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) | |||
270 | disk_super->flags = 0; | 288 | disk_super->flags = 0; |
271 | memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); | 289 | memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); |
272 | disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); | 290 | disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); |
273 | disk_super->version = cpu_to_le32(CACHE_VERSION); | 291 | disk_super->version = cpu_to_le32(MAX_CACHE_VERSION); |
274 | memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); | 292 | memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); |
275 | memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); | 293 | memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); |
276 | disk_super->policy_hint_size = 0; | 294 | disk_super->policy_hint_size = 0; |
@@ -411,7 +429,8 @@ bad: | |||
411 | static int __open_or_format_metadata(struct dm_cache_metadata *cmd, | 429 | static int __open_or_format_metadata(struct dm_cache_metadata *cmd, |
412 | bool format_device) | 430 | bool format_device) |
413 | { | 431 | { |
414 | int r, unformatted; | 432 | int r; |
433 | bool unformatted = false; | ||
415 | 434 | ||
416 | r = __superblock_all_zeroes(cmd->bm, &unformatted); | 435 | r = __superblock_all_zeroes(cmd->bm, &unformatted); |
417 | if (r) | 436 | if (r) |
@@ -666,19 +685,85 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd) | |||
666 | kfree(cmd); | 685 | kfree(cmd); |
667 | } | 686 | } |
668 | 687 | ||
688 | /* | ||
689 | * Checks that the given cache block is either unmapped or clean. | ||
690 | */ | ||
691 | static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, | ||
692 | bool *result) | ||
693 | { | ||
694 | int r; | ||
695 | __le64 value; | ||
696 | dm_oblock_t ob; | ||
697 | unsigned flags; | ||
698 | |||
699 | r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value); | ||
700 | if (r) { | ||
701 | DMERR("block_unmapped_or_clean failed"); | ||
702 | return r; | ||
703 | } | ||
704 | |||
705 | unpack_value(value, &ob, &flags); | ||
706 | *result = !((flags & M_VALID) && (flags & M_DIRTY)); | ||
707 | |||
708 | return 0; | ||
709 | } | ||
710 | |||
711 | static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, | ||
712 | dm_cblock_t begin, dm_cblock_t end, | ||
713 | bool *result) | ||
714 | { | ||
715 | int r; | ||
716 | *result = true; | ||
717 | |||
718 | while (begin != end) { | ||
719 | r = block_unmapped_or_clean(cmd, begin, result); | ||
720 | if (r) | ||
721 | return r; | ||
722 | |||
723 | if (!*result) { | ||
724 | DMERR("cache block %llu is dirty", | ||
725 | (unsigned long long) from_cblock(begin)); | ||
726 | return 0; | ||
727 | } | ||
728 | |||
729 | begin = to_cblock(from_cblock(begin) + 1); | ||
730 | } | ||
731 | |||
732 | return 0; | ||
733 | } | ||
734 | |||
669 | int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) | 735 | int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) |
670 | { | 736 | { |
671 | int r; | 737 | int r; |
738 | bool clean; | ||
672 | __le64 null_mapping = pack_value(0, 0); | 739 | __le64 null_mapping = pack_value(0, 0); |
673 | 740 | ||
674 | down_write(&cmd->root_lock); | 741 | down_write(&cmd->root_lock); |
675 | __dm_bless_for_disk(&null_mapping); | 742 | __dm_bless_for_disk(&null_mapping); |
743 | |||
744 | if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) { | ||
745 | r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean); | ||
746 | if (r) { | ||
747 | __dm_unbless_for_disk(&null_mapping); | ||
748 | goto out; | ||
749 | } | ||
750 | |||
751 | if (!clean) { | ||
752 | DMERR("unable to shrink cache due to dirty blocks"); | ||
753 | r = -EINVAL; | ||
754 | __dm_unbless_for_disk(&null_mapping); | ||
755 | goto out; | ||
756 | } | ||
757 | } | ||
758 | |||
676 | r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks), | 759 | r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks), |
677 | from_cblock(new_cache_size), | 760 | from_cblock(new_cache_size), |
678 | &null_mapping, &cmd->root); | 761 | &null_mapping, &cmd->root); |
679 | if (!r) | 762 | if (!r) |
680 | cmd->cache_blocks = new_cache_size; | 763 | cmd->cache_blocks = new_cache_size; |
681 | cmd->changed = true; | 764 | cmd->changed = true; |
765 | |||
766 | out: | ||
682 | up_write(&cmd->root_lock); | 767 | up_write(&cmd->root_lock); |
683 | 768 | ||
684 | return r; | 769 | return r; |
@@ -1182,3 +1267,8 @@ int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, | |||
1182 | 1267 | ||
1183 | return r; | 1268 | return r; |
1184 | } | 1269 | } |
1270 | |||
1271 | int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result) | ||
1272 | { | ||
1273 | return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result); | ||
1274 | } | ||
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index f45cef21f3d0..cd906f14f98d 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h | |||
@@ -137,6 +137,11 @@ int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy * | |||
137 | int dm_cache_save_hint(struct dm_cache_metadata *cmd, | 137 | int dm_cache_save_hint(struct dm_cache_metadata *cmd, |
138 | dm_cblock_t cblock, uint32_t hint); | 138 | dm_cblock_t cblock, uint32_t hint); |
139 | 139 | ||
140 | /* | ||
141 | * Query method. Are all the blocks in the cache clean? | ||
142 | */ | ||
143 | int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result); | ||
144 | |||
140 | /*----------------------------------------------------------------*/ | 145 | /*----------------------------------------------------------------*/ |
141 | 146 | ||
142 | #endif /* DM_CACHE_METADATA_H */ | 147 | #endif /* DM_CACHE_METADATA_H */ |
diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h index 0928abdc49f0..2256a1f24f73 100644 --- a/drivers/md/dm-cache-policy-internal.h +++ b/drivers/md/dm-cache-policy-internal.h | |||
@@ -61,7 +61,12 @@ static inline int policy_writeback_work(struct dm_cache_policy *p, | |||
61 | 61 | ||
62 | static inline void policy_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) | 62 | static inline void policy_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) |
63 | { | 63 | { |
64 | return p->remove_mapping(p, oblock); | 64 | p->remove_mapping(p, oblock); |
65 | } | ||
66 | |||
67 | static inline int policy_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock) | ||
68 | { | ||
69 | return p->remove_cblock(p, cblock); | ||
65 | } | 70 | } |
66 | 71 | ||
67 | static inline void policy_force_mapping(struct dm_cache_policy *p, | 72 | static inline void policy_force_mapping(struct dm_cache_policy *p, |
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 4296155090b2..416b7b752a6e 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c | |||
@@ -26,19 +26,6 @@ static unsigned next_power(unsigned n, unsigned min) | |||
26 | 26 | ||
27 | /*----------------------------------------------------------------*/ | 27 | /*----------------------------------------------------------------*/ |
28 | 28 | ||
29 | static unsigned long *alloc_bitset(unsigned nr_entries) | ||
30 | { | ||
31 | size_t s = sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); | ||
32 | return vzalloc(s); | ||
33 | } | ||
34 | |||
35 | static void free_bitset(unsigned long *bits) | ||
36 | { | ||
37 | vfree(bits); | ||
38 | } | ||
39 | |||
40 | /*----------------------------------------------------------------*/ | ||
41 | |||
42 | /* | 29 | /* |
43 | * Large, sequential ios are probably better left on the origin device since | 30 | * Large, sequential ios are probably better left on the origin device since |
44 | * spindles tend to have good bandwidth. | 31 | * spindles tend to have good bandwidth. |
@@ -151,6 +138,21 @@ static void queue_init(struct queue *q) | |||
151 | } | 138 | } |
152 | 139 | ||
153 | /* | 140 | /* |
141 | * Checks to see if the queue is empty. | ||
142 | * FIXME: reduce cpu usage. | ||
143 | */ | ||
144 | static bool queue_empty(struct queue *q) | ||
145 | { | ||
146 | unsigned i; | ||
147 | |||
148 | for (i = 0; i < NR_QUEUE_LEVELS; i++) | ||
149 | if (!list_empty(q->qs + i)) | ||
150 | return false; | ||
151 | |||
152 | return true; | ||
153 | } | ||
154 | |||
155 | /* | ||
154 | * Insert an entry to the back of the given level. | 156 | * Insert an entry to the back of the given level. |
155 | */ | 157 | */ |
156 | static void queue_push(struct queue *q, unsigned level, struct list_head *elt) | 158 | static void queue_push(struct queue *q, unsigned level, struct list_head *elt) |
@@ -218,17 +220,116 @@ struct entry { | |||
218 | struct hlist_node hlist; | 220 | struct hlist_node hlist; |
219 | struct list_head list; | 221 | struct list_head list; |
220 | dm_oblock_t oblock; | 222 | dm_oblock_t oblock; |
221 | dm_cblock_t cblock; /* valid iff in_cache */ | ||
222 | 223 | ||
223 | /* | 224 | /* |
224 | * FIXME: pack these better | 225 | * FIXME: pack these better |
225 | */ | 226 | */ |
226 | bool in_cache:1; | 227 | bool dirty:1; |
227 | unsigned hit_count; | 228 | unsigned hit_count; |
228 | unsigned generation; | 229 | unsigned generation; |
229 | unsigned tick; | 230 | unsigned tick; |
230 | }; | 231 | }; |
231 | 232 | ||
233 | /* | ||
234 | * Rather than storing the cblock in an entry, we allocate all entries in | ||
235 | * an array, and infer the cblock from the entry position. | ||
236 | * | ||
237 | * Free entries are linked together into a list. | ||
238 | */ | ||
239 | struct entry_pool { | ||
240 | struct entry *entries, *entries_end; | ||
241 | struct list_head free; | ||
242 | unsigned nr_allocated; | ||
243 | }; | ||
244 | |||
245 | static int epool_init(struct entry_pool *ep, unsigned nr_entries) | ||
246 | { | ||
247 | unsigned i; | ||
248 | |||
249 | ep->entries = vzalloc(sizeof(struct entry) * nr_entries); | ||
250 | if (!ep->entries) | ||
251 | return -ENOMEM; | ||
252 | |||
253 | ep->entries_end = ep->entries + nr_entries; | ||
254 | |||
255 | INIT_LIST_HEAD(&ep->free); | ||
256 | for (i = 0; i < nr_entries; i++) | ||
257 | list_add(&ep->entries[i].list, &ep->free); | ||
258 | |||
259 | ep->nr_allocated = 0; | ||
260 | |||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static void epool_exit(struct entry_pool *ep) | ||
265 | { | ||
266 | vfree(ep->entries); | ||
267 | } | ||
268 | |||
269 | static struct entry *alloc_entry(struct entry_pool *ep) | ||
270 | { | ||
271 | struct entry *e; | ||
272 | |||
273 | if (list_empty(&ep->free)) | ||
274 | return NULL; | ||
275 | |||
276 | e = list_entry(list_pop(&ep->free), struct entry, list); | ||
277 | INIT_LIST_HEAD(&e->list); | ||
278 | INIT_HLIST_NODE(&e->hlist); | ||
279 | ep->nr_allocated++; | ||
280 | |||
281 | return e; | ||
282 | } | ||
283 | |||
284 | /* | ||
285 | * This assumes the cblock hasn't already been allocated. | ||
286 | */ | ||
287 | static struct entry *alloc_particular_entry(struct entry_pool *ep, dm_cblock_t cblock) | ||
288 | { | ||
289 | struct entry *e = ep->entries + from_cblock(cblock); | ||
290 | list_del(&e->list); | ||
291 | |||
292 | INIT_LIST_HEAD(&e->list); | ||
293 | INIT_HLIST_NODE(&e->hlist); | ||
294 | ep->nr_allocated++; | ||
295 | |||
296 | return e; | ||
297 | } | ||
298 | |||
299 | static void free_entry(struct entry_pool *ep, struct entry *e) | ||
300 | { | ||
301 | BUG_ON(!ep->nr_allocated); | ||
302 | ep->nr_allocated--; | ||
303 | INIT_HLIST_NODE(&e->hlist); | ||
304 | list_add(&e->list, &ep->free); | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * Returns NULL if the entry is free. | ||
309 | */ | ||
310 | static struct entry *epool_find(struct entry_pool *ep, dm_cblock_t cblock) | ||
311 | { | ||
312 | struct entry *e = ep->entries + from_cblock(cblock); | ||
313 | return !hlist_unhashed(&e->hlist) ? e : NULL; | ||
314 | } | ||
315 | |||
316 | static bool epool_empty(struct entry_pool *ep) | ||
317 | { | ||
318 | return list_empty(&ep->free); | ||
319 | } | ||
320 | |||
321 | static bool in_pool(struct entry_pool *ep, struct entry *e) | ||
322 | { | ||
323 | return e >= ep->entries && e < ep->entries_end; | ||
324 | } | ||
325 | |||
326 | static dm_cblock_t infer_cblock(struct entry_pool *ep, struct entry *e) | ||
327 | { | ||
328 | return to_cblock(e - ep->entries); | ||
329 | } | ||
330 | |||
331 | /*----------------------------------------------------------------*/ | ||
332 | |||
232 | struct mq_policy { | 333 | struct mq_policy { |
233 | struct dm_cache_policy policy; | 334 | struct dm_cache_policy policy; |
234 | 335 | ||
@@ -238,13 +339,22 @@ struct mq_policy { | |||
238 | struct io_tracker tracker; | 339 | struct io_tracker tracker; |
239 | 340 | ||
240 | /* | 341 | /* |
241 | * We maintain two queues of entries. The cache proper contains | 342 | * Entries come from two pools, one of pre-cache entries, and one |
242 | * the currently active mappings. Whereas the pre_cache tracks | 343 | * for the cache proper. |
243 | * blocks that are being hit frequently and potential candidates | 344 | */ |
244 | * for promotion to the cache. | 345 | struct entry_pool pre_cache_pool; |
346 | struct entry_pool cache_pool; | ||
347 | |||
348 | /* | ||
349 | * We maintain three queues of entries. The cache proper, | ||
350 | * consisting of a clean and dirty queue, contains the currently | ||
351 | * active mappings. Whereas the pre_cache tracks blocks that | ||
352 | * are being hit frequently and potential candidates for promotion | ||
353 | * to the cache. | ||
245 | */ | 354 | */ |
246 | struct queue pre_cache; | 355 | struct queue pre_cache; |
247 | struct queue cache; | 356 | struct queue cache_clean; |
357 | struct queue cache_dirty; | ||
248 | 358 | ||
249 | /* | 359 | /* |
250 | * Keeps track of time, incremented by the core. We use this to | 360 | * Keeps track of time, incremented by the core. We use this to |
@@ -282,25 +392,6 @@ struct mq_policy { | |||
282 | unsigned promote_threshold; | 392 | unsigned promote_threshold; |
283 | 393 | ||
284 | /* | 394 | /* |
285 | * We need cache_size entries for the cache, and choose to have | ||
286 | * cache_size entries for the pre_cache too. One motivation for | ||
287 | * using the same size is to make the hit counts directly | ||
288 | * comparable between pre_cache and cache. | ||
289 | */ | ||
290 | unsigned nr_entries; | ||
291 | unsigned nr_entries_allocated; | ||
292 | struct list_head free; | ||
293 | |||
294 | /* | ||
295 | * Cache blocks may be unallocated. We store this info in a | ||
296 | * bitset. | ||
297 | */ | ||
298 | unsigned long *allocation_bitset; | ||
299 | unsigned nr_cblocks_allocated; | ||
300 | unsigned find_free_nr_words; | ||
301 | unsigned find_free_last_word; | ||
302 | |||
303 | /* | ||
304 | * The hash table allows us to quickly find an entry by origin | 395 | * The hash table allows us to quickly find an entry by origin |
305 | * block. Both pre_cache and cache entries are in here. | 396 | * block. Both pre_cache and cache entries are in here. |
306 | */ | 397 | */ |
@@ -310,49 +401,6 @@ struct mq_policy { | |||
310 | }; | 401 | }; |
311 | 402 | ||
312 | /*----------------------------------------------------------------*/ | 403 | /*----------------------------------------------------------------*/ |
313 | /* Free/alloc mq cache entry structures. */ | ||
314 | static void takeout_queue(struct list_head *lh, struct queue *q) | ||
315 | { | ||
316 | unsigned level; | ||
317 | |||
318 | for (level = 0; level < NR_QUEUE_LEVELS; level++) | ||
319 | list_splice(q->qs + level, lh); | ||
320 | } | ||
321 | |||
322 | static void free_entries(struct mq_policy *mq) | ||
323 | { | ||
324 | struct entry *e, *tmp; | ||
325 | |||
326 | takeout_queue(&mq->free, &mq->pre_cache); | ||
327 | takeout_queue(&mq->free, &mq->cache); | ||
328 | |||
329 | list_for_each_entry_safe(e, tmp, &mq->free, list) | ||
330 | kmem_cache_free(mq_entry_cache, e); | ||
331 | } | ||
332 | |||
333 | static int alloc_entries(struct mq_policy *mq, unsigned elts) | ||
334 | { | ||
335 | unsigned u = mq->nr_entries; | ||
336 | |||
337 | INIT_LIST_HEAD(&mq->free); | ||
338 | mq->nr_entries_allocated = 0; | ||
339 | |||
340 | while (u--) { | ||
341 | struct entry *e = kmem_cache_zalloc(mq_entry_cache, GFP_KERNEL); | ||
342 | |||
343 | if (!e) { | ||
344 | free_entries(mq); | ||
345 | return -ENOMEM; | ||
346 | } | ||
347 | |||
348 | |||
349 | list_add(&e->list, &mq->free); | ||
350 | } | ||
351 | |||
352 | return 0; | ||
353 | } | ||
354 | |||
355 | /*----------------------------------------------------------------*/ | ||
356 | 404 | ||
357 | /* | 405 | /* |
358 | * Simple hash table implementation. Should replace with the standard hash | 406 | * Simple hash table implementation. Should replace with the standard hash |
@@ -388,96 +436,14 @@ static void hash_remove(struct entry *e) | |||
388 | 436 | ||
389 | /*----------------------------------------------------------------*/ | 437 | /*----------------------------------------------------------------*/ |
390 | 438 | ||
391 | /* | ||
392 | * Allocates a new entry structure. The memory is allocated in one lump, | ||
393 | * so we just handing it out here. Returns NULL if all entries have | ||
394 | * already been allocated. Cannot fail otherwise. | ||
395 | */ | ||
396 | static struct entry *alloc_entry(struct mq_policy *mq) | ||
397 | { | ||
398 | struct entry *e; | ||
399 | |||
400 | if (mq->nr_entries_allocated >= mq->nr_entries) { | ||
401 | BUG_ON(!list_empty(&mq->free)); | ||
402 | return NULL; | ||
403 | } | ||
404 | |||
405 | e = list_entry(list_pop(&mq->free), struct entry, list); | ||
406 | INIT_LIST_HEAD(&e->list); | ||
407 | INIT_HLIST_NODE(&e->hlist); | ||
408 | |||
409 | mq->nr_entries_allocated++; | ||
410 | return e; | ||
411 | } | ||
412 | |||
413 | /*----------------------------------------------------------------*/ | ||
414 | |||
415 | /* | ||
416 | * Mark cache blocks allocated or not in the bitset. | ||
417 | */ | ||
418 | static void alloc_cblock(struct mq_policy *mq, dm_cblock_t cblock) | ||
419 | { | ||
420 | BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size)); | ||
421 | BUG_ON(test_bit(from_cblock(cblock), mq->allocation_bitset)); | ||
422 | |||
423 | set_bit(from_cblock(cblock), mq->allocation_bitset); | ||
424 | mq->nr_cblocks_allocated++; | ||
425 | } | ||
426 | |||
427 | static void free_cblock(struct mq_policy *mq, dm_cblock_t cblock) | ||
428 | { | ||
429 | BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size)); | ||
430 | BUG_ON(!test_bit(from_cblock(cblock), mq->allocation_bitset)); | ||
431 | |||
432 | clear_bit(from_cblock(cblock), mq->allocation_bitset); | ||
433 | mq->nr_cblocks_allocated--; | ||
434 | } | ||
435 | |||
436 | static bool any_free_cblocks(struct mq_policy *mq) | 439 | static bool any_free_cblocks(struct mq_policy *mq) |
437 | { | 440 | { |
438 | return mq->nr_cblocks_allocated < from_cblock(mq->cache_size); | 441 | return !epool_empty(&mq->cache_pool); |
439 | } | 442 | } |
440 | 443 | ||
441 | /* | 444 | static bool any_clean_cblocks(struct mq_policy *mq) |
442 | * Fills result out with a cache block that isn't in use, or return | ||
443 | * -ENOSPC. This does _not_ mark the cblock as allocated, the caller is | ||
444 | * reponsible for that. | ||
445 | */ | ||
446 | static int __find_free_cblock(struct mq_policy *mq, unsigned begin, unsigned end, | ||
447 | dm_cblock_t *result, unsigned *last_word) | ||
448 | { | 445 | { |
449 | int r = -ENOSPC; | 446 | return !queue_empty(&mq->cache_clean); |
450 | unsigned w; | ||
451 | |||
452 | for (w = begin; w < end; w++) { | ||
453 | /* | ||
454 | * ffz is undefined if no zero exists | ||
455 | */ | ||
456 | if (mq->allocation_bitset[w] != ~0UL) { | ||
457 | *last_word = w; | ||
458 | *result = to_cblock((w * BITS_PER_LONG) + ffz(mq->allocation_bitset[w])); | ||
459 | if (from_cblock(*result) < from_cblock(mq->cache_size)) | ||
460 | r = 0; | ||
461 | |||
462 | break; | ||
463 | } | ||
464 | } | ||
465 | |||
466 | return r; | ||
467 | } | ||
468 | |||
469 | static int find_free_cblock(struct mq_policy *mq, dm_cblock_t *result) | ||
470 | { | ||
471 | int r; | ||
472 | |||
473 | if (!any_free_cblocks(mq)) | ||
474 | return -ENOSPC; | ||
475 | |||
476 | r = __find_free_cblock(mq, mq->find_free_last_word, mq->find_free_nr_words, result, &mq->find_free_last_word); | ||
477 | if (r == -ENOSPC && mq->find_free_last_word) | ||
478 | r = __find_free_cblock(mq, 0, mq->find_free_last_word, result, &mq->find_free_last_word); | ||
479 | |||
480 | return r; | ||
481 | } | 447 | } |
482 | 448 | ||
483 | /*----------------------------------------------------------------*/ | 449 | /*----------------------------------------------------------------*/ |
@@ -496,33 +462,35 @@ static unsigned queue_level(struct entry *e) | |||
496 | return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u); | 462 | return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u); |
497 | } | 463 | } |
498 | 464 | ||
465 | static bool in_cache(struct mq_policy *mq, struct entry *e) | ||
466 | { | ||
467 | return in_pool(&mq->cache_pool, e); | ||
468 | } | ||
469 | |||
499 | /* | 470 | /* |
500 | * Inserts the entry into the pre_cache or the cache. Ensures the cache | 471 | * Inserts the entry into the pre_cache or the cache. Ensures the cache |
501 | * block is marked as allocated if necc. Inserts into the hash table. Sets the | 472 | * block is marked as allocated if necc. Inserts into the hash table. |
502 | * tick which records when the entry was last moved about. | 473 | * Sets the tick which records when the entry was last moved about. |
503 | */ | 474 | */ |
504 | static void push(struct mq_policy *mq, struct entry *e) | 475 | static void push(struct mq_policy *mq, struct entry *e) |
505 | { | 476 | { |
506 | e->tick = mq->tick; | 477 | e->tick = mq->tick; |
507 | hash_insert(mq, e); | 478 | hash_insert(mq, e); |
508 | 479 | ||
509 | if (e->in_cache) { | 480 | if (in_cache(mq, e)) |
510 | alloc_cblock(mq, e->cblock); | 481 | queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean, |
511 | queue_push(&mq->cache, queue_level(e), &e->list); | 482 | queue_level(e), &e->list); |
512 | } else | 483 | else |
513 | queue_push(&mq->pre_cache, queue_level(e), &e->list); | 484 | queue_push(&mq->pre_cache, queue_level(e), &e->list); |
514 | } | 485 | } |
515 | 486 | ||
516 | /* | 487 | /* |
517 | * Removes an entry from pre_cache or cache. Removes from the hash table. | 488 | * Removes an entry from pre_cache or cache. Removes from the hash table. |
518 | * Frees off the cache block if necc. | ||
519 | */ | 489 | */ |
520 | static void del(struct mq_policy *mq, struct entry *e) | 490 | static void del(struct mq_policy *mq, struct entry *e) |
521 | { | 491 | { |
522 | queue_remove(&e->list); | 492 | queue_remove(&e->list); |
523 | hash_remove(e); | 493 | hash_remove(e); |
524 | if (e->in_cache) | ||
525 | free_cblock(mq, e->cblock); | ||
526 | } | 494 | } |
527 | 495 | ||
528 | /* | 496 | /* |
@@ -531,14 +499,14 @@ static void del(struct mq_policy *mq, struct entry *e) | |||
531 | */ | 499 | */ |
532 | static struct entry *pop(struct mq_policy *mq, struct queue *q) | 500 | static struct entry *pop(struct mq_policy *mq, struct queue *q) |
533 | { | 501 | { |
534 | struct entry *e = container_of(queue_pop(q), struct entry, list); | 502 | struct entry *e; |
503 | struct list_head *h = queue_pop(q); | ||
535 | 504 | ||
536 | if (e) { | 505 | if (!h) |
537 | hash_remove(e); | 506 | return NULL; |
538 | 507 | ||
539 | if (e->in_cache) | 508 | e = container_of(h, struct entry, list); |
540 | free_cblock(mq, e->cblock); | 509 | hash_remove(e); |
541 | } | ||
542 | 510 | ||
543 | return e; | 511 | return e; |
544 | } | 512 | } |
@@ -556,7 +524,8 @@ static bool updated_this_tick(struct mq_policy *mq, struct entry *e) | |||
556 | * of the entries. | 524 | * of the entries. |
557 | * | 525 | * |
558 | * At the moment the threshold is taken by averaging the hit counts of some | 526 | * At the moment the threshold is taken by averaging the hit counts of some |
559 | * of the entries in the cache (the first 20 entries of the first level). | 527 | * of the entries in the cache (the first 20 entries across all levels in |
528 | * ascending order, giving preference to the clean entries at each level). | ||
560 | * | 529 | * |
561 | * We can be much cleverer than this though. For example, each promotion | 530 | * We can be much cleverer than this though. For example, each promotion |
562 | * could bump up the threshold helping to prevent churn. Much more to do | 531 | * could bump up the threshold helping to prevent churn. Much more to do |
@@ -571,14 +540,21 @@ static void check_generation(struct mq_policy *mq) | |||
571 | struct list_head *head; | 540 | struct list_head *head; |
572 | struct entry *e; | 541 | struct entry *e; |
573 | 542 | ||
574 | if ((mq->hit_count >= mq->generation_period) && | 543 | if ((mq->hit_count >= mq->generation_period) && (epool_empty(&mq->cache_pool))) { |
575 | (mq->nr_cblocks_allocated == from_cblock(mq->cache_size))) { | ||
576 | |||
577 | mq->hit_count = 0; | 544 | mq->hit_count = 0; |
578 | mq->generation++; | 545 | mq->generation++; |
579 | 546 | ||
580 | for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) { | 547 | for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) { |
581 | head = mq->cache.qs + level; | 548 | head = mq->cache_clean.qs + level; |
549 | list_for_each_entry(e, head, list) { | ||
550 | nr++; | ||
551 | total += e->hit_count; | ||
552 | |||
553 | if (++count >= MAX_TO_AVERAGE) | ||
554 | break; | ||
555 | } | ||
556 | |||
557 | head = mq->cache_dirty.qs + level; | ||
582 | list_for_each_entry(e, head, list) { | 558 | list_for_each_entry(e, head, list) { |
583 | nr++; | 559 | nr++; |
584 | total += e->hit_count; | 560 | total += e->hit_count; |
@@ -631,19 +607,30 @@ static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e) | |||
631 | * - set the hit count to a hard coded value other than 1, eg, is it better | 607 | * - set the hit count to a hard coded value other than 1, eg, is it better |
632 | * if it goes in at level 2? | 608 | * if it goes in at level 2? |
633 | */ | 609 | */ |
634 | static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) | 610 | static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) |
635 | { | 611 | { |
636 | dm_cblock_t result; | 612 | struct entry *demoted = pop(mq, &mq->cache_clean); |
637 | struct entry *demoted = pop(mq, &mq->cache); | 613 | |
614 | if (!demoted) | ||
615 | /* | ||
616 | * We could get a block from mq->cache_dirty, but that | ||
617 | * would add extra latency to the triggering bio as it | ||
618 | * waits for the writeback. Better to not promote this | ||
619 | * time and hope there's a clean block next time this block | ||
620 | * is hit. | ||
621 | */ | ||
622 | return -ENOSPC; | ||
638 | 623 | ||
639 | BUG_ON(!demoted); | ||
640 | result = demoted->cblock; | ||
641 | *oblock = demoted->oblock; | 624 | *oblock = demoted->oblock; |
642 | demoted->in_cache = false; | 625 | free_entry(&mq->cache_pool, demoted); |
643 | demoted->hit_count = 1; | 626 | |
644 | push(mq, demoted); | 627 | /* |
628 | * We used to put the demoted block into the pre-cache, but I think | ||
629 | * it's simpler to just let it work it's way up from zero again. | ||
630 | * Stops blocks flickering in and out of the cache. | ||
631 | */ | ||
645 | 632 | ||
646 | return result; | 633 | return 0; |
647 | } | 634 | } |
648 | 635 | ||
649 | /* | 636 | /* |
@@ -662,17 +649,18 @@ static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) | |||
662 | static unsigned adjusted_promote_threshold(struct mq_policy *mq, | 649 | static unsigned adjusted_promote_threshold(struct mq_policy *mq, |
663 | bool discarded_oblock, int data_dir) | 650 | bool discarded_oblock, int data_dir) |
664 | { | 651 | { |
665 | if (discarded_oblock && any_free_cblocks(mq) && data_dir == WRITE) | 652 | if (data_dir == READ) |
653 | return mq->promote_threshold + READ_PROMOTE_THRESHOLD; | ||
654 | |||
655 | if (discarded_oblock && (any_free_cblocks(mq) || any_clean_cblocks(mq))) { | ||
666 | /* | 656 | /* |
667 | * We don't need to do any copying at all, so give this a | 657 | * We don't need to do any copying at all, so give this a |
668 | * very low threshold. In practice this only triggers | 658 | * very low threshold. |
669 | * during initial population after a format. | ||
670 | */ | 659 | */ |
671 | return DISCARDED_PROMOTE_THRESHOLD; | 660 | return DISCARDED_PROMOTE_THRESHOLD; |
661 | } | ||
672 | 662 | ||
673 | return data_dir == READ ? | 663 | return mq->promote_threshold + WRITE_PROMOTE_THRESHOLD; |
674 | (mq->promote_threshold + READ_PROMOTE_THRESHOLD) : | ||
675 | (mq->promote_threshold + WRITE_PROMOTE_THRESHOLD); | ||
676 | } | 664 | } |
677 | 665 | ||
678 | static bool should_promote(struct mq_policy *mq, struct entry *e, | 666 | static bool should_promote(struct mq_policy *mq, struct entry *e, |
@@ -688,34 +676,49 @@ static int cache_entry_found(struct mq_policy *mq, | |||
688 | { | 676 | { |
689 | requeue_and_update_tick(mq, e); | 677 | requeue_and_update_tick(mq, e); |
690 | 678 | ||
691 | if (e->in_cache) { | 679 | if (in_cache(mq, e)) { |
692 | result->op = POLICY_HIT; | 680 | result->op = POLICY_HIT; |
693 | result->cblock = e->cblock; | 681 | result->cblock = infer_cblock(&mq->cache_pool, e); |
694 | } | 682 | } |
695 | 683 | ||
696 | return 0; | 684 | return 0; |
697 | } | 685 | } |
698 | 686 | ||
699 | /* | 687 | /* |
700 | * Moves and entry from the pre_cache to the cache. The main work is | 688 | * Moves an entry from the pre_cache to the cache. The main work is |
701 | * finding which cache block to use. | 689 | * finding which cache block to use. |
702 | */ | 690 | */ |
703 | static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, | 691 | static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, |
704 | struct policy_result *result) | 692 | struct policy_result *result) |
705 | { | 693 | { |
706 | dm_cblock_t cblock; | 694 | int r; |
695 | struct entry *new_e; | ||
707 | 696 | ||
708 | if (find_free_cblock(mq, &cblock) == -ENOSPC) { | 697 | /* Ensure there's a free cblock in the cache */ |
698 | if (epool_empty(&mq->cache_pool)) { | ||
709 | result->op = POLICY_REPLACE; | 699 | result->op = POLICY_REPLACE; |
710 | cblock = demote_cblock(mq, &result->old_oblock); | 700 | r = demote_cblock(mq, &result->old_oblock); |
701 | if (r) { | ||
702 | result->op = POLICY_MISS; | ||
703 | return 0; | ||
704 | } | ||
711 | } else | 705 | } else |
712 | result->op = POLICY_NEW; | 706 | result->op = POLICY_NEW; |
713 | 707 | ||
714 | result->cblock = e->cblock = cblock; | 708 | new_e = alloc_entry(&mq->cache_pool); |
709 | BUG_ON(!new_e); | ||
710 | |||
711 | new_e->oblock = e->oblock; | ||
712 | new_e->dirty = false; | ||
713 | new_e->hit_count = e->hit_count; | ||
714 | new_e->generation = e->generation; | ||
715 | new_e->tick = e->tick; | ||
715 | 716 | ||
716 | del(mq, e); | 717 | del(mq, e); |
717 | e->in_cache = true; | 718 | free_entry(&mq->pre_cache_pool, e); |
718 | push(mq, e); | 719 | push(mq, new_e); |
720 | |||
721 | result->cblock = infer_cblock(&mq->cache_pool, new_e); | ||
719 | 722 | ||
720 | return 0; | 723 | return 0; |
721 | } | 724 | } |
@@ -743,7 +746,7 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, | |||
743 | static void insert_in_pre_cache(struct mq_policy *mq, | 746 | static void insert_in_pre_cache(struct mq_policy *mq, |
744 | dm_oblock_t oblock) | 747 | dm_oblock_t oblock) |
745 | { | 748 | { |
746 | struct entry *e = alloc_entry(mq); | 749 | struct entry *e = alloc_entry(&mq->pre_cache_pool); |
747 | 750 | ||
748 | if (!e) | 751 | if (!e) |
749 | /* | 752 | /* |
@@ -757,7 +760,7 @@ static void insert_in_pre_cache(struct mq_policy *mq, | |||
757 | return; | 760 | return; |
758 | } | 761 | } |
759 | 762 | ||
760 | e->in_cache = false; | 763 | e->dirty = false; |
761 | e->oblock = oblock; | 764 | e->oblock = oblock; |
762 | e->hit_count = 1; | 765 | e->hit_count = 1; |
763 | e->generation = mq->generation; | 766 | e->generation = mq->generation; |
@@ -767,30 +770,36 @@ static void insert_in_pre_cache(struct mq_policy *mq, | |||
767 | static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, | 770 | static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, |
768 | struct policy_result *result) | 771 | struct policy_result *result) |
769 | { | 772 | { |
773 | int r; | ||
770 | struct entry *e; | 774 | struct entry *e; |
771 | dm_cblock_t cblock; | ||
772 | 775 | ||
773 | if (find_free_cblock(mq, &cblock) == -ENOSPC) { | 776 | if (epool_empty(&mq->cache_pool)) { |
774 | result->op = POLICY_MISS; | 777 | result->op = POLICY_REPLACE; |
775 | insert_in_pre_cache(mq, oblock); | 778 | r = demote_cblock(mq, &result->old_oblock); |
776 | return; | 779 | if (unlikely(r)) { |
777 | } | 780 | result->op = POLICY_MISS; |
781 | insert_in_pre_cache(mq, oblock); | ||
782 | return; | ||
783 | } | ||
778 | 784 | ||
779 | e = alloc_entry(mq); | 785 | /* |
780 | if (unlikely(!e)) { | 786 | * This will always succeed, since we've just demoted. |
781 | result->op = POLICY_MISS; | 787 | */ |
782 | return; | 788 | e = alloc_entry(&mq->cache_pool); |
789 | BUG_ON(!e); | ||
790 | |||
791 | } else { | ||
792 | e = alloc_entry(&mq->cache_pool); | ||
793 | result->op = POLICY_NEW; | ||
783 | } | 794 | } |
784 | 795 | ||
785 | e->oblock = oblock; | 796 | e->oblock = oblock; |
786 | e->cblock = cblock; | 797 | e->dirty = false; |
787 | e->in_cache = true; | ||
788 | e->hit_count = 1; | 798 | e->hit_count = 1; |
789 | e->generation = mq->generation; | 799 | e->generation = mq->generation; |
790 | push(mq, e); | 800 | push(mq, e); |
791 | 801 | ||
792 | result->op = POLICY_NEW; | 802 | result->cblock = infer_cblock(&mq->cache_pool, e); |
793 | result->cblock = e->cblock; | ||
794 | } | 803 | } |
795 | 804 | ||
796 | static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, | 805 | static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, |
@@ -821,13 +830,16 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock, | |||
821 | int r = 0; | 830 | int r = 0; |
822 | struct entry *e = hash_lookup(mq, oblock); | 831 | struct entry *e = hash_lookup(mq, oblock); |
823 | 832 | ||
824 | if (e && e->in_cache) | 833 | if (e && in_cache(mq, e)) |
825 | r = cache_entry_found(mq, e, result); | 834 | r = cache_entry_found(mq, e, result); |
835 | |||
826 | else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL) | 836 | else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL) |
827 | result->op = POLICY_MISS; | 837 | result->op = POLICY_MISS; |
838 | |||
828 | else if (e) | 839 | else if (e) |
829 | r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock, | 840 | r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock, |
830 | data_dir, result); | 841 | data_dir, result); |
842 | |||
831 | else | 843 | else |
832 | r = no_entry_found(mq, oblock, can_migrate, discarded_oblock, | 844 | r = no_entry_found(mq, oblock, can_migrate, discarded_oblock, |
833 | data_dir, result); | 845 | data_dir, result); |
@@ -854,9 +866,9 @@ static void mq_destroy(struct dm_cache_policy *p) | |||
854 | { | 866 | { |
855 | struct mq_policy *mq = to_mq_policy(p); | 867 | struct mq_policy *mq = to_mq_policy(p); |
856 | 868 | ||
857 | free_bitset(mq->allocation_bitset); | ||
858 | kfree(mq->table); | 869 | kfree(mq->table); |
859 | free_entries(mq); | 870 | epool_exit(&mq->cache_pool); |
871 | epool_exit(&mq->pre_cache_pool); | ||
860 | kfree(mq); | 872 | kfree(mq); |
861 | } | 873 | } |
862 | 874 | ||
@@ -904,8 +916,8 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t | |||
904 | return -EWOULDBLOCK; | 916 | return -EWOULDBLOCK; |
905 | 917 | ||
906 | e = hash_lookup(mq, oblock); | 918 | e = hash_lookup(mq, oblock); |
907 | if (e && e->in_cache) { | 919 | if (e && in_cache(mq, e)) { |
908 | *cblock = e->cblock; | 920 | *cblock = infer_cblock(&mq->cache_pool, e); |
909 | r = 0; | 921 | r = 0; |
910 | } else | 922 | } else |
911 | r = -ENOENT; | 923 | r = -ENOENT; |
@@ -915,6 +927,36 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t | |||
915 | return r; | 927 | return r; |
916 | } | 928 | } |
917 | 929 | ||
930 | static void __mq_set_clear_dirty(struct mq_policy *mq, dm_oblock_t oblock, bool set) | ||
931 | { | ||
932 | struct entry *e; | ||
933 | |||
934 | e = hash_lookup(mq, oblock); | ||
935 | BUG_ON(!e || !in_cache(mq, e)); | ||
936 | |||
937 | del(mq, e); | ||
938 | e->dirty = set; | ||
939 | push(mq, e); | ||
940 | } | ||
941 | |||
942 | static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) | ||
943 | { | ||
944 | struct mq_policy *mq = to_mq_policy(p); | ||
945 | |||
946 | mutex_lock(&mq->lock); | ||
947 | __mq_set_clear_dirty(mq, oblock, true); | ||
948 | mutex_unlock(&mq->lock); | ||
949 | } | ||
950 | |||
951 | static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) | ||
952 | { | ||
953 | struct mq_policy *mq = to_mq_policy(p); | ||
954 | |||
955 | mutex_lock(&mq->lock); | ||
956 | __mq_set_clear_dirty(mq, oblock, false); | ||
957 | mutex_unlock(&mq->lock); | ||
958 | } | ||
959 | |||
918 | static int mq_load_mapping(struct dm_cache_policy *p, | 960 | static int mq_load_mapping(struct dm_cache_policy *p, |
919 | dm_oblock_t oblock, dm_cblock_t cblock, | 961 | dm_oblock_t oblock, dm_cblock_t cblock, |
920 | uint32_t hint, bool hint_valid) | 962 | uint32_t hint, bool hint_valid) |
@@ -922,13 +964,9 @@ static int mq_load_mapping(struct dm_cache_policy *p, | |||
922 | struct mq_policy *mq = to_mq_policy(p); | 964 | struct mq_policy *mq = to_mq_policy(p); |
923 | struct entry *e; | 965 | struct entry *e; |
924 | 966 | ||
925 | e = alloc_entry(mq); | 967 | e = alloc_particular_entry(&mq->cache_pool, cblock); |
926 | if (!e) | ||
927 | return -ENOMEM; | ||
928 | |||
929 | e->cblock = cblock; | ||
930 | e->oblock = oblock; | 968 | e->oblock = oblock; |
931 | e->in_cache = true; | 969 | e->dirty = false; /* this gets corrected in a minute */ |
932 | e->hit_count = hint_valid ? hint : 1; | 970 | e->hit_count = hint_valid ? hint : 1; |
933 | e->generation = mq->generation; | 971 | e->generation = mq->generation; |
934 | push(mq, e); | 972 | push(mq, e); |
@@ -936,57 +974,126 @@ static int mq_load_mapping(struct dm_cache_policy *p, | |||
936 | return 0; | 974 | return 0; |
937 | } | 975 | } |
938 | 976 | ||
977 | static int mq_save_hints(struct mq_policy *mq, struct queue *q, | ||
978 | policy_walk_fn fn, void *context) | ||
979 | { | ||
980 | int r; | ||
981 | unsigned level; | ||
982 | struct entry *e; | ||
983 | |||
984 | for (level = 0; level < NR_QUEUE_LEVELS; level++) | ||
985 | list_for_each_entry(e, q->qs + level, list) { | ||
986 | r = fn(context, infer_cblock(&mq->cache_pool, e), | ||
987 | e->oblock, e->hit_count); | ||
988 | if (r) | ||
989 | return r; | ||
990 | } | ||
991 | |||
992 | return 0; | ||
993 | } | ||
994 | |||
939 | static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, | 995 | static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, |
940 | void *context) | 996 | void *context) |
941 | { | 997 | { |
942 | struct mq_policy *mq = to_mq_policy(p); | 998 | struct mq_policy *mq = to_mq_policy(p); |
943 | int r = 0; | 999 | int r = 0; |
944 | struct entry *e; | ||
945 | unsigned level; | ||
946 | 1000 | ||
947 | mutex_lock(&mq->lock); | 1001 | mutex_lock(&mq->lock); |
948 | 1002 | ||
949 | for (level = 0; level < NR_QUEUE_LEVELS; level++) | 1003 | r = mq_save_hints(mq, &mq->cache_clean, fn, context); |
950 | list_for_each_entry(e, &mq->cache.qs[level], list) { | 1004 | if (!r) |
951 | r = fn(context, e->cblock, e->oblock, e->hit_count); | 1005 | r = mq_save_hints(mq, &mq->cache_dirty, fn, context); |
952 | if (r) | ||
953 | goto out; | ||
954 | } | ||
955 | 1006 | ||
956 | out: | ||
957 | mutex_unlock(&mq->lock); | 1007 | mutex_unlock(&mq->lock); |
958 | 1008 | ||
959 | return r; | 1009 | return r; |
960 | } | 1010 | } |
961 | 1011 | ||
1012 | static void __remove_mapping(struct mq_policy *mq, dm_oblock_t oblock) | ||
1013 | { | ||
1014 | struct entry *e; | ||
1015 | |||
1016 | e = hash_lookup(mq, oblock); | ||
1017 | BUG_ON(!e || !in_cache(mq, e)); | ||
1018 | |||
1019 | del(mq, e); | ||
1020 | free_entry(&mq->cache_pool, e); | ||
1021 | } | ||
1022 | |||
962 | static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) | 1023 | static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) |
963 | { | 1024 | { |
964 | struct mq_policy *mq = to_mq_policy(p); | 1025 | struct mq_policy *mq = to_mq_policy(p); |
965 | struct entry *e; | ||
966 | 1026 | ||
967 | mutex_lock(&mq->lock); | 1027 | mutex_lock(&mq->lock); |
1028 | __remove_mapping(mq, oblock); | ||
1029 | mutex_unlock(&mq->lock); | ||
1030 | } | ||
968 | 1031 | ||
969 | e = hash_lookup(mq, oblock); | 1032 | static int __remove_cblock(struct mq_policy *mq, dm_cblock_t cblock) |
1033 | { | ||
1034 | struct entry *e = epool_find(&mq->cache_pool, cblock); | ||
970 | 1035 | ||
971 | BUG_ON(!e || !e->in_cache); | 1036 | if (!e) |
1037 | return -ENODATA; | ||
972 | 1038 | ||
973 | del(mq, e); | 1039 | del(mq, e); |
974 | e->in_cache = false; | 1040 | free_entry(&mq->cache_pool, e); |
975 | push(mq, e); | ||
976 | 1041 | ||
1042 | return 0; | ||
1043 | } | ||
1044 | |||
1045 | static int mq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock) | ||
1046 | { | ||
1047 | int r; | ||
1048 | struct mq_policy *mq = to_mq_policy(p); | ||
1049 | |||
1050 | mutex_lock(&mq->lock); | ||
1051 | r = __remove_cblock(mq, cblock); | ||
977 | mutex_unlock(&mq->lock); | 1052 | mutex_unlock(&mq->lock); |
1053 | |||
1054 | return r; | ||
978 | } | 1055 | } |
979 | 1056 | ||
980 | static void force_mapping(struct mq_policy *mq, | 1057 | static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock, |
981 | dm_oblock_t current_oblock, dm_oblock_t new_oblock) | 1058 | dm_cblock_t *cblock) |
982 | { | 1059 | { |
983 | struct entry *e = hash_lookup(mq, current_oblock); | 1060 | struct entry *e = pop(mq, &mq->cache_dirty); |
984 | 1061 | ||
985 | BUG_ON(!e || !e->in_cache); | 1062 | if (!e) |
1063 | return -ENODATA; | ||
986 | 1064 | ||
987 | del(mq, e); | 1065 | *oblock = e->oblock; |
988 | e->oblock = new_oblock; | 1066 | *cblock = infer_cblock(&mq->cache_pool, e); |
1067 | e->dirty = false; | ||
989 | push(mq, e); | 1068 | push(mq, e); |
1069 | |||
1070 | return 0; | ||
1071 | } | ||
1072 | |||
1073 | static int mq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock, | ||
1074 | dm_cblock_t *cblock) | ||
1075 | { | ||
1076 | int r; | ||
1077 | struct mq_policy *mq = to_mq_policy(p); | ||
1078 | |||
1079 | mutex_lock(&mq->lock); | ||
1080 | r = __mq_writeback_work(mq, oblock, cblock); | ||
1081 | mutex_unlock(&mq->lock); | ||
1082 | |||
1083 | return r; | ||
1084 | } | ||
1085 | |||
1086 | static void __force_mapping(struct mq_policy *mq, | ||
1087 | dm_oblock_t current_oblock, dm_oblock_t new_oblock) | ||
1088 | { | ||
1089 | struct entry *e = hash_lookup(mq, current_oblock); | ||
1090 | |||
1091 | if (e && in_cache(mq, e)) { | ||
1092 | del(mq, e); | ||
1093 | e->oblock = new_oblock; | ||
1094 | e->dirty = true; | ||
1095 | push(mq, e); | ||
1096 | } | ||
990 | } | 1097 | } |
991 | 1098 | ||
992 | static void mq_force_mapping(struct dm_cache_policy *p, | 1099 | static void mq_force_mapping(struct dm_cache_policy *p, |
@@ -995,16 +1102,20 @@ static void mq_force_mapping(struct dm_cache_policy *p, | |||
995 | struct mq_policy *mq = to_mq_policy(p); | 1102 | struct mq_policy *mq = to_mq_policy(p); |
996 | 1103 | ||
997 | mutex_lock(&mq->lock); | 1104 | mutex_lock(&mq->lock); |
998 | force_mapping(mq, current_oblock, new_oblock); | 1105 | __force_mapping(mq, current_oblock, new_oblock); |
999 | mutex_unlock(&mq->lock); | 1106 | mutex_unlock(&mq->lock); |
1000 | } | 1107 | } |
1001 | 1108 | ||
1002 | static dm_cblock_t mq_residency(struct dm_cache_policy *p) | 1109 | static dm_cblock_t mq_residency(struct dm_cache_policy *p) |
1003 | { | 1110 | { |
1111 | dm_cblock_t r; | ||
1004 | struct mq_policy *mq = to_mq_policy(p); | 1112 | struct mq_policy *mq = to_mq_policy(p); |
1005 | 1113 | ||
1006 | /* FIXME: lock mutex, not sure we can block here */ | 1114 | mutex_lock(&mq->lock); |
1007 | return to_cblock(mq->nr_cblocks_allocated); | 1115 | r = to_cblock(mq->cache_pool.nr_allocated); |
1116 | mutex_unlock(&mq->lock); | ||
1117 | |||
1118 | return r; | ||
1008 | } | 1119 | } |
1009 | 1120 | ||
1010 | static void mq_tick(struct dm_cache_policy *p) | 1121 | static void mq_tick(struct dm_cache_policy *p) |
@@ -1057,10 +1168,13 @@ static void init_policy_functions(struct mq_policy *mq) | |||
1057 | mq->policy.destroy = mq_destroy; | 1168 | mq->policy.destroy = mq_destroy; |
1058 | mq->policy.map = mq_map; | 1169 | mq->policy.map = mq_map; |
1059 | mq->policy.lookup = mq_lookup; | 1170 | mq->policy.lookup = mq_lookup; |
1171 | mq->policy.set_dirty = mq_set_dirty; | ||
1172 | mq->policy.clear_dirty = mq_clear_dirty; | ||
1060 | mq->policy.load_mapping = mq_load_mapping; | 1173 | mq->policy.load_mapping = mq_load_mapping; |
1061 | mq->policy.walk_mappings = mq_walk_mappings; | 1174 | mq->policy.walk_mappings = mq_walk_mappings; |
1062 | mq->policy.remove_mapping = mq_remove_mapping; | 1175 | mq->policy.remove_mapping = mq_remove_mapping; |
1063 | mq->policy.writeback_work = NULL; | 1176 | mq->policy.remove_cblock = mq_remove_cblock; |
1177 | mq->policy.writeback_work = mq_writeback_work; | ||
1064 | mq->policy.force_mapping = mq_force_mapping; | 1178 | mq->policy.force_mapping = mq_force_mapping; |
1065 | mq->policy.residency = mq_residency; | 1179 | mq->policy.residency = mq_residency; |
1066 | mq->policy.tick = mq_tick; | 1180 | mq->policy.tick = mq_tick; |
@@ -1072,7 +1186,6 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, | |||
1072 | sector_t origin_size, | 1186 | sector_t origin_size, |
1073 | sector_t cache_block_size) | 1187 | sector_t cache_block_size) |
1074 | { | 1188 | { |
1075 | int r; | ||
1076 | struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL); | 1189 | struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL); |
1077 | 1190 | ||
1078 | if (!mq) | 1191 | if (!mq) |
@@ -1080,8 +1193,18 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, | |||
1080 | 1193 | ||
1081 | init_policy_functions(mq); | 1194 | init_policy_functions(mq); |
1082 | iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT); | 1195 | iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT); |
1083 | |||
1084 | mq->cache_size = cache_size; | 1196 | mq->cache_size = cache_size; |
1197 | |||
1198 | if (epool_init(&mq->pre_cache_pool, from_cblock(cache_size))) { | ||
1199 | DMERR("couldn't initialize pool of pre-cache entries"); | ||
1200 | goto bad_pre_cache_init; | ||
1201 | } | ||
1202 | |||
1203 | if (epool_init(&mq->cache_pool, from_cblock(cache_size))) { | ||
1204 | DMERR("couldn't initialize pool of cache entries"); | ||
1205 | goto bad_cache_init; | ||
1206 | } | ||
1207 | |||
1085 | mq->tick_protected = 0; | 1208 | mq->tick_protected = 0; |
1086 | mq->tick = 0; | 1209 | mq->tick = 0; |
1087 | mq->hit_count = 0; | 1210 | mq->hit_count = 0; |
@@ -1089,20 +1212,12 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, | |||
1089 | mq->promote_threshold = 0; | 1212 | mq->promote_threshold = 0; |
1090 | mutex_init(&mq->lock); | 1213 | mutex_init(&mq->lock); |
1091 | spin_lock_init(&mq->tick_lock); | 1214 | spin_lock_init(&mq->tick_lock); |
1092 | mq->find_free_nr_words = dm_div_up(from_cblock(mq->cache_size), BITS_PER_LONG); | ||
1093 | mq->find_free_last_word = 0; | ||
1094 | 1215 | ||
1095 | queue_init(&mq->pre_cache); | 1216 | queue_init(&mq->pre_cache); |
1096 | queue_init(&mq->cache); | 1217 | queue_init(&mq->cache_clean); |
1097 | mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); | 1218 | queue_init(&mq->cache_dirty); |
1098 | 1219 | ||
1099 | mq->nr_entries = 2 * from_cblock(cache_size); | 1220 | mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); |
1100 | r = alloc_entries(mq, mq->nr_entries); | ||
1101 | if (r) | ||
1102 | goto bad_cache_alloc; | ||
1103 | |||
1104 | mq->nr_entries_allocated = 0; | ||
1105 | mq->nr_cblocks_allocated = 0; | ||
1106 | 1221 | ||
1107 | mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16); | 1222 | mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16); |
1108 | mq->hash_bits = ffs(mq->nr_buckets) - 1; | 1223 | mq->hash_bits = ffs(mq->nr_buckets) - 1; |
@@ -1110,17 +1225,13 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, | |||
1110 | if (!mq->table) | 1225 | if (!mq->table) |
1111 | goto bad_alloc_table; | 1226 | goto bad_alloc_table; |
1112 | 1227 | ||
1113 | mq->allocation_bitset = alloc_bitset(from_cblock(cache_size)); | ||
1114 | if (!mq->allocation_bitset) | ||
1115 | goto bad_alloc_bitset; | ||
1116 | |||
1117 | return &mq->policy; | 1228 | return &mq->policy; |
1118 | 1229 | ||
1119 | bad_alloc_bitset: | ||
1120 | kfree(mq->table); | ||
1121 | bad_alloc_table: | 1230 | bad_alloc_table: |
1122 | free_entries(mq); | 1231 | epool_exit(&mq->cache_pool); |
1123 | bad_cache_alloc: | 1232 | bad_cache_init: |
1233 | epool_exit(&mq->pre_cache_pool); | ||
1234 | bad_pre_cache_init: | ||
1124 | kfree(mq); | 1235 | kfree(mq); |
1125 | 1236 | ||
1126 | return NULL; | 1237 | return NULL; |
@@ -1130,7 +1241,7 @@ bad_cache_alloc: | |||
1130 | 1241 | ||
1131 | static struct dm_cache_policy_type mq_policy_type = { | 1242 | static struct dm_cache_policy_type mq_policy_type = { |
1132 | .name = "mq", | 1243 | .name = "mq", |
1133 | .version = {1, 0, 0}, | 1244 | .version = {1, 1, 0}, |
1134 | .hint_size = 4, | 1245 | .hint_size = 4, |
1135 | .owner = THIS_MODULE, | 1246 | .owner = THIS_MODULE, |
1136 | .create = mq_create | 1247 | .create = mq_create |
@@ -1138,7 +1249,7 @@ static struct dm_cache_policy_type mq_policy_type = { | |||
1138 | 1249 | ||
1139 | static struct dm_cache_policy_type default_policy_type = { | 1250 | static struct dm_cache_policy_type default_policy_type = { |
1140 | .name = "default", | 1251 | .name = "default", |
1141 | .version = {1, 0, 0}, | 1252 | .version = {1, 1, 0}, |
1142 | .hint_size = 4, | 1253 | .hint_size = 4, |
1143 | .owner = THIS_MODULE, | 1254 | .owner = THIS_MODULE, |
1144 | .create = mq_create | 1255 | .create = mq_create |
diff --git a/drivers/md/dm-cache-policy.c b/drivers/md/dm-cache-policy.c index 21c03c570c06..d80057968407 100644 --- a/drivers/md/dm-cache-policy.c +++ b/drivers/md/dm-cache-policy.c | |||
@@ -119,13 +119,13 @@ struct dm_cache_policy *dm_cache_policy_create(const char *name, | |||
119 | type = get_policy(name); | 119 | type = get_policy(name); |
120 | if (!type) { | 120 | if (!type) { |
121 | DMWARN("unknown policy type"); | 121 | DMWARN("unknown policy type"); |
122 | return NULL; | 122 | return ERR_PTR(-EINVAL); |
123 | } | 123 | } |
124 | 124 | ||
125 | p = type->create(cache_size, origin_size, cache_block_size); | 125 | p = type->create(cache_size, origin_size, cache_block_size); |
126 | if (!p) { | 126 | if (!p) { |
127 | put_policy(type); | 127 | put_policy(type); |
128 | return NULL; | 128 | return ERR_PTR(-ENOMEM); |
129 | } | 129 | } |
130 | p->private = type; | 130 | p->private = type; |
131 | 131 | ||
diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h index 33369ca9614f..052c00a84a5c 100644 --- a/drivers/md/dm-cache-policy.h +++ b/drivers/md/dm-cache-policy.h | |||
@@ -135,9 +135,6 @@ struct dm_cache_policy { | |||
135 | */ | 135 | */ |
136 | int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock); | 136 | int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock); |
137 | 137 | ||
138 | /* | ||
139 | * oblock must be a mapped block. Must not block. | ||
140 | */ | ||
141 | void (*set_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock); | 138 | void (*set_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock); |
142 | void (*clear_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock); | 139 | void (*clear_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock); |
143 | 140 | ||
@@ -159,8 +156,24 @@ struct dm_cache_policy { | |||
159 | void (*force_mapping)(struct dm_cache_policy *p, dm_oblock_t current_oblock, | 156 | void (*force_mapping)(struct dm_cache_policy *p, dm_oblock_t current_oblock, |
160 | dm_oblock_t new_oblock); | 157 | dm_oblock_t new_oblock); |
161 | 158 | ||
162 | int (*writeback_work)(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock); | 159 | /* |
160 | * This is called via the invalidate_cblocks message. It is | ||
161 | * possible the particular cblock has already been removed due to a | ||
162 | * write io in passthrough mode. In which case this should return | ||
163 | * -ENODATA. | ||
164 | */ | ||
165 | int (*remove_cblock)(struct dm_cache_policy *p, dm_cblock_t cblock); | ||
163 | 166 | ||
167 | /* | ||
168 | * Provide a dirty block to be written back by the core target. | ||
169 | * | ||
170 | * Returns: | ||
171 | * | ||
172 | * 0 and @cblock,@oblock: block to write back provided | ||
173 | * | ||
174 | * -ENODATA: no dirty blocks available | ||
175 | */ | ||
176 | int (*writeback_work)(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock); | ||
164 | 177 | ||
165 | /* | 178 | /* |
166 | * How full is the cache? | 179 | * How full is the cache? |
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 29569768ffbf..9efcf1059b99 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c | |||
@@ -61,6 +61,34 @@ static void free_bitset(unsigned long *bits) | |||
61 | 61 | ||
62 | /*----------------------------------------------------------------*/ | 62 | /*----------------------------------------------------------------*/ |
63 | 63 | ||
64 | /* | ||
65 | * There are a couple of places where we let a bio run, but want to do some | ||
66 | * work before calling its endio function. We do this by temporarily | ||
67 | * changing the endio fn. | ||
68 | */ | ||
69 | struct dm_hook_info { | ||
70 | bio_end_io_t *bi_end_io; | ||
71 | void *bi_private; | ||
72 | }; | ||
73 | |||
74 | static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio, | ||
75 | bio_end_io_t *bi_end_io, void *bi_private) | ||
76 | { | ||
77 | h->bi_end_io = bio->bi_end_io; | ||
78 | h->bi_private = bio->bi_private; | ||
79 | |||
80 | bio->bi_end_io = bi_end_io; | ||
81 | bio->bi_private = bi_private; | ||
82 | } | ||
83 | |||
84 | static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) | ||
85 | { | ||
86 | bio->bi_end_io = h->bi_end_io; | ||
87 | bio->bi_private = h->bi_private; | ||
88 | } | ||
89 | |||
90 | /*----------------------------------------------------------------*/ | ||
91 | |||
64 | #define PRISON_CELLS 1024 | 92 | #define PRISON_CELLS 1024 |
65 | #define MIGRATION_POOL_SIZE 128 | 93 | #define MIGRATION_POOL_SIZE 128 |
66 | #define COMMIT_PERIOD HZ | 94 | #define COMMIT_PERIOD HZ |
@@ -76,14 +104,37 @@ static void free_bitset(unsigned long *bits) | |||
76 | /* | 104 | /* |
77 | * FIXME: the cache is read/write for the time being. | 105 | * FIXME: the cache is read/write for the time being. |
78 | */ | 106 | */ |
79 | enum cache_mode { | 107 | enum cache_metadata_mode { |
80 | CM_WRITE, /* metadata may be changed */ | 108 | CM_WRITE, /* metadata may be changed */ |
81 | CM_READ_ONLY, /* metadata may not be changed */ | 109 | CM_READ_ONLY, /* metadata may not be changed */ |
82 | }; | 110 | }; |
83 | 111 | ||
112 | enum cache_io_mode { | ||
113 | /* | ||
114 | * Data is written to cached blocks only. These blocks are marked | ||
115 | * dirty. If you lose the cache device you will lose data. | ||
116 | * Potential performance increase for both reads and writes. | ||
117 | */ | ||
118 | CM_IO_WRITEBACK, | ||
119 | |||
120 | /* | ||
121 | * Data is written to both cache and origin. Blocks are never | ||
122 | * dirty. Potential performance benfit for reads only. | ||
123 | */ | ||
124 | CM_IO_WRITETHROUGH, | ||
125 | |||
126 | /* | ||
127 | * A degraded mode useful for various cache coherency situations | ||
128 | * (eg, rolling back snapshots). Reads and writes always go to the | ||
129 | * origin. If a write goes to a cached oblock, then the cache | ||
130 | * block is invalidated. | ||
131 | */ | ||
132 | CM_IO_PASSTHROUGH | ||
133 | }; | ||
134 | |||
84 | struct cache_features { | 135 | struct cache_features { |
85 | enum cache_mode mode; | 136 | enum cache_metadata_mode mode; |
86 | bool write_through:1; | 137 | enum cache_io_mode io_mode; |
87 | }; | 138 | }; |
88 | 139 | ||
89 | struct cache_stats { | 140 | struct cache_stats { |
@@ -99,6 +150,25 @@ struct cache_stats { | |||
99 | atomic_t discard_count; | 150 | atomic_t discard_count; |
100 | }; | 151 | }; |
101 | 152 | ||
153 | /* | ||
154 | * Defines a range of cblocks, begin to (end - 1) are in the range. end is | ||
155 | * the one-past-the-end value. | ||
156 | */ | ||
157 | struct cblock_range { | ||
158 | dm_cblock_t begin; | ||
159 | dm_cblock_t end; | ||
160 | }; | ||
161 | |||
162 | struct invalidation_request { | ||
163 | struct list_head list; | ||
164 | struct cblock_range *cblocks; | ||
165 | |||
166 | atomic_t complete; | ||
167 | int err; | ||
168 | |||
169 | wait_queue_head_t result_wait; | ||
170 | }; | ||
171 | |||
102 | struct cache { | 172 | struct cache { |
103 | struct dm_target *ti; | 173 | struct dm_target *ti; |
104 | struct dm_target_callbacks callbacks; | 174 | struct dm_target_callbacks callbacks; |
@@ -148,6 +218,10 @@ struct cache { | |||
148 | wait_queue_head_t migration_wait; | 218 | wait_queue_head_t migration_wait; |
149 | atomic_t nr_migrations; | 219 | atomic_t nr_migrations; |
150 | 220 | ||
221 | wait_queue_head_t quiescing_wait; | ||
222 | atomic_t quiescing; | ||
223 | atomic_t quiescing_ack; | ||
224 | |||
151 | /* | 225 | /* |
152 | * cache_size entries, dirty if set | 226 | * cache_size entries, dirty if set |
153 | */ | 227 | */ |
@@ -186,7 +260,7 @@ struct cache { | |||
186 | 260 | ||
187 | bool need_tick_bio:1; | 261 | bool need_tick_bio:1; |
188 | bool sized:1; | 262 | bool sized:1; |
189 | bool quiescing:1; | 263 | bool invalidate:1; |
190 | bool commit_requested:1; | 264 | bool commit_requested:1; |
191 | bool loaded_mappings:1; | 265 | bool loaded_mappings:1; |
192 | bool loaded_discards:1; | 266 | bool loaded_discards:1; |
@@ -197,6 +271,12 @@ struct cache { | |||
197 | struct cache_features features; | 271 | struct cache_features features; |
198 | 272 | ||
199 | struct cache_stats stats; | 273 | struct cache_stats stats; |
274 | |||
275 | /* | ||
276 | * Invalidation fields. | ||
277 | */ | ||
278 | spinlock_t invalidation_lock; | ||
279 | struct list_head invalidation_requests; | ||
200 | }; | 280 | }; |
201 | 281 | ||
202 | struct per_bio_data { | 282 | struct per_bio_data { |
@@ -211,7 +291,7 @@ struct per_bio_data { | |||
211 | */ | 291 | */ |
212 | struct cache *cache; | 292 | struct cache *cache; |
213 | dm_cblock_t cblock; | 293 | dm_cblock_t cblock; |
214 | bio_end_io_t *saved_bi_end_io; | 294 | struct dm_hook_info hook_info; |
215 | struct dm_bio_details bio_details; | 295 | struct dm_bio_details bio_details; |
216 | }; | 296 | }; |
217 | 297 | ||
@@ -228,6 +308,8 @@ struct dm_cache_migration { | |||
228 | bool writeback:1; | 308 | bool writeback:1; |
229 | bool demote:1; | 309 | bool demote:1; |
230 | bool promote:1; | 310 | bool promote:1; |
311 | bool requeue_holder:1; | ||
312 | bool invalidate:1; | ||
231 | 313 | ||
232 | struct dm_bio_prison_cell *old_ocell; | 314 | struct dm_bio_prison_cell *old_ocell; |
233 | struct dm_bio_prison_cell *new_ocell; | 315 | struct dm_bio_prison_cell *new_ocell; |
@@ -533,9 +615,24 @@ static void save_stats(struct cache *cache) | |||
533 | #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) | 615 | #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) |
534 | #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) | 616 | #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) |
535 | 617 | ||
618 | static bool writethrough_mode(struct cache_features *f) | ||
619 | { | ||
620 | return f->io_mode == CM_IO_WRITETHROUGH; | ||
621 | } | ||
622 | |||
623 | static bool writeback_mode(struct cache_features *f) | ||
624 | { | ||
625 | return f->io_mode == CM_IO_WRITEBACK; | ||
626 | } | ||
627 | |||
628 | static bool passthrough_mode(struct cache_features *f) | ||
629 | { | ||
630 | return f->io_mode == CM_IO_PASSTHROUGH; | ||
631 | } | ||
632 | |||
536 | static size_t get_per_bio_data_size(struct cache *cache) | 633 | static size_t get_per_bio_data_size(struct cache *cache) |
537 | { | 634 | { |
538 | return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; | 635 | return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; |
539 | } | 636 | } |
540 | 637 | ||
541 | static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) | 638 | static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) |
@@ -605,6 +702,7 @@ static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, | |||
605 | static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, | 702 | static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, |
606 | dm_oblock_t oblock, dm_cblock_t cblock) | 703 | dm_oblock_t oblock, dm_cblock_t cblock) |
607 | { | 704 | { |
705 | check_if_tick_bio_needed(cache, bio); | ||
608 | remap_to_cache(cache, bio, cblock); | 706 | remap_to_cache(cache, bio, cblock); |
609 | if (bio_data_dir(bio) == WRITE) { | 707 | if (bio_data_dir(bio) == WRITE) { |
610 | set_dirty(cache, oblock, cblock); | 708 | set_dirty(cache, oblock, cblock); |
@@ -662,7 +760,8 @@ static void defer_writethrough_bio(struct cache *cache, struct bio *bio) | |||
662 | static void writethrough_endio(struct bio *bio, int err) | 760 | static void writethrough_endio(struct bio *bio, int err) |
663 | { | 761 | { |
664 | struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); | 762 | struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); |
665 | bio->bi_end_io = pb->saved_bi_end_io; | 763 | |
764 | dm_unhook_bio(&pb->hook_info, bio); | ||
666 | 765 | ||
667 | if (err) { | 766 | if (err) { |
668 | bio_endio(bio, err); | 767 | bio_endio(bio, err); |
@@ -693,9 +792,8 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, | |||
693 | 792 | ||
694 | pb->cache = cache; | 793 | pb->cache = cache; |
695 | pb->cblock = cblock; | 794 | pb->cblock = cblock; |
696 | pb->saved_bi_end_io = bio->bi_end_io; | 795 | dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL); |
697 | dm_bio_record(&pb->bio_details, bio); | 796 | dm_bio_record(&pb->bio_details, bio); |
698 | bio->bi_end_io = writethrough_endio; | ||
699 | 797 | ||
700 | remap_to_origin_clear_discard(pb->cache, bio, oblock); | 798 | remap_to_origin_clear_discard(pb->cache, bio, oblock); |
701 | } | 799 | } |
@@ -748,8 +846,9 @@ static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, | |||
748 | 846 | ||
749 | static void cleanup_migration(struct dm_cache_migration *mg) | 847 | static void cleanup_migration(struct dm_cache_migration *mg) |
750 | { | 848 | { |
751 | dec_nr_migrations(mg->cache); | 849 | struct cache *cache = mg->cache; |
752 | free_migration(mg); | 850 | free_migration(mg); |
851 | dec_nr_migrations(cache); | ||
753 | } | 852 | } |
754 | 853 | ||
755 | static void migration_failure(struct dm_cache_migration *mg) | 854 | static void migration_failure(struct dm_cache_migration *mg) |
@@ -765,13 +864,13 @@ static void migration_failure(struct dm_cache_migration *mg) | |||
765 | DMWARN_LIMIT("demotion failed; couldn't copy block"); | 864 | DMWARN_LIMIT("demotion failed; couldn't copy block"); |
766 | policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); | 865 | policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); |
767 | 866 | ||
768 | cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1); | 867 | cell_defer(cache, mg->old_ocell, mg->promote ? false : true); |
769 | if (mg->promote) | 868 | if (mg->promote) |
770 | cell_defer(cache, mg->new_ocell, 1); | 869 | cell_defer(cache, mg->new_ocell, true); |
771 | } else { | 870 | } else { |
772 | DMWARN_LIMIT("promotion failed; couldn't copy block"); | 871 | DMWARN_LIMIT("promotion failed; couldn't copy block"); |
773 | policy_remove_mapping(cache->policy, mg->new_oblock); | 872 | policy_remove_mapping(cache->policy, mg->new_oblock); |
774 | cell_defer(cache, mg->new_ocell, 1); | 873 | cell_defer(cache, mg->new_ocell, true); |
775 | } | 874 | } |
776 | 875 | ||
777 | cleanup_migration(mg); | 876 | cleanup_migration(mg); |
@@ -823,7 +922,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) | |||
823 | return; | 922 | return; |
824 | 923 | ||
825 | } else if (mg->demote) { | 924 | } else if (mg->demote) { |
826 | cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1); | 925 | cell_defer(cache, mg->old_ocell, mg->promote ? false : true); |
827 | 926 | ||
828 | if (mg->promote) { | 927 | if (mg->promote) { |
829 | mg->demote = false; | 928 | mg->demote = false; |
@@ -832,11 +931,19 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) | |||
832 | list_add_tail(&mg->list, &cache->quiesced_migrations); | 931 | list_add_tail(&mg->list, &cache->quiesced_migrations); |
833 | spin_unlock_irqrestore(&cache->lock, flags); | 932 | spin_unlock_irqrestore(&cache->lock, flags); |
834 | 933 | ||
835 | } else | 934 | } else { |
935 | if (mg->invalidate) | ||
936 | policy_remove_mapping(cache->policy, mg->old_oblock); | ||
836 | cleanup_migration(mg); | 937 | cleanup_migration(mg); |
938 | } | ||
837 | 939 | ||
838 | } else { | 940 | } else { |
839 | cell_defer(cache, mg->new_ocell, true); | 941 | if (mg->requeue_holder) |
942 | cell_defer(cache, mg->new_ocell, true); | ||
943 | else { | ||
944 | bio_endio(mg->new_ocell->holder, 0); | ||
945 | cell_defer(cache, mg->new_ocell, false); | ||
946 | } | ||
840 | clear_dirty(cache, mg->new_oblock, mg->cblock); | 947 | clear_dirty(cache, mg->new_oblock, mg->cblock); |
841 | cleanup_migration(mg); | 948 | cleanup_migration(mg); |
842 | } | 949 | } |
@@ -881,8 +988,46 @@ static void issue_copy_real(struct dm_cache_migration *mg) | |||
881 | r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg); | 988 | r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg); |
882 | } | 989 | } |
883 | 990 | ||
884 | if (r < 0) | 991 | if (r < 0) { |
992 | DMERR_LIMIT("issuing migration failed"); | ||
885 | migration_failure(mg); | 993 | migration_failure(mg); |
994 | } | ||
995 | } | ||
996 | |||
997 | static void overwrite_endio(struct bio *bio, int err) | ||
998 | { | ||
999 | struct dm_cache_migration *mg = bio->bi_private; | ||
1000 | struct cache *cache = mg->cache; | ||
1001 | size_t pb_data_size = get_per_bio_data_size(cache); | ||
1002 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); | ||
1003 | unsigned long flags; | ||
1004 | |||
1005 | if (err) | ||
1006 | mg->err = true; | ||
1007 | |||
1008 | spin_lock_irqsave(&cache->lock, flags); | ||
1009 | list_add_tail(&mg->list, &cache->completed_migrations); | ||
1010 | dm_unhook_bio(&pb->hook_info, bio); | ||
1011 | mg->requeue_holder = false; | ||
1012 | spin_unlock_irqrestore(&cache->lock, flags); | ||
1013 | |||
1014 | wake_worker(cache); | ||
1015 | } | ||
1016 | |||
1017 | static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio) | ||
1018 | { | ||
1019 | size_t pb_data_size = get_per_bio_data_size(mg->cache); | ||
1020 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); | ||
1021 | |||
1022 | dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); | ||
1023 | remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock); | ||
1024 | generic_make_request(bio); | ||
1025 | } | ||
1026 | |||
1027 | static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) | ||
1028 | { | ||
1029 | return (bio_data_dir(bio) == WRITE) && | ||
1030 | (bio->bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); | ||
886 | } | 1031 | } |
887 | 1032 | ||
888 | static void avoid_copy(struct dm_cache_migration *mg) | 1033 | static void avoid_copy(struct dm_cache_migration *mg) |
@@ -899,9 +1044,17 @@ static void issue_copy(struct dm_cache_migration *mg) | |||
899 | if (mg->writeback || mg->demote) | 1044 | if (mg->writeback || mg->demote) |
900 | avoid = !is_dirty(cache, mg->cblock) || | 1045 | avoid = !is_dirty(cache, mg->cblock) || |
901 | is_discarded_oblock(cache, mg->old_oblock); | 1046 | is_discarded_oblock(cache, mg->old_oblock); |
902 | else | 1047 | else { |
1048 | struct bio *bio = mg->new_ocell->holder; | ||
1049 | |||
903 | avoid = is_discarded_oblock(cache, mg->new_oblock); | 1050 | avoid = is_discarded_oblock(cache, mg->new_oblock); |
904 | 1051 | ||
1052 | if (!avoid && bio_writes_complete_block(cache, bio)) { | ||
1053 | issue_overwrite(mg, bio); | ||
1054 | return; | ||
1055 | } | ||
1056 | } | ||
1057 | |||
905 | avoid ? avoid_copy(mg) : issue_copy_real(mg); | 1058 | avoid ? avoid_copy(mg) : issue_copy_real(mg); |
906 | } | 1059 | } |
907 | 1060 | ||
@@ -991,6 +1144,8 @@ static void promote(struct cache *cache, struct prealloc *structs, | |||
991 | mg->writeback = false; | 1144 | mg->writeback = false; |
992 | mg->demote = false; | 1145 | mg->demote = false; |
993 | mg->promote = true; | 1146 | mg->promote = true; |
1147 | mg->requeue_holder = true; | ||
1148 | mg->invalidate = false; | ||
994 | mg->cache = cache; | 1149 | mg->cache = cache; |
995 | mg->new_oblock = oblock; | 1150 | mg->new_oblock = oblock; |
996 | mg->cblock = cblock; | 1151 | mg->cblock = cblock; |
@@ -1012,6 +1167,8 @@ static void writeback(struct cache *cache, struct prealloc *structs, | |||
1012 | mg->writeback = true; | 1167 | mg->writeback = true; |
1013 | mg->demote = false; | 1168 | mg->demote = false; |
1014 | mg->promote = false; | 1169 | mg->promote = false; |
1170 | mg->requeue_holder = true; | ||
1171 | mg->invalidate = false; | ||
1015 | mg->cache = cache; | 1172 | mg->cache = cache; |
1016 | mg->old_oblock = oblock; | 1173 | mg->old_oblock = oblock; |
1017 | mg->cblock = cblock; | 1174 | mg->cblock = cblock; |
@@ -1035,6 +1192,8 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs, | |||
1035 | mg->writeback = false; | 1192 | mg->writeback = false; |
1036 | mg->demote = true; | 1193 | mg->demote = true; |
1037 | mg->promote = true; | 1194 | mg->promote = true; |
1195 | mg->requeue_holder = true; | ||
1196 | mg->invalidate = false; | ||
1038 | mg->cache = cache; | 1197 | mg->cache = cache; |
1039 | mg->old_oblock = old_oblock; | 1198 | mg->old_oblock = old_oblock; |
1040 | mg->new_oblock = new_oblock; | 1199 | mg->new_oblock = new_oblock; |
@@ -1047,6 +1206,33 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs, | |||
1047 | quiesce_migration(mg); | 1206 | quiesce_migration(mg); |
1048 | } | 1207 | } |
1049 | 1208 | ||
1209 | /* | ||
1210 | * Invalidate a cache entry. No writeback occurs; any changes in the cache | ||
1211 | * block are thrown away. | ||
1212 | */ | ||
1213 | static void invalidate(struct cache *cache, struct prealloc *structs, | ||
1214 | dm_oblock_t oblock, dm_cblock_t cblock, | ||
1215 | struct dm_bio_prison_cell *cell) | ||
1216 | { | ||
1217 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | ||
1218 | |||
1219 | mg->err = false; | ||
1220 | mg->writeback = false; | ||
1221 | mg->demote = true; | ||
1222 | mg->promote = false; | ||
1223 | mg->requeue_holder = true; | ||
1224 | mg->invalidate = true; | ||
1225 | mg->cache = cache; | ||
1226 | mg->old_oblock = oblock; | ||
1227 | mg->cblock = cblock; | ||
1228 | mg->old_ocell = cell; | ||
1229 | mg->new_ocell = NULL; | ||
1230 | mg->start_jiffies = jiffies; | ||
1231 | |||
1232 | inc_nr_migrations(cache); | ||
1233 | quiesce_migration(mg); | ||
1234 | } | ||
1235 | |||
1050 | /*---------------------------------------------------------------- | 1236 | /*---------------------------------------------------------------- |
1051 | * bio processing | 1237 | * bio processing |
1052 | *--------------------------------------------------------------*/ | 1238 | *--------------------------------------------------------------*/ |
@@ -1109,13 +1295,6 @@ static bool spare_migration_bandwidth(struct cache *cache) | |||
1109 | return current_volume < cache->migration_threshold; | 1295 | return current_volume < cache->migration_threshold; |
1110 | } | 1296 | } |
1111 | 1297 | ||
1112 | static bool is_writethrough_io(struct cache *cache, struct bio *bio, | ||
1113 | dm_cblock_t cblock) | ||
1114 | { | ||
1115 | return bio_data_dir(bio) == WRITE && | ||
1116 | cache->features.write_through && !is_dirty(cache, cblock); | ||
1117 | } | ||
1118 | |||
1119 | static void inc_hit_counter(struct cache *cache, struct bio *bio) | 1298 | static void inc_hit_counter(struct cache *cache, struct bio *bio) |
1120 | { | 1299 | { |
1121 | atomic_inc(bio_data_dir(bio) == READ ? | 1300 | atomic_inc(bio_data_dir(bio) == READ ? |
@@ -1128,6 +1307,15 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) | |||
1128 | &cache->stats.read_miss : &cache->stats.write_miss); | 1307 | &cache->stats.read_miss : &cache->stats.write_miss); |
1129 | } | 1308 | } |
1130 | 1309 | ||
1310 | static void issue_cache_bio(struct cache *cache, struct bio *bio, | ||
1311 | struct per_bio_data *pb, | ||
1312 | dm_oblock_t oblock, dm_cblock_t cblock) | ||
1313 | { | ||
1314 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
1315 | remap_to_cache_dirty(cache, bio, oblock, cblock); | ||
1316 | issue(cache, bio); | ||
1317 | } | ||
1318 | |||
1131 | static void process_bio(struct cache *cache, struct prealloc *structs, | 1319 | static void process_bio(struct cache *cache, struct prealloc *structs, |
1132 | struct bio *bio) | 1320 | struct bio *bio) |
1133 | { | 1321 | { |
@@ -1139,7 +1327,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, | |||
1139 | size_t pb_data_size = get_per_bio_data_size(cache); | 1327 | size_t pb_data_size = get_per_bio_data_size(cache); |
1140 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); | 1328 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); |
1141 | bool discarded_block = is_discarded_oblock(cache, block); | 1329 | bool discarded_block = is_discarded_oblock(cache, block); |
1142 | bool can_migrate = discarded_block || spare_migration_bandwidth(cache); | 1330 | bool passthrough = passthrough_mode(&cache->features); |
1331 | bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); | ||
1143 | 1332 | ||
1144 | /* | 1333 | /* |
1145 | * Check to see if that block is currently migrating. | 1334 | * Check to see if that block is currently migrating. |
@@ -1160,15 +1349,39 @@ static void process_bio(struct cache *cache, struct prealloc *structs, | |||
1160 | 1349 | ||
1161 | switch (lookup_result.op) { | 1350 | switch (lookup_result.op) { |
1162 | case POLICY_HIT: | 1351 | case POLICY_HIT: |
1163 | inc_hit_counter(cache, bio); | 1352 | if (passthrough) { |
1164 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | 1353 | inc_miss_counter(cache, bio); |
1165 | 1354 | ||
1166 | if (is_writethrough_io(cache, bio, lookup_result.cblock)) | 1355 | /* |
1167 | remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); | 1356 | * Passthrough always maps to the origin, |
1168 | else | 1357 | * invalidating any cache blocks that are written |
1169 | remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); | 1358 | * to. |
1359 | */ | ||
1360 | |||
1361 | if (bio_data_dir(bio) == WRITE) { | ||
1362 | atomic_inc(&cache->stats.demotion); | ||
1363 | invalidate(cache, structs, block, lookup_result.cblock, new_ocell); | ||
1364 | release_cell = false; | ||
1365 | |||
1366 | } else { | ||
1367 | /* FIXME: factor out issue_origin() */ | ||
1368 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
1369 | remap_to_origin_clear_discard(cache, bio, block); | ||
1370 | issue(cache, bio); | ||
1371 | } | ||
1372 | } else { | ||
1373 | inc_hit_counter(cache, bio); | ||
1374 | |||
1375 | if (bio_data_dir(bio) == WRITE && | ||
1376 | writethrough_mode(&cache->features) && | ||
1377 | !is_dirty(cache, lookup_result.cblock)) { | ||
1378 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
1379 | remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); | ||
1380 | issue(cache, bio); | ||
1381 | } else | ||
1382 | issue_cache_bio(cache, bio, pb, block, lookup_result.cblock); | ||
1383 | } | ||
1170 | 1384 | ||
1171 | issue(cache, bio); | ||
1172 | break; | 1385 | break; |
1173 | 1386 | ||
1174 | case POLICY_MISS: | 1387 | case POLICY_MISS: |
@@ -1227,15 +1440,17 @@ static int need_commit_due_to_time(struct cache *cache) | |||
1227 | 1440 | ||
1228 | static int commit_if_needed(struct cache *cache) | 1441 | static int commit_if_needed(struct cache *cache) |
1229 | { | 1442 | { |
1230 | if (dm_cache_changed_this_transaction(cache->cmd) && | 1443 | int r = 0; |
1231 | (cache->commit_requested || need_commit_due_to_time(cache))) { | 1444 | |
1445 | if ((cache->commit_requested || need_commit_due_to_time(cache)) && | ||
1446 | dm_cache_changed_this_transaction(cache->cmd)) { | ||
1232 | atomic_inc(&cache->stats.commit_count); | 1447 | atomic_inc(&cache->stats.commit_count); |
1233 | cache->last_commit_jiffies = jiffies; | ||
1234 | cache->commit_requested = false; | 1448 | cache->commit_requested = false; |
1235 | return dm_cache_commit(cache->cmd, false); | 1449 | r = dm_cache_commit(cache->cmd, false); |
1450 | cache->last_commit_jiffies = jiffies; | ||
1236 | } | 1451 | } |
1237 | 1452 | ||
1238 | return 0; | 1453 | return r; |
1239 | } | 1454 | } |
1240 | 1455 | ||
1241 | static void process_deferred_bios(struct cache *cache) | 1456 | static void process_deferred_bios(struct cache *cache) |
@@ -1344,36 +1559,88 @@ static void writeback_some_dirty_blocks(struct cache *cache) | |||
1344 | } | 1559 | } |
1345 | 1560 | ||
1346 | /*---------------------------------------------------------------- | 1561 | /*---------------------------------------------------------------- |
1347 | * Main worker loop | 1562 | * Invalidations. |
1563 | * Dropping something from the cache *without* writing back. | ||
1348 | *--------------------------------------------------------------*/ | 1564 | *--------------------------------------------------------------*/ |
1349 | static void start_quiescing(struct cache *cache) | 1565 | |
1566 | static void process_invalidation_request(struct cache *cache, struct invalidation_request *req) | ||
1350 | { | 1567 | { |
1351 | unsigned long flags; | 1568 | int r = 0; |
1569 | uint64_t begin = from_cblock(req->cblocks->begin); | ||
1570 | uint64_t end = from_cblock(req->cblocks->end); | ||
1352 | 1571 | ||
1353 | spin_lock_irqsave(&cache->lock, flags); | 1572 | while (begin != end) { |
1354 | cache->quiescing = 1; | 1573 | r = policy_remove_cblock(cache->policy, to_cblock(begin)); |
1355 | spin_unlock_irqrestore(&cache->lock, flags); | 1574 | if (!r) { |
1575 | r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin)); | ||
1576 | if (r) | ||
1577 | break; | ||
1578 | |||
1579 | } else if (r == -ENODATA) { | ||
1580 | /* harmless, already unmapped */ | ||
1581 | r = 0; | ||
1582 | |||
1583 | } else { | ||
1584 | DMERR("policy_remove_cblock failed"); | ||
1585 | break; | ||
1586 | } | ||
1587 | |||
1588 | begin++; | ||
1589 | } | ||
1590 | |||
1591 | cache->commit_requested = true; | ||
1592 | |||
1593 | req->err = r; | ||
1594 | atomic_set(&req->complete, 1); | ||
1595 | |||
1596 | wake_up(&req->result_wait); | ||
1356 | } | 1597 | } |
1357 | 1598 | ||
1358 | static void stop_quiescing(struct cache *cache) | 1599 | static void process_invalidation_requests(struct cache *cache) |
1359 | { | 1600 | { |
1360 | unsigned long flags; | 1601 | struct list_head list; |
1602 | struct invalidation_request *req, *tmp; | ||
1361 | 1603 | ||
1362 | spin_lock_irqsave(&cache->lock, flags); | 1604 | INIT_LIST_HEAD(&list); |
1363 | cache->quiescing = 0; | 1605 | spin_lock(&cache->invalidation_lock); |
1364 | spin_unlock_irqrestore(&cache->lock, flags); | 1606 | list_splice_init(&cache->invalidation_requests, &list); |
1607 | spin_unlock(&cache->invalidation_lock); | ||
1608 | |||
1609 | list_for_each_entry_safe (req, tmp, &list, list) | ||
1610 | process_invalidation_request(cache, req); | ||
1365 | } | 1611 | } |
1366 | 1612 | ||
1613 | /*---------------------------------------------------------------- | ||
1614 | * Main worker loop | ||
1615 | *--------------------------------------------------------------*/ | ||
1367 | static bool is_quiescing(struct cache *cache) | 1616 | static bool is_quiescing(struct cache *cache) |
1368 | { | 1617 | { |
1369 | int r; | 1618 | return atomic_read(&cache->quiescing); |
1370 | unsigned long flags; | 1619 | } |
1371 | 1620 | ||
1372 | spin_lock_irqsave(&cache->lock, flags); | 1621 | static void ack_quiescing(struct cache *cache) |
1373 | r = cache->quiescing; | 1622 | { |
1374 | spin_unlock_irqrestore(&cache->lock, flags); | 1623 | if (is_quiescing(cache)) { |
1624 | atomic_inc(&cache->quiescing_ack); | ||
1625 | wake_up(&cache->quiescing_wait); | ||
1626 | } | ||
1627 | } | ||
1375 | 1628 | ||
1376 | return r; | 1629 | static void wait_for_quiescing_ack(struct cache *cache) |
1630 | { | ||
1631 | wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack)); | ||
1632 | } | ||
1633 | |||
1634 | static void start_quiescing(struct cache *cache) | ||
1635 | { | ||
1636 | atomic_inc(&cache->quiescing); | ||
1637 | wait_for_quiescing_ack(cache); | ||
1638 | } | ||
1639 | |||
1640 | static void stop_quiescing(struct cache *cache) | ||
1641 | { | ||
1642 | atomic_set(&cache->quiescing, 0); | ||
1643 | atomic_set(&cache->quiescing_ack, 0); | ||
1377 | } | 1644 | } |
1378 | 1645 | ||
1379 | static void wait_for_migrations(struct cache *cache) | 1646 | static void wait_for_migrations(struct cache *cache) |
@@ -1412,7 +1679,8 @@ static int more_work(struct cache *cache) | |||
1412 | !bio_list_empty(&cache->deferred_writethrough_bios) || | 1679 | !bio_list_empty(&cache->deferred_writethrough_bios) || |
1413 | !list_empty(&cache->quiesced_migrations) || | 1680 | !list_empty(&cache->quiesced_migrations) || |
1414 | !list_empty(&cache->completed_migrations) || | 1681 | !list_empty(&cache->completed_migrations) || |
1415 | !list_empty(&cache->need_commit_migrations); | 1682 | !list_empty(&cache->need_commit_migrations) || |
1683 | cache->invalidate; | ||
1416 | } | 1684 | } |
1417 | 1685 | ||
1418 | static void do_worker(struct work_struct *ws) | 1686 | static void do_worker(struct work_struct *ws) |
@@ -1420,16 +1688,16 @@ static void do_worker(struct work_struct *ws) | |||
1420 | struct cache *cache = container_of(ws, struct cache, worker); | 1688 | struct cache *cache = container_of(ws, struct cache, worker); |
1421 | 1689 | ||
1422 | do { | 1690 | do { |
1423 | if (!is_quiescing(cache)) | 1691 | if (!is_quiescing(cache)) { |
1692 | writeback_some_dirty_blocks(cache); | ||
1693 | process_deferred_writethrough_bios(cache); | ||
1424 | process_deferred_bios(cache); | 1694 | process_deferred_bios(cache); |
1695 | process_invalidation_requests(cache); | ||
1696 | } | ||
1425 | 1697 | ||
1426 | process_migrations(cache, &cache->quiesced_migrations, issue_copy); | 1698 | process_migrations(cache, &cache->quiesced_migrations, issue_copy); |
1427 | process_migrations(cache, &cache->completed_migrations, complete_migration); | 1699 | process_migrations(cache, &cache->completed_migrations, complete_migration); |
1428 | 1700 | ||
1429 | writeback_some_dirty_blocks(cache); | ||
1430 | |||
1431 | process_deferred_writethrough_bios(cache); | ||
1432 | |||
1433 | if (commit_if_needed(cache)) { | 1701 | if (commit_if_needed(cache)) { |
1434 | process_deferred_flush_bios(cache, false); | 1702 | process_deferred_flush_bios(cache, false); |
1435 | 1703 | ||
@@ -1442,6 +1710,9 @@ static void do_worker(struct work_struct *ws) | |||
1442 | process_migrations(cache, &cache->need_commit_migrations, | 1710 | process_migrations(cache, &cache->need_commit_migrations, |
1443 | migration_success_post_commit); | 1711 | migration_success_post_commit); |
1444 | } | 1712 | } |
1713 | |||
1714 | ack_quiescing(cache); | ||
1715 | |||
1445 | } while (more_work(cache)); | 1716 | } while (more_work(cache)); |
1446 | } | 1717 | } |
1447 | 1718 | ||
@@ -1715,7 +1986,7 @@ static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as, | |||
1715 | static void init_features(struct cache_features *cf) | 1986 | static void init_features(struct cache_features *cf) |
1716 | { | 1987 | { |
1717 | cf->mode = CM_WRITE; | 1988 | cf->mode = CM_WRITE; |
1718 | cf->write_through = false; | 1989 | cf->io_mode = CM_IO_WRITEBACK; |
1719 | } | 1990 | } |
1720 | 1991 | ||
1721 | static int parse_features(struct cache_args *ca, struct dm_arg_set *as, | 1992 | static int parse_features(struct cache_args *ca, struct dm_arg_set *as, |
@@ -1740,10 +2011,13 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, | |||
1740 | arg = dm_shift_arg(as); | 2011 | arg = dm_shift_arg(as); |
1741 | 2012 | ||
1742 | if (!strcasecmp(arg, "writeback")) | 2013 | if (!strcasecmp(arg, "writeback")) |
1743 | cf->write_through = false; | 2014 | cf->io_mode = CM_IO_WRITEBACK; |
1744 | 2015 | ||
1745 | else if (!strcasecmp(arg, "writethrough")) | 2016 | else if (!strcasecmp(arg, "writethrough")) |
1746 | cf->write_through = true; | 2017 | cf->io_mode = CM_IO_WRITETHROUGH; |
2018 | |||
2019 | else if (!strcasecmp(arg, "passthrough")) | ||
2020 | cf->io_mode = CM_IO_PASSTHROUGH; | ||
1747 | 2021 | ||
1748 | else { | 2022 | else { |
1749 | *error = "Unrecognised cache feature requested"; | 2023 | *error = "Unrecognised cache feature requested"; |
@@ -1872,14 +2146,15 @@ static int set_config_values(struct cache *cache, int argc, const char **argv) | |||
1872 | static int create_cache_policy(struct cache *cache, struct cache_args *ca, | 2146 | static int create_cache_policy(struct cache *cache, struct cache_args *ca, |
1873 | char **error) | 2147 | char **error) |
1874 | { | 2148 | { |
1875 | cache->policy = dm_cache_policy_create(ca->policy_name, | 2149 | struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name, |
1876 | cache->cache_size, | 2150 | cache->cache_size, |
1877 | cache->origin_sectors, | 2151 | cache->origin_sectors, |
1878 | cache->sectors_per_block); | 2152 | cache->sectors_per_block); |
1879 | if (!cache->policy) { | 2153 | if (IS_ERR(p)) { |
1880 | *error = "Error creating cache's policy"; | 2154 | *error = "Error creating cache's policy"; |
1881 | return -ENOMEM; | 2155 | return PTR_ERR(p); |
1882 | } | 2156 | } |
2157 | cache->policy = p; | ||
1883 | 2158 | ||
1884 | return 0; | 2159 | return 0; |
1885 | } | 2160 | } |
@@ -1995,6 +2270,22 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
1995 | } | 2270 | } |
1996 | cache->cmd = cmd; | 2271 | cache->cmd = cmd; |
1997 | 2272 | ||
2273 | if (passthrough_mode(&cache->features)) { | ||
2274 | bool all_clean; | ||
2275 | |||
2276 | r = dm_cache_metadata_all_clean(cache->cmd, &all_clean); | ||
2277 | if (r) { | ||
2278 | *error = "dm_cache_metadata_all_clean() failed"; | ||
2279 | goto bad; | ||
2280 | } | ||
2281 | |||
2282 | if (!all_clean) { | ||
2283 | *error = "Cannot enter passthrough mode unless all blocks are clean"; | ||
2284 | r = -EINVAL; | ||
2285 | goto bad; | ||
2286 | } | ||
2287 | } | ||
2288 | |||
1998 | spin_lock_init(&cache->lock); | 2289 | spin_lock_init(&cache->lock); |
1999 | bio_list_init(&cache->deferred_bios); | 2290 | bio_list_init(&cache->deferred_bios); |
2000 | bio_list_init(&cache->deferred_flush_bios); | 2291 | bio_list_init(&cache->deferred_flush_bios); |
@@ -2005,6 +2296,10 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2005 | atomic_set(&cache->nr_migrations, 0); | 2296 | atomic_set(&cache->nr_migrations, 0); |
2006 | init_waitqueue_head(&cache->migration_wait); | 2297 | init_waitqueue_head(&cache->migration_wait); |
2007 | 2298 | ||
2299 | init_waitqueue_head(&cache->quiescing_wait); | ||
2300 | atomic_set(&cache->quiescing, 0); | ||
2301 | atomic_set(&cache->quiescing_ack, 0); | ||
2302 | |||
2008 | r = -ENOMEM; | 2303 | r = -ENOMEM; |
2009 | cache->nr_dirty = 0; | 2304 | cache->nr_dirty = 0; |
2010 | cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); | 2305 | cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); |
@@ -2064,7 +2359,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2064 | 2359 | ||
2065 | cache->need_tick_bio = true; | 2360 | cache->need_tick_bio = true; |
2066 | cache->sized = false; | 2361 | cache->sized = false; |
2067 | cache->quiescing = false; | 2362 | cache->invalidate = false; |
2068 | cache->commit_requested = false; | 2363 | cache->commit_requested = false; |
2069 | cache->loaded_mappings = false; | 2364 | cache->loaded_mappings = false; |
2070 | cache->loaded_discards = false; | 2365 | cache->loaded_discards = false; |
@@ -2078,6 +2373,9 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2078 | atomic_set(&cache->stats.commit_count, 0); | 2373 | atomic_set(&cache->stats.commit_count, 0); |
2079 | atomic_set(&cache->stats.discard_count, 0); | 2374 | atomic_set(&cache->stats.discard_count, 0); |
2080 | 2375 | ||
2376 | spin_lock_init(&cache->invalidation_lock); | ||
2377 | INIT_LIST_HEAD(&cache->invalidation_requests); | ||
2378 | |||
2081 | *result = cache; | 2379 | *result = cache; |
2082 | return 0; | 2380 | return 0; |
2083 | 2381 | ||
@@ -2207,17 +2505,37 @@ static int cache_map(struct dm_target *ti, struct bio *bio) | |||
2207 | return DM_MAPIO_SUBMITTED; | 2505 | return DM_MAPIO_SUBMITTED; |
2208 | } | 2506 | } |
2209 | 2507 | ||
2508 | r = DM_MAPIO_REMAPPED; | ||
2210 | switch (lookup_result.op) { | 2509 | switch (lookup_result.op) { |
2211 | case POLICY_HIT: | 2510 | case POLICY_HIT: |
2212 | inc_hit_counter(cache, bio); | 2511 | if (passthrough_mode(&cache->features)) { |
2213 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | 2512 | if (bio_data_dir(bio) == WRITE) { |
2513 | /* | ||
2514 | * We need to invalidate this block, so | ||
2515 | * defer for the worker thread. | ||
2516 | */ | ||
2517 | cell_defer(cache, cell, true); | ||
2518 | r = DM_MAPIO_SUBMITTED; | ||
2519 | |||
2520 | } else { | ||
2521 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
2522 | inc_miss_counter(cache, bio); | ||
2523 | remap_to_origin_clear_discard(cache, bio, block); | ||
2524 | |||
2525 | cell_defer(cache, cell, false); | ||
2526 | } | ||
2214 | 2527 | ||
2215 | if (is_writethrough_io(cache, bio, lookup_result.cblock)) | 2528 | } else { |
2216 | remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); | 2529 | inc_hit_counter(cache, bio); |
2217 | else | ||
2218 | remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); | ||
2219 | 2530 | ||
2220 | cell_defer(cache, cell, false); | 2531 | if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && |
2532 | !is_dirty(cache, lookup_result.cblock)) | ||
2533 | remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); | ||
2534 | else | ||
2535 | remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); | ||
2536 | |||
2537 | cell_defer(cache, cell, false); | ||
2538 | } | ||
2221 | break; | 2539 | break; |
2222 | 2540 | ||
2223 | case POLICY_MISS: | 2541 | case POLICY_MISS: |
@@ -2242,10 +2560,10 @@ static int cache_map(struct dm_target *ti, struct bio *bio) | |||
2242 | DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, | 2560 | DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, |
2243 | (unsigned) lookup_result.op); | 2561 | (unsigned) lookup_result.op); |
2244 | bio_io_error(bio); | 2562 | bio_io_error(bio); |
2245 | return DM_MAPIO_SUBMITTED; | 2563 | r = DM_MAPIO_SUBMITTED; |
2246 | } | 2564 | } |
2247 | 2565 | ||
2248 | return DM_MAPIO_REMAPPED; | 2566 | return r; |
2249 | } | 2567 | } |
2250 | 2568 | ||
2251 | static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) | 2569 | static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) |
@@ -2406,26 +2724,71 @@ static int load_discard(void *context, sector_t discard_block_size, | |||
2406 | return 0; | 2724 | return 0; |
2407 | } | 2725 | } |
2408 | 2726 | ||
2727 | static dm_cblock_t get_cache_dev_size(struct cache *cache) | ||
2728 | { | ||
2729 | sector_t size = get_dev_size(cache->cache_dev); | ||
2730 | (void) sector_div(size, cache->sectors_per_block); | ||
2731 | return to_cblock(size); | ||
2732 | } | ||
2733 | |||
2734 | static bool can_resize(struct cache *cache, dm_cblock_t new_size) | ||
2735 | { | ||
2736 | if (from_cblock(new_size) > from_cblock(cache->cache_size)) | ||
2737 | return true; | ||
2738 | |||
2739 | /* | ||
2740 | * We can't drop a dirty block when shrinking the cache. | ||
2741 | */ | ||
2742 | while (from_cblock(new_size) < from_cblock(cache->cache_size)) { | ||
2743 | new_size = to_cblock(from_cblock(new_size) + 1); | ||
2744 | if (is_dirty(cache, new_size)) { | ||
2745 | DMERR("unable to shrink cache; cache block %llu is dirty", | ||
2746 | (unsigned long long) from_cblock(new_size)); | ||
2747 | return false; | ||
2748 | } | ||
2749 | } | ||
2750 | |||
2751 | return true; | ||
2752 | } | ||
2753 | |||
2754 | static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) | ||
2755 | { | ||
2756 | int r; | ||
2757 | |||
2758 | r = dm_cache_resize(cache->cmd, cache->cache_size); | ||
2759 | if (r) { | ||
2760 | DMERR("could not resize cache metadata"); | ||
2761 | return r; | ||
2762 | } | ||
2763 | |||
2764 | cache->cache_size = new_size; | ||
2765 | |||
2766 | return 0; | ||
2767 | } | ||
2768 | |||
2409 | static int cache_preresume(struct dm_target *ti) | 2769 | static int cache_preresume(struct dm_target *ti) |
2410 | { | 2770 | { |
2411 | int r = 0; | 2771 | int r = 0; |
2412 | struct cache *cache = ti->private; | 2772 | struct cache *cache = ti->private; |
2413 | sector_t actual_cache_size = get_dev_size(cache->cache_dev); | 2773 | dm_cblock_t csize = get_cache_dev_size(cache); |
2414 | (void) sector_div(actual_cache_size, cache->sectors_per_block); | ||
2415 | 2774 | ||
2416 | /* | 2775 | /* |
2417 | * Check to see if the cache has resized. | 2776 | * Check to see if the cache has resized. |
2418 | */ | 2777 | */ |
2419 | if (from_cblock(cache->cache_size) != actual_cache_size || !cache->sized) { | 2778 | if (!cache->sized) { |
2420 | cache->cache_size = to_cblock(actual_cache_size); | 2779 | r = resize_cache_dev(cache, csize); |
2421 | 2780 | if (r) | |
2422 | r = dm_cache_resize(cache->cmd, cache->cache_size); | ||
2423 | if (r) { | ||
2424 | DMERR("could not resize cache metadata"); | ||
2425 | return r; | 2781 | return r; |
2426 | } | ||
2427 | 2782 | ||
2428 | cache->sized = true; | 2783 | cache->sized = true; |
2784 | |||
2785 | } else if (csize != cache->cache_size) { | ||
2786 | if (!can_resize(cache, csize)) | ||
2787 | return -EINVAL; | ||
2788 | |||
2789 | r = resize_cache_dev(cache, csize); | ||
2790 | if (r) | ||
2791 | return r; | ||
2429 | } | 2792 | } |
2430 | 2793 | ||
2431 | if (!cache->loaded_mappings) { | 2794 | if (!cache->loaded_mappings) { |
@@ -2518,10 +2881,19 @@ static void cache_status(struct dm_target *ti, status_type_t type, | |||
2518 | (unsigned long long) from_cblock(residency), | 2881 | (unsigned long long) from_cblock(residency), |
2519 | cache->nr_dirty); | 2882 | cache->nr_dirty); |
2520 | 2883 | ||
2521 | if (cache->features.write_through) | 2884 | if (writethrough_mode(&cache->features)) |
2522 | DMEMIT("1 writethrough "); | 2885 | DMEMIT("1 writethrough "); |
2523 | else | 2886 | |
2524 | DMEMIT("0 "); | 2887 | else if (passthrough_mode(&cache->features)) |
2888 | DMEMIT("1 passthrough "); | ||
2889 | |||
2890 | else if (writeback_mode(&cache->features)) | ||
2891 | DMEMIT("1 writeback "); | ||
2892 | |||
2893 | else { | ||
2894 | DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode); | ||
2895 | goto err; | ||
2896 | } | ||
2525 | 2897 | ||
2526 | DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); | 2898 | DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); |
2527 | if (sz < maxlen) { | 2899 | if (sz < maxlen) { |
@@ -2553,7 +2925,128 @@ err: | |||
2553 | } | 2925 | } |
2554 | 2926 | ||
2555 | /* | 2927 | /* |
2556 | * Supports <key> <value>. | 2928 | * A cache block range can take two forms: |
2929 | * | ||
2930 | * i) A single cblock, eg. '3456' | ||
2931 | * ii) A begin and end cblock with dots between, eg. 123-234 | ||
2932 | */ | ||
2933 | static int parse_cblock_range(struct cache *cache, const char *str, | ||
2934 | struct cblock_range *result) | ||
2935 | { | ||
2936 | char dummy; | ||
2937 | uint64_t b, e; | ||
2938 | int r; | ||
2939 | |||
2940 | /* | ||
2941 | * Try and parse form (ii) first. | ||
2942 | */ | ||
2943 | r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy); | ||
2944 | if (r < 0) | ||
2945 | return r; | ||
2946 | |||
2947 | if (r == 2) { | ||
2948 | result->begin = to_cblock(b); | ||
2949 | result->end = to_cblock(e); | ||
2950 | return 0; | ||
2951 | } | ||
2952 | |||
2953 | /* | ||
2954 | * That didn't work, try form (i). | ||
2955 | */ | ||
2956 | r = sscanf(str, "%llu%c", &b, &dummy); | ||
2957 | if (r < 0) | ||
2958 | return r; | ||
2959 | |||
2960 | if (r == 1) { | ||
2961 | result->begin = to_cblock(b); | ||
2962 | result->end = to_cblock(from_cblock(result->begin) + 1u); | ||
2963 | return 0; | ||
2964 | } | ||
2965 | |||
2966 | DMERR("invalid cblock range '%s'", str); | ||
2967 | return -EINVAL; | ||
2968 | } | ||
2969 | |||
2970 | static int validate_cblock_range(struct cache *cache, struct cblock_range *range) | ||
2971 | { | ||
2972 | uint64_t b = from_cblock(range->begin); | ||
2973 | uint64_t e = from_cblock(range->end); | ||
2974 | uint64_t n = from_cblock(cache->cache_size); | ||
2975 | |||
2976 | if (b >= n) { | ||
2977 | DMERR("begin cblock out of range: %llu >= %llu", b, n); | ||
2978 | return -EINVAL; | ||
2979 | } | ||
2980 | |||
2981 | if (e > n) { | ||
2982 | DMERR("end cblock out of range: %llu > %llu", e, n); | ||
2983 | return -EINVAL; | ||
2984 | } | ||
2985 | |||
2986 | if (b >= e) { | ||
2987 | DMERR("invalid cblock range: %llu >= %llu", b, e); | ||
2988 | return -EINVAL; | ||
2989 | } | ||
2990 | |||
2991 | return 0; | ||
2992 | } | ||
2993 | |||
2994 | static int request_invalidation(struct cache *cache, struct cblock_range *range) | ||
2995 | { | ||
2996 | struct invalidation_request req; | ||
2997 | |||
2998 | INIT_LIST_HEAD(&req.list); | ||
2999 | req.cblocks = range; | ||
3000 | atomic_set(&req.complete, 0); | ||
3001 | req.err = 0; | ||
3002 | init_waitqueue_head(&req.result_wait); | ||
3003 | |||
3004 | spin_lock(&cache->invalidation_lock); | ||
3005 | list_add(&req.list, &cache->invalidation_requests); | ||
3006 | spin_unlock(&cache->invalidation_lock); | ||
3007 | wake_worker(cache); | ||
3008 | |||
3009 | wait_event(req.result_wait, atomic_read(&req.complete)); | ||
3010 | return req.err; | ||
3011 | } | ||
3012 | |||
3013 | static int process_invalidate_cblocks_message(struct cache *cache, unsigned count, | ||
3014 | const char **cblock_ranges) | ||
3015 | { | ||
3016 | int r = 0; | ||
3017 | unsigned i; | ||
3018 | struct cblock_range range; | ||
3019 | |||
3020 | if (!passthrough_mode(&cache->features)) { | ||
3021 | DMERR("cache has to be in passthrough mode for invalidation"); | ||
3022 | return -EPERM; | ||
3023 | } | ||
3024 | |||
3025 | for (i = 0; i < count; i++) { | ||
3026 | r = parse_cblock_range(cache, cblock_ranges[i], &range); | ||
3027 | if (r) | ||
3028 | break; | ||
3029 | |||
3030 | r = validate_cblock_range(cache, &range); | ||
3031 | if (r) | ||
3032 | break; | ||
3033 | |||
3034 | /* | ||
3035 | * Pass begin and end origin blocks to the worker and wake it. | ||
3036 | */ | ||
3037 | r = request_invalidation(cache, &range); | ||
3038 | if (r) | ||
3039 | break; | ||
3040 | } | ||
3041 | |||
3042 | return r; | ||
3043 | } | ||
3044 | |||
3045 | /* | ||
3046 | * Supports | ||
3047 | * "<key> <value>" | ||
3048 | * and | ||
3049 | * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]* | ||
2557 | * | 3050 | * |
2558 | * The key migration_threshold is supported by the cache target core. | 3051 | * The key migration_threshold is supported by the cache target core. |
2559 | */ | 3052 | */ |
@@ -2561,6 +3054,12 @@ static int cache_message(struct dm_target *ti, unsigned argc, char **argv) | |||
2561 | { | 3054 | { |
2562 | struct cache *cache = ti->private; | 3055 | struct cache *cache = ti->private; |
2563 | 3056 | ||
3057 | if (!argc) | ||
3058 | return -EINVAL; | ||
3059 | |||
3060 | if (!strcasecmp(argv[0], "invalidate_cblocks")) | ||
3061 | return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1); | ||
3062 | |||
2564 | if (argc != 2) | 3063 | if (argc != 2) |
2565 | return -EINVAL; | 3064 | return -EINVAL; |
2566 | 3065 | ||
@@ -2630,7 +3129,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
2630 | 3129 | ||
2631 | static struct target_type cache_target = { | 3130 | static struct target_type cache_target = { |
2632 | .name = "cache", | 3131 | .name = "cache", |
2633 | .version = {1, 1, 1}, | 3132 | .version = {1, 2, 0}, |
2634 | .module = THIS_MODULE, | 3133 | .module = THIS_MODULE, |
2635 | .ctr = cache_ctr, | 3134 | .ctr = cache_ctr, |
2636 | .dtr = cache_dtr, | 3135 | .dtr = cache_dtr, |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0fce0bc1a957..50ea7ed24dce 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * Copyright (C) 2003 Christophe Saout <christophe@saout.de> | 2 | * Copyright (C) 2003 Christophe Saout <christophe@saout.de> |
3 | * Copyright (C) 2004 Clemens Fruhwirth <clemens@endorphin.org> | 3 | * Copyright (C) 2004 Clemens Fruhwirth <clemens@endorphin.org> |
4 | * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved. | 4 | * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved. |
5 | * Copyright (C) 2013 Milan Broz <gmazyland@gmail.com> | ||
5 | * | 6 | * |
6 | * This file is released under the GPL. | 7 | * This file is released under the GPL. |
7 | */ | 8 | */ |
@@ -98,6 +99,13 @@ struct iv_lmk_private { | |||
98 | u8 *seed; | 99 | u8 *seed; |
99 | }; | 100 | }; |
100 | 101 | ||
102 | #define TCW_WHITENING_SIZE 16 | ||
103 | struct iv_tcw_private { | ||
104 | struct crypto_shash *crc32_tfm; | ||
105 | u8 *iv_seed; | ||
106 | u8 *whitening; | ||
107 | }; | ||
108 | |||
101 | /* | 109 | /* |
102 | * Crypt: maps a linear range of a block device | 110 | * Crypt: maps a linear range of a block device |
103 | * and encrypts / decrypts at the same time. | 111 | * and encrypts / decrypts at the same time. |
@@ -139,6 +147,7 @@ struct crypt_config { | |||
139 | struct iv_essiv_private essiv; | 147 | struct iv_essiv_private essiv; |
140 | struct iv_benbi_private benbi; | 148 | struct iv_benbi_private benbi; |
141 | struct iv_lmk_private lmk; | 149 | struct iv_lmk_private lmk; |
150 | struct iv_tcw_private tcw; | ||
142 | } iv_gen_private; | 151 | } iv_gen_private; |
143 | sector_t iv_offset; | 152 | sector_t iv_offset; |
144 | unsigned int iv_size; | 153 | unsigned int iv_size; |
@@ -171,7 +180,8 @@ struct crypt_config { | |||
171 | 180 | ||
172 | unsigned long flags; | 181 | unsigned long flags; |
173 | unsigned int key_size; | 182 | unsigned int key_size; |
174 | unsigned int key_parts; | 183 | unsigned int key_parts; /* independent parts in key buffer */ |
184 | unsigned int key_extra_size; /* additional keys length */ | ||
175 | u8 key[0]; | 185 | u8 key[0]; |
176 | }; | 186 | }; |
177 | 187 | ||
@@ -230,6 +240,16 @@ static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc) | |||
230 | * version 3: the same as version 2 with additional IV seed | 240 | * version 3: the same as version 2 with additional IV seed |
231 | * (it uses 65 keys, last key is used as IV seed) | 241 | * (it uses 65 keys, last key is used as IV seed) |
232 | * | 242 | * |
243 | * tcw: Compatible implementation of the block chaining mode used | ||
244 | * by the TrueCrypt device encryption system (prior to version 4.1). | ||
245 | * For more info see: http://www.truecrypt.org | ||
246 | * It operates on full 512 byte sectors and uses CBC | ||
247 | * with an IV derived from initial key and the sector number. | ||
248 | * In addition, whitening value is applied on every sector, whitening | ||
249 | * is calculated from initial key, sector number and mixed using CRC32. | ||
250 | * Note that this encryption scheme is vulnerable to watermarking attacks | ||
251 | * and should be used for old compatible containers access only. | ||
252 | * | ||
233 | * plumb: unimplemented, see: | 253 | * plumb: unimplemented, see: |
234 | * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454 | 254 | * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454 |
235 | */ | 255 | */ |
@@ -530,7 +550,7 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv, | |||
530 | char ctx[crypto_shash_descsize(lmk->hash_tfm)]; | 550 | char ctx[crypto_shash_descsize(lmk->hash_tfm)]; |
531 | } sdesc; | 551 | } sdesc; |
532 | struct md5_state md5state; | 552 | struct md5_state md5state; |
533 | u32 buf[4]; | 553 | __le32 buf[4]; |
534 | int i, r; | 554 | int i, r; |
535 | 555 | ||
536 | sdesc.desc.tfm = lmk->hash_tfm; | 556 | sdesc.desc.tfm = lmk->hash_tfm; |
@@ -608,6 +628,153 @@ static int crypt_iv_lmk_post(struct crypt_config *cc, u8 *iv, | |||
608 | return r; | 628 | return r; |
609 | } | 629 | } |
610 | 630 | ||
631 | static void crypt_iv_tcw_dtr(struct crypt_config *cc) | ||
632 | { | ||
633 | struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; | ||
634 | |||
635 | kzfree(tcw->iv_seed); | ||
636 | tcw->iv_seed = NULL; | ||
637 | kzfree(tcw->whitening); | ||
638 | tcw->whitening = NULL; | ||
639 | |||
640 | if (tcw->crc32_tfm && !IS_ERR(tcw->crc32_tfm)) | ||
641 | crypto_free_shash(tcw->crc32_tfm); | ||
642 | tcw->crc32_tfm = NULL; | ||
643 | } | ||
644 | |||
645 | static int crypt_iv_tcw_ctr(struct crypt_config *cc, struct dm_target *ti, | ||
646 | const char *opts) | ||
647 | { | ||
648 | struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; | ||
649 | |||
650 | if (cc->key_size <= (cc->iv_size + TCW_WHITENING_SIZE)) { | ||
651 | ti->error = "Wrong key size for TCW"; | ||
652 | return -EINVAL; | ||
653 | } | ||
654 | |||
655 | tcw->crc32_tfm = crypto_alloc_shash("crc32", 0, 0); | ||
656 | if (IS_ERR(tcw->crc32_tfm)) { | ||
657 | ti->error = "Error initializing CRC32 in TCW"; | ||
658 | return PTR_ERR(tcw->crc32_tfm); | ||
659 | } | ||
660 | |||
661 | tcw->iv_seed = kzalloc(cc->iv_size, GFP_KERNEL); | ||
662 | tcw->whitening = kzalloc(TCW_WHITENING_SIZE, GFP_KERNEL); | ||
663 | if (!tcw->iv_seed || !tcw->whitening) { | ||
664 | crypt_iv_tcw_dtr(cc); | ||
665 | ti->error = "Error allocating seed storage in TCW"; | ||
666 | return -ENOMEM; | ||
667 | } | ||
668 | |||
669 | return 0; | ||
670 | } | ||
671 | |||
672 | static int crypt_iv_tcw_init(struct crypt_config *cc) | ||
673 | { | ||
674 | struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; | ||
675 | int key_offset = cc->key_size - cc->iv_size - TCW_WHITENING_SIZE; | ||
676 | |||
677 | memcpy(tcw->iv_seed, &cc->key[key_offset], cc->iv_size); | ||
678 | memcpy(tcw->whitening, &cc->key[key_offset + cc->iv_size], | ||
679 | TCW_WHITENING_SIZE); | ||
680 | |||
681 | return 0; | ||
682 | } | ||
683 | |||
684 | static int crypt_iv_tcw_wipe(struct crypt_config *cc) | ||
685 | { | ||
686 | struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; | ||
687 | |||
688 | memset(tcw->iv_seed, 0, cc->iv_size); | ||
689 | memset(tcw->whitening, 0, TCW_WHITENING_SIZE); | ||
690 | |||
691 | return 0; | ||
692 | } | ||
693 | |||
694 | static int crypt_iv_tcw_whitening(struct crypt_config *cc, | ||
695 | struct dm_crypt_request *dmreq, | ||
696 | u8 *data) | ||
697 | { | ||
698 | struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; | ||
699 | u64 sector = cpu_to_le64((u64)dmreq->iv_sector); | ||
700 | u8 buf[TCW_WHITENING_SIZE]; | ||
701 | struct { | ||
702 | struct shash_desc desc; | ||
703 | char ctx[crypto_shash_descsize(tcw->crc32_tfm)]; | ||
704 | } sdesc; | ||
705 | int i, r; | ||
706 | |||
707 | /* xor whitening with sector number */ | ||
708 | memcpy(buf, tcw->whitening, TCW_WHITENING_SIZE); | ||
709 | crypto_xor(buf, (u8 *)§or, 8); | ||
710 | crypto_xor(&buf[8], (u8 *)§or, 8); | ||
711 | |||
712 | /* calculate crc32 for every 32bit part and xor it */ | ||
713 | sdesc.desc.tfm = tcw->crc32_tfm; | ||
714 | sdesc.desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; | ||
715 | for (i = 0; i < 4; i++) { | ||
716 | r = crypto_shash_init(&sdesc.desc); | ||
717 | if (r) | ||
718 | goto out; | ||
719 | r = crypto_shash_update(&sdesc.desc, &buf[i * 4], 4); | ||
720 | if (r) | ||
721 | goto out; | ||
722 | r = crypto_shash_final(&sdesc.desc, &buf[i * 4]); | ||
723 | if (r) | ||
724 | goto out; | ||
725 | } | ||
726 | crypto_xor(&buf[0], &buf[12], 4); | ||
727 | crypto_xor(&buf[4], &buf[8], 4); | ||
728 | |||
729 | /* apply whitening (8 bytes) to whole sector */ | ||
730 | for (i = 0; i < ((1 << SECTOR_SHIFT) / 8); i++) | ||
731 | crypto_xor(data + i * 8, buf, 8); | ||
732 | out: | ||
733 | memset(buf, 0, sizeof(buf)); | ||
734 | return r; | ||
735 | } | ||
736 | |||
737 | static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv, | ||
738 | struct dm_crypt_request *dmreq) | ||
739 | { | ||
740 | struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; | ||
741 | u64 sector = cpu_to_le64((u64)dmreq->iv_sector); | ||
742 | u8 *src; | ||
743 | int r = 0; | ||
744 | |||
745 | /* Remove whitening from ciphertext */ | ||
746 | if (bio_data_dir(dmreq->ctx->bio_in) != WRITE) { | ||
747 | src = kmap_atomic(sg_page(&dmreq->sg_in)); | ||
748 | r = crypt_iv_tcw_whitening(cc, dmreq, src + dmreq->sg_in.offset); | ||
749 | kunmap_atomic(src); | ||
750 | } | ||
751 | |||
752 | /* Calculate IV */ | ||
753 | memcpy(iv, tcw->iv_seed, cc->iv_size); | ||
754 | crypto_xor(iv, (u8 *)§or, 8); | ||
755 | if (cc->iv_size > 8) | ||
756 | crypto_xor(&iv[8], (u8 *)§or, cc->iv_size - 8); | ||
757 | |||
758 | return r; | ||
759 | } | ||
760 | |||
761 | static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv, | ||
762 | struct dm_crypt_request *dmreq) | ||
763 | { | ||
764 | u8 *dst; | ||
765 | int r; | ||
766 | |||
767 | if (bio_data_dir(dmreq->ctx->bio_in) != WRITE) | ||
768 | return 0; | ||
769 | |||
770 | /* Apply whitening on ciphertext */ | ||
771 | dst = kmap_atomic(sg_page(&dmreq->sg_out)); | ||
772 | r = crypt_iv_tcw_whitening(cc, dmreq, dst + dmreq->sg_out.offset); | ||
773 | kunmap_atomic(dst); | ||
774 | |||
775 | return r; | ||
776 | } | ||
777 | |||
611 | static struct crypt_iv_operations crypt_iv_plain_ops = { | 778 | static struct crypt_iv_operations crypt_iv_plain_ops = { |
612 | .generator = crypt_iv_plain_gen | 779 | .generator = crypt_iv_plain_gen |
613 | }; | 780 | }; |
@@ -643,6 +810,15 @@ static struct crypt_iv_operations crypt_iv_lmk_ops = { | |||
643 | .post = crypt_iv_lmk_post | 810 | .post = crypt_iv_lmk_post |
644 | }; | 811 | }; |
645 | 812 | ||
813 | static struct crypt_iv_operations crypt_iv_tcw_ops = { | ||
814 | .ctr = crypt_iv_tcw_ctr, | ||
815 | .dtr = crypt_iv_tcw_dtr, | ||
816 | .init = crypt_iv_tcw_init, | ||
817 | .wipe = crypt_iv_tcw_wipe, | ||
818 | .generator = crypt_iv_tcw_gen, | ||
819 | .post = crypt_iv_tcw_post | ||
820 | }; | ||
821 | |||
646 | static void crypt_convert_init(struct crypt_config *cc, | 822 | static void crypt_convert_init(struct crypt_config *cc, |
647 | struct convert_context *ctx, | 823 | struct convert_context *ctx, |
648 | struct bio *bio_out, struct bio *bio_in, | 824 | struct bio *bio_out, struct bio *bio_in, |
@@ -1274,9 +1450,12 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) | |||
1274 | 1450 | ||
1275 | static int crypt_setkey_allcpus(struct crypt_config *cc) | 1451 | static int crypt_setkey_allcpus(struct crypt_config *cc) |
1276 | { | 1452 | { |
1277 | unsigned subkey_size = cc->key_size >> ilog2(cc->tfms_count); | 1453 | unsigned subkey_size; |
1278 | int err = 0, i, r; | 1454 | int err = 0, i, r; |
1279 | 1455 | ||
1456 | /* Ignore extra keys (which are used for IV etc) */ | ||
1457 | subkey_size = (cc->key_size - cc->key_extra_size) >> ilog2(cc->tfms_count); | ||
1458 | |||
1280 | for (i = 0; i < cc->tfms_count; i++) { | 1459 | for (i = 0; i < cc->tfms_count; i++) { |
1281 | r = crypto_ablkcipher_setkey(cc->tfms[i], | 1460 | r = crypto_ablkcipher_setkey(cc->tfms[i], |
1282 | cc->key + (i * subkey_size), | 1461 | cc->key + (i * subkey_size), |
@@ -1409,6 +1588,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1409 | return -EINVAL; | 1588 | return -EINVAL; |
1410 | } | 1589 | } |
1411 | cc->key_parts = cc->tfms_count; | 1590 | cc->key_parts = cc->tfms_count; |
1591 | cc->key_extra_size = 0; | ||
1412 | 1592 | ||
1413 | cc->cipher = kstrdup(cipher, GFP_KERNEL); | 1593 | cc->cipher = kstrdup(cipher, GFP_KERNEL); |
1414 | if (!cc->cipher) | 1594 | if (!cc->cipher) |
@@ -1460,13 +1640,6 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1460 | goto bad; | 1640 | goto bad; |
1461 | } | 1641 | } |
1462 | 1642 | ||
1463 | /* Initialize and set key */ | ||
1464 | ret = crypt_set_key(cc, key); | ||
1465 | if (ret < 0) { | ||
1466 | ti->error = "Error decoding and setting key"; | ||
1467 | goto bad; | ||
1468 | } | ||
1469 | |||
1470 | /* Initialize IV */ | 1643 | /* Initialize IV */ |
1471 | cc->iv_size = crypto_ablkcipher_ivsize(any_tfm(cc)); | 1644 | cc->iv_size = crypto_ablkcipher_ivsize(any_tfm(cc)); |
1472 | if (cc->iv_size) | 1645 | if (cc->iv_size) |
@@ -1493,18 +1666,33 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1493 | cc->iv_gen_ops = &crypt_iv_null_ops; | 1666 | cc->iv_gen_ops = &crypt_iv_null_ops; |
1494 | else if (strcmp(ivmode, "lmk") == 0) { | 1667 | else if (strcmp(ivmode, "lmk") == 0) { |
1495 | cc->iv_gen_ops = &crypt_iv_lmk_ops; | 1668 | cc->iv_gen_ops = &crypt_iv_lmk_ops; |
1496 | /* Version 2 and 3 is recognised according | 1669 | /* |
1670 | * Version 2 and 3 is recognised according | ||
1497 | * to length of provided multi-key string. | 1671 | * to length of provided multi-key string. |
1498 | * If present (version 3), last key is used as IV seed. | 1672 | * If present (version 3), last key is used as IV seed. |
1673 | * All keys (including IV seed) are always the same size. | ||
1499 | */ | 1674 | */ |
1500 | if (cc->key_size % cc->key_parts) | 1675 | if (cc->key_size % cc->key_parts) { |
1501 | cc->key_parts++; | 1676 | cc->key_parts++; |
1677 | cc->key_extra_size = cc->key_size / cc->key_parts; | ||
1678 | } | ||
1679 | } else if (strcmp(ivmode, "tcw") == 0) { | ||
1680 | cc->iv_gen_ops = &crypt_iv_tcw_ops; | ||
1681 | cc->key_parts += 2; /* IV + whitening */ | ||
1682 | cc->key_extra_size = cc->iv_size + TCW_WHITENING_SIZE; | ||
1502 | } else { | 1683 | } else { |
1503 | ret = -EINVAL; | 1684 | ret = -EINVAL; |
1504 | ti->error = "Invalid IV mode"; | 1685 | ti->error = "Invalid IV mode"; |
1505 | goto bad; | 1686 | goto bad; |
1506 | } | 1687 | } |
1507 | 1688 | ||
1689 | /* Initialize and set key */ | ||
1690 | ret = crypt_set_key(cc, key); | ||
1691 | if (ret < 0) { | ||
1692 | ti->error = "Error decoding and setting key"; | ||
1693 | goto bad; | ||
1694 | } | ||
1695 | |||
1508 | /* Allocate IV */ | 1696 | /* Allocate IV */ |
1509 | if (cc->iv_gen_ops && cc->iv_gen_ops->ctr) { | 1697 | if (cc->iv_gen_ops && cc->iv_gen_ops->ctr) { |
1510 | ret = cc->iv_gen_ops->ctr(cc, ti, ivopts); | 1698 | ret = cc->iv_gen_ops->ctr(cc, ti, ivopts); |
@@ -1817,7 +2005,7 @@ static int crypt_iterate_devices(struct dm_target *ti, | |||
1817 | 2005 | ||
1818 | static struct target_type crypt_target = { | 2006 | static struct target_type crypt_target = { |
1819 | .name = "crypt", | 2007 | .name = "crypt", |
1820 | .version = {1, 12, 1}, | 2008 | .version = {1, 13, 0}, |
1821 | .module = THIS_MODULE, | 2009 | .module = THIS_MODULE, |
1822 | .ctr = crypt_ctr, | 2010 | .ctr = crypt_ctr, |
1823 | .dtr = crypt_dtr, | 2011 | .dtr = crypt_dtr, |
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index afe08146f73e..51521429fb59 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c | |||
@@ -57,7 +57,7 @@ struct vers_iter { | |||
57 | static struct list_head _name_buckets[NUM_BUCKETS]; | 57 | static struct list_head _name_buckets[NUM_BUCKETS]; |
58 | static struct list_head _uuid_buckets[NUM_BUCKETS]; | 58 | static struct list_head _uuid_buckets[NUM_BUCKETS]; |
59 | 59 | ||
60 | static void dm_hash_remove_all(int keep_open_devices); | 60 | static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool only_deferred); |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * Guards access to both hash tables. | 63 | * Guards access to both hash tables. |
@@ -86,7 +86,7 @@ static int dm_hash_init(void) | |||
86 | 86 | ||
87 | static void dm_hash_exit(void) | 87 | static void dm_hash_exit(void) |
88 | { | 88 | { |
89 | dm_hash_remove_all(0); | 89 | dm_hash_remove_all(false, false, false); |
90 | } | 90 | } |
91 | 91 | ||
92 | /*----------------------------------------------------------------- | 92 | /*----------------------------------------------------------------- |
@@ -276,7 +276,7 @@ static struct dm_table *__hash_remove(struct hash_cell *hc) | |||
276 | return table; | 276 | return table; |
277 | } | 277 | } |
278 | 278 | ||
279 | static void dm_hash_remove_all(int keep_open_devices) | 279 | static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool only_deferred) |
280 | { | 280 | { |
281 | int i, dev_skipped; | 281 | int i, dev_skipped; |
282 | struct hash_cell *hc; | 282 | struct hash_cell *hc; |
@@ -293,7 +293,8 @@ retry: | |||
293 | md = hc->md; | 293 | md = hc->md; |
294 | dm_get(md); | 294 | dm_get(md); |
295 | 295 | ||
296 | if (keep_open_devices && dm_lock_for_deletion(md)) { | 296 | if (keep_open_devices && |
297 | dm_lock_for_deletion(md, mark_deferred, only_deferred)) { | ||
297 | dm_put(md); | 298 | dm_put(md); |
298 | dev_skipped++; | 299 | dev_skipped++; |
299 | continue; | 300 | continue; |
@@ -450,6 +451,11 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, | |||
450 | return md; | 451 | return md; |
451 | } | 452 | } |
452 | 453 | ||
454 | void dm_deferred_remove(void) | ||
455 | { | ||
456 | dm_hash_remove_all(true, false, true); | ||
457 | } | ||
458 | |||
453 | /*----------------------------------------------------------------- | 459 | /*----------------------------------------------------------------- |
454 | * Implementation of the ioctl commands | 460 | * Implementation of the ioctl commands |
455 | *---------------------------------------------------------------*/ | 461 | *---------------------------------------------------------------*/ |
@@ -461,7 +467,7 @@ typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size); | |||
461 | 467 | ||
462 | static int remove_all(struct dm_ioctl *param, size_t param_size) | 468 | static int remove_all(struct dm_ioctl *param, size_t param_size) |
463 | { | 469 | { |
464 | dm_hash_remove_all(1); | 470 | dm_hash_remove_all(true, !!(param->flags & DM_DEFERRED_REMOVE), false); |
465 | param->data_size = 0; | 471 | param->data_size = 0; |
466 | return 0; | 472 | return 0; |
467 | } | 473 | } |
@@ -683,6 +689,9 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param) | |||
683 | if (dm_suspended_md(md)) | 689 | if (dm_suspended_md(md)) |
684 | param->flags |= DM_SUSPEND_FLAG; | 690 | param->flags |= DM_SUSPEND_FLAG; |
685 | 691 | ||
692 | if (dm_test_deferred_remove_flag(md)) | ||
693 | param->flags |= DM_DEFERRED_REMOVE; | ||
694 | |||
686 | param->dev = huge_encode_dev(disk_devt(disk)); | 695 | param->dev = huge_encode_dev(disk_devt(disk)); |
687 | 696 | ||
688 | /* | 697 | /* |
@@ -832,8 +841,13 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) | |||
832 | /* | 841 | /* |
833 | * Ensure the device is not open and nothing further can open it. | 842 | * Ensure the device is not open and nothing further can open it. |
834 | */ | 843 | */ |
835 | r = dm_lock_for_deletion(md); | 844 | r = dm_lock_for_deletion(md, !!(param->flags & DM_DEFERRED_REMOVE), false); |
836 | if (r) { | 845 | if (r) { |
846 | if (r == -EBUSY && param->flags & DM_DEFERRED_REMOVE) { | ||
847 | up_write(&_hash_lock); | ||
848 | dm_put(md); | ||
849 | return 0; | ||
850 | } | ||
837 | DMDEBUG_LIMIT("unable to remove open device %s", hc->name); | 851 | DMDEBUG_LIMIT("unable to remove open device %s", hc->name); |
838 | up_write(&_hash_lock); | 852 | up_write(&_hash_lock); |
839 | dm_put(md); | 853 | dm_put(md); |
@@ -848,6 +862,8 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) | |||
848 | dm_table_destroy(t); | 862 | dm_table_destroy(t); |
849 | } | 863 | } |
850 | 864 | ||
865 | param->flags &= ~DM_DEFERRED_REMOVE; | ||
866 | |||
851 | if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr)) | 867 | if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr)) |
852 | param->flags |= DM_UEVENT_GENERATED_FLAG; | 868 | param->flags |= DM_UEVENT_GENERATED_FLAG; |
853 | 869 | ||
@@ -1469,6 +1485,14 @@ static int message_for_md(struct mapped_device *md, unsigned argc, char **argv, | |||
1469 | if (**argv != '@') | 1485 | if (**argv != '@') |
1470 | return 2; /* no '@' prefix, deliver to target */ | 1486 | return 2; /* no '@' prefix, deliver to target */ |
1471 | 1487 | ||
1488 | if (!strcasecmp(argv[0], "@cancel_deferred_remove")) { | ||
1489 | if (argc != 1) { | ||
1490 | DMERR("Invalid arguments for @cancel_deferred_remove"); | ||
1491 | return -EINVAL; | ||
1492 | } | ||
1493 | return dm_cancel_deferred_remove(md); | ||
1494 | } | ||
1495 | |||
1472 | r = dm_stats_message(md, argc, argv, result, maxlen); | 1496 | r = dm_stats_message(md, argc, argv, result, maxlen); |
1473 | if (r < 2) | 1497 | if (r < 2) |
1474 | return r; | 1498 | return r; |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index de570a558764..6eb9dc9ef8f3 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -87,6 +87,7 @@ struct multipath { | |||
87 | unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */ | 87 | unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */ |
88 | unsigned saved_queue_if_no_path:1; /* Saved state during suspension */ | 88 | unsigned saved_queue_if_no_path:1; /* Saved state during suspension */ |
89 | unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */ | 89 | unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */ |
90 | unsigned pg_init_disabled:1; /* pg_init is not currently allowed */ | ||
90 | 91 | ||
91 | unsigned pg_init_retries; /* Number of times to retry pg_init */ | 92 | unsigned pg_init_retries; /* Number of times to retry pg_init */ |
92 | unsigned pg_init_count; /* Number of times pg_init called */ | 93 | unsigned pg_init_count; /* Number of times pg_init called */ |
@@ -390,13 +391,16 @@ static int map_io(struct multipath *m, struct request *clone, | |||
390 | if (was_queued) | 391 | if (was_queued) |
391 | m->queue_size--; | 392 | m->queue_size--; |
392 | 393 | ||
393 | if ((pgpath && m->queue_io) || | 394 | if (m->pg_init_required) { |
394 | (!pgpath && m->queue_if_no_path)) { | 395 | if (!m->pg_init_in_progress) |
396 | queue_work(kmultipathd, &m->process_queued_ios); | ||
397 | r = DM_MAPIO_REQUEUE; | ||
398 | } else if ((pgpath && m->queue_io) || | ||
399 | (!pgpath && m->queue_if_no_path)) { | ||
395 | /* Queue for the daemon to resubmit */ | 400 | /* Queue for the daemon to resubmit */ |
396 | list_add_tail(&clone->queuelist, &m->queued_ios); | 401 | list_add_tail(&clone->queuelist, &m->queued_ios); |
397 | m->queue_size++; | 402 | m->queue_size++; |
398 | if ((m->pg_init_required && !m->pg_init_in_progress) || | 403 | if (!m->queue_io) |
399 | !m->queue_io) | ||
400 | queue_work(kmultipathd, &m->process_queued_ios); | 404 | queue_work(kmultipathd, &m->process_queued_ios); |
401 | pgpath = NULL; | 405 | pgpath = NULL; |
402 | r = DM_MAPIO_SUBMITTED; | 406 | r = DM_MAPIO_SUBMITTED; |
@@ -497,7 +501,8 @@ static void process_queued_ios(struct work_struct *work) | |||
497 | (!pgpath && !m->queue_if_no_path)) | 501 | (!pgpath && !m->queue_if_no_path)) |
498 | must_queue = 0; | 502 | must_queue = 0; |
499 | 503 | ||
500 | if (m->pg_init_required && !m->pg_init_in_progress && pgpath) | 504 | if (m->pg_init_required && !m->pg_init_in_progress && pgpath && |
505 | !m->pg_init_disabled) | ||
501 | __pg_init_all_paths(m); | 506 | __pg_init_all_paths(m); |
502 | 507 | ||
503 | spin_unlock_irqrestore(&m->lock, flags); | 508 | spin_unlock_irqrestore(&m->lock, flags); |
@@ -942,10 +947,20 @@ static void multipath_wait_for_pg_init_completion(struct multipath *m) | |||
942 | 947 | ||
943 | static void flush_multipath_work(struct multipath *m) | 948 | static void flush_multipath_work(struct multipath *m) |
944 | { | 949 | { |
950 | unsigned long flags; | ||
951 | |||
952 | spin_lock_irqsave(&m->lock, flags); | ||
953 | m->pg_init_disabled = 1; | ||
954 | spin_unlock_irqrestore(&m->lock, flags); | ||
955 | |||
945 | flush_workqueue(kmpath_handlerd); | 956 | flush_workqueue(kmpath_handlerd); |
946 | multipath_wait_for_pg_init_completion(m); | 957 | multipath_wait_for_pg_init_completion(m); |
947 | flush_workqueue(kmultipathd); | 958 | flush_workqueue(kmultipathd); |
948 | flush_work(&m->trigger_event); | 959 | flush_work(&m->trigger_event); |
960 | |||
961 | spin_lock_irqsave(&m->lock, flags); | ||
962 | m->pg_init_disabled = 0; | ||
963 | spin_unlock_irqrestore(&m->lock, flags); | ||
949 | } | 964 | } |
950 | 965 | ||
951 | static void multipath_dtr(struct dm_target *ti) | 966 | static void multipath_dtr(struct dm_target *ti) |
@@ -1164,7 +1179,7 @@ static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) | |||
1164 | 1179 | ||
1165 | spin_lock_irqsave(&m->lock, flags); | 1180 | spin_lock_irqsave(&m->lock, flags); |
1166 | 1181 | ||
1167 | if (m->pg_init_count <= m->pg_init_retries) | 1182 | if (m->pg_init_count <= m->pg_init_retries && !m->pg_init_disabled) |
1168 | m->pg_init_required = 1; | 1183 | m->pg_init_required = 1; |
1169 | else | 1184 | else |
1170 | limit_reached = 1; | 1185 | limit_reached = 1; |
@@ -1665,6 +1680,11 @@ static int multipath_busy(struct dm_target *ti) | |||
1665 | 1680 | ||
1666 | spin_lock_irqsave(&m->lock, flags); | 1681 | spin_lock_irqsave(&m->lock, flags); |
1667 | 1682 | ||
1683 | /* pg_init in progress, requeue until done */ | ||
1684 | if (m->pg_init_in_progress) { | ||
1685 | busy = 1; | ||
1686 | goto out; | ||
1687 | } | ||
1668 | /* Guess which priority_group will be used at next mapping time */ | 1688 | /* Guess which priority_group will be used at next mapping time */ |
1669 | if (unlikely(!m->current_pgpath && m->next_pg)) | 1689 | if (unlikely(!m->current_pgpath && m->next_pg)) |
1670 | pg = m->next_pg; | 1690 | pg = m->next_pg; |
@@ -1714,7 +1734,7 @@ out: | |||
1714 | *---------------------------------------------------------------*/ | 1734 | *---------------------------------------------------------------*/ |
1715 | static struct target_type multipath_target = { | 1735 | static struct target_type multipath_target = { |
1716 | .name = "multipath", | 1736 | .name = "multipath", |
1717 | .version = {1, 5, 1}, | 1737 | .version = {1, 6, 0}, |
1718 | .module = THIS_MODULE, | 1738 | .module = THIS_MODULE, |
1719 | .ctr = multipath_ctr, | 1739 | .ctr = multipath_ctr, |
1720 | .dtr = multipath_dtr, | 1740 | .dtr = multipath_dtr, |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 8f8783533ac7..465f08ca62b1 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -545,14 +545,28 @@ static int adjoin(struct dm_table *table, struct dm_target *ti) | |||
545 | 545 | ||
546 | /* | 546 | /* |
547 | * Used to dynamically allocate the arg array. | 547 | * Used to dynamically allocate the arg array. |
548 | * | ||
549 | * We do first allocation with GFP_NOIO because dm-mpath and dm-thin must | ||
550 | * process messages even if some device is suspended. These messages have a | ||
551 | * small fixed number of arguments. | ||
552 | * | ||
553 | * On the other hand, dm-switch needs to process bulk data using messages and | ||
554 | * excessive use of GFP_NOIO could cause trouble. | ||
548 | */ | 555 | */ |
549 | static char **realloc_argv(unsigned *array_size, char **old_argv) | 556 | static char **realloc_argv(unsigned *array_size, char **old_argv) |
550 | { | 557 | { |
551 | char **argv; | 558 | char **argv; |
552 | unsigned new_size; | 559 | unsigned new_size; |
560 | gfp_t gfp; | ||
553 | 561 | ||
554 | new_size = *array_size ? *array_size * 2 : 64; | 562 | if (*array_size) { |
555 | argv = kmalloc(new_size * sizeof(*argv), GFP_KERNEL); | 563 | new_size = *array_size * 2; |
564 | gfp = GFP_KERNEL; | ||
565 | } else { | ||
566 | new_size = 8; | ||
567 | gfp = GFP_NOIO; | ||
568 | } | ||
569 | argv = kmalloc(new_size * sizeof(*argv), gfp); | ||
556 | if (argv) { | 570 | if (argv) { |
557 | memcpy(argv, old_argv, *array_size * sizeof(*argv)); | 571 | memcpy(argv, old_argv, *array_size * sizeof(*argv)); |
558 | *array_size = new_size; | 572 | *array_size = new_size; |
@@ -1548,8 +1562,11 @@ int dm_table_resume_targets(struct dm_table *t) | |||
1548 | continue; | 1562 | continue; |
1549 | 1563 | ||
1550 | r = ti->type->preresume(ti); | 1564 | r = ti->type->preresume(ti); |
1551 | if (r) | 1565 | if (r) { |
1566 | DMERR("%s: %s: preresume failed, error = %d", | ||
1567 | dm_device_name(t->md), ti->type->name, r); | ||
1552 | return r; | 1568 | return r; |
1569 | } | ||
1553 | } | 1570 | } |
1554 | 1571 | ||
1555 | for (i = 0; i < t->num_targets; i++) { | 1572 | for (i = 0; i < t->num_targets; i++) { |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b3e26c7d1417..0704c523a76b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -49,6 +49,11 @@ static unsigned int _major = 0; | |||
49 | static DEFINE_IDR(_minor_idr); | 49 | static DEFINE_IDR(_minor_idr); |
50 | 50 | ||
51 | static DEFINE_SPINLOCK(_minor_lock); | 51 | static DEFINE_SPINLOCK(_minor_lock); |
52 | |||
53 | static void do_deferred_remove(struct work_struct *w); | ||
54 | |||
55 | static DECLARE_WORK(deferred_remove_work, do_deferred_remove); | ||
56 | |||
52 | /* | 57 | /* |
53 | * For bio-based dm. | 58 | * For bio-based dm. |
54 | * One of these is allocated per bio. | 59 | * One of these is allocated per bio. |
@@ -116,6 +121,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | |||
116 | #define DMF_DELETING 4 | 121 | #define DMF_DELETING 4 |
117 | #define DMF_NOFLUSH_SUSPENDING 5 | 122 | #define DMF_NOFLUSH_SUSPENDING 5 |
118 | #define DMF_MERGE_IS_OPTIONAL 6 | 123 | #define DMF_MERGE_IS_OPTIONAL 6 |
124 | #define DMF_DEFERRED_REMOVE 7 | ||
119 | 125 | ||
120 | /* | 126 | /* |
121 | * A dummy definition to make RCU happy. | 127 | * A dummy definition to make RCU happy. |
@@ -299,6 +305,8 @@ out_free_io_cache: | |||
299 | 305 | ||
300 | static void local_exit(void) | 306 | static void local_exit(void) |
301 | { | 307 | { |
308 | flush_scheduled_work(); | ||
309 | |||
302 | kmem_cache_destroy(_rq_tio_cache); | 310 | kmem_cache_destroy(_rq_tio_cache); |
303 | kmem_cache_destroy(_io_cache); | 311 | kmem_cache_destroy(_io_cache); |
304 | unregister_blkdev(_major, _name); | 312 | unregister_blkdev(_major, _name); |
@@ -404,7 +412,10 @@ static void dm_blk_close(struct gendisk *disk, fmode_t mode) | |||
404 | 412 | ||
405 | spin_lock(&_minor_lock); | 413 | spin_lock(&_minor_lock); |
406 | 414 | ||
407 | atomic_dec(&md->open_count); | 415 | if (atomic_dec_and_test(&md->open_count) && |
416 | (test_bit(DMF_DEFERRED_REMOVE, &md->flags))) | ||
417 | schedule_work(&deferred_remove_work); | ||
418 | |||
408 | dm_put(md); | 419 | dm_put(md); |
409 | 420 | ||
410 | spin_unlock(&_minor_lock); | 421 | spin_unlock(&_minor_lock); |
@@ -418,14 +429,18 @@ int dm_open_count(struct mapped_device *md) | |||
418 | /* | 429 | /* |
419 | * Guarantees nothing is using the device before it's deleted. | 430 | * Guarantees nothing is using the device before it's deleted. |
420 | */ | 431 | */ |
421 | int dm_lock_for_deletion(struct mapped_device *md) | 432 | int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred) |
422 | { | 433 | { |
423 | int r = 0; | 434 | int r = 0; |
424 | 435 | ||
425 | spin_lock(&_minor_lock); | 436 | spin_lock(&_minor_lock); |
426 | 437 | ||
427 | if (dm_open_count(md)) | 438 | if (dm_open_count(md)) { |
428 | r = -EBUSY; | 439 | r = -EBUSY; |
440 | if (mark_deferred) | ||
441 | set_bit(DMF_DEFERRED_REMOVE, &md->flags); | ||
442 | } else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags)) | ||
443 | r = -EEXIST; | ||
429 | else | 444 | else |
430 | set_bit(DMF_DELETING, &md->flags); | 445 | set_bit(DMF_DELETING, &md->flags); |
431 | 446 | ||
@@ -434,6 +449,27 @@ int dm_lock_for_deletion(struct mapped_device *md) | |||
434 | return r; | 449 | return r; |
435 | } | 450 | } |
436 | 451 | ||
452 | int dm_cancel_deferred_remove(struct mapped_device *md) | ||
453 | { | ||
454 | int r = 0; | ||
455 | |||
456 | spin_lock(&_minor_lock); | ||
457 | |||
458 | if (test_bit(DMF_DELETING, &md->flags)) | ||
459 | r = -EBUSY; | ||
460 | else | ||
461 | clear_bit(DMF_DEFERRED_REMOVE, &md->flags); | ||
462 | |||
463 | spin_unlock(&_minor_lock); | ||
464 | |||
465 | return r; | ||
466 | } | ||
467 | |||
468 | static void do_deferred_remove(struct work_struct *w) | ||
469 | { | ||
470 | dm_deferred_remove(); | ||
471 | } | ||
472 | |||
437 | sector_t dm_get_size(struct mapped_device *md) | 473 | sector_t dm_get_size(struct mapped_device *md) |
438 | { | 474 | { |
439 | return get_capacity(md->disk); | 475 | return get_capacity(md->disk); |
@@ -2894,6 +2930,11 @@ int dm_suspended_md(struct mapped_device *md) | |||
2894 | return test_bit(DMF_SUSPENDED, &md->flags); | 2930 | return test_bit(DMF_SUSPENDED, &md->flags); |
2895 | } | 2931 | } |
2896 | 2932 | ||
2933 | int dm_test_deferred_remove_flag(struct mapped_device *md) | ||
2934 | { | ||
2935 | return test_bit(DMF_DEFERRED_REMOVE, &md->flags); | ||
2936 | } | ||
2937 | |||
2897 | int dm_suspended(struct dm_target *ti) | 2938 | int dm_suspended(struct dm_target *ti) |
2898 | { | 2939 | { |
2899 | return dm_suspended_md(dm_table_get_md(ti->table)); | 2940 | return dm_suspended_md(dm_table_get_md(ti->table)); |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 1d1ad7b7e527..c57ba550f69e 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -129,6 +129,16 @@ int dm_deleting_md(struct mapped_device *md); | |||
129 | int dm_suspended_md(struct mapped_device *md); | 129 | int dm_suspended_md(struct mapped_device *md); |
130 | 130 | ||
131 | /* | 131 | /* |
132 | * Test if the device is scheduled for deferred remove. | ||
133 | */ | ||
134 | int dm_test_deferred_remove_flag(struct mapped_device *md); | ||
135 | |||
136 | /* | ||
137 | * Try to remove devices marked for deferred removal. | ||
138 | */ | ||
139 | void dm_deferred_remove(void); | ||
140 | |||
141 | /* | ||
132 | * The device-mapper can be driven through one of two interfaces; | 142 | * The device-mapper can be driven through one of two interfaces; |
133 | * ioctl or filesystem, depending which patch you have applied. | 143 | * ioctl or filesystem, depending which patch you have applied. |
134 | */ | 144 | */ |
@@ -158,7 +168,8 @@ void dm_stripe_exit(void); | |||
158 | void dm_destroy(struct mapped_device *md); | 168 | void dm_destroy(struct mapped_device *md); |
159 | void dm_destroy_immediate(struct mapped_device *md); | 169 | void dm_destroy_immediate(struct mapped_device *md); |
160 | int dm_open_count(struct mapped_device *md); | 170 | int dm_open_count(struct mapped_device *md); |
161 | int dm_lock_for_deletion(struct mapped_device *md); | 171 | int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred); |
172 | int dm_cancel_deferred_remove(struct mapped_device *md); | ||
162 | int dm_request_based(struct mapped_device *md); | 173 | int dm_request_based(struct mapped_device *md); |
163 | sector_t dm_get_size(struct mapped_device *md); | 174 | sector_t dm_get_size(struct mapped_device *md); |
164 | struct dm_stats *dm_get_stats(struct mapped_device *md); | 175 | struct dm_stats *dm_get_stats(struct mapped_device *md); |
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index 172147eb1d40..af96e24ec328 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c | |||
@@ -509,15 +509,18 @@ static int grow_add_tail_block(struct resize *resize) | |||
509 | static int grow_needs_more_blocks(struct resize *resize) | 509 | static int grow_needs_more_blocks(struct resize *resize) |
510 | { | 510 | { |
511 | int r; | 511 | int r; |
512 | unsigned old_nr_blocks = resize->old_nr_full_blocks; | ||
512 | 513 | ||
513 | if (resize->old_nr_entries_in_last_block > 0) { | 514 | if (resize->old_nr_entries_in_last_block > 0) { |
515 | old_nr_blocks++; | ||
516 | |||
514 | r = grow_extend_tail_block(resize, resize->max_entries); | 517 | r = grow_extend_tail_block(resize, resize->max_entries); |
515 | if (r) | 518 | if (r) |
516 | return r; | 519 | return r; |
517 | } | 520 | } |
518 | 521 | ||
519 | r = insert_full_ablocks(resize->info, resize->size_of_block, | 522 | r = insert_full_ablocks(resize->info, resize->size_of_block, |
520 | resize->old_nr_full_blocks, | 523 | old_nr_blocks, |
521 | resize->new_nr_full_blocks, | 524 | resize->new_nr_full_blocks, |
522 | resize->max_entries, resize->value, | 525 | resize->max_entries, resize->value, |
523 | &resize->root); | 526 | &resize->root); |
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index e735a6d5a793..cfbf9617e465 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c | |||
@@ -140,26 +140,10 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b) | |||
140 | 140 | ||
141 | static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b) | 141 | static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b) |
142 | { | 142 | { |
143 | int r; | ||
144 | uint32_t old_count; | ||
145 | enum allocation_event ev; | 143 | enum allocation_event ev; |
146 | struct sm_disk *smd = container_of(sm, struct sm_disk, sm); | 144 | struct sm_disk *smd = container_of(sm, struct sm_disk, sm); |
147 | 145 | ||
148 | r = sm_ll_dec(&smd->ll, b, &ev); | 146 | return sm_ll_dec(&smd->ll, b, &ev); |
149 | if (!r && (ev == SM_FREE)) { | ||
150 | /* | ||
151 | * It's only free if it's also free in the last | ||
152 | * transaction. | ||
153 | */ | ||
154 | r = sm_ll_lookup(&smd->old_ll, b, &old_count); | ||
155 | if (r) | ||
156 | return r; | ||
157 | |||
158 | if (!old_count) | ||
159 | smd->nr_allocated_this_transaction--; | ||
160 | } | ||
161 | |||
162 | return r; | ||
163 | } | 147 | } |
164 | 148 | ||
165 | static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) | 149 | static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) |